From 4f1e23945406395698e886f1e78527883d88db64 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Fri, 11 Feb 2022 16:57:41 +0800 Subject: [PATCH] upgrade Ascend package 11 Feb 22 --- inc/external/acl/acl_base.h | 14 + inc/external/acl/acl_op_compiler.h | 51 ++ inc/external/acl/acl_prof.h | 74 +++ inc/external/acl/acl_rt.h | 18 + inc/external/acl/error_codes/ge_error_codes.h | 65 +-- inc/external/acl/error_codes/rt_error_codes.h | 8 +- inc/external/acl/ops/acl_dvpp.h | 8 +- inc/external/ge/ge_api_error_codes.h | 67 +-- inc/external/ge/ge_api_types.h | 278 +++++------ inc/external/ge/ge_ir_build.h | 8 +- inc/external/runtime/rt_error_codes.h | 2 +- inc/framework/common/debug/ge_log.h | 16 +- inc/framework/common/debug/log.h | 12 +- inc/framework/common/file_constant_util.h | 10 +- inc/framework/common/fmk_error_codes.h | 4 +- inc/framework/common/ge_inner_error_codes.h | 81 ++-- inc/framework/common/ge_types.h | 42 +- inc/framework/common/helper/model_helper.h | 4 +- inc/framework/common/helper/om_file_helper.h | 42 +- inc/framework/common/l2_cache_optimize.h | 90 +--- inc/framework/common/op/attr_value_util.h | 140 +----- inc/framework/common/op/ge_op_utils.h | 87 +--- inc/framework/common/op_types.h | 20 +- inc/framework/common/profiling/ge_profiling.h | 6 +- inc/framework/common/profiling_definitions.h | 173 +++++++ inc/framework/common/scope_guard.h | 6 +- inc/framework/common/string_util.h | 39 +- inc/framework/common/taskdown_common.h | 37 +- inc/framework/common/types.h | 307 ++---------- inc/framework/common/util.h | 209 ++++---- inc/framework/engine/dnnengine.h | 15 +- inc/framework/executor/ge_executor.h | 126 +++-- inc/framework/generator/ge_generator.h | 18 +- inc/framework/memory/memory_api.h | 4 - inc/framework/memory/memory_assigner.h | 2 +- inc/framework/omg/omg.h | 8 +- inc/framework/omg/omg_inner_types.h | 40 +- inc/framework/omg/parser/model_parser.h | 7 + inc/framework/omg/parser/op_parser.h | 8 +- inc/framework/omg/parser/parser_factory.h | 6 +- inc/framework/omg/parser/parser_inner_ctx.h | 6 +- inc/framework/omg/version.h | 4 +- metadef | 2 +- .../aicpu/aicpu_schedule/aicpu_op_type_list.h | 114 ++--- .../inc/aicpu/common/aicpu_task_struct.h | 47 ++ third_party/fwkacllib/inc/cce/aicpu_engine.h | 1 - .../fwkacllib/inc/cce/fwk_adpt_struct.h | 16 +- third_party/fwkacllib/inc/hccl/base.h | 19 +- third_party/fwkacllib/inc/hccl/hcom.h | 66 --- third_party/fwkacllib/inc/mmpa/mmpa_api.h | 24 +- .../fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h | 24 +- .../inc/mmpa/sub_inc/mmpa_typedef_linux.h | 3 + .../inc/mmpa/sub_inc/mmpa_typedef_win.h | 169 +++---- .../fwkacllib/inc/mmpa/sub_inc/mmpa_win.h | 24 +- third_party/fwkacllib/inc/ops/OWNERS | 65 +++ third_party/fwkacllib/inc/ops/array_ops.h | 117 ++++- third_party/fwkacllib/inc/ops/ctc_ops.h | 12 +- third_party/fwkacllib/inc/ops/data_flow_ops.h | 45 ++ third_party/fwkacllib/inc/ops/deep_md.h | 156 +++++- .../inc/ops/elewise_calculation_ops.h | 14 +- third_party/fwkacllib/inc/ops/image_ops.h | 20 +- third_party/fwkacllib/inc/ops/math_ops.h | 2 +- .../inc/ops/matrix_calculation_ops.h | 83 +++- .../fwkacllib/inc/ops/nn_batch_norm_ops.h | 68 +++ .../fwkacllib/inc/ops/nn_calculation_ops.h | 46 +- third_party/fwkacllib/inc/ops/nn_detect_ops.h | 98 +++- third_party/fwkacllib/inc/ops/nn_norm_ops.h | 96 +++- .../fwkacllib/inc/ops/nn_pooling_ops.h | 6 +- .../fwkacllib/inc/ops/nonlinear_fuc_ops.h | 29 +- .../fwkacllib/inc/ops/npu_loss_scale_ops.h | 27 ++ third_party/fwkacllib/inc/ops/ocr_ops.h | 1 + third_party/fwkacllib/inc/ops/random_ops.h | 161 ++++++- third_party/fwkacllib/inc/ops/randomdsa_ops.h | 139 ++++++ third_party/fwkacllib/inc/ops/reduce_ops.h | 123 +++++ third_party/fwkacllib/inc/ops/rnn.h | 38 ++ third_party/fwkacllib/inc/ops/selection_ops.h | 130 ++++- .../fwkacllib/inc/ops/split_combination_ops.h | 24 + third_party/fwkacllib/inc/ops/vector_search.h | 39 +- third_party/fwkacllib/inc/runtime/base.h | 13 +- third_party/fwkacllib/inc/runtime/config.h | 24 +- third_party/fwkacllib/inc/runtime/context.h | 2 +- third_party/fwkacllib/inc/runtime/dev.h | 6 +- .../fwkacllib/inc/runtime/dvfsprofile.h | 4 +- third_party/fwkacllib/inc/runtime/event.h | 15 + third_party/fwkacllib/inc/runtime/kernel.h | 46 +- third_party/fwkacllib/inc/runtime/mem.h | 24 + third_party/fwkacllib/inc/runtime/rt_dfx.h | 10 + .../fwkacllib/inc/runtime/rt_ffts_plus.h | 3 +- .../fwkacllib/inc/runtime/rt_mem_queue.h | 65 ++- third_party/fwkacllib/inc/runtime/rt_model.h | 33 +- third_party/fwkacllib/inc/runtime/rt_stars.h | 21 +- .../fwkacllib/inc/runtime/rt_stars_define.h | 58 +++ third_party/fwkacllib/inc/tdt/data_common.h | 21 +- .../fwkacllib/inc/tdt/index_transform.h | 20 +- third_party/fwkacllib/inc/tdt/status.h | 22 +- .../fwkacllib/inc/tdt/tdt_host_interface.h | 20 +- third_party/fwkacllib/inc/tdt/tsd_client.h | 83 +++- .../fwkacllib/inc/toolchain/prof_acl_api.h | 61 ++- .../fwkacllib/inc/toolchain/prof_callback.h | 24 +- .../fwkacllib/inc/toolchain/prof_common.h | 449 ++++++++++++++++++ third_party/fwkacllib/inc/toolchain/slog.h | 237 ++++----- 101 files changed, 3653 insertions(+), 1818 deletions(-) create mode 100644 inc/framework/common/profiling_definitions.h create mode 100755 third_party/fwkacllib/inc/ops/OWNERS create mode 100644 third_party/fwkacllib/inc/ops/randomdsa_ops.h create mode 100644 third_party/fwkacllib/inc/toolchain/prof_common.h diff --git a/inc/external/acl/acl_base.h b/inc/external/acl/acl_base.h index 1f81b15a..c938d4fb 100644 --- a/inc/external/acl/acl_base.h +++ b/inc/external/acl/acl_base.h @@ -134,6 +134,7 @@ static const int ACL_ERROR_DRV_FAILURE = 500004; static const int ACL_ERROR_PROFILING_FAILURE = 500005; #define ACL_TENSOR_SHAPE_RANGE_NUM 2 +#define ACL_TENSOR_VALUE_RANGE_NUM 2 #define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE typedef enum { @@ -336,6 +337,19 @@ ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc); ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount, int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]); +/** + * @ingroup AscendCL + * @brief set value range for aclTensorDesc + * + * @param desc [OUT] pointer to the data of aclTensorDesc + * @param valueCount [IN] the number of value + * @param valueRange [IN] the range of value + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclSetTensorValueRange(aclTensorDesc *desc, size_t valueCount, + int64_t valueRange[][ACL_TENSOR_VALUE_RANGE_NUM]); /** * @ingroup AscendCL * @brief get data type specified by the tensor description diff --git a/inc/external/acl/acl_op_compiler.h b/inc/external/acl/acl_op_compiler.h index b64b2bad..8480b89c 100644 --- a/inc/external/acl/acl_op_compiler.h +++ b/inc/external/acl/acl_op_compiler.h @@ -41,6 +41,8 @@ typedef enum { typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclOpCompileFlag; +typedef struct aclGraphDumpOption aclGraphDumpOption; + /** * @ingroup AscendCL * @brief compile op @@ -114,6 +116,55 @@ ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *val */ ACL_FUNC_VISIBILITY aclError aclopSetCompileFlag(aclOpCompileFlag flag); +/** + * @ingroup AscendCL + * @brief generate graph and dump + * + * @param opType [IN] op type + * @param numInputs [IN] number of inputs + * @param inputDesc [IN] pointer to array of input tensor descriptions + * @param inputs [IN] pointer to array of input buffers + * @param numOutputs [IN] number of outputs + * @param outputDesc [IN] pointer to array of output tensor descriptions + * @param outputs [IN] pointer to array of outputs buffers + * @param attr [IN] pointer to instance of aclopAttr. + * may pass nullptr if the op has no attribute + * @param engineType [IN] engine type + * @param graphDumpPath [IN] dump path, if the suffix is ".txt", it means file path, else it means directory path + * @param graphDumpOpt [IN] dump option, nullptr is supported + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclGenGraphAndDumpForOp( + const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], + int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr, + aclopEngineType engineType, const char *graphDumpPath, const aclGraphDumpOption *graphDumpOpt); + +/** + * @ingroup AscendCL + * @brief Create the graph dump option + * + * @retval null for failed + * @retval OtherValues success + * + * @see aclDestroyGraphDumpOpt + */ +ACL_FUNC_VISIBILITY aclGraphDumpOption *aclCreateGraphDumpOpt(); + +/** + * @ingroup AscendCL + * @brief Destroy graph dump option + * + * @param graphDumpOpt [IN] pointer to the graph dump option + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclCreateGraphDumpOpt + */ +ACL_FUNC_VISIBILITY aclError aclDestroyGraphDumpOpt(const aclGraphDumpOption *graphDumpOpt); + #ifdef __cplusplus } #endif diff --git a/inc/external/acl/acl_prof.h b/inc/external/acl/acl_prof.h index 2ad13063..b2e1e1f7 100644 --- a/inc/external/acl/acl_prof.h +++ b/inc/external/acl/acl_prof.h @@ -37,6 +37,7 @@ extern "C" { #define ACL_PROF_HCCL_TRACE 0x0020ULL #define ACL_PROF_TRAINING_TRACE 0x0040ULL #define ACL_PROF_MSPROFTX 0x0080ULL +#define ACL_PROF_RUNTIME_API 0x0100ULL /** * @deprecated please use aclprofGetOpTypeLen and aclprofGetOpTNameLen instead @@ -367,6 +368,79 @@ MSVP_PROF_API aclprofStepInfo *aclprofCreateStepInfo(); */ MSVP_PROF_API void aclprofDestroyStepInfo(aclprofStepInfo *stepinfo); +/** + * @ingroup AscendCL + * @brief create pointer to aclprofstamp + * + * + * @retval aclprofStamp pointer + */ +MSVP_PROF_API void *aclprofCreateStamp(); + +/** + * @ingroup AscendCL + * @brief destory stamp pointer + * + * + * @retval void + */ +MSVP_PROF_API void aclprofDestroyStamp(void *stamp); + +/** + * @ingroup AscendCL + * @brief Record push timestamp + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +MSVP_PROF_API aclError aclprofPush(void *stamp); + +/** + * @ingroup AscendCL + * @brief Record pop timestamp + * + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +MSVP_PROF_API aclError aclprofPop(); + +/** + * @ingroup AscendCL + * @brief Record range start timestamp + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +MSVP_PROF_API aclError aclprofRangeStart(void *stamp, uint32_t *rangeId); + +/** + * @ingroup AscendCL + * @brief Record range end timestamp + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +MSVP_PROF_API aclError aclprofRangeStop(uint32_t rangeId); + +/** + * @ingroup AscendCL + * @brief set message to stamp + * + * + * @retval void + */ +MSVP_PROF_API aclError aclprofSetStampTraceMessage(void *stamp, const char *msg, uint32_t msgLen); + +/** + * @ingroup AscendCL + * @brief Record mark timestamp + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +MSVP_PROF_API aclError aclprofMark(void *stamp); + #ifdef __cplusplus } #endif diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h index 7ea27cba..c5e8272c 100644 --- a/inc/external/acl/acl_rt.h +++ b/inc/external/acl/acl_rt.h @@ -44,6 +44,11 @@ typedef enum aclrtEventStatus { ACL_EVENT_STATUS_RESERVED = 2, } aclrtEventStatus; +typedef enum aclrtEventRecordedStatus { + ACL_EVENT_RECORDED_STATUS_NOT_READY = 0, + ACL_EVENT_RECORDED_STATUS_COMPLETE = 1, +} aclrtEventRecordedStatus; + typedef enum aclrtEventWaitStatus { ACL_EVENT_WAIT_STATUS_COMPLETE = 0, ACL_EVENT_WAIT_STATUS_NOT_READY = 1, @@ -503,8 +508,21 @@ ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream strea * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ +ACL_DEPRECATED_MESSAGE("aclrtQueryEvent is deprecated, use aclrtQueryEventStatus instead") ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status); +/** + * @ingroup AscendCL + * @brief Queries an event's status + * + * @param event [IN] event to query + * @param status [OUT] event recorded status + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtQueryEventStatus(aclrtEvent event, aclrtEventRecordedStatus *status); + /** * @ingroup AscendCL * @brief Queries an event's wait-status diff --git a/inc/external/acl/error_codes/ge_error_codes.h b/inc/external/acl/error_codes/ge_error_codes.h index cafc5a64..027c83ea 100644 --- a/inc/external/acl/error_codes/ge_error_codes.h +++ b/inc/external/acl/error_codes/ge_error_codes.h @@ -32,42 +32,43 @@ #endif #include +#include #ifdef __cplusplus extern "C" { #endif -static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000; -static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001; -static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002; -static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003; -static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006; -static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007; -static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008; -static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009; -static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011; -static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012; -static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013; -static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014; -static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015; -static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016; -static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017; -static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018; -static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019; -static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020; -static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021; -static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022; -static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000; -static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001; -static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000; -static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001; -static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002; -static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003; -static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004; -static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005; -static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006; -static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007; -static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008; -static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009; +static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000U; +static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001U; +static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002U; +static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003U; +static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006U; +static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007U; +static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008U; +static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009U; +static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011U; +static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012U; +static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013U; +static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014U; +static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015U; +static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016U; +static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017U; +static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018U; +static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019U; +static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020U; +static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021U; +static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022U; +static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000U; +static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001U; +static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000U; +static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001U; +static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002U; +static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003U; +static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004U; +static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005U; +static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006U; +static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007U; +static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008U; +static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009U; #ifdef __cplusplus } // namespace ge diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h index abfa30db..d16af89f 100644 --- a/inc/external/acl/error_codes/rt_error_codes.h +++ b/inc/external/acl/error_codes/rt_error_codes.h @@ -44,6 +44,7 @@ static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callbac static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type +static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error @@ -61,6 +62,7 @@ static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init +static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error @@ -99,6 +101,11 @@ static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // devic static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception +static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal +static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode +static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die +static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id +static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error @@ -107,5 +114,4 @@ static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconn #ifdef __cplusplus } #endif - #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h index b839ae48..6ab89c49 100644 --- a/inc/external/acl/ops/acl_dvpp.h +++ b/inc/external/acl/ops/acl_dvpp.h @@ -158,7 +158,13 @@ enum acldvppJpegFormat { ACL_JPEG_CSS_UNKNOWN = 1000 }; -enum acldvppChannelDescParamType { ACL_DVPP_CSC_MATRIX_UINT32 = 0, ACL_DVPP_MODE_UINT32, ACL_DVPP_CHANNEL_ID_UINT64 }; +enum acldvppChannelDescParamType { + ACL_DVPP_CSC_MATRIX_UINT32 = 0, + ACL_DVPP_MODE_UINT32, + ACL_DVPP_CHANNEL_ID_UINT64, + ACL_DVPP_CHANNEL_HEIGHT_UINT32, + ACL_DVPP_CHANNEL_WIDTH_UINT32 +}; enum aclvdecChannelDescParamType { ACL_VDEC_CSC_MATRIX_UINT32 = 0, diff --git a/inc/external/ge/ge_api_error_codes.h b/inc/external/ge/ge_api_error_codes.h index 5d2d6963..b5cfb115 100644 --- a/inc/external/ge/ge_api_error_codes.h +++ b/inc/external/ge/ge_api_error_codes.h @@ -20,15 +20,27 @@ #include #include #include "ge_error_codes.h" -#include "graph/types.h" +#include "ge_api_types.h" -namespace ge { #ifdef __GNUC__ #define ATTRIBUTED_DEPRECATED(replacement) __attribute__((deprecated("Please use " #replacement " instead."))) #else #define ATTRIBUTED_DEPRECATED(replacement) __declspec(deprecated("Please use " #replacement " instead.")) #endif +// Code compose(4 byte), runtime: 2 bit, type: 2 bit, level: 3 bit, sysid: 8 bit, modid: 5 bit, value: 12 bit +#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc) \ + constexpr ge::Status name = (static_cast(0xFFU & (static_cast(runtime))) << 30U) | \ + (static_cast(0xFFU & (static_cast(type))) << 28U) | \ + (static_cast(0xFFU & (static_cast(level))) << 25U) | \ + (static_cast(0xFFU & (static_cast(sysid))) << 17U) | \ + (static_cast(0xFFU & (static_cast(modid))) << 12U) | \ + (static_cast(0x0FFFU) & (static_cast(value))); \ + const ErrorNoRegisterar g_errorno_##name((name), (desc)); + +#define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_errorno_##name((name), (desc)); + +namespace ge { class GE_FUNC_VISIBILITY StatusFactory { public: static StatusFactory *Instance() { @@ -56,7 +68,7 @@ class GE_FUNC_VISIBILITY StatusFactory { } std::string GetErrDesc(const uint32_t err) { - const auto iter_find = err_desc_.find(err); + const std::map::const_iterator iter_find = err_desc_.find(err); if (iter_find == err_desc_.end()) { return ""; } @@ -82,59 +94,10 @@ class GE_FUNC_VISIBILITY ErrorNoRegisterar { ~ErrorNoRegisterar() {} }; -// Code compose(4 byte), runtime: 2 bit, type: 2 bit, level: 3 bit, sysid: 8 bit, modid: 5 bit, value: 12 bit -#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc) \ - constexpr ge::Status name = (static_cast(0xFFU & (static_cast(runtime))) << 30U) | \ - (static_cast(0xFFU & (static_cast(type))) << 28U) | \ - (static_cast(0xFFU & (static_cast(level))) << 25U) | \ - (static_cast(0xFFU & (static_cast(sysid))) << 17U) | \ - (static_cast(0xFFU & (static_cast(modid))) << 12U) | \ - (static_cast(0x0FFFU) & (static_cast(value))); \ - const ErrorNoRegisterar g_##name##_errorno(name, desc); - -#define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_##name##_errorno(name, desc); - -using Status = uint32_t; - // General error code GE_ERRORNO(0, 0, 0, 0, 0, SUCCESS, 0, "success"); GE_ERRORNO(0b11, 0b11, 0b111, 0xFFU, 0b11111, FAILED, 0xFFFU, "failed"); /*lint !e401*/ -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_PARAM_INVALID, "Parameter invalid."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_NOT_INIT, "GE executor not initialized yet."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "Model file path invalid."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "Model id invalid."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "Data size of model invalid."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID, "Model addr invalid."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Queue id of model invalid."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED, "The model loaded repeatedly."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, "Dynamic input addr invalid."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Dynamic input size invalid."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, "Dynamic batch size invalid."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_BATCH_EMPTY, "AIPP batch parameter empty."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_NOT_EXIST, "AIPP parameter not exist."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_MODE_INVALID, "AIPP mode invalid."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Task type invalid."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Kernel type invalid."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_PLGMGR_PATH_INVALID, "Plugin path is invalid."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_FORMAT_INVALID, "Format is invalid."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_SHAPE_INVALID, "Shape is invalid."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DATATYPE_INVALID, "Datatype is invalid."); - -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_ALLOCATION, "Memory allocation error."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate memory."); - -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_INTERNAL_ERROR, "Internal error."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_LOAD_MODEL, "Load model error."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED, "Failed to load model partition."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED, "Failed to load weight partition."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED, "Failed to load task partition."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED, "Failed to load op kernel partition."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA, "Failed to release the model data."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_COMMAND_HANDLE, "Command handle error."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_GET_TENSOR_INFO, "Get tensor info error."); -GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_UNLOAD_MODEL, "Load model error."); - } // namespace ge #endif // INC_EXTERNAL_GE_GE_API_ERROR_CODES_H_ diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index 52881020..93ebe284 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -28,96 +28,98 @@ namespace ge { // Option key: graph run mode -const char *const OPTION_GRAPH_RUN_MODE = "ge.graphRunMode"; -const char *const OPTION_DEVICE_TYPE = "ge.deviceType"; +const char_t *const OPTION_GRAPH_RUN_MODE = "ge.graphRunMode"; +const char_t *const OPTION_DEVICE_TYPE = "ge.deviceType"; // Option key: ome init -const char *const OPTION_EXEC_SESSION_ID = "ge.exec.sessionId"; -const char *const OPTION_EXEC_DEVICE_ID = "ge.exec.deviceId"; -const char *const OPTION_EXEC_JOB_ID = "ge.exec.jobId"; -const char *const OPTION_EXEC_IS_USEHCOM = "ge.exec.isUseHcom"; -const char *const OPTION_EXEC_IS_USEHVD = "ge.exec.isUseHvd"; -const char *const OPTION_EXEC_RANK_ID = "ge.exec.rankId"; -const char *const OPTION_EXEC_POD_NAME = "ge.exec.podName"; -const char *const OPTION_EXEC_DEPLOY_MODE = "ge.exec.deployMode"; -const char *const OPTION_EXEC_RANK_TABLE_FILE = "ge.exec.rankTableFile"; -const char *const GE_AICPU_FLAG = "ge.aicpuFlag"; -const char *const OPTION_EXEC_EXTERN_PLUGIN_PATH = "ge.soLoadPath"; +const char_t *const OPTION_EXEC_SESSION_ID = "ge.exec.sessionId"; +const char_t *const OPTION_EXEC_DEVICE_ID = "ge.exec.deviceId"; +const char_t *const OPTION_EXEC_JOB_ID = "ge.exec.jobId"; +const char_t *const OPTION_EXEC_IS_USEHCOM = "ge.exec.isUseHcom"; +const char_t *const OPTION_EXEC_IS_USEHVD = "ge.exec.isUseHvd"; +const char_t *const OPTION_EXEC_RANK_ID = "ge.exec.rankId"; +const char_t *const OPTION_EXEC_POD_NAME = "ge.exec.podName"; +const char_t *const OPTION_EXEC_DEPLOY_MODE = "ge.exec.deployMode"; +const char_t *const OPTION_EXEC_RANK_TABLE_FILE = "ge.exec.rankTableFile"; +const char_t *const GE_AICPU_FLAG = "ge.aicpuFlag"; +const char_t *const OPTION_EXEC_EXTERN_PLUGIN_PATH = "ge.soLoadPath"; // Dump flag and para -const char *const OPTION_EXEC_ENABLE_DUMP = "ge.exec.enableDump"; -const char *const OPTION_EXEC_DUMP_PATH = "ge.exec.dumpPath"; -const char *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep"; -const char *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode"; -const char *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug"; -const char *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode"; -const char *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild"; -const char *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath"; -const char *const OPTION_EXEC_ENABLE_EXCEPTION_DUMP = "ge.exec.enable_exception_dump"; -const char *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses"; -const char *const OPTION_EXEC_PROFILING_FPPONIT_OPTIONS = "ge.exec.profilingFpPointOptions"; -const char *const OPTION_EXEC_PROFILING_BPPONIT_OPTIONS = "ge.exec.profilingBpPointOptions"; +const char_t *const OPTION_EXEC_ENABLE_DUMP = "ge.exec.enableDump"; +const char_t *const OPTION_EXEC_DUMP_PATH = "ge.exec.dumpPath"; +const char_t *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep"; +const char_t *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode"; +const char_t *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug"; +const char_t *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode"; +const char_t *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild"; +const char_t *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath"; +const char_t *const OPTION_EXEC_ENABLE_EXCEPTION_DUMP = "ge.exec.enable_exception_dump"; +const char_t *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses"; +const char_t *const OPTION_EXEC_PROFILING_FPPONIT_OPTIONS = "ge.exec.profilingFpPointOptions"; +const char_t *const OPTION_EXEC_PROFILING_BPPONIT_OPTIONS = "ge.exec.profilingBpPointOptions"; // profiling flag -const char *const OPTION_EXEC_PROFILING_MODE = "ge.exec.profilingMode"; -const char *const OPTION_EXEC_PROFILING_OPTIONS = "ge.exec.profilingOptions"; +const char_t *const OPTION_EXEC_PROFILING_MODE = "ge.exec.profilingMode"; +const char_t *const OPTION_EXEC_PROFILING_OPTIONS = "ge.exec.profilingOptions"; // Hccl flag, if ge.exec.hcclFlag =1, it means load plugin for opskernel, else:ge.exec.hcclFlag =0 -const char *const OPTION_EXEC_HCCL_FLAG = "ge.exec.hcclFlag"; -const char *const OPTION_EXEC_ATOMIC_FLAG = "ge.exec.enable_atomic"; -const char *const OPTION_EXEC_DISABLE_REUSED_MEMORY = "ge.exec.disableReuseMemory"; -const char *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOptimization"; +const char_t *const OPTION_EXEC_HCCL_FLAG = "ge.exec.hcclFlag"; +const char_t *const OPTION_EXEC_ATOMIC_FLAG = "ge.exec.enable_atomic"; +const char_t *const OPTION_EXEC_DISABLE_REUSED_MEMORY = "ge.exec.disableReuseMemory"; +const char_t *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOptimization"; // Dynamic input flag. ge.exec.dynamicInput=1, means enable dynaimc input, // ge.exec.dynamicGraphExecuteMode, dynamic_execute[default] -const char *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput"; -const char *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode"; -const char *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; -const char *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr"; +const char_t *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput"; +const char_t *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode"; +const char_t *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; +const char_t *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr"; // Option key: memory init -const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; -const char *const VARIABLE_MEMORY_MAX_SIZE = "ge.variableMemoryMaxSize"; +const char_t *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; +const char_t *const VARIABLE_MEMORY_MAX_SIZE = "ge.variableMemoryMaxSize"; +const char_t *const OPTION_EXEC_REUSE_ZERO_COPY_MEMORY = "ge.exec.reuseZeroCopyMemory"; + namespace configure_option { -const char *const STREAM_NUM = "ge.streamNum"; -const char *const HEAD_STREAM = "ge.headStream"; -const char *const PERF_LEVEL = "ge.perfLevel"; -const char *const ENCRYPT_MODE = "ge.encryptMode"; -const char *const EK_FILE = "ge.ekFile"; -const char *const CERT_FILE = "ge.certFile"; -const char *const HW_KEY_FILE = "ge.hwKeyFile"; -const char *const PRIVATE_KEY_FILE = "ge.privateKeyFile"; -const char *const FRAMEWORK_TYPE = "ge.frameworkType"; -const char *const CALIBRATION_CONF_FILE = "ge.calibrationConfFile"; -const char *const INSERT_OP_FILE = "ge.insertOpFile"; -const char *const OUTPUT_NODE_NAME = "ge.outputNodeName"; -const char *const COMPRESS_FLAG = "ge.compressFlag"; -const char *const PRECISION_MODE = "ge.exec.precision_mode"; -const char *const SINGLE_OP_FLAG = "ge.exec.single_op"; -const char *const TRAIN_FLAG = "ge.trainFlag"; -const char *const RUN_FLAG = "ge.runFlag"; -const char *const LOCAL_FMKOP_FLAG = "ge.enabledLocalFmkop"; -const char *const TBE_PLUGIN_PATH_FLAG = "ge.TBE_plugin_path"; -const char *const DDK_VERSION_FLAG = "ge.DDK_version"; -const char *const GE_FE_FLAG = "ge.feFlag"; -const char *const STREAM_MAX_PARALLEL_NUM = "ge.streamMaxParallelNum"; -const char *const OUTPUT_DATATYPE = "ge.outputDatatype"; -const char *const OP_SELECT_IMPL_MODE = "ge.opSelectImplmode"; -const char *const OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode"; -const char *const HCOM_PARALLEL = "ge.hcomParallel"; -const char *const AUTO_TUNE_MODE = "ge.autoTuneMode"; -const char *const SOC_VERSION = "ge.socVersion"; -const char *const CORE_TYPE = "ge.engineType"; -const char *const AICORE_NUM = "ge.aicoreNum"; -const char *const L1_FUSION = "ge.l1Fusion"; -const char *const BUFFER_OPTIMIZE = "ge.bufferOptimize"; -const char *const ENABLE_SMALL_CHANNEL = "ge.enableSmallChannel"; -const char *const ENABLE_COMPRESS_WEIGHT = "ge.enableCompressWeight"; -const char *const FUSION_SWITCH_FILE = "ge.fusionSwitchFile"; -const char *const SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel"; -const char *const ORIGINAL_MODEL_FILE = "ge.originalModelFile"; -const char *const INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16"; -const char *const OP_DEBUG_LEVEL = "ge.opDebugLevel"; -const char *const PERFORMANCE_MODE = "ge.performance_mode"; -const char *const SHAPE_GENERALIZED_BUILD_MODE = "ge.shape_generalized_build_mode"; -const char *const MODIFY_MIXLIST = "ge.exec.modify_mixlist"; -const char *const OP_PRECISION_MODE = "ge.exec.op_precision_mode"; +const char_t *const STREAM_NUM = "ge.streamNum"; +const char_t *const HEAD_STREAM = "ge.headStream"; +const char_t *const PERF_LEVEL = "ge.perfLevel"; +const char_t *const ENCRYPT_MODE = "ge.encryptMode"; +const char_t *const EK_FILE = "ge.ekFile"; +const char_t *const CERT_FILE = "ge.certFile"; +const char_t *const HW_KEY_FILE = "ge.hwKeyFile"; +const char_t *const PRIVATE_KEY_FILE = "ge.privateKeyFile"; +const char_t *const FRAMEWORK_TYPE = "ge.frameworkType"; +const char_t *const CALIBRATION_CONF_FILE = "ge.calibrationConfFile"; +const char_t *const INSERT_OP_FILE = "ge.insertOpFile"; +const char_t *const OUTPUT_NODE_NAME = "ge.outputNodeName"; +const char_t *const COMPRESS_FLAG = "ge.compressFlag"; +const char_t *const PRECISION_MODE = "ge.exec.precision_mode"; +const char_t *const SINGLE_OP_FLAG = "ge.exec.single_op"; +const char_t *const TRAIN_FLAG = "ge.trainFlag"; +const char_t *const RUN_FLAG = "ge.runFlag"; +const char_t *const LOCAL_FMKOP_FLAG = "ge.enabledLocalFmkop"; +const char_t *const TBE_PLUGIN_PATH_FLAG = "ge.TBE_plugin_path"; +const char_t *const DDK_VERSION_FLAG = "ge.DDK_version"; +const char_t *const GE_FE_FLAG = "ge.feFlag"; +const char_t *const STREAM_MAX_PARALLEL_NUM = "ge.streamMaxParallelNum"; +const char_t *const OUTPUT_DATATYPE = "ge.outputDatatype"; +const char_t *const OP_SELECT_IMPL_MODE = "ge.opSelectImplmode"; +const char_t *const OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode"; +const char_t *const HCOM_PARALLEL = "ge.hcomParallel"; +const char_t *const AUTO_TUNE_MODE = "ge.autoTuneMode"; +const char_t *const SOC_VERSION = "ge.socVersion"; +const char_t *const CORE_TYPE = "ge.engineType"; +const char_t *const AICORE_NUM = "ge.aicoreNum"; +const char_t *const L1_FUSION = "ge.l1Fusion"; +const char_t *const BUFFER_OPTIMIZE = "ge.bufferOptimize"; +const char_t *const ENABLE_SMALL_CHANNEL = "ge.enableSmallChannel"; +const char_t *const ENABLE_COMPRESS_WEIGHT = "ge.enableCompressWeight"; +const char_t *const FUSION_SWITCH_FILE = "ge.fusionSwitchFile"; +const char_t *const SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel"; +const char_t *const ORIGINAL_MODEL_FILE = "ge.originalModelFile"; +const char_t *const INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16"; +const char_t *const OP_DEBUG_LEVEL = "ge.opDebugLevel"; +const char_t *const PERFORMANCE_MODE = "ge.performance_mode"; +const char_t *const SHAPE_GENERALIZED_BUILD_MODE = "ge.shape_generalized_build_mode"; +const char_t *const MODIFY_MIXLIST = "ge.exec.modify_mixlist"; +const char_t *const OP_PRECISION_MODE = "ge.exec.op_precision_mode"; } // namespace configure_option // Configure stream num by Session constructor options param, // its value should be int32_t type, default value is "1" @@ -227,7 +229,7 @@ const std::string OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode"; const std::string HCOM_PARALLEL = "ge.hcomParallel"; // configure whether to use dynamic batch size -const char *const kDynamicBatchSize = "ge.dynamicBatchSize"; +const char_t *const kDynamicBatchSize = "ge.dynamicBatchSize"; // configure threshold of fusion data size for communication op const std::string FUSION_TENSOR_SIZE = "ge.fusionTensorSize"; @@ -236,10 +238,10 @@ const std::string INPUT_SHAPE = "ge.inputShape"; const std::string DYNAMIC_NODE_TYPE = "ge.dynamicNodeType"; // configure whether to use dynamic image size -const char *const kDynamicImageSize = "ge.dynamicImageSize"; +const char_t *const kDynamicImageSize = "ge.dynamicImageSize"; // Configure whether to use dynamic dims -const char *const kDynamicDims = "ge.dynamicDims"; +const char_t *const kDynamicDims = "ge.dynamicDims"; // Configure auto tune mode, this option only take effect while AUTO_TUNE_FLAG is Y, // example: GA|RL, support configure multiple, split by | @@ -275,29 +277,29 @@ const std::string SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel"; // Save original model file name const std::string ORIGINAL_MODEL_FILE = "ge.originalModelFile"; -const char *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum"; -const char *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize"; -const char *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum"; +const char_t *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum"; +const char_t *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize"; +const char_t *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum"; // Configure for print op pass // Its value should be "0" or "1", default value is "1" -const char *const ENABLE_PRINT_OP_PASS = "ge.enablePrintOpPass"; +const char_t *const ENABLE_PRINT_OP_PASS = "ge.enablePrintOpPass"; // Configure operator compilation path // Its value should be file path, default value is "./" -const char *const DEBUG_DIR = "ge.debugDir"; +const char_t *const DEBUG_DIR = "ge.debugDir"; // Configure operator compiler cache path // Its value should be file path, default value is "./" -const char *const OP_COMPILER_CACHE_DIR = "ge.op_compiler_cache_dir"; +const char_t *const OP_COMPILER_CACHE_DIR = "ge.op_compiler_cache_dir"; // Configure operator compiler cache mode // Its value should be "disable", "enable" or "force", default value is "disable" -const char *const OP_COMPILER_CACHE_MODE = "ge.op_compiler_cache_mode"; +const char_t *const OP_COMPILER_CACHE_MODE = "ge.op_compiler_cache_mode"; // Configure whether to use single stream. // Its value should be "true" or "false", default value is "false" -const char *const ENABLE_SINGLE_STREAM = "ge.enableSingleStream"; +const char_t *const ENABLE_SINGLE_STREAM = "ge.enableSingleStream"; // Configure input fp16 nodes const std::string INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16"; @@ -322,7 +324,7 @@ const std::string OP_BANK_UPDATE_FLAG = "ge.op_bank_update"; const std::string HCOM_MULTI_MODE = "ge.hcomMultiMode"; // atc and ir option -const char *const INPUT_SHAPE_RANGE = "input_shape_range"; +const char_t *const INPUT_SHAPE_RANGE = "input_shape_range"; // Configure express high compile performance or high execute performance // normal: no need to compile, used saved .o files directly @@ -338,7 +340,11 @@ const std::string MODIFY_MIXLIST = "ge.exec.modify_mixlist"; const std::string OP_PRECISION_MODE = "ge.exec.op_precision_mode"; -const char *const FILE_CONSTANT_PATH = "ge.exec.value_bins"; +const std::string OP_WAIT_TIMEOUT = "ge.exec.opWaitTimeout"; + +const std::string OP_EXECUTE_TIMEOUT = "ge.exec.opExecuteTimeout"; + +const char_t *const FILE_CONSTANT_PATH = "ge.exec.value_bins"; // Graph run mode enum GraphRunMode { PREDICTION = 0, TRAIN }; @@ -378,49 +384,49 @@ using RunAsyncCallback = std::function &)>; // for ir build namespace ir_option { -static const char *const INPUT_FORMAT = "input_format"; -static const char *const INPUT_SHAPE = "input_shape"; -static const char *const INPUT_SHAPE_RANGE = ge::INPUT_SHAPE_RANGE; -static const char *const OP_NAME_MAP = "op_name_map"; -static const char *const IS_DYNAMIC_INPUT = "is_dynamic_input"; -static const char *const IS_INPUT_ADJUST_HW_LAYOUT = "is_input_adjust_hw_layout"; -static const char *const IS_OUTPUT_ADJUST_HW_LAYOUT = "is_output_adjust_hw_layout"; -static const char *const ENABLE_SCOPE_FUSION_PASSES = "enable_scope_fusion_passes"; -static const char *const OUTPUT = "output"; -static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize; -static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize; -static const char *const DYNAMIC_DIMS = kDynamicDims; -static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str(); -static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str(); -static const char *const TUNE_DEVICE_IDS = ge::TUNE_DEVICE_IDS.c_str(); -static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY; -static const char *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str(); -static const char *const CORE_TYPE = ge::CORE_TYPE.c_str(); -static const char *const SOC_VERSION = ge::SOC_VERSION.c_str(); -static const char *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM; -static const char *const AICORE_NUM = ge::AICORE_NUM.c_str(); -static const char *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str(); -static const char *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str(); -static const char *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str(); -static const char *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str(); -static const char *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str(); -static const char *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str(); -static const char *const COMPRESS_WEIGHT_CONF = "compress_weight_conf"; -static const char *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str(); -static const char *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str(); -static const char *const LOG_LEVEL = "log"; -static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str(); -static const char *const DEBUG_DIR = ge::DEBUG_DIR; -static const char *const OP_COMPILER_CACHE_DIR = ge::OP_COMPILER_CACHE_DIR; -static const char *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE; -static const char *const MDL_BANK_PATH = ge::MDL_BANK_PATH_FLAG.c_str(); -static const char *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str(); -static const char *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str(); -static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str(); -static const char *const PERFORMANCE_MODE = ge::PERFORMANCE_MODE.c_str(); -static const char *const SHAPE_GENERALIZED_BUILD_MODE = ge::SHAPE_GENERALIZED_BUILD_MODE.c_str(); -static const char *const MODIFY_MIXLIST = ge::MODIFY_MIXLIST.c_str(); -static const char *const OP_PRECISION_MODE = ge::OP_PRECISION_MODE.c_str(); +static const char_t *const INPUT_FORMAT = "input_format"; +static const char_t *const INPUT_SHAPE = "input_shape"; +static const char_t *const INPUT_SHAPE_RANGE = ge::INPUT_SHAPE_RANGE; +static const char_t *const OP_NAME_MAP = "op_name_map"; +static const char_t *const IS_DYNAMIC_INPUT = "is_dynamic_input"; +static const char_t *const IS_INPUT_ADJUST_HW_LAYOUT = "is_input_adjust_hw_layout"; +static const char_t *const IS_OUTPUT_ADJUST_HW_LAYOUT = "is_output_adjust_hw_layout"; +static const char_t *const ENABLE_SCOPE_FUSION_PASSES = "enable_scope_fusion_passes"; +static const char_t *const OUTPUT = "output"; +static const char_t *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize; +static const char_t *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize; +static const char_t *const DYNAMIC_DIMS = kDynamicDims; +static const char_t *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str(); +static const char_t *const PRECISION_MODE = ge::PRECISION_MODE.c_str(); +static const char_t *const TUNE_DEVICE_IDS = ge::TUNE_DEVICE_IDS.c_str(); +static const char_t *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY; +static const char_t *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str(); +static const char_t *const CORE_TYPE = ge::CORE_TYPE.c_str(); +static const char_t *const SOC_VERSION = ge::SOC_VERSION.c_str(); +static const char_t *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM; +static const char_t *const AICORE_NUM = ge::AICORE_NUM.c_str(); +static const char_t *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str(); +static const char_t *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str(); +static const char_t *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str(); +static const char_t *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str(); +static const char_t *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str(); +static const char_t *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str(); +static const char_t *const COMPRESS_WEIGHT_CONF = "compress_weight_conf"; +static const char_t *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str(); +static const char_t *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str(); +static const char_t *const LOG_LEVEL = "log"; +static const char_t *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str(); +static const char_t *const DEBUG_DIR = ge::DEBUG_DIR; +static const char_t *const OP_COMPILER_CACHE_DIR = ge::OP_COMPILER_CACHE_DIR; +static const char_t *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE; +static const char_t *const MDL_BANK_PATH = ge::MDL_BANK_PATH_FLAG.c_str(); +static const char_t *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str(); +static const char_t *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str(); +static const char_t *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str(); +static const char_t *const PERFORMANCE_MODE = ge::PERFORMANCE_MODE.c_str(); +static const char_t *const SHAPE_GENERALIZED_BUILD_MODE = ge::SHAPE_GENERALIZED_BUILD_MODE.c_str(); +static const char_t *const MODIFY_MIXLIST = ge::MODIFY_MIXLIST.c_str(); +static const char_t *const OP_PRECISION_MODE = ge::OP_PRECISION_MODE.c_str(); // for interface: aclgrphBuildModel #ifdef __GNUC__ diff --git a/inc/external/ge/ge_ir_build.h b/inc/external/ge/ge_ir_build.h index 84f31145..d1d6ade9 100644 --- a/inc/external/ge/ge_ir_build.h +++ b/inc/external/ge/ge_ir_build.h @@ -98,10 +98,10 @@ GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph, * @retval GRAPH_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *, const ModelBufferData &)) +ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char_t *, const ModelBufferData &)) GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const std::string &output_file, const ModelBufferData &model); -GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model); +GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char_t *output_file, const ModelBufferData &model); /** * @ingroup AscendCL @@ -126,7 +126,7 @@ GE_FUNC_VISIBILITY graphStatus aclgrphGetIRVersion(int32_t *major_version, int32 * @retval GRAPH_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -GE_FUNC_VISIBILITY graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len); +GE_FUNC_VISIBILITY graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char_t *file, const size_t len); /** * @ingroup AscendCL @@ -150,7 +150,7 @@ GE_FUNC_VISIBILITY graphStatus aclgrphGenerateForOp(const AscendString &op_type, * @param cfg_path [IN] the config file path * @return graphStatus */ -GE_FUNC_VISIBILITY graphStatus aclgrphSetOpAttr(Graph &graph, aclgrphAttrType attr_type, const char *cfg_path); +GE_FUNC_VISIBILITY graphStatus aclgrphSetOpAttr(Graph &graph, aclgrphAttrType attr_type, const char_t *cfg_path); }; // namespace ge #endif // INC_EXTERNAL_GE_IR_BUILD_H_ diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h index 8966f738..d16af89f 100644 --- a/inc/external/runtime/rt_error_codes.h +++ b/inc/external/runtime/rt_error_codes.h @@ -62,6 +62,7 @@ static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init +static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error @@ -113,5 +114,4 @@ static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconn #ifdef __cplusplus } #endif - #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ diff --git a/inc/framework/common/debug/ge_log.h b/inc/framework/common/debug/ge_log.h index b1bf929f..2cb56222 100644 --- a/inc/framework/common/debug/ge_log.h +++ b/inc/framework/common/debug/ge_log.h @@ -40,7 +40,7 @@ enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP }; class GE_FUNC_VISIBILITY GeLog { public: - static const uint64_t GetTid() { + static uint64_t GetTid() { #ifdef __GNUC__ const uint64_t tid = static_cast(syscall(__NR_gettid)); #else @@ -56,11 +56,11 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) { return (enable == 1); } -#define GELOGE(ERROR_CODE, fmt, ...) \ - do { \ - dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE, \ - ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ - ##__VA_ARGS__); \ +#define GELOGE(ERROR_CODE, fmt, ...) \ + do { \ + dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], (ERROR_CODE), \ + ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ + ##__VA_ARGS__); \ } while (false) #define GELOGW(fmt, ...) \ @@ -91,7 +91,7 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) { #define GELOGT(VALUE, fmt, ...) \ do { \ - TraceStatus stat = VALUE; \ + TraceStatus stat = (VALUE); \ const char_t *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ const int32_t idx = static_cast(stat); \ char_t *k = const_cast("status"); \ @@ -102,7 +102,7 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) { #define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ do { \ - dlog_error(MOD_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE, \ + dlog_error((MOD_NAME), "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], (ERROR_CODE), \ ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ ##__VA_ARGS__); \ } while (false) diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index 32e915eb..07affc3f 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -69,7 +69,7 @@ do { \ const ge::Status _chk_status = (expr); \ if (_chk_status != ge::SUCCESS) { \ - GELOGE((ge::FAILED), __VA_ARGS__); \ + GELOGE(_chk_status, __VA_ARGS__); \ } \ } while (false) @@ -213,9 +213,9 @@ // If expr is not RT_ERROR_NONE, print the log #define GE_CHK_RT(expr) \ do { \ - const rtError_t _rt_ret = (expr); \ - if (_rt_ret != RT_ERROR_NONE) { \ - GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \ + const rtError_t _rt_err = (expr); \ + if (_rt_err != RT_ERROR_NONE) { \ + GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_err); \ } \ } while (false) @@ -278,7 +278,7 @@ return (_status); \ } \ } while (false) - +namespace ge { template GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) { std::string fmt; @@ -287,5 +287,5 @@ GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) { fmt = st.str(); return fmt; } - +} // namespace ge #endif // INC_FRAMEWORK_COMMON_DEBUG_LOG_H_ diff --git a/inc/framework/common/file_constant_util.h b/inc/framework/common/file_constant_util.h index 1ea81960..e0763057 100644 --- a/inc/framework/common/file_constant_util.h +++ b/inc/framework/common/file_constant_util.h @@ -27,11 +27,6 @@ #include "graph/ge_tensor.h" namespace ge { -extern const int64_t kBlockSize; -extern const std::string kBinFileValues; -extern const std::string kBinIdValue; -extern const std::string kBinFilePathValue; - struct FileConstantInfo { std::string value_bin_file_id; std::string value_bin_file_path; @@ -47,14 +42,11 @@ void from_json(const nlohmann::json &j, OptionInfo &option_info); Status GetFilePathFromOption(std::map &file_id_and_path_map); -Status CopyOneWeightFromFile(const void *curr_dev_ptr, const std::string &value, const size_t file_constant_size, +Status CopyOneWeightFromFile(const void *const curr_dev_ptr, const std::string &value, const size_t file_constant_size, size_t &left_size); Status GetFilePath(const OpDescPtr &op_desc, const std::map &file_id_and_path_map, std::string &file_path); - -Status GetFileConstantElementTotalSize(const GeShape &shape, const DataType data_type, int64_t &mem_size, - const Format format = FORMAT_ND); } // namespace ge #endif // INC_FRAMEWORK_COMMON_FILE_CONSTANT_UTIL_H diff --git a/inc/framework/common/fmk_error_codes.h b/inc/framework/common/fmk_error_codes.h index 9f1719ac..1ecd96cb 100644 --- a/inc/framework/common/fmk_error_codes.h +++ b/inc/framework/common/fmk_error_codes.h @@ -44,7 +44,7 @@ // Each module uses the following four macros to define error codes: #define DECLARE_ERRORNO_OMG(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OMG, (name), (value)) #define DECLARE_ERRORNO_OME(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OME, (name), (value)) -#define DECLARE_ERRORNO_CALIBRATION(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_CALIBRATION, name, value) +#define DECLARE_ERRORNO_CALIBRATION(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_CALIBRATION, (name), (value)) #define DEF_ERRORNO(name, desc) const ErrorNoRegisterar g_##name##_errorno((name), (desc)); @@ -74,7 +74,7 @@ class GE_FUNC_VISIBILITY StatusFactory { class GE_FUNC_VISIBILITY ErrorNoRegisterar { public: - ErrorNoRegisterar(uint32_t err, const std::string &desc) { + ErrorNoRegisterar(const uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } ~ErrorNoRegisterar() {} diff --git a/inc/framework/common/ge_inner_error_codes.h b/inc/framework/common/ge_inner_error_codes.h index 529f6cb4..fd80d2bd 100644 --- a/inc/framework/common/ge_inner_error_codes.h +++ b/inc/framework/common/ge_inner_error_codes.h @@ -22,17 +22,57 @@ #include #include "ge/ge_api_error_codes.h" +// Each module defines error codes using the following macros, name can not be modified to (name) +#define GE_ERRORNO_COMMON(name, value, desc) \ + GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ + ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::COMMON_MODULE, name, (value), (desc)) +#define GE_ERRORNO_CLIENT(name, value, desc) \ + GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ + ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::CLIENT_MODULE, name, (value), (desc)) +#define GE_ERRORNO_INIT(name, value, desc) \ + GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ + ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::INIT_MODULE, name, (value), (desc)) +#define GE_ERRORNO_SESSION(name, value, desc) \ + GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ + ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::SESSION_MODULE, name, (value), (desc)) +#define GE_ERRORNO_GRAPH(name, value, desc) \ + GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ + ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::GRAPH_MODULE, name, (value), (desc)) +#define GE_ERRORNO_ENGINE(name, value, desc) \ + GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ + ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::ENGINE_MODULE, name, (value), (desc)) +#define GE_ERRORNO_OPS(name, value, desc) \ + GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ + ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::OPS_MODULE, name, (value), (desc)) +#define GE_ERRORNO_PLUGIN(name, value, desc) \ + GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ + ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::PLUGIN_MODULE, name, (value), (desc)) +#define GE_ERRORNO_RUNTIME(name, value, desc) \ + GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ + ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::RUNTIME_MODULE, name, (value), (desc)) +#define GE_ERRORNO_EXECUTOR(name, value, desc) \ + GE_ERRORNO(ge::InnLogRuntime::RT_DEVICE, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ + ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::EXECUTOR_MODULE, name, (value), (desc)) +#define GE_ERRORNO_GENERATOR(name, value, desc) \ + GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ + ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::GENERATOR_MODULE, name, (value), (desc)) + +// Get error code description +#define GE_GET_ERRORNO_STR(value) ge::StatusFactory::Instance()->GetErrDesc(value) + +#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast(RT_ERROR) + namespace ge { // System ID -enum SystemIdType { SYSID_GE = 8 }; +enum class InnSystemIdType { SYSID_GE = 8 }; // Runtime location -enum LogRuntime { +enum class InnLogRuntime { RT_HOST = 0b01, RT_DEVICE = 0b10, }; // Sub model -enum SubModuleId { +enum class InnSubModuleId { COMMON_MODULE = 0, CLIENT_MODULE = 1, INIT_MODULE = 2, @@ -47,13 +87,13 @@ enum SubModuleId { }; // Error code type -enum ErrorCodeType { +enum class InnErrorCodeType { ERROR_CODE = 0b01, EXCEPTION_CODE = 0b10, }; // Error level -enum ErrorLevel { +enum class InnErrorLevel { COMMON_LEVEL = 0b000, SUGGESTION_LEVEL = 0b001, MINOR_LEVEL = 0b010, @@ -61,33 +101,6 @@ enum ErrorLevel { CRITICAL_LEVEL = 0b100, }; -// Each module defines error codes using the following macros, name can not be modified to (name) -#define GE_ERRORNO_COMMON(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, COMMON_MODULE, name, (value), (desc)) -#define GE_ERRORNO_CLIENT(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, CLIENT_MODULE, name, (value), (desc)) -#define GE_ERRORNO_INIT(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, INIT_MODULE, name, (value), (desc)) -#define GE_ERRORNO_SESSION(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, SESSION_MODULE, name, (value), (desc)) -#define GE_ERRORNO_GRAPH(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GRAPH_MODULE, name, (value), (desc)) -#define GE_ERRORNO_ENGINE(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, ENGINE_MODULE, name, (value), (desc)) -#define GE_ERRORNO_OPS(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, OPS_MODULE, name, (value), (desc)) -#define GE_ERRORNO_PLUGIN(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, PLUGIN_MODULE, name, (value), (desc)) -#define GE_ERRORNO_RUNTIME(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, RUNTIME_MODULE, name, (value), (desc)) -#define GE_ERRORNO_EXECUTOR(name, value, desc) \ - GE_ERRORNO(RT_DEVICE, ERROR_CODE, COMMON_LEVEL, SYSID_GE, EXECUTOR_MODULE, name, (value), (desc)) -#define GE_ERRORNO_GENERATOR(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GENERATOR_MODULE, name, (value), (desc)) - -// Get error code description -#define GE_GET_ERRORNO_STR(value) ge::StatusFactory::Instance()->GetErrDesc(value) - // Common module error code definition GE_ERRORNO_COMMON(MEMALLOC_FAILED, 0, "Failed to allocate memory!"); // 1343225856 GE_ERRORNO_COMMON(PARAM_INVALID, 1, "Parameter's invalid!"); // 1343225857 @@ -313,10 +326,6 @@ GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, 3, "Graph ma GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, 4, "Graph manager finalize failed."); GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_SAVE_MODEL_FAILED, 5, "Graph manager save model failed."); -static inline Status TransRtErrorCode(const int32_t error_code) { - return static_cast(error_code); -} -#define RT_ERROR_TO_GE_STATUS(RT_ERROR) TransRtErrorCode(RT_ERROR) } // namespace ge #endif // INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_ diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index 868debe7..f1f6e54a 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -40,13 +40,13 @@ enum FrameworkType { CAFFE = 0, MINDSPORE = 1, TENSORFLOW = 3, - ANDROID_NN, - ONNX, + ANDROID_NN = 4, + ONNX = 5, }; enum class GraphStage : int64_t { GRAPH_STAGE_FUZZ = 0, GRAPH_STAGE_RESERVED }; -const char *const kGraphDumpStage = "DumpStage"; +const char_t *const kGraphDumpStage = "DumpStage"; const std::map kFwkTypeToStr = { {"0", "Caffe"}, {"1", "MindSpore"}, {"3", "TensorFlow"}, {"4", "Android_NN"}, {"5", "Onnx"}}; @@ -70,21 +70,42 @@ const std::string kTaskTypeAicore = "AI_CORE"; const std::string kTaskTypeAicpu = "AI_CPU"; const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID"; const std::string kTaskTypeFftsPlus = "FFTS_PLUS"; +const std::string kEngineNameVectorCore = "VectorEngine"; + +const std::string kEngineNameHccl = "ops_kernel_info_hccl"; +const std::string kEngineNameRts = "DNN_VM_RTS_OP_STORE"; +const std::string kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE"; +const std::string kEngineNameGeLocal = "DNN_VM_GE_LOCAL_OP_STORE"; +const std::string kEngineNameAiCpu = "aicpu_ascend_kernel"; +const std::string kEngineNameAiCpuTf = "aicpu_tf_kernel"; +const std::string kEngineNameAiCore = "AIcoreEngine"; +const std::string kAtomicOpType = "DynamicAtomicAddrClean"; + +const std::string kShapeTypeStatic = "static"; +const std::string kShapeTypeDynamic = "dynamic"; + +constexpr uint64_t kInferSessionId = 0U; +constexpr uint64_t kReleaseFlag = 1U; +constexpr uint32_t kInvalidModelId = 0xFFFFFFFFU; +constexpr size_t kNumTaskWithAtomicAddrCleanTask = 2U; // dynamic execute mode const char_t *const kLazyRecompile = "lazy_recompile"; +constexpr size_t kMaxHostMemInputLen = 64U; + // Data cache, including data address and length struct DataBuffer { - public: void *data; // Data address uint64_t length; // Data length bool isDataSupportMemShare = false; uint32_t placement = 0U; - DataBuffer(void *data_in, uint64_t data_len, bool is_support_mem_share, uint32_t placement = 0U) - : data(data_in), length(data_len), isDataSupportMemShare(is_support_mem_share), placement(placement) {} - DataBuffer() : data(nullptr), length(0UL), isDataSupportMemShare(false) {} + DataBuffer(void *const data_in, const uint64_t data_len, const bool is_support_mem_share = false, + const uint32_t data_placement = 0U) + : data(data_in), length(data_len), isDataSupportMemShare(is_support_mem_share), placement(data_placement) {} + + DataBuffer() : data(nullptr), length(0UL), isDataSupportMemShare(false), placement(0U) {} }; /// @@ -232,6 +253,9 @@ struct ModelInfo { class GE_FUNC_VISIBILITY ModelListener { public: virtual ~ModelListener() {} + ModelListener() = default; + ModelListener(const ModelListener &) = delete; + ModelListener &operator=(const ModelListener &) = delete; /// /// @brief Asynchronous callback interface /// @param [in] model_id Model ID of the callback @@ -241,7 +265,9 @@ class GE_FUNC_VISIBILITY ModelListener { virtual Status OnComputeDone(uint32_t model_id, uint32_t data_index, uint32_t result_code, std::vector &outputs) = 0; - virtual void SetCallback(const RunAsyncCallback &callback){}; + virtual void SetCallback(const RunAsyncCallback &callback) { + (void)callback; + } virtual uint32_t GetResultCode() { return 0U; diff --git a/inc/framework/common/helper/model_helper.h b/inc/framework/common/helper/model_helper.h index 8da856e1..4a098b1f 100644 --- a/inc/framework/common/helper/model_helper.h +++ b/inc/framework/common/helper/model_helper.h @@ -34,12 +34,13 @@ class GE_FUNC_VISIBILITY ModelHelper { ~ModelHelper(); Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, const std::string &output_file, - ge::ModelBufferData &model); + ge::ModelBufferData &model) const; Status SaveToOmRootModel(const GeRootModelPtr &ge_root_model, const SaveParam &save_param, const std::string &output_file, ModelBufferData &model, const bool is_unknown_shape); Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file); Status LoadModel(const ge::ModelData &model_data); Status LoadRootModel(const ge::ModelData &model_data); + static void SetModelToGeModel(GeModelPtr &ge_model, Model &model); GeModelPtr GetGeModel(); GeRootModelPtr GetGeRootModel(); @@ -67,7 +68,6 @@ class GE_FUNC_VISIBILITY ModelHelper { Status GenerateGeModel(OmFileLoadHelper &om_load_helper); Status GenerateGeRootModel(OmFileLoadHelper &om_load_helper); Status LoadModelData(OmFileLoadHelper &om_load_helper); - void SetModelToGeModel(GeModelPtr &ge_model, Model &model) const; Status LoadModelData(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const; Status LoadWeights(OmFileLoadHelper &om_load_helper); Status LoadWeights(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const; diff --git a/inc/framework/common/helper/om_file_helper.h b/inc/framework/common/helper/om_file_helper.h index 45d61395..2d715486 100644 --- a/inc/framework/common/helper/om_file_helper.h +++ b/inc/framework/common/helper/om_file_helper.h @@ -21,25 +21,20 @@ #include #include "external/ge/ge_ir_build.h" -#include "framework/common/fmk_types.h" #include "framework/common/types.h" #include "framework/common/ge_types.h" -using ProcParam = struct PROC_PARAM; -using std::string; -using std::vector; - namespace ge { struct ModelPartition { ModelPartitionType type; - uint8_t *data = 0; - uint32_t size = 0; + const uint8_t *data = nullptr; + uint32_t size = 0U; }; struct OmFileContext { std::vector partition_datas_; - std::vector partition_table_; - uint32_t model_data_len_ = 0; + std::vector partition_table_; + uint32_t model_data_len_ = 0U; }; struct SaveParam { @@ -55,13 +50,13 @@ class GE_FUNC_VISIBILITY OmFileLoadHelper { public: Status Init(const ge::ModelData &model); - Status Init(uint8_t *model_data, const uint32_t model_data_size); + Status Init(uint8_t *const model_data, const uint32_t model_data_size); - Status Init(uint8_t *model_data, const uint32_t model_data_size, uint32_t model_num); + Status Init(uint8_t *const model_data, const uint32_t model_data_size, const uint32_t model_num); - Status GetModelPartition(ModelPartitionType type, ModelPartition &partition); + Status GetModelPartition(const ModelPartitionType type, ModelPartition &partition); - Status GetModelPartition(ModelPartitionType type, ModelPartition &partition, size_t model_index); + Status GetModelPartition(const ModelPartitionType type, ModelPartition &partition, const size_t model_index); OmFileContext context_; @@ -70,9 +65,9 @@ class GE_FUNC_VISIBILITY OmFileLoadHelper { private: Status CheckModelValid(const ge::ModelData &model) const; - Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size); + Status LoadModelPartitionTable(uint8_t *const model_data, const uint32_t model_data_size); - Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size, uint32_t model_num); + Status LoadModelPartitionTable(uint8_t *const model_data, const uint32_t model_data_size, const uint32_t model_num); bool is_inited_{false}; }; @@ -89,25 +84,24 @@ class GE_FUNC_VISIBILITY OmFileSaveHelper { ModelPartitionTable *GetPartitionTable(); - Status AddPartition(ModelPartition &partition); - - Status AddPartition(ModelPartition &partition, size_t cur_index); + Status AddPartition(const ModelPartition &partition); - const std::vector &GetModelPartitions() const; + Status AddPartition(const ModelPartition &partition, const size_t cur_index); - Status SaveModel(const SaveParam &save_param, const char *target_file, ge::ModelBufferData &model, - bool is_offline = true); + Status SaveModel(const SaveParam &save_param, const char_t *const output_file, ge::ModelBufferData &model, + const bool is_offline = true); - Status SaveModelToFile(const char *output_file, ge::ModelBufferData &model, bool is_offline = true); + Status SaveModelToFile(const char_t *const output_file, ge::ModelBufferData &model, const bool is_offline = true); std::vector model_contexts_; ModelFileHeader model_header_; OmFileContext context_; - ModelPartitionTable *GetPartitionTable(size_t cur_ctx_index); + ModelPartitionTable *GetPartitionTable(const size_t cur_ctx_index); - Status SaveRootModel(const SaveParam &save_param, const char *output_file, ModelBufferData &model, bool is_offline); + Status SaveRootModel(const SaveParam &save_param, const char_t *const output_file, ModelBufferData &model, + const bool is_offline); }; } // namespace ge #endif // INC_FRAMEWORK_COMMON_HELPER_OM_FILE_HELPER_H_ diff --git a/inc/framework/common/l2_cache_optimize.h b/inc/framework/common/l2_cache_optimize.h index 39bc1d13..478c1750 100644 --- a/inc/framework/common/l2_cache_optimize.h +++ b/inc/framework/common/l2_cache_optimize.h @@ -28,97 +28,13 @@ #include "framework/common/util.h" #include "graph/compute_graph.h" -using std::vector; - namespace ge { // Size of RC memory alignment, 2M -constexpr size_t ALIGN_SIZE = 2097152; - -constexpr uint32_t RC_VALUE_DEFAULT = 1; -constexpr uint32_t RC_VALUE_MAX = 32; - -// RC data type classification -enum RCType { - RC_DEFAULT, // Such as temporary workspace memory of operator, variable (including global and local variable) - RC_HCOM, // Output of gradient aggregation, RC value should be set to 0 - RC_L2LOSS, // Parameter of L2 loss operator, RC value should be set to 0 - RC_INPUTOUTPUT, // Input and output tensor of operator, RC value is returned by FE calculation - RC_WEIGHTS, // The weight, fp16, RC value used by FP/BP operator should be set to 1 or the actual access numbers - RC_DW, // The gradient data DW and RC value output by BP operator - // should be set to 1 or the actual access numbers - RC_ARGS // Args of FlowTable, actual access numbers -}; - -enum MemType { INPUT_TENSOR, OUTPUT_TENSOR, WEIGHT, WORKSPACE }; - -// Memory usage information < node, type, number > -struct NodeInfo { - std::string nodeName; - MemType memType; - size_t index; -}; - -// Memory block RC value -struct RCMemoryBlock { - RCType type; // RC type - size_t blockSize; // memory block size - size_t headOffset; // Start offset from base address - size_t tailOffset; // End offset from base address - uint32_t rcCount; // RC value - NodeInfo nodeInfo; // Input and output indexes of node objects to which RC belongs -}; - -// L2Cache optimizer -class GE_FUNC_VISIBILITY L2CacheOptimize { - public: - explicit L2CacheOptimize(ge::ComputeGraphPtr &graph); - ~L2CacheOptimize(); - - // Collect the information L2Cache Memory optimization - Status Gath(); - - private: - ge::ComputeGraphPtr graph_; - - // Save RC block information list - std::vector weightRCs; - std::vector opRCs; - - // Extract RC information generated by FE from compiled graph - void RetirveRCinfo(); - - // Take the maximum common divisor of RC values for the duplicate address - void Merge(std::vector &blocks); - - // The RC information is aligned with the 2m address - void Align(std::vector &blocks); - - // Weight of l2loss operator, output of gradient aggregation output, RC value set to 0 - void HandleOutputZeroRC(RCType type, ge::NodePtr node, std::vector &outputList, - std::vector &blocks); - - // Processing operator input Tensor's RC - void HandOPInput(ge::NodePtr node, std::vector &inputList, std::vector &blocks); - - // Processing operator output Tensor's RC - void HandOPoutput(ge::NodePtr node, std::vector &outputList, std::vector &blocks); +constexpr size_t ALIGN_SIZE = 2097152U; - // maximum common divisor - uint32_t Measure(uint32_t x, uint32_t y) { - if ((x == 0) || (y == 0)) return RC_VALUE_DEFAULT; - uint32_t z = y; - while (x % y != 0) { - z = x % y; - x = y; - y = z; - } - return z; - } +constexpr uint32_t RC_VALUE_DEFAULT = 1U; +constexpr uint32_t RC_VALUE_MAX = 32U; - bool Contain(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block); - bool Cross(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block); - bool Connect(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block); -}; } // namespace ge #endif // INC_FRAMEWORK_COMMON_L2_CACHE_OPTIMIZE_H_ \ No newline at end of file diff --git a/inc/framework/common/op/attr_value_util.h b/inc/framework/common/op/attr_value_util.h index 27415df8..d749ae4f 100644 --- a/inc/framework/common/op/attr_value_util.h +++ b/inc/framework/common/op/attr_value_util.h @@ -34,143 +34,11 @@ #include #include #include - +#include "external/graph/types.h" #include "graph/debug/ge_attr_define.h" #include "proto/om.pb.h" -using domi::AttrDef; -using domi::AttrDef_ListValue; -using domi::ModelDef; -using domi::NamedAttrs; -using domi::OpDef; - namespace ge { -using AttrDefMap = ::google::protobuf::Map<::std::string, ::domi::AttrDef>; -using AttrDefPair = ::google::protobuf::MapPair; - -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, AttrDef &attr, OpDef *opdef); -// DEFINE_ADD_ATTR_VALUE -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const std::string &value, AttrDefMap *attrs); -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const char *value, AttrDefMap *attrs); -GE_FUNC_VISIBILITY void AddOpAttr(const char *key, const char *value, AttrDefMap *attrs); -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const uint32_t value, AttrDefMap *attrs); -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int32_t value, AttrDefMap *attrs); -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int64_t value, AttrDefMap *attrs); -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const float value, AttrDefMap *attrs); -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const double value, AttrDefMap *attrs); -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const bool value, AttrDefMap *attrs); - -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, AttrDefMap *attrs); - -// DEFINE_ADD_ATTR_VALUE -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const std::string &value, OpDef *opdef); -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const char *value, OpDef *opdef); -GE_FUNC_VISIBILITY void AddOpAttr(const char *key, const char *value, OpDef *opdef); -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const uint32_t value, OpDef *opdef); -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int32_t value, OpDef *opdef); -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int64_t value, OpDef *opdef); -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const float value, OpDef *opdef); -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const double value, OpDef *opdef); -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const bool value, OpDef *opdef); - -GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, OpDef *opdef); - -GE_FUNC_VISIBILITY void AddOpBytesAttr(const std::string &key, const void *value, size_t size, OpDef *opdef); - -// DEFINE_ADD_ATTR_VALUE_LIST -GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const double value, AttrDefMap *attrs); -GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const float value, AttrDefMap *attrs); -GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const uint32_t value, AttrDefMap *attrs); -GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int32_t value, AttrDefMap *attrs); -GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const std::string value, AttrDefMap *attrs); -GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const double value, OpDef *opdef); -GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const float value, OpDef *opdef); -GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const uint32_t value, OpDef *opdef); -GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int32_t value, OpDef *opdef); -GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const bool value, OpDef *opdef); -GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int64_t value, OpDef *opdef); - -GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const std::string &value, OpDef *opdef); - -GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, std::string *value, const OpDef *opdef); -GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, int32_t *value, const OpDef *opdef); -GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, int64_t *value, const OpDef *opdef); -GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, uint32_t *value, const OpDef *opdef); -GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, float *value, const OpDef *opdef); -GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, double *value, const OpDef *opdef); -GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, bool *value, const OpDef *opdef); -GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, AttrDef_ListValue *value, const OpDef *opdef); - -GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, std::string value, const OpDef *opdef); -GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, int32_t value, const OpDef *opdef); -GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, int64_t value, const OpDef *opdef); -GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, uint32_t value, const OpDef *opdef); -GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, float value, const OpDef *opdef); -GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, double value, const OpDef *opdef); -GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, bool value, const OpDef *opdef); - -GE_FUNC_VISIBILITY bool GetBytesAttr(const std::string &key, std::string *value, const OpDef *opdef); -GE_FUNC_VISIBILITY bool GetBytesAttr(const std::string &key, std::string *value, const ModelDef *model_def); - -GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const std::string &value, ModelDef *model_def); -GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const char *value, ModelDef *model_def); -GE_FUNC_VISIBILITY void AddModelAttr(const char *key, const char *value, ModelDef *model_def); -GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const uint32_t value, ModelDef *model_def); -GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const int32_t value, ModelDef *model_def); -GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const int64_t value, ModelDef *model_def); -GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const float value, ModelDef *model_def); -GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const double value, ModelDef *model_def); -GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const bool value, ModelDef *model_def); -GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const void *value, size_t size, ModelDef *model_def); -GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const AttrDef_ListValue &value, ModelDef *model_def); - -GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const double value, ModelDef *model_def); -GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const float value, ModelDef *model_def); -GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const uint32_t value, ModelDef *model_def); -GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const int32_t value, ModelDef *model_def); -GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const std::string &value, ModelDef *model_def); - -GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, std::string *value, const ModelDef *model_def); -GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, int32_t *value, const ModelDef *model_def); -GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, int64_t *value, const ModelDef *model_def); -GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, uint32_t *value, const ModelDef *model_def); -GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, float *value, const ModelDef *model_def); -GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, double *value, const ModelDef *model_def); -GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, bool *value, const ModelDef *model_def); -GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, AttrDef_ListValue *value, const ModelDef *model_def); - -GE_FUNC_VISIBILITY bool HasOpAttr(const OpDef *opdef, const std::string &attr_name); - -GE_FUNC_VISIBILITY void SetAttrDef(const std::string &value, AttrDef *out); -GE_FUNC_VISIBILITY void SetAttrDef(const char *value, AttrDef *out); -GE_FUNC_VISIBILITY void SetAttrDef(const uint32_t value, AttrDef *out); -GE_FUNC_VISIBILITY void SetAttrDef(const int32_t value, AttrDef *out); -GE_FUNC_VISIBILITY void SetAttrDef(const float value, AttrDef *out); -GE_FUNC_VISIBILITY void SetAttrDef(const double value, AttrDef *out); -GE_FUNC_VISIBILITY void SetAttrDef(const bool value, AttrDef *out); -GE_FUNC_VISIBILITY void SetAttrList(const std::string &value, AttrDef *out); -GE_FUNC_VISIBILITY void SetAttrList(const bool value, AttrDef *out); -GE_FUNC_VISIBILITY void SetAttrList(const float value, AttrDef *out); -GE_FUNC_VISIBILITY void SetAttrList(const double value, AttrDef *out); -GE_FUNC_VISIBILITY void SetAttrList(const uint32_t value, AttrDef *out); - -GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, std::string *value, const AttrDefMap &attr); -GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, int32_t *value, const AttrDefMap &attr); -GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, int64_t *value, const AttrDefMap &attr); -GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, uint32_t *value, const AttrDefMap &attr); -GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, float *value, const AttrDefMap &attr); -GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, double *value, const AttrDefMap &attr); -GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, bool *value, const AttrDefMap &attr); -GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, AttrDef_ListValue *value, const AttrDefMap &attr); -GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, NamedAttrs *&value, AttrDefMap *attr); -GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, const NamedAttrs *&value, const AttrDefMap &attr); - -GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, int32_t *value, - const AttrDefMap &attr); -GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, uint32_t *value, - const AttrDefMap &attr); -GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, float *value, const AttrDefMap &attr); -GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, double *value, const AttrDefMap &attr); -} // namespace ge - -#endif // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ +GE_FUNC_VISIBILITY void SetAttrDef(const std::string &value, domi::AttrDef *const out); +} +#endif // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ \ No newline at end of file diff --git a/inc/framework/common/op/ge_op_utils.h b/inc/framework/common/op/ge_op_utils.h index 8b28258a..53e72074 100644 --- a/inc/framework/common/op/ge_op_utils.h +++ b/inc/framework/common/op/ge_op_utils.h @@ -31,18 +31,16 @@ #include "proto/insert_op.pb.h" namespace ge { -using domi::Status; // Add Sub Mul GE_FUNC_VISIBILITY extern const uint32_t ADD_INPUT_NUM; -GE_FUNC_VISIBILITY extern const uint32_t SUB_INPUT_NUM; GE_FUNC_VISIBILITY extern const uint32_t MUL_INPUT_NUM; // Permute GE_FUNC_VISIBILITY extern const int32_t PERMUTE_ORDER_NUM; // Ssd PriroBox -GE_FUNC_VISIBILITY extern const double SSD_PRIORBOX_ASPECT_RATIO_VALUE; +GE_FUNC_VISIBILITY extern const float64_t SSD_PRIORBOX_ASPECT_RATIO_VALUE; GE_FUNC_VISIBILITY extern const uint32_t STRIDEDSLICE_INPUT_NUM; @@ -55,8 +53,8 @@ GE_FUNC_VISIBILITY extern const uint32_t SWITCH_DATA_INPUT; GE_FUNC_VISIBILITY extern const uint32_t SWITCH_PRED_INPUT; // Merge -GE_FUNC_VISIBILITY extern const uint32_t MERGE_DATA_OUTPUT; -GE_FUNC_VISIBILITY extern const uint32_t MERGE_INDEX_OUTPUT; +GE_FUNC_VISIBILITY extern const int32_t MERGE_DATA_OUTPUT; +GE_FUNC_VISIBILITY extern const int32_t MERGE_INDEX_OUTPUT; // FunctionOp GE_FUNC_VISIBILITY extern const uint32_t IF_COND_INPUT; @@ -66,86 +64,35 @@ GE_FUNC_VISIBILITY extern const uint32_t FOR_DELTA_INPUT; GE_FUNC_VISIBILITY extern const uint32_t FOR_DATA_INPUT; GE_FUNC_VISIBILITY extern const int32_t NORMAL_TENSOR_SIZE; - +/*lint -e148*/ class GE_FUNC_VISIBILITY OpUtils { public: - /// - /// @ingroup domi_ome - /// @brief Check whether check_value is in [min_enum_value, max_enum_value] - /// @return true Within - /// @return false out of range - // - static inline bool CheckEnumValid(int32_t check_value, int32_t min_enum_value, int32_t max_enum_value) { - return check_value < min_enum_value ? false : (check_value >= max_enum_value ? false : true); - } - - /// - /// @ingroup domi_omg - /// @brief Determine whether to manually calculate the tensor size based on the values of format and dim - /// @param [in] format, Format information of the tensor - /// @param [in] real_dim_cnt, Tensor dim - /// @return true Manually calculate the size based on dim and datatype - /// @return false skip - /// - static bool IsComputDimsSize(const int32_t format, const uint32_t real_dim_cnt); - /// /// @brief Extract AIPP parameters from AttrDefMap and splice them /// @param [in] aipp_attr attr of operator /// @param [out] aipp_params aipp parameters /// @return enum of tagCCAippInputFormat /// - static Status ConvertAippParams(const NamedAttrs &aipp_attr, domi::AippOpParams *aipp_params); - static Status TransferDim(const std::vector &dim, std::vector &dim_vector); + + static Status ConvertAippParams(const GeAttrValue::NamedAttrs &aipp_attr, domi::AippOpParams &aipp_params); template - static void SliceData(const std::vector &input, int64_t chunk_size, std::vector &output, - int64_t begin, int64_t out_dim, int64_t stride); + static void SliceData(const std::vector &input, const int64_t chunk_size, std::vector &output, + const int64_t begin, const int64_t out_dim, const int64_t stride); template - static Status SetDataByDataType(size_t out_size, const std::vector &chunk_input, - const std::vector &chunk_output, GeTensor *output); + static Status SetDataByDataType(const size_t out_size, const std::vector &chunk_input, + const std::vector &chunk_output, GeTensor *const output); template - static Status SetOutputSliceDataByDataType(void *data, int64_t data_size, const std::vector &input_dims, - const std::vector &begin, const std::vector &output_dims, - ge::GeTensor *output, const std::vector &stride); - static Status SetOutputSliceData(void *data, int64_t data_size, int32_t data_type, + static Status SetOutputSliceDataByDataType(void *const data, const int64_t data_size, + const std::vector &input_dims, const std::vector &begin, + const std::vector &output_dims, ge::GeTensor *const output, + const std::vector &stride); + static Status SetOutputSliceData(void *const data, const int64_t data_size, const int32_t data_type, const std::vector &input_dims, const std::vector &begin, - const std::vector &output_dims, ge::GeTensor *const output, + const std::vector &output_dims, GeTensor *const output, const std::vector &stride); - - /// - /// @ingroup domi_omg - /// @brief Convert the convolutional weight data from [h, w, c, k] to [k, c, h, w] - /// @param [in] input Weight data in HWCK format - /// @param [in] H value of H dimension - /// @param [in] W value of W dimension - /// @param [in] C value of C dimension - /// @param [in] K value of K dimension - /// @param [out] output Data pointer after conversion. The format is KCHW. - /// - static void TransDataHWCK2KCHW(const void *input, int64_t h, int64_t w, int64_t c, int64_t k, void **output); - /// - /// @ingroup domi_omg - /// @brief Converts the convolutional weight data from [k, c, h, w] to [h, w, c, k]. - /// @param [in] input Weight data in HWCK format - /// @param [in] K value of K dimension - /// @param [in] C value of C dimension - /// @param [in] H value of H dimension - /// @param [in] W value of W dimension - /// @param [out] output Data pointer after conversion. The format is HWCK - /// - static void TransDataKCHW2HWCK(const void *input, int64_t k, int64_t c, int64_t h, int64_t w, void *output); - - static std::vector GetWeights(const ge::Node &node); - static std::vector GetWeights(ge::ConstNodePtr node); - static std::vector MutableWeights(const ge::Node &node); - static std::vector MutableWeights(const ge::NodePtr node); - static Status SetWeights(ge::Node &node, const std::vector &weights); - static Status SetWeights(const ge::NodePtr node, const std::vector &weights); static Status GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, const DataType type, std::vector &dims); - - private: - static uint32_t GetRealDimCnt(const GeTensorDesc &tensor_desc); }; +/*lint +e148*/ } // namespace ge #endif // INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_ diff --git a/inc/framework/common/op_types.h b/inc/framework/common/op_types.h index 4e4d4dda..2b7009e8 100644 --- a/inc/framework/common/op_types.h +++ b/inc/framework/common/op_types.h @@ -20,6 +20,8 @@ #include #include +#include "graph/types.h" + namespace ge { class GE_FUNC_VISIBILITY OpTypeContainer { public: @@ -30,12 +32,11 @@ class GE_FUNC_VISIBILITY OpTypeContainer { ~OpTypeContainer() = default; void Register(const std::string &op_type) { - op_type_list_.insert(op_type); + static_cast(op_type_list_.insert(op_type)); } bool IsExisting(const std::string &op_type) { - auto iter_find = op_type_list_.find(op_type); - return iter_find != op_type_list_.end(); + return op_type_list_.find(op_type) != op_type_list_.end(); } protected: @@ -47,20 +48,19 @@ class GE_FUNC_VISIBILITY OpTypeContainer { class GE_FUNC_VISIBILITY OpTypeRegistrar { public: - explicit OpTypeRegistrar(const std::string &op_type) { + explicit OpTypeRegistrar(const std::string &op_type) noexcept { OpTypeContainer::Instance()->Register(op_type); } ~OpTypeRegistrar() {} }; +} // namespace ge #define REGISTER_OPTYPE_DECLARE(var_name, str_name) \ - FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *var_name; + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char_t *var_name; #define REGISTER_OPTYPE_DEFINE(var_name, str_name) \ - const char *var_name = str_name; \ - const OpTypeRegistrar g_##var_name##_reg(str_name); - -#define IS_OPTYPE_EXISTING(str_name) (OpTypeContainer::Instance()->IsExisting(str_name)) -} // namespace ge + const char_t *var_name = str_name; \ + const ge::OpTypeRegistrar g_##var_name##_reg(str_name); +#define IS_OPTYPE_EXISTING(str_name) (ge::OpTypeContainer::Instance()->IsExisting(str_name)) #endif // INC_FRAMEWORK_COMMON_OP_TYPES_H_ diff --git a/inc/framework/common/profiling/ge_profiling.h b/inc/framework/common/profiling/ge_profiling.h index f0ecc5d1..16588830 100644 --- a/inc/framework/common/profiling/ge_profiling.h +++ b/inc/framework/common/profiling/ge_profiling.h @@ -24,10 +24,8 @@ /// @brief Output the profiling data of single operator in Pytorch, and does not support multithreading /// @return Status result /// -GE_FUNC_VISIBILITY ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id, rtStream_t stream); +GE_FUNC_VISIBILITY ge::Status ProfSetStepInfo(const uint64_t index_id, const uint16_t tag_id, rtStream_t const stream); -GE_FUNC_VISIBILITY ge::Status ProfGetDeviceFormGraphId(uint32_t graph_id, uint32_t &device_id); - -GE_FUNC_VISIBILITY void ProfSetGraphIdToDeviceMap(uint32_t graph_id, uint32_t &device_id); +GE_FUNC_VISIBILITY ge::Status ProfGetDeviceFormGraphId(const uint32_t graph_id, uint32_t &device_id); #endif // INC_FRAMEWORK_COMMON_GE_PROFILING_H_ diff --git a/inc/framework/common/profiling_definitions.h b/inc/framework/common/profiling_definitions.h new file mode 100644 index 00000000..90f3823a --- /dev/null +++ b/inc/framework/common/profiling_definitions.h @@ -0,0 +1,173 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AIR_CXX_PROFILING_DEFINITIONS_H +#define AIR_CXX_PROFILING_DEFINITIONS_H +#include +#include +#include +#include +#include "graph/profiler.h" +#include "external/ge/ge_api_types.h" +#include "toolchain/prof_callback.h" +namespace ge { +namespace profiling { +enum { + kAclCompileAndExecute, + kAclMatchOpModel, + kAclMatchStaticOpModel, + kAclMatchDynamicOpModel, + kAclExecuteAsync, + kAclLoadSingleOp, + kAclBuildOpModel, + kInferShape, + kTiling, + kUpdateShape, + kConstPrepare, + kInitHybridExecuteArgs, + kInitInferShapeContext, + kDestroyInferShapeContext, + kResetSubgraphExecutor, + kCommitInferShapeTask, + kDeviceToHost, + kPrepareTask, + kLaunchTask, + kCommitTilingTask, + kAtomic, + kKernelLaunchPrepare, + kRtKernelLaunch, + kOpExecute, + kAllocMem, + kCopyH2D, + + // Add new definitions here + kProfilingIndexEnd +}; +constexpr uint64_t kInvalidHashId = 0UL; + +class ProfilingContext { + public: + static bool IsDumpToStdEnabled(); + static ProfilingContext &GetInstance(); + ProfilingContext(); + ~ProfilingContext(); + + /* + * 还有一种思路是`IsEnabled`只判断profiler_是否为空指针,不再设置单独的enabled标记位,这样可以少一个标记位。 + * 但是这么做就意味着,profiler_实例在未使能profiling时,必须是空指针状态。 + * 为了性能考虑,profiling机制在编译和加载时,就会调用`RegisterString`,向profiler_注册字符串,后续执行时,只会使用注册好的index了。 + * 因此存在一种场景:编译时并未使能profiling(因为编译时间很长,使能profiling也无法真实反应执行时的耗时状态), + * 因此编译时注册字符串的动作并没有生效。在执行时,动态的打开了profiling,这种场景下,执行时无法拿到注册后字符串 + */ + bool IsEnabled() const noexcept { + return enabled_ && profiler_ != nullptr; + } + void SetEnable() noexcept { + enabled_ = true; + } + void SetDisable() noexcept { + enabled_ = false; + } + + void RecordCurrentThread(const int64_t element, const int64_t event, const EventType et, + const std::chrono::time_point time_point) { + if (IsEnabled()) { + profiler_->RecordCurrentThread(element, event, et, time_point); + } + } + + void RecordCurrentThread(const int64_t element, const int64_t event, const EventType et) { + RecordCurrentThread(element, event, et, std::chrono::system_clock::now()); + } + + const Profiler *GetProfiler() const { + return profiler_.get(); + } + + void Dump(std::ostream &out_stream) const { + if (IsEnabled()) { + profiler_->Dump(out_stream); + } else { + out_stream << "Profiling not enable, skip to dump" << std::endl; + } + } + + void DumpToStdOut() const { + Dump(std::cout); + } + + void Reset() { + if (IsEnabled()) { + profiler_->Reset(); + } + } + + int64_t RegisterString(const std::string &str); + int64_t RegisterStringHash(const uint64_t hash_id, const std::string &str); + void UpdateElementHashId(const MsprofReporterCallback reporter_callback); + static Status QueryHashId(const MsprofReporterCallback reporter_callback, const std::string &src_str, + uint64_t &hash_id); + size_t GetRegisterStringNum() const { + return strings_to_index_.size(); + } + + void Init(); + + private: + void UpdateHashByStr(const std::string &str, const uint64_t hash); + + private: + bool inited_; + bool enabled_; + int64_t str_index_; + std::unordered_map strings_to_index_; + std::mutex strings_to_index_mutex_; + std::unique_ptr profiler_; +}; + +class ScopeProfiler { + public: + ScopeProfiler(const int64_t element, const int64_t event) : element_(element), event_(event) { + if (ProfilingContext::GetInstance().IsEnabled()) { + start_trace_ = std::chrono::system_clock::now(); + } + } + ~ScopeProfiler() { + if (ProfilingContext::GetInstance().IsEnabled()) { + ProfilingContext::GetInstance().RecordCurrentThread(element_, event_, EventType::kEventStart, start_trace_); + ProfilingContext::GetInstance().RecordCurrentThread(element_, event_, EventType::kEventEnd); + } + } + void SetElement(const int64_t element) { + element_ = element; + } + + private: + std::chrono::time_point start_trace_; + int64_t element_; + int64_t event_; +}; +} // namespace profiling +} // namespace ge +#define PROFILING_START(element, event) \ + ge::profiling::ProfilingContext::GetInstance().RecordCurrentThread((element), (event), \ + ge::profiling::EventType::kEventStart) +#define PROFILING_END(element, event) \ + ge::profiling::ProfilingContext::GetInstance().RecordCurrentThread((element), (event), \ + ge::profiling::EventType::kEventEnd) +#define PROFILING_SCOPE(element, event) ge::profiling::ScopeProfiler profiler((element), (event)) +#define PROFILING_SCOPE_ELEMENT(element) profiler.SetElement((element)) +#endif // AIR_CXX_PROFILING_DEFINITIONS_H diff --git a/inc/framework/common/scope_guard.h b/inc/framework/common/scope_guard.h index 4ec4cfa7..ce9e4c51 100644 --- a/inc/framework/common/scope_guard.h +++ b/inc/framework/common/scope_guard.h @@ -25,9 +25,9 @@ /// MAKE_GUARD([&] { Release Resource 1 }) /// Acquire Resource 2 // MAKE_GUARD([&] { Release Resource 2 }) -#define GE_MAKE_GUARD(var, callback) const ScopeGuard const_guard_##var(callback) +#define GE_MAKE_GUARD(var, callback) const ::ge::ScopeGuard const_guard_##var(callback) -#define GE_DISMISSABLE_GUARD(var, callback) ScopeGuard make_guard_##var(callback) +#define GE_DISMISSABLE_GUARD(var, callback) ::ge::ScopeGuard make_guard_##var(callback) #define GE_DISMISS_GUARD(var) make_guard_##var.Dismiss() namespace ge { @@ -44,7 +44,7 @@ class GE_FUNC_VISIBILITY ScopeGuard { if (on_exit_scope_ != nullptr) { try { on_exit_scope_(); - } catch (std::bad_function_call &e) { + } catch (std::bad_function_call &) { } catch (...) { } } diff --git a/inc/framework/common/string_util.h b/inc/framework/common/string_util.h index 21f09ffd..c369d04f 100644 --- a/inc/framework/common/string_util.h +++ b/inc/framework/common/string_util.h @@ -39,13 +39,14 @@ #include #include #include +#include "graph/types.h" namespace ge { class GE_FUNC_VISIBILITY StringUtils { public: static std::string &Ltrim(std::string &s) { #if __cplusplus >= 201103L - (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int32_t c) { return std::isspace(c) == 0; })); + (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](const int32_t c) { return std::isspace(c) == 0; })); #else (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun(std::isspace)))); #endif @@ -54,7 +55,8 @@ class GE_FUNC_VISIBILITY StringUtils { // lint -esym(551,*) static std::string &Rtrim(std::string &s) { /*lint !e618*/ #if __cplusplus >= 201103L - (void)s.erase(std::find_if(s.rbegin(), s.rend(), [](int32_t c) { return std::isspace(c) == 0; }).base(), s.end()); + (void)s.erase(std::find_if(s.rbegin(), s.rend(), [](const int32_t c) { return std::isspace(c) == 0; }).base(), + s.end()); #else (void)s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(std::isspace))).base(), s.end()); @@ -79,7 +81,7 @@ class GE_FUNC_VISIBILITY StringUtils { /// @param [in] delim separator /// @return string array after segmentation /// - static std::vector> Split(const std::string &str, char delim) { + static std::vector> Split(const std::string &str, const char_t delim) { std::vector> elems; if (str.empty()) { @@ -94,8 +96,8 @@ class GE_FUNC_VISIBILITY StringUtils { elems.push_back(item); } - auto str_size = str.size(); - if ((str_size > 0) && (str[str_size - 1] == delim)) { + const auto str_size = str.size(); + if ((str_size > 0U) && (str[str_size - 1U] == delim)) { elems.emplace_back(""); } @@ -107,13 +109,13 @@ class GE_FUNC_VISIBILITY StringUtils { /// @param [in] s path name /// @return file name /// - static std::string GetFileName(std::string &s) { + static std::string GetFileName(const std::string &s) { if (s.empty()) { return ""; } - std::vector files = StringUtils::Split(s, '/'); + const std::vector files = StringUtils::Split(s, '/'); - return files.empty() ? "" : files[files.size() - 1]; + return files.empty() ? "" : files[files.size() - 1U]; } /// /// @ingroup domi_common @@ -125,12 +127,13 @@ class GE_FUNC_VISIBILITY StringUtils { /// @return string after replacement /// static std::string ReplaceAll(std::string str, const std::string &old_value, const std::string &new_value) { - std::string::size_type cur_pos = 0; - std::string::size_type old_length = old_value.length(); - std::string::size_type new_length = new_value.length(); + std::string::size_type cur_pos = 0U; + const std::string::size_type old_length = old_value.length(); + const std::string::size_type new_length = new_value.length(); // cycle replace for (; cur_pos != std::string::npos; cur_pos += new_length) { - if ((cur_pos = str.find(old_value, cur_pos)) != std::string::npos) { + cur_pos = str.find(old_value, cur_pos); + if (cur_pos != std::string::npos) { (void)str.replace(cur_pos, old_length, new_value); } else { break; @@ -148,7 +151,7 @@ class GE_FUNC_VISIBILITY StringUtils { /// @return if the value is a prefix, true is returned. Otherwise, false is returned /// static bool StartWith(const std::string &str, const std::string str_x) { - return ((str.size() >= str_x.size()) && (str.compare(0, str_x.size(), str_x) == 0)); + return ((str.size() >= str_x.size()) && (str.compare(0U, str_x.size(), str_x) == 0)); } /// @@ -159,14 +162,14 @@ class GE_FUNC_VISIBILITY StringUtils { /// @param [in] ... format Filling Content /// @return formatted string /// - static std::string FormatString(const char *format, ...) { - const uint32_t MAX_BUFFER_LEN = 1024; // the stack memory plint check result must be less than 1024 + static std::string FormatString(const char_t *const format, ...) { + const uint32_t MAX_BUFFER_LEN = 1024U; // the stack memory plint check result must be less than 1024 va_list args; va_start(args, format); - char buffer[MAX_BUFFER_LEN] = {0}; - int32_t ret = vsnprintf_s(buffer, MAX_BUFFER_LEN, MAX_BUFFER_LEN - 1, format, args); + char_t buffer[MAX_BUFFER_LEN] = {}; + const int32_t ret = vsnprintf_s(&buffer[0], MAX_BUFFER_LEN, MAX_BUFFER_LEN - 1U, format, args); va_end(args); - return ret > 0 ? buffer : ""; + return (ret > 0) ? buffer : ""; } }; } // namespace ge diff --git a/inc/framework/common/taskdown_common.h b/inc/framework/common/taskdown_common.h index 331bf486..159072b8 100644 --- a/inc/framework/common/taskdown_common.h +++ b/inc/framework/common/taskdown_common.h @@ -23,7 +23,7 @@ namespace ge { const int32_t CC_FUSION_OP_MAX = 32; -typedef enum tagCcStatus { +enum class ccStatus_t { CC_STATUS_SUCCESS = 0, /**< succ */ CC_STATUS_NOT_INITIALIZED = 1, /**< not init */ CC_STATUS_ALLOC_FAILED = 2, /**< alloc mem failed */ @@ -33,10 +33,10 @@ typedef enum tagCcStatus { CC_STATUS_RUNTIME_ERROR = 6, /**< runtime error */ CC_STATUS_NOT_SUPPORTED = 7, /**< unsupport error */ CC_STATUS_INVALID_VALUE = 7, /**< invalid value error for blas*/ - CC_STATUS_RESERVED /**< just for check */ -} ccStatus_t; + CC_STATUS_RESERVED = 8, /**< just for check */ +}; -typedef enum tagccKernelType { +enum class ccKernelType { CCE_AI_CORE = 0, /* cce aicore */ CCE_AI_CPU = 1, /* cce aicpu */ TE = 2, /* te operator*/ @@ -47,9 +47,9 @@ typedef enum tagccKernelType { CUST_AI_CPU = 7, /* custom aicpu*/ HOST_CPU = 8, /* host cpu */ INVALID = 10000 /* unknown kernel type */ -} ccKernelType; +}; -typedef struct tagOpContext { +using ccOpContext = struct tagOpContext { ccKernelType kernelType; uint32_t opId; uint32_t kernelFuncId; @@ -66,7 +66,28 @@ typedef struct tagOpContext { uint64_t genVariableBaseAddr; uint64_t genVariableBaseSize; uint64_t l2ctrlSize; -} ccOpContext; -} // namespace ge +}; + +enum class tagOpTensorFormat { OP_TENSOR_FORMAT_NC1HWC0 = 0, OP_TENSOR_FORMAT_ND, OP_TENSOR_FORMAT_RESERVED }; +enum class tagOpDataType { + OP_DATA_FLOAT = 0, /**< float type */ + OP_DATA_HALF, /**< fp16 type */ + OP_DATA_INT8, /**< int8 type */ + OP_DATA_INT32, /**< int32 type */ + OP_DATA_UINT8, /**< uint8 type */ + OP_DATA_HALF_UINT16_PROPOSAL, /**< mixed type for proposal */ + OP_DATA_RESERVED +}; + +// AICPU Tensor +using ccAICPUTensor = struct tagOpTensor { + // real dim info + tagOpTensorFormat format; + tagOpDataType data_type; + int32_t dim_cnt; + int32_t mm; + int32_t dim[8]; +}; +} // namespace ge #endif // INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_ diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index e5681ba6..475def7f 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -19,7 +19,6 @@ #include #include -#include #include #include #include @@ -44,32 +43,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEB FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEBUG_ALL; // Profile-related constants -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OME_PROFILE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string CCE_PROFILE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string RTS_PROFILE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_MODEL_ID; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASKS; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_FUSION_MODEL_DEF; - FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint64_t ALLOC_MEMORY_MAX_SIZE; // Max size of 8 GB. -template -static std::pair flip_pair(const std::pair &p) { - return std::pair(p.second, p.first); -} - -template -static std::map flip_map(std::map src) { - std::map dst; - std::transform(src.begin(), src.end(), std::inserter(dst, dst.begin()), flip_pair); - return dst; -} - REGISTER_OPTYPE_DECLARE(DATA, "Data"); REGISTER_OPTYPE_DECLARE(AIPPDATA, "AippData"); +REGISTER_OPTYPE_DECLARE(QUEUE_DATA, "QueueData"); REGISTER_OPTYPE_DECLARE(CONVOLUTION, "Convolution"); REGISTER_OPTYPE_DECLARE(CORRELATION, "Correlation"); REGISTER_OPTYPE_DECLARE(CORRELATIONV2, "Correlation_V2"); @@ -140,6 +124,8 @@ REGISTER_OPTYPE_DECLARE(SQUEEZE, "Squeeze"); REGISTER_OPTYPE_DECLARE(UNSQUEEZE, "Unsqueeze"); REGISTER_OPTYPE_DECLARE(SQUEEZEV2, "SqueezeV2"); REGISTER_OPTYPE_DECLARE(UNSQUEEZEV2, "UnsqueezeV2"); +REGISTER_OPTYPE_DECLARE(SQUEEZEV3, "SqueezeV3"); +REGISTER_OPTYPE_DECLARE(UNSQUEEZEV3, "UnsqueezeV3"); REGISTER_OPTYPE_DECLARE(STRIDEDSLICE, "StridedSlice"); REGISTER_OPTYPE_DECLARE(RANGE, "Range"); REGISTER_OPTYPE_DECLARE(RPNPROPOSALS, "GenerateRpnProposals"); @@ -438,6 +424,7 @@ REGISTER_OPTYPE_DECLARE(MODELEXIT, "ModelExit"); REGISTER_OPTYPE_DECLARE(SEND, "Send"); REGISTER_OPTYPE_DECLARE(RECV, "Recv"); REGISTER_OPTYPE_DECLARE(ENDOFSEQUENCE, "EndOfSequence"); +REGISTER_OPTYPE_DECLARE(STARTOFSEQUENCE, "StartOfSequence"); REGISTER_OPTYPE_DECLARE(LABELSET, "LabelSet"); REGISTER_OPTYPE_DECLARE(LABELGOTO, "LabelGoto"); @@ -461,8 +448,6 @@ REGISTER_OPTYPE_DECLARE(ELU_GRAD, "EluGrad"); REGISTER_OPTYPE_DECLARE(ADD_V2, "AddV2"); REGISTER_OPTYPE_DECLARE(DATAFORMATDIMMAP, "DataFormatDimMap"); REGISTER_OPTYPE_DECLARE(DATAFORMATVECPERMUTE, "DataFormatVecPermute"); -REGISTER_OPTYPE_DECLARE(BESSELI0e, "BesselI0e"); -REGISTER_OPTYPE_DECLARE(BESSELI1e, "BesselI1e"); REGISTER_OPTYPE_DECLARE(DEQUANTIZE, "Dequantize"); REGISTER_OPTYPE_DECLARE(APPLYADADELTA, "ApplyAdadelta"); REGISTER_OPTYPE_DECLARE(APPLYADAGRAD, "ApplyAdagrad"); @@ -516,29 +501,11 @@ REGISTER_OPTYPE_DECLARE(GETDYNAMICDIMS, "GetDynamicDims"); // profiling training trace node REGISTER_OPTYPE_DECLARE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace"); -enum InputMode { INPUT = 0, CONST_INPUT }; - -// Definition of the processing status enum of the process module -enum ModelProcessState { - INIT_STATE = 0, // init status - WAIT_EVENT_STATE, // Wait for the event status - IND_RSLT_STATE, // The model execution result is being output to the high level - STOPPED_STATE, // Model execution completed. The model enters this state after Model Manager::Stop - RESERVED_STATE, // reserved -}; - -// Indicates the enun definition of the execution mode of the access module -enum SysMode { - INFERENCE = 0, // Normal, that is, Inference mode - DEBUG, // Debug mode - TIME, // Model execution time mode, including the execution time of each OP - STOP, // STOP mode - RESET, // RESET mode - PERFORMANCE, // Impact of enabling the performance model: 1. The input data of the model is considered ready and does - // not need to be converted - ANDROID_DEBUG, // Exports Android platform computing data - RESERVED, // reserved -}; +// Stack series +REGISTER_OPTYPE_DECLARE(STACK, "Stack"); +REGISTER_OPTYPE_DECLARE(STACKPUSH, "StackPush"); +REGISTER_OPTYPE_DECLARE(STACKPOP, "StackPop"); +REGISTER_OPTYPE_DECLARE(STACKCLOSE, "StackClose"); // @brief encryption type of the model file enum ModelEncryptType { @@ -577,22 +544,22 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_FIL /// /// @brief model name length /// -static constexpr uint32_t MODEL_NAME_LENGTH = 32; +constexpr uint32_t MODEL_NAME_LENGTH = 32U; /// /// @brief length of user-defined information /// -static constexpr uint32_t USER_DEFINE_INFO_LENGTH = 32; +constexpr uint32_t USER_DEFINE_INFO_LENGTH = 32U; /// /// @brief length of the model file signature /// -static constexpr uint32_t MODEL_FILE_CHECKSUM_LENGTH = 64; +constexpr uint32_t MODEL_FILE_CHECKSUM_LENGTH = 64U; /// /// @brief length of the reserved field in the model file header /// -static constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 75; +constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 75U; // DATA node type FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DATA_TYPE; @@ -617,7 +584,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_TYP FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_CONF_DELIMITER; // dim default size value -static const int32_t DIM_DEFAULT_SIZE = 4; +constexpr int32_t DIM_DEFAULT_SIZE = 4; // dim extension default value FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t DIM_DEFAULT_VALUE; @@ -650,34 +617,35 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t STREAM_SW FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_GLOBAL_STEP; -static const uint32_t PLATFORM_VERSION_LEN = 20; +constexpr uint32_t PLATFORM_VERSION_LEN = 20U; // Definition of the file header of the model file struct ModelFileHeader { - uint32_t magic = MODEL_FILE_MAGIC_NUM; // magic number of DOMI - uint32_t headsize = MODEL_FILE_HEAD_LEN; // length of the model header. The value is fixed at 256 - uint32_t version = MODEL_VERSION; // version 1.0 - uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0}; // signature - uint32_t length = 0; // Ciphertext length. In the non-encryption model, the length is the plaintext length. - uint8_t is_encrypt = ModelEncryptType::UNENCRYPTED; // whether encrypted 0:not encrypt, 1:encrypt - uint8_t is_checksum = ModelCheckType::CHECK; // whether to check the checksum - uint8_t modeltype = 0; // 0:IR model 1:standard model 2: OM Tiny model - uint8_t genmode = 0; // 0:offline generate 1:online generate - uint8_t name[MODEL_NAME_LENGTH] = {0}; // Model name, which contains 32 characters - uint32_t ops = 0; // Computing power (Kops) - uint8_t userdefineinfo[USER_DEFINE_INFO_LENGTH] = {0}; // User-defined information. The value contains 32 characters - uint32_t om_ir_version = 0; - uint32_t model_num = 0; - uint8_t platform_version[PLATFORM_VERSION_LEN] = {0}; - uint8_t platform_type = {0}; - uint8_t reserved[MODEL_FILE_RESERVED_LENGTH] = {0}; // Reserved field 75 + uint32_t magic = MODEL_FILE_MAGIC_NUM; // magic number of DOMI + uint32_t headsize = MODEL_FILE_HEAD_LEN; // length of the model header. The value is fixed at 256 + uint32_t version = MODEL_VERSION; // version 1.0 + uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0U}; // signature + uint32_t length = 0U; // Ciphertext length. In the non-encryption model, the length is the plaintext length. + uint8_t is_encrypt = + static_cast(ModelEncryptType::UNENCRYPTED); // whether encrypted 0:not encrypt, 1:encrypt + uint8_t is_checksum = static_cast(ModelCheckType::CHECK); // whether to check the checksum + uint8_t modeltype = 0U; // 0:IR model 1:standard model 2: OM Tiny model + uint8_t genmode = 0U; // 0:offline generate 1:online generate + uint8_t name[MODEL_NAME_LENGTH] = {0U}; // Model name, which contains 32 characters + uint32_t ops = 0U; // Computing power (Kops) + uint8_t userdefineinfo[USER_DEFINE_INFO_LENGTH] = {0U}; // User-defined information. The value contains 32 characters + uint32_t om_ir_version = 0U; + uint32_t model_num = 0U; + uint8_t platform_version[PLATFORM_VERSION_LEN] = {0U}; + uint8_t platform_type = {0U}; + uint8_t reserved[MODEL_FILE_RESERVED_LENGTH] = {0U}; // Reserved field 75 }; -static constexpr uint8_t TARGET_TYPE_LTTE_8BIT = 0; -static constexpr uint8_t TARGET_TYPE_MINI_8BIT = 1; +constexpr uint8_t TARGET_TYPE_LTTE_8BIT = 0U; +constexpr uint8_t TARGET_TYPE_MINI_8BIT = 1U; // number of partitions in the current model -static constexpr uint32_t PARTITION_SIZE = 5; +constexpr uint32_t PARTITION_SIZE = 5U; enum ModelPartitionType { MODEL_DEF = 0, WEIGHTS_DATA, TASK_INFO, TBE_KERNELS, CUST_AICPU_KERNELS }; @@ -692,22 +660,9 @@ struct ModelPartitionTable { ModelPartitionMemInfo partition[0]; }; -#define SIZE_OF_MODEL_PARTITION_TABLE(table) (sizeof(ModelPartitionTable) + sizeof(ModelPartitionMemInfo) * (table).num) - -// Filter format -typedef enum tagDomiFilterFormat { - DOMI_FILTER_KCHW, // KCHW - DOMI_FILTER_HWCK, // HWCK - DOMI_FILTER_RESERVED -} domiFilterFormat_t; - -// Const data trans type -typedef enum tagDomiConstDataTransType { - DOMI_CONST_DATA_NOT_CHANGE = 0, // No action is required - DOMI_CONST_DATA_TRANS_MATMUL, // The const input to MatMul and needs to be transposed - DOMI_CONST_DATA_RESERVED -} domiConstDataTransType_t; - +inline uint64_t SizeOfModelPartitionTable(const ModelPartitionTable &table) { + return sizeof(ModelPartitionTable) + (sizeof(ModelPartitionMemInfo) * static_cast(table.num)); +} // mode of activation typedef enum tagDomiActivationMode { DOMI_ACTIVATION_SIGMOID = 0, // sigmoid @@ -727,190 +682,6 @@ typedef enum tagDomiActivationMode { DOMI_ACTIVATION_RESERVED } domiActivationMode_t; -// mode of batchnorm -typedef enum tagDomiBatchNormMode { - DOMI_BATCHNORM_PER_ACTIVATION = 0, // bnScale, bnBias tensor dims are 1xCxHxW - DOMI_BATCHNORM_SPATIAL, // bnScale, bnBias tensor dims are 1xCx1x1 - DOMI_BATCHNORM_RESERVED -} domiBatchNormMode_t; - -// eltwise mode -typedef enum tagDomiEltwiseMode { - DOMI_ELTWISE_PROD = 0, // prod - DOMI_ELTWISE_SUM, // sum - DOMI_ELTWISE_MAX, // max - DOMI_ELTWISE_RESERVED -} domiEltwiseMode_t; - -// mode of padding -typedef enum tagDomiPaddingMode { - DOMI_PADDING_CEIL = 0, // Default padding mode - DOMI_PADDING_DIRECTASSIGN, // Default padding mode: NOTSET - DOMI_PADDING_VALID, // VALID padding mode - DOMI_PADDING_SAME, // Padding values of 0 are always used - DOMI_PADDING_CEIL_NEW, // Padding values of 0 are always used - DOMI_PADDING_VALID_NEW, // Padding values of 0 are always used - DOMI_PADDING_SAME_NEW, // Padding values of 0 are always used - DOMI_PADDING_RESERVED -} domiPaddingMode_t; - -// algorithm of convolution forward -typedef enum tagDomiConvolutionFwdAlgo { - DOMI_CONVOLUTION_FWD_ALGO_GEMM = 0, // matrix gemm algo - DOMI_CONVOLUTION_FWD_ALGO_WINOGRAD, // Winograd Transform algo - DOMI_CONVOLUTION_FWD_ALGO_GEMM_ACCU_FLOAT32, // accumulate in L0c with FP32 - DOMI_CONVOLUTION_FWD_ALGO_RESERVED -} domiConvolutionFwdAlgo_t; - -typedef enum tagDomiFullConnectFwdAlgo { - DOMI_FULLCONNECT_FWD_ALGO_HALF = 0, // accumulate in L0c with FP16 - DOMI_FULLCONNECT_FWD_ALGO_FLOAT32 // accumulate in L0c with FP32 -} domiFullConnectFwdAlgo_t; - -typedef enum tagDomiPooingFwdAlgo { - DOMI_POOLING_FWD_ALGO_HALF = 0, // accumulate in L0c with FP16 - DOMI_POOLING_FWD_ALGO_FLOAT32 // accumulate in L0c with FP32 -} domiPooingFwdAlgo_t; - -// mode of convolution -typedef enum tagDomiConvolutionMode { - DOMI_CONV_CONVOLUTION = 0, // math convolution - DOMI_CONV_CROSS_CORRELATION, // cross-correlation convolution - DOMI_CONV_DECONVOLUTION, // deconvolution, also named transposed convolution - DOMI_CONV_MODE_DEPTHWISE, // depthwise convolution - DOMI_CONV_MODE_RESERVED -} domiConvolutionMode_t; - -// softmax mode -typedef enum tagDomiSoftmaxMode { - DOMI_SOFTMAX_MODE_INSTANCE = 0, // compute the softmax over all C, H, W for each N - DOMI_SOFTMAX_MODE_CHANNEL, // compute the softmax over all C for each H, W, N - DOMI_SOFTMAX_MODE_HEIGHT, // compute the softmax over all H for each N, C, W - DOMI_SOFTMAX_MODE_WIDTH, // compute the softmax over all W for each N, C, H - DOMI_SOFTMAX_MODE_RESERVED -} domiSoftmaxMode_t; - -// softmax algorithm -typedef enum tagDomiSoftmaxAlgo { - DOMI_SOFTMAX_FAST = 0, // straightforward implementation - DOMI_SOFTMAX_ACCURATE, // subtract max from every point to avoid overflow - DOMI_SOFTMAX_LOG, // perform the Log softmax operation to avoid overflow - DOMI_SOFTMAX_ACCURATE_FP32, - DOMI_SOFTMAX_RESERVED -} domiSoftmaxAlgo_t; - -// algorithm of convolution backward -typedef enum tagDomiConvolutionBwdAlgo { - DOMI_CONVOLUTION_BWD_ALGO_GEMM = 0, // matrix gemm algo - DOMI_CONVOLUTION_BWD_ALGO_WINOGRAD, // Winograd Transform algo - DOMI_CONVOLUTION_BWD_ALGO_RESERVED -} domiConvolutionBwdAlgo_t; - -// mode of pooling -typedef enum tagDomiPoolingMode { - DOMI_POOLING_MAX = 0, // max pooling - DOMI_POOLING_AVG, // average pooling - DOMI_POOLING_L2, // L2 pooling - DOMI_POOLING_RESERVED -} domiPoolingMode_t; - -// propagate Nan -typedef enum tagDomiNanPropagation { - DOMI_NAN_NOT_PROPAGATE = 0, // Nan numbers are not propagated - DOMI_NAN_PROPAGATE, // Nan numbers are propagated - DOMI_NAN_PROPAGATE_RESERVED -} domiNanPropagation_t; - -// mode of cropandresize -typedef enum tagDomiCropAndResizeMode { - DOMI_RESIZE_METHOD_BILINEAR = 0, // resize bilinear - DOMI_RESIZE_METHOD_NEAREST, // resize nearest - DOMI_RESIZE_RESERVED -} domiCropAndResizeMode_t; - -// yolo version -typedef enum tagDomiYoloVersion { DOMI_YOLO_V2 = 1, DOMI_YOLO_V3, DOMI_YOLO_TRSERVED } domiYoloVersion_t; - -typedef enum tagDomiRNNScopePassType { - DOMI_STATIC_BIDIRECTIONAL_RNN_GENERAL_PASS = 0, - DOMI_DYNAMIC_BIDIRECTIONAL_RNN_GENERAL_PASS, - DOMI_DYNAMIC_BIDIRECTIONAL_RNN_BIDAF_PASS -} domiRNNScopePassType; - -// RNNDataLayout -typedef enum tagDomiRNNDataLayout { - DOMI_RNN_ND_TBX = 0, // data[max_time,batch_size,Xt] - DOMI_RNN_ND_BTX, // data[batch_size,max_time,Xt] - DOMI_RNN_5D_TX1BX, // data[max_time,Xt,1,batch_size,Xt] - DOMI_RNN_5D_BX1TX, // dataa[batch_size,Xt,1,max_time,Xt] - DOMI_RNN_4DTBX1, - DOMI_ENN_DL_RESERVED -} domiRNNDataLayout_t; - -// RNNInputMode -typedef enum tagDomiRNNInputMode { DOMI_RNN_LINEAR_INPUT = 0, DOMI_RNN_SKIP_INPUT } domiRNNInputMode_t; - -// RNNDirectionMode -typedef enum tagDomiRNNDirectionMode { DOMI_RNN_UNIDIRECTIONAL = 0, DOMI_RNN_BIDIRECTIONAL } domiDirectionMode_t; - -typedef enum tagDomiPoolingCeilMode { DOMI_POOLING_FLOOR = 0, DOMI_POOLING_CEIL } domiPoolingCeilMode_t; - -// RNNMode -typedef enum tagDomiRNNActivationMode { - DOMI_RNN_ACTIVATION_SIGMOID = 0, // sigmoid - DOMI_RNN_ACTIVATION_TANH, // tanh - DOMI_RNN_ACTIVATION_RELU, // ReLU - DOMI_RNN_ACTIVATION_RELU1, // ReLU1 - DOMI_RNN_ACTIVATION_RELU6, // ReLU6 - DOMI_RNN_ACTIVATION_RESERVED -} domiRNNActivationMode_t; - -typedef enum tagDomiRNNLSTMOutMode { - DOMI_RNN_LSTM_OUT_SEPARATE = 0, - DOMI_RNN_LSTM_OUT_CONCAT, - DOMI_RNN_LSTM_OUT_RESERVED -} domiRNNLSTMOutPutMode_t; -typedef enum tagDomiRNNLSTMStateOutMode { - DOMI_RNN_LSTM_STATE_OUT_SEPARATE = 0, - DOMI_RNN_LSTM_STATE_OUT_CONCAT_ALL, - DOMI_RNN_LSTM_STATE_OUT_RESERVED -} domiRNNLSTMStateOutMode_t; - -typedef enum tagDomiRNNMode { - DOMI_RNN_RELU = 0, - DOMI_RNN_TANH, - DOMI_LSTM, - DOMI_GRU, - DOMI_RNN_MODE_RESERVED -} domiRNNMode_t; - -typedef enum tagDomiResizeBilinearMode { - DOMI_RESIZE_OUTPUT_DIM_BY_ZOOM_FACTOR = 0, // Output dimension specified by zoom factor - DOMI_RESIZE_OUTPUT_DIM_BY_SHRINK_FACTOR, // specified by shrink factor - DOMI_RESIZE_OUTPUT_DIM_EXPLICIT, // specified explicitly - DOMI_RESIZE_OUTPUT_DIM_RESERVED -} domiResizeOutputDimMode_t; - -#pragma pack(1) // single-byte alignment -// DUMP file struct -struct FileHeader { - int32_t Version; // version - int32_t Output_Offset; // output offset address - char Reserved[24] = {0}; // 24 bytes reserved -}; - -struct BasicInfo { - struct FileHeader header; // file header - int32_t stream_id; // stread id - uint64_t start_time; // start time - uint64_t end_time; // end time - uint32_t input_size; // input memory size - uint32_t output_size; // output memory size - uint32_t weight_size; // weight Memory Size - uint32_t workspace_size; // workspace - uint32_t total_size; // total memory size -}; -#pragma pack() // Cancels single-byte alignment enum class MemorySizeCalcType { NORMAL = 0, ALWAYS_EMPTY }; } // namespace ge diff --git a/inc/framework/common/util.h b/inc/framework/common/util.h index fac7e4ca..ca662242 100644 --- a/inc/framework/common/util.h +++ b/inc/framework/common/util.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef INC_FRAMEWORK_COMMON_UTIL_H_ -#define INC_FRAMEWORK_COMMON_UTIL_H_ +#ifndef AIR_INC_FRAMEWORK_COMMON_UTIL_H_ +#define AIR_INC_FRAMEWORK_COMMON_UTIL_H_ #include #include @@ -24,13 +24,16 @@ #include #include +#include "external/graph/types.h" +#include "external/register/register.h" #include "framework/common/debug/log.h" #include "framework/common/scope_guard.h" #include "framework/common/ge_inner_error_codes.h" +#include "graph/detail/attributes_holder.h" #define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ do { \ - if (size <= 0) { \ + if ((size) <= 0) { \ GELOGE(ge::FAILED, "param[%s] is not a positive number", #size); \ return PARAM_INVALID; \ } \ @@ -46,15 +49,19 @@ // new ge marco // Encapsulate common resource releases -#define GE_MAKE_GUARD_RTMEM(var) \ - GE_MAKE_GUARD(var, [&] { \ - if (var) GE_CHK_RT(rtFreeHost(var)); \ - }); +#define GE_MAKE_GUARD_RTMEM(var) \ + GE_MAKE_GUARD(var, [&] { \ + if ((var) != nullptr) { \ + GE_CHK_RT(rtFreeHost(var)); \ + } \ + }) -#define GE_MAKE_GUARD_RTSTREAM(var) \ - GE_MAKE_GUARD(var, [&] { \ - if (var) GE_CHK_RT(rtStreamDestroy(var)); \ - }); +#define GE_MAKE_GUARD_RTSTREAM(var) \ + GE_MAKE_GUARD(var, [&] { \ + if ((var) != nullptr) { \ + GE_CHK_RT(rtStreamDestroy(var)); \ + } \ + }) // For propagating errors when calling a function. #define GE_RETURN_IF_ERROR(expr) \ @@ -115,7 +122,7 @@ // Check if the parameter is null. If yes, return PARAM_INVALID and record the error #define GE_CHECK_NOTNULL(val) \ do { \ - if (val == nullptr) { \ + if ((val) == nullptr) { \ REPORT_INNER_ERROR("E19999", "Param:%s is nullptr, check invalid", #val); \ GELOGE(ge::FAILED, "[Check][Param:%s]null is invalid.", #val); \ return ge::PARAM_INVALID; \ @@ -125,7 +132,7 @@ // Check if the parameter is null. If yes, just return and record the error #define GE_CHECK_NOTNULL_JUST_RETURN(val) \ do { \ - if (val == nullptr) { \ + if ((val) == nullptr) { \ GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ return; \ } \ @@ -134,7 +141,7 @@ // Check whether the parameter is null. If so, execute the exec_expr expression and record the error log #define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \ do { \ - if (val == nullptr) { \ + if ((val) == nullptr) { \ GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ exec_expr; \ } \ @@ -143,7 +150,7 @@ // Check whether the parameter is null. If yes, return directly and record the error log #define GE_RT_VOID_CHECK_NOTNULL(val) \ do { \ - if (val == nullptr) { \ + if ((val) == nullptr) { \ GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ return; \ } \ @@ -152,7 +159,7 @@ // Check if the parameter is null. If yes, return false and record the error log #define GE_RT_FALSE_CHECK_NOTNULL(val) \ do { \ - if (val == nullptr) { \ + if ((val) == nullptr) { \ GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ return false; \ } \ @@ -161,7 +168,7 @@ // Check if the parameter is out of bounds #define GE_CHECK_SIZE(size) \ do { \ - if (size == 0) { \ + if ((size) == 0U) { \ GELOGE(ge::FAILED, "param[%s] is out of range", #size); \ return ge::PARAM_INVALID; \ } \ @@ -170,7 +177,7 @@ // Check if the value on the left is greater than or equal to the value on the right #define GE_CHECK_GE(lhs, rhs) \ do { \ - if (lhs < rhs) { \ + if ((lhs) < (rhs)) { \ GELOGE(ge::FAILED, "param[%s] is less than[%s]", #lhs, #rhs); \ return ge::PARAM_INVALID; \ } \ @@ -179,7 +186,7 @@ // Check if the value on the left is less than or equal to the value on the right #define GE_CHECK_LE(lhs, rhs) \ do { \ - if (lhs > rhs) { \ + if ((lhs) > (rhs)) { \ GELOGE(ge::FAILED, "param[%s] is greater than[%s]", #lhs, #rhs); \ return ge::PARAM_INVALID; \ } \ @@ -187,102 +194,37 @@ #define GE_DELETE_NEW_SINGLE(var) \ do { \ - if (var != nullptr) { \ - delete var; \ - var = nullptr; \ + if ((var) != nullptr) { \ + delete (var); \ + (var) = nullptr; \ } \ } while (false) #define GE_DELETE_NEW_ARRAY(var) \ do { \ - if (var != nullptr) { \ - delete[] var; \ - var = nullptr; \ + if ((var) != nullptr) { \ + delete[](var); \ + (var) = nullptr; \ } \ } while (false) #define GE_FREE_RT_LOG(addr) \ do { \ - if (addr != nullptr) { \ + if ((addr) != nullptr) { \ const rtError_t error = rtFree(addr); \ if (error != RT_ERROR_NONE) { \ GELOGE(RT_FAILED, "Call rtFree failed, error: %#x", error); \ } \ - addr = nullptr; \ + (addr) = nullptr; \ } \ } while (false) +namespace ge { /** * @ingroup domi_common * @brief version of om.proto file */ -static constexpr int32_t OM_PROTO_VERSION = 2; - -/** - * Finding an Integer Ceiling Value Without Precision Loss - */ -#define CEIL(N, n) (((N) + (n)-1) / (n)) - -namespace ge { -using google::protobuf::Message; - -/// -/// @ingroup domi_common -/// @brief Reads the proto structure from an array. -/// @param [in] data proto data to be read -/// @param [in] size proto data size -/// @param [out] proto Memory for storing the proto file -/// @return true success -/// @return false fail -/// -GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *data, int32_t size, Message *proto); - -/// -/// @ingroup domi_proto -/// @brief Reads the proto file in the text format. -/// @param [in] file path of proto file -/// @param [out] message Memory for storing the proto file -/// @return true success -/// @return false fail -/// -GE_FUNC_VISIBILITY bool ReadProtoFromText(const char *file, google::protobuf::Message *message); - -/// -/// @ingroup: domi_common -/// @brief: get length of file -/// @param [in] input_file: path of file -/// @return long: File length. If the file length fails to be obtained, the value -1 is returned. -/// -GE_FUNC_VISIBILITY extern long GetFileLength(const std::string &input_file); - -/// -/// @ingroup domi_common -/// @brief Reads all data from a binary file. -/// @param [in] file_name path of file -/// @param [out] buffer Output memory address, which needs to be released by the caller. -/// @param [out] length Output memory size -/// @return false fail -/// @return true success -/// -GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *const file_name, char **buffer, int32_t &length); - -GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *file_name, std::vector &buffer); - -/// -/// @ingroup domi_common -/// @brief Recursively Creating a Directory -/// @param [in] directory_path Path, which can be a multi-level directory. -/// @return 0 success -/// @return -1 fail -/// -GE_FUNC_VISIBILITY extern int32_t CreateDirectory(const std::string &directory_path); - -/// -/// @ingroup domi_common -/// @brief Obtains the current time string. -/// @return Time character string in the format : %Y%m%d%H%M%S, eg: 20171011083555 -/// -GE_FUNC_VISIBILITY std::string CurrentTimeInStr(); +constexpr int32_t OM_PROTO_VERSION = 2; /// /// @ingroup domi_common @@ -294,7 +236,7 @@ template GE_FUNC_VISIBILITY std::string ToString(std::vector &v) { std::stringstream ss; ss << "["; - for (T x : v) { + for (const T x : v) { ss << x; ss << ", "; } @@ -314,7 +256,7 @@ template GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedField &rpd_field) { std::stringstream ss; ss << "["; - for (T x : rpd_field) { + for (const T x : rpd_field) { ss << x; ss << ", "; } @@ -343,6 +285,63 @@ GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedPtrField return str_ret; } +/// +/// @ingroup domi_common +/// @brief Reads the proto structure from an array. +/// @param [in] data proto data to be read +/// @param [in] size proto data size +/// @param [out] proto Memory for storing the proto file +/// @return true success +/// @return false fail +/// +GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *const data, const int32_t size, + google::protobuf::Message *const proto); + +/// +/// @ingroup domi_proto +/// @brief Reads the proto file in the text format. +/// @param [in] file path of proto file +/// @param [out] message Memory for storing the proto file +/// @return true success +/// @return false fail +/// +GE_FUNC_VISIBILITY bool ReadProtoFromText(const char_t *const file, google::protobuf::Message *const message); + +/// +/// @ingroup: domi_common +/// @brief: get length of file +/// @param [in] input_file: path of file +/// @return int64_t: File length. If the file length fails to be obtained, the value -1 is returned. +/// +GE_FUNC_VISIBILITY extern int64_t GetFileLength(const std::string &input_file); + +/// +/// @ingroup domi_common +/// @brief Reads all data from a binary file. +/// @param [in] file_name path of file +/// @param [out] buffer Output memory address, which needs to be released by the caller. +/// @param [out] length Output memory size +/// @return false fail +/// @return true success +/// +GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char_t *const file_name, char_t **const buffer, int32_t &length); + +/// +/// @ingroup domi_common +/// @brief Recursively Creating a Directory +/// @param [in] directory_path Path, which can be a multi-level directory. +/// @return 0 success +/// @return -1 fail +/// +GE_FUNC_VISIBILITY extern int32_t CreateDirectory(const std::string &directory_path); + +/// +/// @ingroup domi_common +/// @brief Obtains the current time string. +/// @return Time character string in the format : %Y%m%d%H%M%S, eg: 20171011083555 +/// +GE_FUNC_VISIBILITY std::string CurrentTimeInStr(); + /// /// @ingroup domi_common /// @brief Obtains the absolute time (timestamp) of the current system. @@ -366,7 +365,7 @@ GE_FUNC_VISIBILITY uint32_t GetCurrentSecondTimestap(); /// @param [in] b /// @return false: true: The result is within the normal int64 range. /// -GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(int64_t a, int64_t b); +GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(const int64_t a, const int64_t b); /// /// @ingroup domi_common @@ -374,7 +373,7 @@ GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(int64_t a, int64_t b); /// @param [in] path of input file /// @param [out] Absolute path of a file. If the absolute path cannot be obtained, an empty string is returned /// -GE_FUNC_VISIBILITY std::string RealPath(const char *path); +GE_FUNC_VISIBILITY std::string RealPath(const char_t *path); /// /// @ingroup domi_common @@ -401,17 +400,9 @@ GE_FUNC_VISIBILITY bool CheckOutputPathValid(const std::string &file_path, const /// @param [in] str file path /// @param [out] result /// -GE_FUNC_VISIBILITY bool ValidateStr(const std::string &str, const std::string &mode); +GE_FUNC_VISIBILITY bool ValidateStr(const std::string &file_path, const std::string &mode); -/// -/// @ingroup domi_common -/// @brief Check path invalid -/// @param [in] path, path to be checked -/// @param [in] length, length of path -/// @return 0 success -/// @return -1 fail -/// -GE_FUNC_VISIBILITY Status CheckPath(const char *path, size_t length); +GE_FUNC_VISIBILITY Status ConvertToInt32(const std::string &str, int32_t &val); } // namespace ge -#endif // INC_FRAMEWORK_COMMON_UTIL_H_ +#endif // AIR_INC_FRAMEWORK_COMMON_UTIL_H_ diff --git a/inc/framework/engine/dnnengine.h b/inc/framework/engine/dnnengine.h index 84d5705a..63c2d9de 100644 --- a/inc/framework/engine/dnnengine.h +++ b/inc/framework/engine/dnnengine.h @@ -26,11 +26,11 @@ #include "graph/types.h" namespace ge { -enum PriorityEnum { +enum class PriorityEnum { COST_0 = 0, - COST_1, - COST_2, - COST_3, + COST_1 = 1, + COST_2 = 2, + COST_3 = 3, COST_9 = 9, COST_10 = 10, }; @@ -38,7 +38,7 @@ enum PriorityEnum { struct DNNEngineAttribute { std::string engine_name; std::vector mem_type; - uint32_t compute_cost; + PriorityEnum compute_cost; enum RuntimeType runtime_type; // HOST, DEVICE // If engine input format must be specific, set this attribute, else set FORMAT_RESERVED Format engine_input_format; @@ -53,10 +53,11 @@ class GE_FUNC_VISIBILITY DNNEngine { engine_attribute_ = attrs; } virtual ~DNNEngine() = default; - Status Initialize(const std::map &options) { + Status Initialize(const std::map &options) const { + (void)options; return SUCCESS; } - Status Finalize() { + Status Finalize() const { return SUCCESS; } void GetAttributes(DNNEngineAttribute &attr) const { diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index c5df77df..2929d26b 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -32,18 +32,19 @@ namespace ge { class SingleOp; class DynamicSingleOp; +class GeRootModel; struct RunModelData { uint32_t index; // Data index uint32_t modelId; - std::vector blobs; // All input/output data buffer - uint32_t timestamp; // Data creation time - uint32_t timeout; // Processing timeout - uint64_t request_id = 0; // Request ID - uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 - uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 - uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 - std::vector dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty + std::vector blobs; // All input/output data buffer + uint32_t timestamp; // Data creation time + uint32_t timeout; // Processing timeout + uint64_t request_id = 0UL; // Request ID + uint64_t dynamic_batch_size = 0UL; // Dynamic batch size scene, set dynamic size, not supported by default:0 + uint64_t dynamic_image_height = 0UL; // Dynamic image size scene, set image height, not supported by default:0 + uint64_t dynamic_image_width = 0UL; // Dynamic image size scene, set image width, not supported by default:0 + std::vector dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty }; class GE_FUNC_VISIBILITY GeExecutor { @@ -69,11 +70,11 @@ class GE_FUNC_VISIBILITY GeExecutor { /// static Status FinalizeEx(); - Status UnloadModel(uint32_t modelId); + Status UnloadModel(const uint32_t model_id); // Get input and output descriptor - Status GetModelDescInfo(uint32_t model_id, std::vector &input_desc, std::vector &output_desc, - bool new_model_desc = false); + Status GetModelDescInfo(const uint32_t model_id, std::vector &input_desc, + std::vector &output_desc, const bool new_model_desc = false); /// /// @ingroup ge @@ -84,7 +85,8 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [in] batch_size: batch size entered by user in dynamic multi-batch scenario /// @return execute result /// - Status SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t batch_size); + Status SetDynamicBatchSize(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length, + const uint64_t batch_size); /// /// @ingroup ge @@ -96,8 +98,8 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [in] image_width: image width entered by user in dynamic multi-resolution scenario /// @return execute result /// - Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height, - uint64_t image_width); + Status SetDynamicImageSize(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length, + const uint64_t image_height, const uint64_t image_width); /// /// @ingroup ge @@ -109,7 +111,7 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [in] dynamic_dims: array of dynamic dimensions /// @return execute result /// - Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length, + Status SetDynamicDims(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length, const std::vector &dynamic_dims); /// @@ -120,7 +122,7 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] cur_dynamic_dims: current dynamic dims /// @return execute result /// - Status GetCurDynamicDims(uint32_t model_id, const std::vector &dynamic_dims, + Status GetCurDynamicDims(const uint32_t model_id, const std::vector &dynamic_dims, std::vector &cur_dynamic_dims); /// @@ -131,7 +133,8 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] dynamic_type /// @return execute result /// - Status GetDynamicBatchInfo(uint32_t model_id, std::vector> &batch_info, int32_t &dynamic_type); + Status GetDynamicBatchInfo(const uint32_t model_id, std::vector> &batch_info, + int32_t &dynamic_type); /// /// @ingroup ge @@ -140,7 +143,7 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] batch_info /// @return execute result /// - Status GetCombinedDynamicDims(uint32_t model_id, std::vector> &batch_info); + Status GetCombinedDynamicDims(const uint32_t model_id, std::vector> &batch_info); /// /// @ingroup ge @@ -149,7 +152,7 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] user_designate_shape_order /// @return execute result /// - Status GetUserDesignateShapeOrder(uint32_t model_id, std::vector &user_designate_shape_order); + Status GetUserDesignateShapeOrder(const uint32_t model_id, std::vector &user_designate_shape_order); Status GetCurShape(const uint32_t model_id, std::vector &batch_info, int32_t &dynamic_type); @@ -163,18 +166,18 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [in] aippParms: kAippDynamicPara by user in dynamic aipp /// @return execute result /// - Status SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length, + Status SetDynamicAippData(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length, const std::vector &aipp_batch_para, - const kAippDynamicPara &aippParms); + const kAippDynamicPara &aipp_parms); - Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); + Status GetAIPPInfo(const uint32_t model_id, const uint32_t index, AippConfigInfo &aipp_info); - Status GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name, + Status GetOpAttr(const uint32_t model_id, const std::string &op_name, const std::string &attr_name, std::string &attr_value); - Status GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info); + Status GetModelAttr(const uint32_t model_id, std::vector &dynamic_output_shape_info); - Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); + Status GetAippType(const uint32_t model_id, const uint32_t index, InputAippType &type, size_t &aipp_index); Status CommandHandle(const Command &command); @@ -188,7 +191,7 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @return SUCCESS /// @return FAILED /// - Status GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size); + Status GetMaxUsedMemory(const uint32_t model_id, uint32_t &max_size); /// /// @ingroup ge @@ -210,8 +213,8 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] uint32_t &model_id: Corresponding identification after model loading /// @return SUCCESS handle successfully / others handle failed /// - Status LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t mem_size, - void *weight_ptr, size_t weight_size); + Status LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *const dev_ptr, const size_t mem_size, + void *const weight_ptr, const size_t weight_size); /// /// @ingroup ge @@ -225,6 +228,18 @@ class GE_FUNC_VISIBILITY GeExecutor { Status LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector &input_queue_ids, const std::vector &output_queue_ids); + /// + /// @ingroup ge + /// @brief Load task list from ModelData with queue. + /// @param [out] model_id: model id allocate from manager. + /// @param [in] root_model: Instance of GeRootModel. + /// @param [in] input_queue_ids: input queue ids create from user. + /// @param [in] output_queue_ids: input queue ids create from user. + /// @return: 0 for success / others for fail + /// + Status LoadModelWithQ(uint32_t &model_id, const std::shared_ptr &root_model, + const std::vector &input_queue_ids, const std::vector &output_queue_ids); + /// /// @ingroup ge /// @brief Synchronous execution of offline model(Do not create thread) @@ -235,8 +250,17 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] domi::OutputData *output_data: Model output data /// @return SUCCESS handle successfully / others handle failed /// - Status ExecModel(uint32_t model_id, void *stream, const RunModelData &input_data, RunModelData &output_data, - bool async_mode = false); + Status ExecModel(const uint32_t model_id, void *const stream, const RunModelData &input_data, + RunModelData &output_data, const bool async_mode = false); + + /// + /// @ingroup ge + /// @brief Load task list from root_model without input queue or output queue. + /// @param [out] model_id: model id allocate from manager. + /// @param [in] root_model: Instance of GeRootModel. + /// @return: 0 for success / others for fail + /// + Status LoadModelWithoutQ(uint32_t &model_id, const std::shared_ptr &root_model) const; /// /// @ingroup ge @@ -250,9 +274,9 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] std::vector &output_desc: description of model output data /// @return SUCCESS handle successfully / others handle failed /// - Status ExecModel(uint32_t model_id, void *stream, const RunModelData &run_input_data, + Status ExecModel(const uint32_t model_id, void *const stream, const RunModelData &run_input_data, const std::vector &input_desc, RunModelData &run_output_data, - std::vector &output_desc, bool async_mode = false); + std::vector &output_desc, const bool async_mode = false); /// /// @ingroup ge @@ -273,36 +297,38 @@ class GE_FUNC_VISIBILITY GeExecutor { /// @param [out] size_t &weight_size Weight memory space size /// @return SUCCESS handle successfully / others handle failed /// - Status GetMemAndWeightSize(const void *model_data, size_t model_size, size_t &mem_size, size_t &weight_size); + Status GetMemAndWeightSize(const void *const model_data, const size_t model_size, size_t &mem_size, + size_t &weight_size); - static Status LoadSingleOp(const std::string &modelName, const ModelData &modelData, void *stream, - SingleOp **single_op); + static Status LoadSingleOp(const std::string &model_name, const ModelData &model_data, void *const stream, + SingleOp **const single_op); - static Status LoadSingleOpV2(const std::string &modelName, const ModelData &modelData, void *stream, - SingleOp **single_op, const uint64_t model_id); + static Status LoadSingleOpV2(const std::string &model_name, const ModelData &model_data, void *const stream, + SingleOp **const single_op, const uint64_t model_id); - static Status ExecuteAsync(SingleOp *executor, const std::vector &inputs, + static Status ExecuteAsync(SingleOp *const executor, const std::vector &inputs, std::vector &outputs); - static Status LoadDynamicSingleOp(const std::string &model_name, const ModelData &modelData, void *stream, - DynamicSingleOp **single_op); + static Status LoadDynamicSingleOp(const std::string &model_name, const ModelData &model_data, void *const stream, + DynamicSingleOp **const single_op); - static Status LoadDynamicSingleOpV2(const std::string &model_name, const ModelData &modelData, void *stream, - DynamicSingleOp **single_op, const uint64_t model_id); + static Status LoadDynamicSingleOpV2(const std::string &model_name, const ModelData &model_data, void *const stream, + DynamicSingleOp **const single_op, const uint64_t model_id); - static Status ExecuteAsync(DynamicSingleOp *executor, const std::vector &input_desc, + static Status ExecuteAsync(DynamicSingleOp *const executor, const std::vector &input_desc, const std::vector &inputs, std::vector &output_desc, std::vector &outputs); - static Status ReleaseSingleOpResource(void *stream); + static Status ReleaseSingleOpResource(void *const stream); - static Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id); + static Status GetDeviceIdByModelId(const uint32_t model_id, uint32_t &device_id); - Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count); - Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); - Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector &input_dims, - std::vector &output_dims); - Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info); + Status GetBatchInfoSize(const uint32_t model_id, size_t &shape_count); + Status GetOrigInputInfo(const uint32_t model_id, const uint32_t index, OriginInputInfo &orig_input_info); + Status GetAllAippInputOutputDims(const uint32_t model_id, const uint32_t index, + std::vector &input_dims, std::vector &output_dims); + Status GetOpDescInfo(const uint32_t device_id, const uint32_t stream_id, const uint32_t task_id, + OpDescInfo &op_desc_info); private: static std::atomic_bool is_inited_; diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h index b49fa53b..8213c115 100644 --- a/inc/framework/generator/ge_generator.h +++ b/inc/framework/generator/ge_generator.h @@ -31,6 +31,8 @@ #include "framework/omg/omg_inner_types.h" namespace ge { +const std::string kAttrSupportDynamicShape = "support_dynamicshape"; + class GeRootModel; class GE_FUNC_VISIBILITY GeGenerator { public: @@ -103,8 +105,8 @@ class GE_FUNC_VISIBILITY GeGenerator { /// @param [in] graph_name: graph name. /// @param [out] graph: graph of single op. /// @return SUCCESS or FAILED - Status BuildSingleOpGraph(OpDescPtr &op_desc, const InOutTensorRef &inputs_outputs, std::string graph_name, - Graph &graph, std::vector> &inputs_name_type); + Status BuildSingleOpGraph(const OpDescPtr &op_desc, const InOutTensorRef &inputs_outputs, std::string graph_name, + Graph &graph, std::vector> &inputs_name_type) const; Status BuildOriginalGraphInfo(OpDescPtr &op_desc, const std::vector &inputs, const std::vector &outputs, const std::string &model_file_name, bool is_offline, int32_t compile_flag, GraphStage graph_stage, Graph &graph, @@ -116,20 +118,20 @@ class GE_FUNC_VISIBILITY GeGenerator { ge::ModelBufferData &model, bool is_offline = true); Status BuildSingleOp(OpDescPtr &op_desc, const std::vector &inputs, const std::vector &outputs, const std::string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, - ComputeGraphPtr &compute_graph, bool is_offline = true, int32_t compile_flag = 0, + ComputeGraphPtr &comp_graph, bool is_offline = true, int32_t compile_flag = 0, GraphStage graph_stage = GraphStage::GRAPH_STAGE_RESERVED); bool CheckNoAicore(const ComputeGraphPtr &graph); - void RemoveConst(const std::vector &inputs, std::vector &outputs); - Status CheckForSingleOp(OpDescPtr &op_desc, const std::vector &inputs, - const std::vector &outputs); - Status InferFormatForSingleOp(OpDescPtr &op_desc, Graph &graph); + void RemoveConst(const std::vector &inputs, std::vector &outputs) const; + Status CheckForSingleOp(const OpDescPtr &op_desc, const std::vector &inputs, + const std::vector &outputs) const; + Status InferFormatForSingleOp(const OpDescPtr &op_desc, const Graph &graph) const; using GeRootModelPtr = std::shared_ptr; Status SetModelNameForDump(const GeRootModelPtr &ge_root_model); Status CreateGeneralizedBuildAttrs(const GeRootModelPtr &ge_root_model, const std::vector &inputs, const std::vector &outputs, const std::vector> &inputs_name_type, - std::vector &generalized_build_attrs); + std::vector &generalized_build_attrs) const; class Impl; diff --git a/inc/framework/memory/memory_api.h b/inc/framework/memory/memory_api.h index a04a1ebe..30ade3b7 100644 --- a/inc/framework/memory/memory_api.h +++ b/inc/framework/memory/memory_api.h @@ -17,11 +17,7 @@ #ifndef INC_FRAMEWORK_MEMORY_MEMORY_API_H_ #define INC_FRAMEWORK_MEMORY_MEMORY_API_H_ -#include -#include - #include "external/ge/ge_api_error_codes.h" -#include "graph/types.h" #include "runtime/mem.h" namespace ge { diff --git a/inc/framework/memory/memory_assigner.h b/inc/framework/memory/memory_assigner.h index 1ccbd785..be615426 100644 --- a/inc/framework/memory/memory_assigner.h +++ b/inc/framework/memory/memory_assigner.h @@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY MemoryAssigner { MemoryAssigner &operator=(const MemoryAssigner &) = delete; - Status AssignMemory(bool is_loop_graph, std::map &mem_offset, size_t &zero_copy_mem_size); + Status AssignMemory(std::map &mem_offset, size_t &zero_copy_mem_size); private: ge::ComputeGraphPtr compute_graph_; diff --git a/inc/framework/omg/omg.h b/inc/framework/omg/omg.h index 029b7a24..cab51e0d 100644 --- a/inc/framework/omg/omg.h +++ b/inc/framework/omg/omg.h @@ -64,7 +64,7 @@ GE_FUNC_VISIBILITY Status InitDomiOmgContext(const std::string &input_shape, con GE_FUNC_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map &atc_params, const char *model_file, const char *weights_file, domi::FrameworkType type, const char *op_conf = nullptr, const char *target = nullptr, - RunMode run_mode = GEN_OM_MODEL, bool is_dynamic_input = false); + RunMode run_mode = RunMode::GEN_OM_MODEL, bool is_dynamic_input = false); /** * @ingroup domi_omg @@ -89,15 +89,15 @@ GE_FUNC_VISIBILITY Status ConvertPbtxtToJson(const char *model_file, const char GE_FUNC_VISIBILITY Status ConvertFwkModelToJson(domi::FrameworkType framework, const char *model_file, const char *json_file); -GE_FUNC_VISIBILITY void GetGroupName(ge::proto::ModelDef &model); +GE_FUNC_VISIBILITY void GetGroupName(ge::proto::ModelDef &model_def); -GE_FUNC_VISIBILITY void FindParserSo(const std::string &path, std::vector &fileList, +GE_FUNC_VISIBILITY void FindParserSo(const std::string &path, std::vector &file_list, std::string &caffe_parser_path); GE_FUNC_VISIBILITY Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file); GE_FUNC_VISIBILITY Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, - const std::string &output_format); + const std::string &output); GE_FUNC_VISIBILITY Status GetOutputLeaf(ge::NodePtr node, std::vector> &output_nodes_info); diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h index 3f7b5db1..1addd326 100644 --- a/inc/framework/omg/omg_inner_types.h +++ b/inc/framework/omg/omg_inner_types.h @@ -31,12 +31,7 @@ using domi::DOMI_TENSOR_ND; using domi::DOMI_TENSOR_RESERVED; using domi::domiTensorFormat_t; -using domi::FRAMEWORK_RESERVED; -using domi::FrameworkType; -using std::map; -using std::string; using std::unordered_map; -using std::vector; namespace ge { /** @@ -51,36 +46,13 @@ enum RunMode { DISPLAY_OM_INFO = 6 // display model info }; -/// -/// @ingroup domi_omg -/// @brief high-precision mode -/// -enum HighPrecisionMode { - // the FP16 high-precision function is disabled in common mode - HIGH_PRECISION_DEFAULT = 0, - - // high-precision mode, enabling FP16 high-precision mode (Convolution/FullConnect/AvgPooling are involved) - HIGH_PRECISION_FP16 = 1 -}; - -/// -/// @ingroup domi_omg -/// @brief description buffer data -/// -struct OMGBufferData { - void *data; - uint32_t length; -}; - struct OmgContext { - OmgContext() { - format = DOMI_TENSOR_ND; - } - domiTensorFormat_t format; + OmgContext() : format(domi::DOMI_TENSOR_ND) {} + domi::domiTensorFormat_t format; // format of the input specified by the command line - std::unordered_map input_nodes_format_map; - std::vector output_formats; + std::unordered_map input_nodes_format_map; + std::vector output_formats; // user-designate input dims std::vector>> user_input_dims; @@ -107,9 +79,9 @@ struct OmgContext { // net data nodes tensor names(caffe or onnx) std::vector data_tensor_names; // preferential format used by the entire network - domiTensorFormat_t net_format = DOMI_TENSOR_RESERVED; + domi::domiTensorFormat_t net_format = domi::DOMI_TENSOR_RESERVED; domi::FrameworkType type = domi::FRAMEWORK_RESERVED; - RunMode run_mode = ONLY_PRE_CHECK; + RunMode run_mode = RunMode::ONLY_PRE_CHECK; bool train_flag = false; std::string output_type; diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h index a0415d73..96a5a018 100644 --- a/inc/framework/omg/parser/model_parser.h +++ b/inc/framework/omg/parser/model_parser.h @@ -108,6 +108,8 @@ class GE_FUNC_VISIBILITY ModelParser { * @return Others failed */ virtual domi::Status ToJson(const char *model_file, const char *json_file) { + (void)model_file; + (void)json_file; return domi::SUCCESS; } @@ -130,6 +132,8 @@ class GE_FUNC_VISIBILITY ModelParser { * @return Others failed */ virtual domi::Status ParseProto(const std::string &serialized_proto, ge::ComputeGraphPtr &graph) { + (void)serialized_proto; + (void)graph; return UNSUPPORTED; } @@ -144,6 +148,9 @@ class GE_FUNC_VISIBILITY ModelParser { */ virtual domi::Status ParseProtoWithSubgraph(const std::string &serialized_proto, GetGraphCallbackV2 callback, ge::ComputeGraphPtr &graph) { + (void)serialized_proto; + (void)callback; + (void)graph; return UNSUPPORTED; } }; diff --git a/inc/framework/omg/parser/op_parser.h b/inc/framework/omg/parser/op_parser.h index 04731ff3..b511a8b0 100644 --- a/inc/framework/omg/parser/op_parser.h +++ b/inc/framework/omg/parser/op_parser.h @@ -50,7 +50,7 @@ class GE_FUNC_VISIBILITY OpParser { * @return SUCCESS * @return FAILED */ - virtual domi::Status ParseParams(const Message *op_src, ge::OpDescPtr &op_desc) = 0; + virtual domi::Status ParseParams(const google::protobuf::Message *op_src, ge::OpDescPtr &op_desc) = 0; /** * @ingroup domi_omg @@ -60,7 +60,7 @@ class GE_FUNC_VISIBILITY OpParser { * @return SUCCESS * @return FAILED */ - virtual domi::Status ParseParams(const Message *op_src, ge::Operator &op_dest) = 0; + virtual domi::Status ParseParams(const google::protobuf::Message *op_src, ge::Operator &op_dest) = 0; /** * @ingroup domi_omg @@ -70,7 +70,7 @@ class GE_FUNC_VISIBILITY OpParser { * @return SUCCESS * @return FAILED */ - virtual domi::Status ParseWeights(const Message *op_src, ge::NodePtr &node) = 0; + virtual domi::Status ParseWeights(const google::protobuf::Message *op_src, ge::NodePtr &node) = 0; /** * @ingroup domi_omg @@ -80,7 +80,7 @@ class GE_FUNC_VISIBILITY OpParser { * @return SUCCESS * @return FAILED */ - virtual domi::Status GetFormat(const Message *op_src, domi::domiTensorFormat_t &format) { + virtual domi::Status GetFormat(const google::protobuf::Message *op_src, domi::domiTensorFormat_t &format) { (void)op_src; // Indicates that the op does not provide a value for format format = domi::DOMI_TENSOR_RESERVED; diff --git a/inc/framework/omg/parser/parser_factory.h b/inc/framework/omg/parser/parser_factory.h index 7ae286df..cd5faa73 100644 --- a/inc/framework/omg/parser/parser_factory.h +++ b/inc/framework/omg/parser/parser_factory.h @@ -24,13 +24,11 @@ #include "framework/omg/omg_inner_types.h" #include "framework/omg/parser/parser_types.h" -using Status = domi::Status; - namespace domi { class WeightsParser; class ModelParser; -typedef std::shared_ptr (*MODEL_PARSER_CREATOR_FUN)(void); +using MODEL_PARSER_CREATOR_FUN = std::shared_ptr (*)(void); // Create modelparser for different frameworks class GE_FUNC_VISIBILITY ModelParserFactory { @@ -82,7 +80,7 @@ class GE_FUNC_VISIBILITY ModelParserRegisterar { } \ ModelParserRegisterar g_##type##_Model_Parser_Creator(type, Creator_##type##_Model_Parser) -typedef std::shared_ptr (*WEIGHTS_PARSER_CREATOR_FUN)(void); +using WEIGHTS_PARSER_CREATOR_FUN = std::shared_ptr (*)(void); // Create weightsparser for different frameworks class GE_FUNC_VISIBILITY WeightsParserFactory { diff --git a/inc/framework/omg/parser/parser_inner_ctx.h b/inc/framework/omg/parser/parser_inner_ctx.h index 969a94f8..5cf0d00b 100644 --- a/inc/framework/omg/parser/parser_inner_ctx.h +++ b/inc/framework/omg/parser/parser_inner_ctx.h @@ -29,8 +29,8 @@ namespace ge { struct ParserContext { // format of the input specified by the command line - std::unordered_map input_nodes_format_map; - std::vector output_formats; + std::unordered_map input_nodes_format_map; + std::vector output_formats; // user-designate input dims std::vector>> user_input_dims; std::map> input_dims; @@ -58,7 +58,7 @@ struct ParserContext { bool train_flag = false; domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND; domi::FrameworkType type = domi::FRAMEWORK_RESERVED; - RunMode run_mode = GEN_OM_MODEL; + RunMode run_mode = RunMode::GEN_OM_MODEL; // save caffe custom proto path, used by caffe parse std::string custom_proto_path; // save caffe proto path, used by caffe parse diff --git a/inc/framework/omg/version.h b/inc/framework/omg/version.h index 156fbc5c..53f155fa 100644 --- a/inc/framework/omg/version.h +++ b/inc/framework/omg/version.h @@ -19,8 +19,6 @@ #include #include -#include -#include #include "framework/common/debug/log.h" #include "framework/common/string_util.h" @@ -34,7 +32,7 @@ class GE_FUNC_VISIBILITY PlatformVersionManager { static Status GetPlatformVersion(std::string &ver) { ver = "1.11.z"; const std::vector version_splits = StringUtils::Split(ver, '.'); - GE_IF_BOOL_EXEC(version_splits.size() < 3, GELOGW("Read platform version error!"); return FAILED;); + GE_IF_BOOL_EXEC(version_splits.size() < 3U, GELOGW("Read platform version error!"); return FAILED;); GELOGI("Read current platform version: %s.", ver.c_str()); return SUCCESS; diff --git a/metadef b/metadef index 1d99928b..0a233571 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 1d99928bfcb02e45acc7db73e3ee57304ff1131a +Subproject commit 0a2335712484f85cd44a0f2402eac6932b22b40a diff --git a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h index 2a10859c..37a2e412 100644 --- a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h +++ b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h @@ -1,57 +1,57 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef AICPU_OP_TYPE_LIST_H_ -#define AICPU_OP_TYPE_LIST_H_ - -extern "C" { -enum OpKernelType { - TF_KERNEL, - CPU_KERNEL -}; - -enum ReturnCode { - OP_TYPE_NOT_SUPPORT, - FORMAT_NOT_SUPPORT, - DTYPE_NOT_SUPPORT -}; - -#pragma pack(push, 1) -//One byte alignment -struct SysOpInfo { - uint64_t opLen; - uint64_t opType; - OpKernelType kernelsType; -}; - -struct SysOpCheckInfo { - uint64_t opListNum; - uint64_t offSetLen; - uint64_t sysOpInfoList; - uint64_t opParamInfoList; -}; - -struct SysOpCheckResp { - uint64_t opListNum; - bool isWithoutJson; - uint64_t returnCodeList; - uint64_t sysOpInfoList; - uint64_t opParamInfoList; -}; -#pragma pack(pop) -} - -#endif // AICPU_OP_TYPE_LIST_H_ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AICPU_OP_TYPE_LIST_H_ +#define AICPU_OP_TYPE_LIST_H_ + +extern "C" { +enum OpKernelType { + TF_KERNEL, + CPU_KERNEL +}; + +enum ReturnCode { + OP_TYPE_NOT_SUPPORT, + FORMAT_NOT_SUPPORT, + DTYPE_NOT_SUPPORT +}; + +#pragma pack(push, 1) +// One byte alignment +struct SysOpInfo { + uint64_t opLen; + uint64_t opType; + OpKernelType kernelsType; +}; + +struct SysOpCheckInfo { + uint64_t opListNum; + uint64_t offSetLen; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; +}; + +struct SysOpCheckResp { + uint64_t opListNum; + bool isWithoutJson; + uint64_t returnCodeList; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; +}; +#pragma pack(pop) +} + +#endif // AICPU_OP_TYPE_LIST_H_ diff --git a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h index 72e21f6f..ceae0f0f 100644 --- a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h +++ b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h @@ -29,6 +29,53 @@ struct AicpuParamHead uint32_t extInfoLength; // extInfo struct Length uint64_t extInfoAddr; // extInfo address }; + +enum class AicpuConfigMsgType { + AICPU_CONFIG_MSG_TYPE_BUF_FREE = 0, /* free buf */ + AICPU_CONFIG_MSG_TYPE_BUF_RESET = 1, /* reset buf */ + AICPU_CONFIG_MSG_TYPE_BUF_SET_ADDR = 2, /* set buf addr to aicpu */ +}; + +enum class AicpuErrMsgType { + ERR_MSG_TYPE_NULL = 0, + ERR_MSG_TYPE_AICORE = 1, + ERR_MSG_TYPE_AICPU = 2, +}; + +typedef struct tagAicpuConfigMsg { + uint8_t msgType; + uint8_t reserved1; + uint16_t bufLen; + uint32_t offset; + uint64_t bufAddr; + uint32_t tsId; + uint32_t reserved2; +} AicpuConfigMsg; + +typedef struct tagAicoreErrMsgInfo { + uint8_t errType; + uint8_t version; + uint8_t reserved1[2]; /* reserved1, 4 byte alignment */ + uint32_t errorCode; + uint32_t modelId; + uint32_t taskId; + uint32_t streamId; + uint64_t transactionId; + uint8_t reserved2[228]; /* the total byte is 256, reserved2 len = 256 - other lens */ +} AicoreErrMsgInfo; + +typedef struct tagAicpuErrMsgInfo { + uint8_t errType; + uint8_t version; + uint8_t reserved1[2]; /* reserved1, 4 byte alignment */ + uint32_t errorCode; + uint32_t modelId; + uint32_t streamId; + uint64_t transactionId; + char opName[64]; /* op name str */ + char errDesc[128]; /* err msg desc info */ + uint8_t reserved2[40]; /* the total byte is 256, reserved2 len = 256 - other lens */ +} AicpuErrMsgInfo; #pragma pack(pop) } // namespace aicpu diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine.h b/third_party/fwkacllib/inc/cce/aicpu_engine.h index bc2e415f..042d952b 100644 --- a/third_party/fwkacllib/inc/cce/aicpu_engine.h +++ b/third_party/fwkacllib/inc/cce/aicpu_engine.h @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef AICPU_ENGINE_H__ #define AICPU_ENGINE_H__ diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h index 5733d68f..ec92a036 100644 --- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h +++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h @@ -21,7 +21,7 @@ namespace aicpu { namespace FWKAdapter { - +using char_t = char; // API RETURN CODE enum FWKAdptAPIRetCode { FWK_ADPT_SUCCESS = 0, // success @@ -63,6 +63,8 @@ enum FWKTaskExtInfoType { FWK_ADPT_EXT_BITMAP, FWK_ADPT_EXT_TOPIC_TYPE, FWK_ADPT_EXT_ASYNCWAIT, + FWK_ADPT_EXT_UNKNOWN_SHAPE_INPUT_INDEX, + FWK_ADPT_EXT_UNKNOWN_SHAPE_OUTPUT_INDEX, FWK_ADPT_EXT_INVALID }; @@ -113,7 +115,7 @@ struct StrFWKKernel { typedef StrFWKKernel FWKOperateParam; // Extent info ShapeAndType -const uint32_t kMaxShapeDims = 8; +const uint32_t kMaxShapeDims = 8U; #pragma pack(push, 1) struct ShapeAndType { int32_t type; @@ -122,13 +124,13 @@ struct ShapeAndType { #pragma pack(pop) // Extend info structure for extInfoAddr -const uint32_t kExtInfoHeadSize = 8; +const uint32_t kExtInfoHeadSize = 8U; #pragma pack(push, 1) struct ExtInfo { int32_t infoType; // extend type uint32_t infoLen; // length for infoMsg - char infoMsg[0]; // extend value + char_t infoMsg[0]; // extend value }; #pragma pack(pop) @@ -143,9 +145,9 @@ struct ResultSummary { #pragma pack(push, 1) struct AsyncWait { - uint8_t waitType; // wait type, FWK_ADPT_WAIT_TYPE_EVENT: event wait - uint32_t waitId; // wait id, GE refresh - uint32_t timeOut; // reserved + uint8_t waitType; // wait type, FWk_ADPT_WAIT_TPYE_EVENT: event wait + uint32_t waitId; // wait id, GE refresh + uint32_t timeOut; // reserved uint64_t reserved; }; #pragma pack(pop) diff --git a/third_party/fwkacllib/inc/hccl/base.h b/third_party/fwkacllib/inc/hccl/base.h index aa43c82a..4dd9e023 100644 --- a/third_party/fwkacllib/inc/hccl/base.h +++ b/third_party/fwkacllib/inc/hccl/base.h @@ -94,13 +94,13 @@ enum HcclEventType { HCCL_EVENT_RESERVED /**< reserved */ }; -const u32 TAG_MAX_LEN = 127; // ×î´óµÄtag ³¤¶È +const u32 TAG_MAX_LEN = 127; // tag using TagAttr = struct TagAttrDef { - char name[TAG_MAX_LEN + 1]; // tag±êʶ - // tag±êʶµÄ½ÓÊÕÊý¾Ý£¬µ÷ÓÃÕßÊÇ·ñ»áÖ÷¶¯µ÷ÓýÓÊÕ½Ó¿Ú£¬0 = ·ñ, 1 = »á(Ô¤Áô£¬Ôݲ»Ö§³Ö)¡£ - // ¶ÔÓÚactiveRecv = 0£¬µ±½ÓÊÕ²àÊÕµ½Êý¾Ý»òÕß·¢ËÍÇëÇóʱ£¬Ö÷¶¯Í¨Öªµ÷ÓÃÕß¡£ + char name[TAG_MAX_LEN + 1]; // tagʶ + // tagʶĽݣǷýսӿڣ0 = , 1 = (Ԥݲ֧) + // activeRecv = 0ղյݻ߷ʱ֪ͨߡ uint32_t activeRecv; - uint32_t sendCredit; // ÅäÖøÃtagÔÊÐíinflightµÄsend¸öÊý + uint32_t sendCredit; // øtaginflightsend uint32_t eventId; }; @@ -188,6 +188,15 @@ struct HcomGatherAllToAllVParams { const char *group; // not used now }; +typedef enum workMode { +HCCL_MODE_NORMAL = 0, // ֧κProbe any֧־ȷprobe +HCCL_MODE_ANY = 1 // ֧ANY_SOURCE + ANY_TAGprobe +} WorkMode; + +typedef struct tagCommAttr { + WorkMode mode; // ͨڵprobeģʽ + uint32_t deviceId = 0; +} CommAttr; #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h index bf1f395b..cdc8d840 100644 --- a/third_party/fwkacllib/inc/hccl/hcom.h +++ b/third_party/fwkacllib/inc/hccl/hcom.h @@ -126,72 +126,6 @@ extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, co * @return HcclResult */ extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); - -/** - * @brief Initialize hcom executor. - * - * @param void - * @return HcclResult - */ -HcclResult HcomExecInitialize(); - -/** - * @brief Finalize hcom executor. - * - * @param void - * @return HcclResult - */ -HcclResult HcomExecFinalize(); - -/** - * @brief Put collective communication operation into hcom executor. - * - * @param opInfo information about collective communication operation. - * @param callback callback after collective communication operation. - * @return HcclResult - */ -HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function callback); - -/** - * @brief Put remote access operation into hcom executor. - * - * @param remoteAccessType operation type (read or write). - * @param addrInfos address information about collective communication operation. - * @param callback callback after collective communication operation. - * @return HcclResult - */ -HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, - const std::vector& addrInfos, - std::function callback); - -/** - * @brief Put alltoallv communication operation into hcom executor. - * - * @param params information about alltoallv communication operation. - * @param callback callback after collective communication operation. - * @return HcclResult - */ -HcclResult HcomExecEnqueueAllToAllV(HcomAllToAllVParams params, std::function callback); - -/** - * @brief Put agther alltoallv communication operation into hcom executor. - * - * @param params information about agther alltoallv communication operation. - * @param callback callback after collective communication operation. - * @return HcclResult - */ -HcclResult HcomExecEnqueueGatherAllToAllV(HcomGatherAllToAllVParams params, - std::function callback); - -/** - * @brief Register memories and init resources for remote access. - * - * @param addrList memory addresses for remote access. - * @param count number of remote memory addresses. - * @return HcclResult - */ -extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count); - #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/mmpa/mmpa_api.h b/third_party/fwkacllib/inc/mmpa/mmpa_api.h index c48aaa63..f7eb7435 100644 --- a/third_party/fwkacllib/inc/mmpa/mmpa_api.h +++ b/third_party/fwkacllib/inc/mmpa/mmpa_api.h @@ -1,18 +1,12 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/* +* @file mmpa_api.h +* +* Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ #ifndef _MMPA_API_H_ #define _MMPA_API_H_ diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h index 46fb6e21..fa72aed2 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h @@ -1,18 +1,12 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/* +* @file mmpa_linux.h +* +* Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ #ifndef MMPA_LINUX_MMPA_LINUX_H #define MMPA_LINUX_MMPA_LINUX_H diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h index 9df5b9ce..9c6f6499 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h @@ -79,6 +79,9 @@ typedef long LONG; #define MMPA_THREAD_SCHED_OTHER SCHED_OTHER #define MMPA_THREAD_MIN_STACK_SIZE PTHREAD_STACK_MIN +#define MMPA_PATH_SEPARATOR_STR "/" +#define MMPA_PATH_SEPARATOR_CHAR '/' + #define MM_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER #define MMPA_MAX_NI 19 diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h index 1627d7a9..9f8a72cd 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h @@ -1,83 +1,86 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef MMPA_TYPEDEF_WIN_H -#define MMPA_TYPEDEF_WIN_H - -#ifdef __cplusplus -#if __cplusplus -extern "C" { -#endif // __cpluscplus -#endif // __cpluscplus - -#ifndef FALSE -#define FALSE 0 -#endif - -#ifndef TRUE -#define TRUE 1 -#endif - -#define EN_OK 0 -#define EN_ERR 1 -#define EN_ERROR (-1) -#define EN_INVALID_PARAM (-2) -#define EN_TIMEOUT (-3) - -#define HANDLE_INVALID_VALUE (-1) -#define INVALID_SOCKET_HANDLE INVALID_SOCKET -#define MMPA_MEM_MAX_LEN (0x7fffffff) -#define MMPA_PROCESS_ERROR (0x7fffffff) - -#define MMPA_ONE_THOUSAND 1000 -#define MMPA_COMPUTER_BEGIN_YEAR 1900 -#define SUMMER_TIME_OR_NOT (-1) -#define MMPA_ZERO 0 -#define MMPA_VALUE_ONE 1 -#define MMPA_SOCKET_MAIN_EDITION 2 -#define MMPA_SOCKET_SECOND_EDITION 0 -#define MMPA_PIPE_BUF_SIZE 1024 -#define MMPA_MAX_SCANDIR_COUNT 1024 -#define MAX_IOVEC_SIZE 32 -#define MMPA_PIPE_COUNT 2 -#define MMPA_THREADNAME_SIZE 16 -#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1) -#define MMPA_MIN_OS_VERSION_SIZE 64 - -#define MMPA_MAX_NI 19 -#define MMPA_MIDDLE_NI 5 -#define MMPA_LOW_NI (-5) -#define MMPA_MIN_NI (-20) -#define MMPA_MAX_FILE 128 - -#define MMPA_MAX_THREAD_PIO 99 -#define MMPA_MIDDLE_THREAD_PIO 66 -#define MMPA_LOW_THREAD_PIO 33 -#define MMPA_MIN_THREAD_PIO 1 - -#define MMPA_THREAD_SCHED_RR 0 -#define MMPA_THREAD_SCHED_FIFO 0 -#define MMPA_THREAD_SCHED_OTHER 0 -#define MMPA_THREAD_MIN_STACK_SIZE 0 - -#define MM_MUTEX_INITIALIZER NULL - -#ifdef __cplusplus -#if __cplusplus -} -#endif // __cpluscplus -#endif // __cpluscplus -#endif // _MMPA_TYPEDEF_WIN_H_ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MMPA_TYPEDEF_WIN_H +#define MMPA_TYPEDEF_WIN_H + +#ifdef __cplusplus +#if __cplusplus +extern "C" { +#endif // __cpluscplus +#endif // __cpluscplus + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#define EN_OK 0 +#define EN_ERR 1 +#define EN_ERROR (-1) +#define EN_INVALID_PARAM (-2) +#define EN_TIMEOUT (-3) + +#define HANDLE_INVALID_VALUE (-1) +#define INVALID_SOCKET_HANDLE INVALID_SOCKET +#define MMPA_MEM_MAX_LEN (0x7fffffff) +#define MMPA_PROCESS_ERROR (0x7fffffff) + +#define MMPA_ONE_THOUSAND 1000 +#define MMPA_COMPUTER_BEGIN_YEAR 1900 +#define SUMMER_TIME_OR_NOT (-1) +#define MMPA_ZERO 0 +#define MMPA_VALUE_ONE 1 +#define MMPA_SOCKET_MAIN_EDITION 2 +#define MMPA_SOCKET_SECOND_EDITION 0 +#define MMPA_PIPE_BUF_SIZE 1024 +#define MMPA_MAX_SCANDIR_COUNT 1024 +#define MAX_IOVEC_SIZE 32 +#define MMPA_PIPE_COUNT 2 +#define MMPA_THREADNAME_SIZE 16 +#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1) +#define MMPA_MIN_OS_VERSION_SIZE 64 + +#define MMPA_MAX_NI 19 +#define MMPA_MIDDLE_NI 5 +#define MMPA_LOW_NI (-5) +#define MMPA_MIN_NI (-20) +#define MMPA_MAX_FILE 128 + +#define MMPA_PATH_SEPARATOR_STR "\\" +#define MMPA_PATH_SEPARATOR_CHAR '\\' + +#define MMPA_MAX_THREAD_PIO 99 +#define MMPA_MIDDLE_THREAD_PIO 66 +#define MMPA_LOW_THREAD_PIO 33 +#define MMPA_MIN_THREAD_PIO 1 + +#define MMPA_THREAD_SCHED_RR 0 +#define MMPA_THREAD_SCHED_FIFO 0 +#define MMPA_THREAD_SCHED_OTHER 0 +#define MMPA_THREAD_MIN_STACK_SIZE 0 + +#define MM_MUTEX_INITIALIZER NULL + +#ifdef __cplusplus +#if __cplusplus +} +#endif // __cpluscplus +#endif // __cpluscplus +#endif // _MMPA_TYPEDEF_WIN_H_ diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h index 699fe815..e03131f2 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h @@ -1,18 +1,12 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/* +* @file mmpa_win.h +* +* Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ #ifndef MMPA_WIN_MMPA_WIN_H #define MMPA_WIN_MMPA_WIN_H diff --git a/third_party/fwkacllib/inc/ops/OWNERS b/third_party/fwkacllib/inc/ops/OWNERS new file mode 100755 index 00000000..f95df23c --- /dev/null +++ b/third_party/fwkacllib/inc/ops/OWNERS @@ -0,0 +1,65 @@ +approvers: +- gegenhua +- qiaohairong +reviewers: +- chuqingxi +- wang-jintang +- luanma_bl +- chen-kang30 +- li-xulong +- Allan_Yu +- minshen +- pan-jixing +- yl_wang +- lijie176 +- mabing726 +- miao-fangzheng +- huang-qiang002 +- su-yueming +- chenpeng-hw +- wang_jianle +- luanma_bl +- LDLD0524 +- wywismygod2020 +- lipeiyang3699 +- koala-zhang +- zhu-jingjing +- zhaozhihui5 +- simbaliuxx +- lyxyz +- zhou-qilong +- block0219 +- hanfuwei +- xchu42 +- sheng-nan +- yangjing88 +- alexlak +- xig514 +- jellylj +- brightlyking +- liuzhenyuhw +- djh602 +- wangjiangben_hw +- li1jie +- clinglai +- liujun2014 +- soupkey +- wu-shengji +- cimeng +- ccl_ligang +- xiaozhedeng +- granpad7 +- tc1qaz +- Ronnie_zheng +- xiexianhu +- zhouyujoe +- zhaoping12 +- tanshengshun +- fanqirui +- xu-binglin +- yangyang016 +- zhangzhongzt +- gegenhua +- qiaohairong +options: + no_parent_owners: true \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h index d56ac5bb..7928db15 100644 --- a/third_party/fwkacllib/inc/ops/array_ops.h +++ b/third_party/fwkacllib/inc/ops/array_ops.h @@ -745,6 +745,28 @@ REG_OP(UnsqueezeV2) .ATTR(axis, ListInt, {}) .OP_END_FACTORY_REG(UnsqueezeV2) + +/** +*@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape +is changed, but the data is not changed. \n + +*@par Inputs: +*x: A tensor. +*axes: A list of int64, which indicates the dimensions to be inserted. \n + +*@par Outputs: +*y: Reshape tensor with same data as input. \n + +*@par Third-party framework compatibility +*Compatible with the Onnx operator Unsqueeze in V13. \n +*/ + +REG_OP(UnsqueezeV3) + .INPUT(x, TensorType::ALL()) + .INPUT(axes, ListInt) + .OUTPUT(y, TensorType::ALL()) + .OP_END_FACTORY_REG(UnsqueezeV3) + /** *@brief Reshapes a tensor. Only the tensor shape is changed, without changing the data. \n @@ -821,6 +843,28 @@ REG_OP(SqueezeV2) .ATTR(axis, ListInt, {}) .OP_END_FACTORY_REG(SqueezeV2) +/** +*@brief Removes dimensions of size 1 from the shape of a tensor according to axes. \n + +*@par Inputs: +*x: A tensor. +*axes: An optional list of int64. If not specified, squeezes all dimensions of +size 1. If specified, only squeezes the dimensions listed. It is an error to +squeeze a dimension that is not 1. \n + +*@par Outputs: +*y: Reshape tensor with same data as input. \n + +*@par Third-party framework compatibility +*Compatible with the onnx operator Squeeze in V13. \n +*/ + +REG_OP(SqueezeV3) + .INPUT(x, TensorType::ALL()) + .OPTIONAL_INPUT(axes, ListInt) + .OUTPUT(y, TensorType::ALL()) + .OP_END_FACTORY_REG(SqueezeV3) + /** *@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n @@ -1273,7 +1317,7 @@ REG_OP(SortV2) * @par Inputs: * One inputs, including: * @li x: A Tensor. Must be one of the following types: -* float16, float32, int32, int8 ,uint8. \n +* float16, float32, int32, int8, uint8, bool. \n * @li shape: A Tensor to specify the shape that the input tensor expanded to. \n * @par Outputs: @@ -1284,9 +1328,9 @@ REG_OP(SortV2) */ REG_OP(Expand) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32,DT_INT64, DT_INT8, DT_UINT8, DT_BOOL})) .INPUT(shape, TensorType({DT_INT16, DT_INT32, DT_INT64})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32,DT_INT64, DT_INT8, DT_UINT8, DT_BOOL})) .OP_END_FACTORY_REG(Expand) /** @@ -1342,13 +1386,37 @@ REG_OP(NonZeroWithValue) .ATTR(dtype, Type, DT_INT32) .OP_END_FACTORY_REG(NonZeroWithValue) + + +/** +*@Returns a tensor with updated shape from NonZeroWithValue. \n + +*@par Inputs: +*value: A Tensor. The output of NonZeroWithValue. \n +*index: A Tensor. The output of NonZeroWithValue. \n +*count: A Tensor. The type is INT32, means count for non_zero ele in input. \n + +* out_value: A Tensor. Has the same type as "value" . \n +* out_index: A Tensor. Has the same type as "index". \n +*/ +REG_OP(NonZeroWithValueShape) + .INPUT(value, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, + DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL})) + .INPUT(index, TensorType({DT_INT32})) + .INPUT(count, TensorType({DT_INT32})) + .OUTPUT(out_value, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, + DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL})) + .OUTPUT(out_index, TensorType({DT_INT32})) + .OP_END_FACTORY_REG(NonZeroWithValueShape) + + /** * @brief Expand the input tensor to a compatible shape. \n * @par Inputs: * One inputs, including: * x: A Tensor. Must be one of the following types: -* float16, float32, int32, int8 ,uint8. \n +* float16, float32, int32, int8, uint8, bool. \n * @par Attributes: * shape: A required listInt to specify the shape that the input tensor expanded to. \n @@ -1362,8 +1430,8 @@ REG_OP(NonZeroWithValue) */ REG_OP(ExpandD) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL})) .REQUIRED_ATTR(shape, ListInt) .OP_END_FACTORY_REG(ExpandD) @@ -1404,6 +1472,43 @@ REG_OP(UpdateTensorDesc) DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE})) .REQUIRED_ATTR(shape, ListInt) .OP_END_FACTORY_REG(UpdateTensorDesc) + +/** +*@brief Queue data for other operators. \n +*@par Attributes: +*index: Index of the input tensor.The data type must be int32 or int64. +Assume that net has three data nodes, one should be set 0, another should +be set 1, and the left should be set 2. \n +*queue_name: queue name +*output_types: types of outputs data +*output_shapes: shapes of outputs data +*@par Outputs: +*y: A DT_UINT8 tensor. \n +*/ +REG_OP(QueueData) + .OUTPUT(y, TensorType({DT_UINT8})) + .ATTR(index, Int, 0) + .ATTR(queue_name, String, "") + .ATTR(output_types, ListType, {}) + .ATTR(output_shapes, ListListInt, {{}, {}}) + .OP_END_FACTORY_REG(QueueData) + +/** +* @brief Ensures that the tensor's shape matches the expected shape. \n +* @par Inputs: +* x: A Tensor. \n +* @par Attributes: +* shape: The shape that needs to be checked \n +* @par Outputs: +* y: A tensor. \n +*/ +REG_OP(EnsureShape) + .INPUT(input, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \ + DT_FLOAT,DT_DOUBLE})) + .OUTPUT(output, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \ + DT_FLOAT,DT_DOUBLE})) + .REQUIRED_ATTR(shape, ListInt) + .OP_END_FACTORY_REG(EnsureShape) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/ctc_ops.h b/third_party/fwkacllib/inc/ops/ctc_ops.h index 6e908091..bf50b24d 100644 --- a/third_party/fwkacllib/inc/ops/ctc_ops.h +++ b/third_party/fwkacllib/inc/ops/ctc_ops.h @@ -146,7 +146,7 @@ REG_OP(CTCBeamSearchDecoder) *@li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size, and C = number of classes (including blank). It represent the logarithmized probabilities of the outputs. -*@li targets: Tensor of size (N, S), where S= max target length. +*@li targets: Tensor of size (N, S) or sum(target_lengths), where S = max target length. It represent the target sequences. *@li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs. *@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets. @@ -159,11 +159,12 @@ REG_OP(CTCBeamSearchDecoder) *@li blank : Blank label. Default 0. *@li reduction: Specifies the reduction to apply to the output. Default: 'mean'. *@li zero_infinity : Whether to zero infinite losses and the associated gradients. +*@li label_max : The max length of targets. *@par Third-party framework compatibility * Compatible with Pytorch CTCLoss operator. -*@par Restrictions: +*@attention Constraints: *The limit of Label’s length is 1K. */ REG_OP(CTCLossV2) @@ -176,6 +177,7 @@ REG_OP(CTCLossV2) .ATTR(blank, Int, 0) .ATTR(reduction, String, "mean") .ATTR(zero_infinity, Bool, false) + .ATTR(label_max, Int, 0) .OP_END_FACTORY_REG(CTCLossV2) /** @@ -186,7 +188,7 @@ REG_OP(CTCLossV2) *@li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size, and C = number of classes (including blank). It represent the logarithmized probabilities of the outputs. -*@li targets: Tensor of size (N, S), where S= max target length. +*@li targets: Tensor of size (N, S) or sum(target_lengths), where S = max target length. It represent the target sequences. *@li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs. *@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets. @@ -200,11 +202,12 @@ REG_OP(CTCLossV2) *@li blank : Blank label. Default 0. *@li reduction: Specifies the reduction to apply to the output. Default: 'mean'. *@li zero_infinity : Whether to zero infinite losses and the associated gradients. +*@li label_max : The max length of targets. *@par Third-party framework compatibility * Compatible with Pytorch CTCLoss operator. -*@par Restrictions: +*@attention Constraints: *The limit of Label’s length is 1K. */ REG_OP(CTCLossV2Grad) @@ -219,6 +222,7 @@ REG_OP(CTCLossV2Grad) .ATTR(blank, Int, 0) .ATTR(reduction, String, "mean") .ATTR(zero_infinity, Bool, false) + .ATTR(label_max, Int, 0) .OP_END_FACTORY_REG(CTCLossV2Grad) } // namespace ge diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h index 3034730d..91a59327 100644 --- a/third_party/fwkacllib/inc/ops/data_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h @@ -2398,6 +2398,32 @@ REG_OP(DynamicGetNext) .ATTR(_getnext_inputs_shape_range, String, "") .OP_END_FACTORY_REG(DynamicGetNext) +/** +@brief DynamicGetNextV2, dynamic get next data +* @par Inputs: +*x: the iterator, all types are available +* @par Outputs: +* y: the date in iterator, all types are available +* @par Attributes: +* output_types: types of all outputs +* output_shapes: shapes of all outputs +*_dynamic_graph_execute_mode: dynamic graph execution mode, +value is one of lazy_recompile and dynamic_execute +*_getnext_inputs_shape_range: shape ranges of outputs, +it works where _dynamic_graph_execute_mode is dynamic_execute +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ + +REG_OP(DynamicGetNextV2) + .DYNAMIC_OUTPUT(y, TensorType::ALL()) + .ATTR(output_types, ListType, {}) + .ATTR(channel_name, String, "") + .ATTR(output_shapes, ListListInt, {{}, {}}) + .ATTR(_dynamic_graph_execute_mode, String, "lazy_recompile") + .ATTR(_getnext_inputs_shape_range, String, "") + .OP_END_FACTORY_REG(DynamicGetNextV2) + /** *@brief AdpGetNext *@par Outputs: @@ -2433,5 +2459,24 @@ REG_OP(GetNextV2) .ATTR(output_shapes, ListListInt, {{}, {}}) .ATTR(channel_name, String, "") .OP_END_FACTORY_REG(GetNextV2) + +/** +*@brief GetNextFromQueue +*@par Inputs: +*x: the data, only support uint8 +*@par Outputs: +*y: the data in iterator, all types are available +*@par Attributes: +*output_types: types of all outputs +*output_shapes: shapes of all outputs +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(GetNextFromQueue) + .INPUT(x, TensorType({DT_UINT8})) + .DYNAMIC_OUTPUT(y, TensorType::ALL()) + .ATTR(output_types, ListType, {}) + .ATTR(output_shapes, ListListInt, {{}, {}}) + .OP_END_FACTORY_REG(GetNextFromQueue) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/deep_md.h b/third_party/fwkacllib/inc/ops/deep_md.h index fadfe128..ef335f33 100644 --- a/third_party/fwkacllib/inc/ops/deep_md.h +++ b/third_party/fwkacllib/inc/ops/deep_md.h @@ -1,5 +1,5 @@ /** - * Copyright 2021 Huawei Technologies Co., Ltd + * CCopyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,87 @@ #include "graph/operator_reg.h" namespace ge { + +/** +* @brief Calculate TabulateFusion. \n +* +* @par Inputs: +* Five inputs, including: +* @li table: A Tensor. Must be one of the following types: float16, float32, float64. +* @li table_info: A Tensor. Must be one of the following types: float16, float32, float64. +* @li em_x: A Tensor. Must be one of the following types: float16, float32, float64. +* @li em: A Tensor. Must be one of the following types: float16, float32, float64. \n +* +* @par Outputs: +* descriptor: A Tensor. Must be one of the following types: float16, float32, float64. \n +* +* @par Attributes: +* Three attributes, including: +* @li last_layer_size: int value. +* @li split_count: int value. +* @li split_index: int value. \n +* +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(TabulateFusion) + .INPUT(table, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(table_info, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(em_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(em, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(descriptor, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .REQUIRED_ATTR(last_layer_size, Int) + .ATTR(split_count, Int, 1) + .ATTR(split_index, Int, 0) + .OP_END_FACTORY_REG(TabulateFusion) + +/** +* @brief Calculate ProdEnvMatA. \n +* +* @par Inputs: +* @li coord: A Tensor. Must be one of the following types: float32, float64. +* @li type: A Tensor. Must be one of the following types: int32. +* @li natoms: A Tensor. Must be one of the following types: int32. +* @li box: A Tensor. Must be one of the following types: float32, float64. +* @li mesh: A Tensor. Must be one of the following types: int32. +* @li davg: A Tensor. Must be one of the following types: float32, float64. +* @li dstd: A Tensor. Must be one of the following types: float32, float64. +* +* @par Outputs: +* descrpt: A Tensor. Must be one of the following types: float32, float64. +* descrpt_deriv: A Tensor. Must be one of the following types: float32, float64. +* rij: A Tensor. Must be one of the following types: float32, float64. +* nlist: A Tensor. Must be one of the following types: int32. \n +* +* @par Attributes: +* @li rcut_a: A Float. +* @li rcut_r: A Float. +* @li rcut_r_smth: A Float. +* @li sel_a: A ListInt. +* @li split_count: A Int. +* @li split_index: A Int.\n +* +*/ +REG_OP(ProdEnvMatA) + .INPUT(coord, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(type, TensorType({DT_INT32})) + .INPUT(natoms, TensorType({DT_INT32})) + .INPUT(box, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(mesh, TensorType({DT_INT32})) + .INPUT(davg, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(dstd, TensorType({DT_FLOAT, DT_DOUBLE})) + .OUTPUT(descrpt, TensorType({DT_FLOAT, DT_DOUBLE})) + .OUTPUT(descrpt_deriv, TensorType({DT_FLOAT, DT_DOUBLE})) + .OUTPUT(rij, TensorType({DT_FLOAT, DT_DOUBLE})) + .OUTPUT(nlist, TensorType({DT_INT32})) + .ATTR(rcut_a, Float, 1.0) + .ATTR(rcut_r, Float, 1.0) + .ATTR(rcut_r_smth, Float, 1.0) + .ATTR(sel_a, ListInt, {}) + .ATTR(sel_r, ListInt, {}) + .ATTR(split_count, Int, 1) + .ATTR(split_index, Int, 0) + .OP_END_FACTORY_REG(ProdEnvMatA) /** * @brief Calculate ProdForceSeA. \n * @@ -53,7 +134,80 @@ REG_OP(ProdForceSeA) .OUTPUT(atom_force, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) .REQUIRED_ATTR(n_a_sel, Int) .REQUIRED_ATTR(n_r_sel, Int) + .ATTR(split_count, Int, 1) + .ATTR(split_index, Int, 0) .OP_END_FACTORY_REG(ProdForceSeA) + +/** +* @brief Calculate ProdVirialSeA. \n +* +* @par Inputs: +* Five inputs, including: +* @li net_deriv: A Tensor. Must be one of the following types: float16, float32, float64. +* @li in_deriv: A Tensor. Must be one of the following types: float16, float32, float64. +* @li rij: A Tensor. Must be one of the following types: float16, float32, float64. +* @li nlist: A Tensor. dtype is int32. +* @li natoms: A Tensor. dtype is int32. \n +* +* @par Outputs: +* Two outputs, including: +* @li virial: A Tensor. Must be one of the following types: float16, float32, float64. +* @li atom_virial: A Tensor. Must be one of the following types: float16, float32, float64. \n +* +* @par Attributes: +* Two attributes, including: +* @li n_a_sel: Int value. +* @li n_r_sel: Int value. +* @li split_count: Int value. +* @li split_index: Int value. \n +*/ +REG_OP(ProdVirialSeA) + .INPUT(net_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(in_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(rij, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(nlist, TensorType({DT_INT32})) + .INPUT(natoms, TensorType({DT_INT32})) + .OUTPUT(virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(atom_virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .REQUIRED_ATTR(n_a_sel, Int) + .REQUIRED_ATTR(n_r_sel, Int) + .ATTR(split_count, Int, 1) + .ATTR(split_index, Int, 0) + .OP_END_FACTORY_REG(ProdVirialSeA) + +/** +* @brief Calculate TabulateFusionGrad. \n +* +* @par Inputs: +* Five inputs, including: +* @li table: A Tensor. Must be one of the following types: float16, float32, float64. +* @li table_info: A Tensor. Must be one of the following types: float16, float32, float64. +* @li em_x: A Tensor. Must be one of the following types: float16, float32, float64. +* @li em: A Tensor. Must be one of the following types: float16, float32, float64. +* @li dy: A Tensor. Must be one of the following types: float16, float32, float64. +* @li descriptor: A Tensor. Must be one of the following types: float16, float32, float64. \n +* +* @par Outputs: +* @li dy_dem_x: A Tensor. Must be one of the following types: float16, float32, float64. +* @li dy_dem: A Tensor. Must be one of the following types: float16, float32, float64. \n +* +* @par Attributes: +* Two attributes, including: +* @li split_count: A Scalar. +* @li split_index: A Scalar. \n +*/ +REG_OP(TabulateFusionGrad) + .INPUT(table, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(table_info, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(em_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(em, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(descriptor, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(dy_dem_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(dy_dem, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .ATTR(split_count, Int, 1) + .ATTR(split_index, Int, 0) + .OP_END_FACTORY_REG(TabulateFusionGrad) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_DEEP_MD_H_ diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index be201579..1cd89c69 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -331,7 +331,7 @@ REG_OP(Sub) *@par Inputs: *One input, including: \n -*x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n +*x: A Tensor. Must be one of the following types: float16, float32, double, int8, int16, int32, int64. \n *@par Outputs: *y: A Tensor. Has the same type as "x". \n @@ -340,8 +340,10 @@ REG_OP(Sub) *Compatible with the TensorFlow operator Abs. */ REG_OP(Abs) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, + DT_INT32, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, + DT_INT32, DT_INT64})) .OP_END_FACTORY_REG(Abs) /** @@ -3821,6 +3823,10 @@ REG_OP(CosineSimilarity) * @li max_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n * @li global_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n * @li weight_decay: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n +* @li step_size: A Optional Tensor. Datatype is same as exp_avg. Shape (1, ).\n + +* @par Attributes: +* @li adam_mode: An optional bool. Defaults to "adam". \n *@par Outputs: *three inputs, including: @@ -3840,9 +3846,11 @@ REG_OP(ApplyAdamV2) .INPUT(max_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 })) .INPUT(global_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 })) .INPUT(weight_decay, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .OPTIONAL_INPUT(step_size, TensorType({ DT_FLOAT, DT_FLOAT16 })) .OUTPUT(var, TensorType({ DT_FLOAT, DT_FLOAT16 })) .OUTPUT(m, TensorType({ DT_FLOAT, DT_FLOAT16 })) .OUTPUT(v, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .ATTR(adam_mode, String, "adam") .OP_END_FACTORY_REG(ApplyAdamV2) } // namespace ge diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index dc3a96b6..bf0846c2 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -132,7 +132,7 @@ nearest neighbor sampling to a common output size specified by crop_size . \n *@li x:A Tensor. Must be one of the following types:uint8, uint16, int8, int16, int32, int64, float16, float, double. A 4-D tensor of shape [batch, image_height, image_width, depth]. The format must be NHWC. -*@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4]. +*@li boxes: A Tensor. Must be one of the following types: float16, float. A 2-D tensor of shape [num_boxes, 4]. *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch). *@li crop_size: A Tensor of type int32. A 1-D tensor of 2 elements, crop_size @@ -146,7 +146,7 @@ extrapolation, when applicable. NearestNeighbor . \n *@par Outputs: -*y:A Tensor of type float. The format must be NHWC. \n +*y: A Tensor. Must be one of the following types: float16, float. The format must be NHWC. \n *@attention Constraints: *Input images must be a 4-D tensor . \n @@ -158,10 +158,10 @@ NearestNeighbor . \n REG_OP(CropAndResize) .INPUT(x, TensorType({DT_UINT8, DT_UINT16, DT_INT8, \ DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) - .INPUT(boxes, TensorType({DT_FLOAT})) + .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(box_index, TensorType({DT_INT32})) .INPUT(crop_size, TensorType({DT_INT32})) - .OUTPUT(y, TensorType({DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .ATTR(extrapolation_value, Float, 0) .ATTR(method, String, "bilinear") .OP_END_FACTORY_REG(CropAndResize) @@ -175,7 +175,7 @@ REG_OP(CropAndResize) *Input images must be a 5HD tensor. Inputs include: *@li x:A Tensor. Must be one of the following types:float16, float. A 5HD tensor of shape * [batch, C1, image_height, image_width, C0]. -*@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4]. +*@li boxes: A Tensor. Must be one of the following types: float16, float. A 2-D tensor of shape [num_boxes, 4]. *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch) . \n *@par Attributes: @@ -184,7 +184,7 @@ REG_OP(CropAndResize) *@li method: An optional string from: '"bilinear"'. Defaults to "bilinear" . \n *@par Outputs: -*y:A Tensor of type float . \n +*y: A Tensor. Must be one of the following types: float16, float. \n *@attention Constraints: *Input images must be a 5HD tensor . \n @@ -197,9 +197,9 @@ REG_OP(CropAndResize) */ REG_OP(CropAndResizeD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(boxes, TensorType({DT_FLOAT})) + .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(box_index, TensorType({DT_INT32})) - .OUTPUT(y, TensorType({DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(crop_size, ListInt) .ATTR(extrapolation_value, Float, 0) .ATTR(method, String, "bilinear") @@ -888,10 +888,10 @@ Defaults to false . \n *@li half_pixel_centers: An optional bool. Defaults to False . \n *@par Outputs: -*y: 4-D with shape [batch, new_height, new_width, channels] . \n +*y: A Tensor with the same type and format as input "images" . \n *@par Third-party framework compatibility -*Compatible with tensorflow ResizeNearestNeighborV2 operator. +*Compatible with tensorflow ResizeNearestNeighbor operator. */ REG_OP(ResizeNearestNeighborV2) diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index d3e8c0bf..f6e2df88 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -378,7 +378,7 @@ to each component of an element of this dataset. REG_OP(GetNext) .DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL})) - .ATTR(output_types, ListInt, {}) + .ATTR(output_types, ListType, {}) .ATTR(output_shapes, ListListInt, {}) .ATTR(output_num, Int, 1) .ATTR(channel_name, String, "") diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index 0d9a8424..e8931120 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -213,9 +213,9 @@ REG_OP(GEMM) */ REG_OP(BatchMatMul) - .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) - .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) + .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) .ATTR(adj_x1, Bool, false) .ATTR(adj_x2, Bool, false) .OP_END_FACTORY_REG(BatchMatMul) @@ -246,11 +246,11 @@ REG_OP(BatchMatMul) */ REG_OP(BatchMatMulV2) - .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4})) - .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16})) + .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) .ATTR(adj_x1, Bool, false) .ATTR(adj_x2, Bool, false) .ATTR(offset_x, Int, 0) @@ -505,17 +505,17 @@ REG_OP(ScatterElements) * Three inputs, including: *@li var: An ND Tensor . -*Must be one of the following types: float16, float32, int32, int8, uint8 -*@li indices: An ND Tensor of type int32 or int64 - +*Must be one of the following types: float16, float, int32, int8, uint8 +*@li indices: An ND Tensor . \n -*@li updates: An Tensor. format:NCHW, NHWC . +*Must be one of the following types: int32 or int64 +*@li updates: An ND Tensor . -*Must be one of the following types: float16, float32, int32, int8, uint8 +*Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: -* use_locking: An optional bool. Defaults to "False". If "True", the operation -* will be protected by a lock . \n +*use_locking: An optional bool. Defaults to "False". If "True", +* the operation will be protected by a lock . \n *@par Outputs: *var: A Tensor. Has the same type and format as input "var" . \n @@ -792,13 +792,13 @@ REG_OP(DiagPart) * Four inputs, including: *@li x: A Tensor of type float16, int8. *@li w: A weight matrix of type float16, int8. -*@li b: A Tensor of type float16, int32, float32. -*@li offset_w: A Tensor of type int8 . \n +*@li b: An optional Tensor of type float16, int32, float32. +*@li offset_w: An optional Tensor of type int8. Reserved. Only None Supported. \n *@par Attributes: -*@li num_output: Reserved. +*@li num_output: Required. An int, output neuron number. Reserved. *@li transpose: A bool, specifying weight whether to transpose input w, either "true" or "false". Defaults to "false". -*@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1. +*@li axis: Optional. An int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1. * The product of the subsequent dimensions starting form first dimension or the second dimension is "K". *@li offset_x: An optional integer for quantized FullyConnection. *The negative offset added to the input image for int8 type. Ensure offset_x within the @@ -814,11 +814,11 @@ REG_OP(DiagPart) * Yes */ REG_OP(FullyConnection) - .INPUT(x, TensorType({DT_FLOAT16, DT_INT8, DT_INT4})) - .INPUT(w, TensorType({DT_FLOAT16, DT_INT8, DT_INT4})) - .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32})) + .INPUT(x, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT32, DT_BF16})) + .INPUT(w, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT32, DT_BF16})) + .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32, DT_BF16})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32, DT_BF16})) .REQUIRED_ATTR(num_output, Int) .ATTR(transpose, Bool, false) .ATTR(axis, Int, 1) @@ -1360,6 +1360,45 @@ REG_OP(FillDiagonal) .ATTR(wrap, Bool, false) .OP_END_FACTORY_REG(FillDiagonal) +/** +*@brief: Returns the sum of the elements of the diagonal of the input 2-D matrix. \n + +*@par Inputs: +*x: A Tensor. Must be one of the following types: +* float16, float. \n + +*@par Outputs: +*y: A Tensor. Has the same type as "x" . \n + +*@par Third-party framework compatibility +* Compatible with the Pytorch operator Trace. +*/ + +REG_OP(Trace) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(Trace) + +/** +*@brief Computes the generalized inverse of any matrix. \n + +*@par Inputs: +* @li x: input matrix. Must be one of the following types: +* double, float. \n + +*@par Attributes: +* @li rcond: An optional float >= 0 or inf. Defaults to 1e-15. \n + +*@par Outputs: +* y: A Tensor with the same type and shape of x's transpose. \n + +*/ +REG_OP(Pinverse) + .INPUT(x, TensorType({ DT_FLOAT, DT_DOUBLE })) + .OUTPUT(y, TensorType({ DT_FLOAT, DT_DOUBLE })) + .ATTR(rcond, Float, 1e-15) + .OP_END_FACTORY_REG(Pinverse) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h index 398c6568..7a28a738 100644 --- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h @@ -142,6 +142,74 @@ REG_OP(BatchNorm) .ATTR(is_training, Bool, true) .OP_END_FACTORY_REG(BatchNorm) +/** +* @brief After the mean and reciprocal of standard deviation(invert_std) are separately calculated on each device, +* the mena and reciprocal of standard deviation(invert_std) data on each device are normlized, +* a total mean and reciprocal of standard deviation(invert_std) are returned, and running_var are updated. + +* @par Inputs: +* include: +* @li mean_all: A Tensor. The mean of each device. Must be one of the following types: float16, float32. +* @li invert_std_all: A Tensor. Reciprocal of the variances of each device. Must be one of the following types: float16, float32. +* @li count_all: A Tensor. Number of data for each device. Must be one of the following types: float16, float32. +* @li mean_broadcast: A Tensor. The overall average and broadcast. Must be one of the following types: float16, float32. +* @li count_sum: A Tensor. General statistics. Must be one of the following types: float16, float32. +* @li running_var: A Tensor. Runtime variance. Must be one of the following types: float16, float32. \n + +* @par Attributes: +* Two Attributes, including: +* @li momentum: A optional float. Defaults to 0.01. \n +* @li epsilon: An optional float. Defaults to 0.00001. \n + +* @par Outputs: +* include: +* @li invert_std: A Tensor. It's inverse of total variance. +* @li running_var_update: A Tensor. It's moving variance of each device after the update. \n + +* @par Third-party framework compatibility +* ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate +* compatible with the Pytorch operator BatchNormGatherStatsWithCounts. +*/ +REG_OP(SyncBatchNormGatherStatsWithCounts) + .INPUT(mean_all, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(invert_std_all, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(count_all, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(mean_broadcast, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(running_var, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(invert_std, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(running_var_update, TensorType({DT_FLOAT, DT_FLOAT16})) + .ATTR(momentum, Float, 0.1) + .ATTR(epsilon, Float, 0.001) + .OP_END_FACTORY_REG(SyncBatchNormGatherStatsWithCounts) + +/** +* @brief update running_mean. + +* @par Inputs: +* include: +* @li mean: A Tensor. The mean of each device. Must be one of the following types: float16, float32. +* @li running_mean: A Tensor. Runtime Mean. Must be one of the following types: float16, float32. \n + +* @par Attributes: +* One Attribute, including: +* @li momentum: A optional float. Defaults to 0.01. \n + +* @par Outputs: +* include: +* @li running_mean_update: A Tensor. It's moving mean of each device after the update. \n + +* @par Third-party framework compatibility +* ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate +* compatible with the Pytorch operator BatchNormGatherStatsWithCounts. +*/ +REG_OP(SyncBNTrainingUpdate) + .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(running_mean, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(running_mean_update, TensorType({DT_FLOAT, DT_FLOAT16})) + .ATTR(momentum, Float, 0.1) + .OP_END_FACTORY_REG(SyncBNTrainingUpdate) + /** *@brief part of SyncBatchNormBackward . \n diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index 6f58f028..5cabd4db 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -134,9 +134,9 @@ REG_OP(DepthwiseConv2DBackpropFilter) * instead. */ REG_OP(DepthwiseConv2DBackpropFilterD) - .INPUT(input, TensorType({float16})) - .INPUT(out_backprop, TensorType({float16})) - .OUTPUT(filter_grad, TensorType({float32})) + .INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16})) + .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16})) + .OUTPUT(filter_grad, TensorType({DT_FLOAT32})) .REQUIRED_ATTR(filter_size, ListInt) .REQUIRED_ATTR(strides, ListInt) .ATTR(dilations, ListInt, {1, 1, 1, 1}) @@ -764,7 +764,7 @@ REG_OP(Conv2DBackpropFilterD) | | float32 | float32 | float32 | float32 |\n | | int8 | int8 | int32 | int32 |\n | Format | NCHW | NCHW | ND | NCHW |\n -| | NHWC | HWCN | | NHWC |\n +| | NHWC | HWCN | ND | NHWC |\n *\n * For float32 type, the actual calculation on the chip is based on * float16. @@ -1650,5 +1650,43 @@ REG_OP(Dilation) .ATTR(padding_value, Float, 0.0) .OP_END_FACTORY_REG(Dilation) +/** +*@brief Computes the post-cube processing output with the expected input +*@par Inputs: + * Ten inputs: + * x1: A Tensor of type float16, bfloat16, float32, int32 + * x2: A Tensor of type float16, int8, int4 + * quant_scale_0: A Tensor of type uint64 + * relu_weight_0: A Tensor of type float32 + * clip_value_0: A Tensor of type float16, int8, int4 + * quant_scale_1: A Tensor of type uint64 + * relu_weight_1: A Tensor of type float32 + * clip_value_1: A Tensor of type float16 + * anti_quant_scale: A Tensor of type float16 + * anti_quant_offset: A Tensor of type int8, int4 +*@par Attributes: + * @li fusion_op_list: A list of String. + * @li unit_list: A list of String + * @li eltwise_mode: An optional string from "ADD", "SUB" and "". +*@par Outputs: + * output: A Tensor. A Tensor of type float16, bfloat16, float32, int32, int8, int4. +*/ +REG_OP(FixPipe) + .INPUT(x1, TensorType({DT_FLOAT16, DT_BF16, DT_FLOAT, DT_INT32})) + .OPTIONAL_INPUT(x2, TensorType({DT_FLOAT16, DT_INT8, DT_INT4})) + .OPTIONAL_INPUT(quant_scale_0, TensorType({DT_UINT64})) + .OPTIONAL_INPUT(relu_weight_0, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(clip_value_0, TensorType({DT_FLOAT16, DT_INT8, DT_INT4})) + .OPTIONAL_INPUT(quant_scale_1, TensorType({DT_UINT64})) + .OPTIONAL_INPUT(relu_weight_1, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(clip_value_1, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(anti_quant_scale, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(anti_quant_offset, TensorType({DT_INT8, DT_INT4})) + .OUTPUT(output, TensorType({DT_FLOAT16, DT_BF16, DT_FLOAT, DT_INT32, DT_INT8, DT_INT4})) + .REQUIRED_ATTR(fusion_op_list, ListString) + .REQUIRED_ATTR(unit_list, ListString) + .ATTR(eltwise_mode, String, "") + .OP_END_FACTORY_REG(FixPipe) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index 108a20a7..2ccb184f 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -1179,6 +1179,8 @@ REG_OP(SPP) * the index of the input feature map, "x1", "y1", "x2", or "y2" must be * greater than or equal to "0.0". * roi_max_num must be less than or equal to 6000 and must be divided by 16. +* The input data of the rois cannot exceed the width and height range of the x, +* otherwise, the accuracy of the output result may not be as expected. *@li roi_actual_num: A optional tensor of type int32, with shape [batch, 8], specifying * the number of ROIs per batch . \n @@ -2076,7 +2078,7 @@ REG_OP(GIoUGrad) * trans: An optional attr, true for 'xyxyt', false for 'xywht'. *@par Outputs: -* overlaps: A 3D Tensor of type float16 or float32 with shape [B, N, K]. +* overlaps: A 3D Tensor of type float32 with shape [B, N, K]. *@attention Constraints: * In each batch, the invalid box cannot appear before the valid box. @@ -2087,6 +2089,100 @@ REG_OP(RotatedOverlaps) .OUTPUT(overlaps, TensorType({DT_FLOAT})) .ATTR(trans, Bool, false) .OP_END_FACTORY_REG(RotatedOverlaps) + +/** +*@brief RotatedIou . \n + +*@par Inputs: +*@li boxes : data of grad increment, a 3D Tensor of type float32 with +* shape (B, 5, N). "N" indicates the number of boxes, and the value +* "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta]. +*@li query_boxes: Bounding boxes, a 3D Tensor of type float32 with +* shape (B, 5, K). "K" indicates the number of boxes, and the value +* "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta]. + +*@par Attributes: +*@li trans: An optional attr, true for 'xyxyt', false for 'xywht'. +*@li mode: An optional attr, a character string with the value range of ['iou', 'iof'], +* only support 'iou' now. +*@li is_cross: Cross calculation when it is True, and one-to-one calculation when it is False. +*@li v_threshold: An optional attr, provide condition relaxation for intersection calculation. +*@li e_threshold: An optional attr, provide condition relaxation for intersection calculation. + +*@par Outputs: +* iou: A 3D Tensor of float32 with shape [B, N, K]. + +*@attention Constraints: +* In each batch, the invalid box cannot appear before the valid box. +*/ +REG_OP(RotatedIou) + .INPUT(boxes, TensorType({DT_FLOAT})) + .INPUT(query_boxes, TensorType({DT_FLOAT})) + .OUTPUT(iou, TensorType({DT_FLOAT})) + .ATTR(trans, Bool, false) + .ATTR(mode, String, "iou") + .ATTR(is_cross, Bool, true) + .ATTR(v_threshold, Float, 0) + .ATTR(e_threshold, Float, 0) + .OP_END_FACTORY_REG(RotatedIou) + +/** +*@brief RotatedBoxEncode. \n + +*@par Inputs: +* Two inputs, including: +*@li anchor_box: A 3D Tensor of float32 (float16) with shape (B, 5, N). +* "B" indicates the number of batch size +* "N" indicates the number of bounding boxes, and the value "5" refers to +* "x0", "x1", "y0", "y1" and "angle". +*@li gt_box: A 3D Tensor of float32 (float16) with shape (B, 5, N). +* "B" indicates the number of batch size +* "N" indicates the number of bounding boxes, and the value "5" refers to +* "x0", "x1", "y0", "y1" and "angle". \n + +*@par Attributes: +*@li weight: A float list for "x0", "x1", "y0", "y1" and "angle", +* defaults to [1.0, 1.0, 1.0, 1.0, 1.0]. + +*@par Outputs: +*@li y: A 3D Tensor of type float32 (float16) with shape (B, 5, N), +* specifying the variations between all anchor boxes and ground truth boxes. +*/ +REG_OP(RotatedBoxEncode) + .INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(gt_box, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(weight, ListFloat, {1.0, 1.0, 1.0, 1.0, 1.0}) + .OP_END_FACTORY_REG(RotatedBoxEncode) + +/** +*@brief RotatedBoxDecode. \n + +*@par Inputs: +* Two inputs, including: +*@li anchor_box: A 3D Tensor of float32 (float16) with shape (B, 5, N). +* "B" indicates the number of batch size +* "N" indicates the number of bounding boxes, and the value "5" refers to +* "x0", "x1", "y0", "y1" and "angle". +*@li deltas: A 3D Tensor of float32 (float16) with shape (B, 5, N). +* "B" indicates the number of batch size +* "N" indicates the number of bounding boxes, and the value "5" refers to +* "x0", "x1", "y0", "y1" and "angle". \n + +*@par Attributes: +*@li weight: A float list for "x0", "x1", "y0", "y1" and "angle", +* defaults to [1.0, 1.0, 1.0, 1.0, 1.0]. + +*@par Outputs: +*@li y: A 3D Tensor of type float32 (float16) with shape (B, 5, N), +* specifying the variations between all anchor boxes and ground truth boxes. +*/ +REG_OP(RotatedBoxDecode) + .INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(deltas, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(weight, ListFloat, {1.0, 1.0, 1.0, 1.0, 1.0}) + .OP_END_FACTORY_REG(RotatedBoxDecode) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 9ce7abfd..38e513db 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -1487,25 +1487,51 @@ REG_OP(Roll) .OP_END_FACTORY_REG(Roll) /** - *@brief Calculate the loss. Creates a criterion that optimizes a two-class classification - logistic loss between input_x and input_y (containing 1 or -1). \n +* @brief Roll the tensor along the given dimension(s). - *@par Inputs: - *Tow inputs, including: +* @par Inputs: +* One inputs, including: +* x: A tensor + +* @par Attributes: +* @li shift: The number of places by which the elements of the tensor are shifted. \n +* @li axes: Axis along which to roll. \n + +* @par Outputs: +* y: A Tensor with the same type and shape of x's. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Roll. \n +*/ +REG_OP(RollV2) + .INPUT(input, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \ + DT_FLOAT,DT_DOUBLE})) + .INPUT(shift, TensorType({DT_INT32,DT_INT64})) + .INPUT(axes, TensorType({DT_INT32,DT_INT64})) + .OUTPUT(output, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \ + DT_FLOAT,DT_DOUBLE})) + .OP_END_FACTORY_REG(RollV2) + +/** + * @brief Calculate the loss. Creates a criterion that optimizes a two-class classification + * logistic loss between input_x and input_y (containing 1 or -1). \n + + * @par Inputs: + * Tow inputs, including: * @li input_x: A tensor. Must be one of the following types: * float16, float32. \n * @li input_y: A tensor. Must be one of the following types: * float16, float32. \n - *@par Attributes: - *reduction: An optional string.Defaults to "mean". \n + * @par Attributes: + * reduction: An optional string.Defaults to "mean". \n - *@par Outputs: - *output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n + * @par Outputs: + * output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n * while reduction == "sum" or "mean", A Tensor with the same type of input_x , shape of which is (1,) - *@par Third-party framework compatibility - *Compatible with the Pytorch operator SoftMarginLoss. \n + * @par Third-party framework compatibility + * Compatible with the Pytorch operator SoftMarginLoss. \n */ REG_OP(SoftMarginLoss) .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -1624,18 +1650,18 @@ REG_OP(MultilabelMarginLoss) .OP_END_FACTORY_REG(MultilabelMarginLoss) /** -*@brief Performs batch normalization . \n -*@par Inputs: +* @brief Performs batch normalization . \n +* @par Inputs: * Two inputs -*@li input_x: A Tensor. Support float32. shape (n, c, d). -*@li seq_len: A Tensor. Each batch normalize data num. Support Int32. Shape (n, ). \n -*@par Attributes: -*@li normalize_type: Str. Support "per_feature" or "all_features". -*@li epsilon: An optional float32, specifying the small value added to -variance to avoid dividing by zero. Defaults to "0.00001" . \n -*@par Outputs: +* @li input_x: A Tensor. Support float32. shape (n, c, d). +* @li seq_len: A Tensor. Each batch normalize data num. Support Int32. Shape (n, ). \n +* @par Attributes: +* @li normalize_type: Str. Support "per_feature" or "all_features". +* @li epsilon: An optional float32, specifying the small value added to +* variance to avoid dividing by zero. Defaults to "0.00001" . \n +* @par Outputs: * One outputs -*@li output_y: A Tensor for the normalized "x".Support float32. shape (n, c, d).\n +* @li output_y: A Tensor for the normalized "x".Support float32. shape (n, c, d).\n */ REG_OP(NormalizeBatch) .INPUT(input_x, TensorType({ DT_FLOAT })) @@ -1644,6 +1670,36 @@ REG_OP(NormalizeBatch) .REQUIRED_ATTR(normalize_type, String) .ATTR(epsilon, Float, 0.00001) .OP_END_FACTORY_REG(NormalizeBatch) + +/** +*@brief GroupNorm and Reul operator +* calculating: x, gamma, beta +* y = relu(gamma*((x - mean) / np.sqrt(variance + 0.001)) + beta) + +* @par Inputs: +* Three inputs, including: +* @li x: A Tensor. Must be one of the following types: float16, float32. +* @li gamma: A Tensor. Must be one of the following types: float16, float32. +* @li beta: A Tensor. Must be one of the following types: float16, float32 . \n + +* @par Attributes: +* @li num_groups: A require attribute, the type is int32. +* @li eps: A optional attribute, the type is float32. Defaults to 0.00001. \n + +* @par Outputs: +* One outputs, including: +* @li y: A Tensor. Must be one of the following types: float16, float32. +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use/ +*/ +REG_OP(GroupNormRelu) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .REQUIRED_ATTR(num_groups, Int) + .ATTR(eps, Float, 0.00001) + .OP_END_FACTORY_REG(GroupNormRelu) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index ee599a76..1c689ee9 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -1747,7 +1747,8 @@ included in the sample.\n *@par Third-party framework compatibility *Compatible with the Pytorch operator SubSample. -*@par Restrictions: + +*@attention Constraints: *Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. */ REG_OP(SubSample) @@ -1776,7 +1777,8 @@ included in the sample.\n *@par Third-party framework compatibility *Compatible with the Pytorch operator SubSampleLabels. -*@par Restrictions: + +*@attention Constraints: *Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. */ REG_OP(SubSampleLabels) diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index d5960395..fc61815e 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -25,7 +25,8 @@ namespace ge { /** -*@brief Computes the for the gelu of "x" . \n +*@brief The GELU activation function is x*Φ(x), +* where Φ(x) the standard Gaussian cumulative distribution function. \n *@par Inputs: *One input, including: @@ -144,7 +145,7 @@ REG_OP(GeluGrad) .OP_END_FACTORY_REG(GeluGrad) /** -*@brief Computes the for the fast_gelu of "x" . \n +*@brief The FastGelu activation function is x*e^(0.851*x)*(x-|x|)/(1+e^(-1.702|x|)). \n *@par Inputs: *One input, including: @@ -159,7 +160,23 @@ REG_OP(FastGelu) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OP_END_FACTORY_REG(FastGelu) +/** +*@brief The FastGeluV2 activation function is x*(sgn(x)*[(a/2)*(clip(|x|,max=-b)+b)^2+0.5]+0.5), +* where sgn(x) function is (x+0.000000000001)/|(x+0.000000000001)|. \n + +*@par Inputs: +*One input, including: +*x: A Tensor. Must be one of the following types: float16, float32 +*@par Outputs: +*y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator FastGeluV2 +*/ +REG_OP(FastGeluV2) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(FastGeluV2) /** *@brief Computes the gradient for the fast_gelu of "x" . \n @@ -623,9 +640,7 @@ REG_OP(Elu) *x: A float16, float32, for the input data type . \n *@par Attributes: -*@li alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . -*@li alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . -*@li alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n +*li alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . *@par Outputs: *y: A float16, float32, for the normalized result . \n @@ -641,9 +656,7 @@ REG_OP(Elu) REG_OP(Celu) .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16})) - .ATTR(alpha1, Float, 1.0) - .ATTR(alpha2, Float, 1.0) - .ATTR(alpha3, Float, 1.0) + .ATTR(alpha, Float, 1.0) .OP_END_FACTORY_REG(Celu) /** diff --git a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h index f36d2935..1bdfb2c7 100644 --- a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h +++ b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h @@ -117,6 +117,33 @@ REG_OP(NPUGetFloatStatus) .INPUT(addr, TensorType{DT_FLOAT}) .OUTPUT(data, TensorType({DT_FLOAT})) .OP_END_FACTORY_REG(NPUGetFloatStatus) + + +/** +*@brief Set the value of global workspace to 0. \n + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(NPUClearFloatStatusV2) + .OP_END_FACTORY_REG(NPUClearFloatStatusV2) + +/** +*@brief Set the value of global workspace to 0. \n + +*@par Inputs: +*addr: A nested structure of Tensors of type float32 . \n + +*@par Outputs: +*data: A Tensor of type float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(NPUGetFloatStatusV2) + .DYNAMIC_INPUT(addr, TensorType{DT_FLOAT}) + .OUTPUT(data, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(NPUGetFloatStatusV2) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/ocr_ops.h b/third_party/fwkacllib/inc/ops/ocr_ops.h index a5755659..efaa7aa7 100644 --- a/third_party/fwkacllib/inc/ops/ocr_ops.h +++ b/third_party/fwkacllib/inc/ops/ocr_ops.h @@ -81,6 +81,7 @@ REG_OP(OCRRecognitionPreHandle) .OUTPUT(imgs, TensorType({DT_UINT8})) .OUTPUT(imgs_relation, TensorType({DT_INT32})) .OUTPUT(imgs_lang, TensorType({DT_INT32})) + .OUTPUT(imgs_piece_fillers, TensorType({DT_INT32})) .ATTR(batch_size, Int, 8) .ATTR(data_format, String, "NHWC") .ATTR(pad_mode, String, "REPLICATE") diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index ad7f9003..57631d14 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -59,6 +59,65 @@ REG_OP(Multinomial) .ATTR(seed2, Int, 0) .OP_END_FACTORY_REG(Multinomial) +/** +*@brief Creates a multinomial distribution. \n + +*@par Inputs: +*Inputs include: +* @li q: A Tensor. Must be one of the following types: float, double. +1-D Tensor with shape [num_classes]. +* @li j: A Tensor. Must be one of the following types: int64. +1-D Tensor with shape [num_classes]. +* @li num_samples: A Tensor of type int32. 0-D. Number of independent samples to draw for each row slice . \n + +*@par Attributes: +*@li output_dtype: An optional type from: int32, int64. Defaults to int64. +*@li seed: An optional int. Defaults to 0. +*@li seed2: An optional int. Defaults to 0. \n + +*@par Outputs: +*y: A Tensor of type int32 or int64. \n + +*@attention Constraints: +*The implementation for MultinomialAliasDraw on Ascend uses AICPU, with bad performance. + +*@par Third-party framework compatibility +*@li compatible with torch _multinomial_alias_draw operator. +*/ +REG_OP(MultinomialAliasDraw) + .INPUT(q, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(j, TensorType({DT_INT64})) + .OUTPUT(y, TensorType({DT_INT64})) + .REQUIRED_ATTR(num_samples, Int) + .ATTR(seed, Int, 0) + .OP_END_FACTORY_REG(MultinomialAliasDraw) + +/** +*@brief Prepares for MultinomialAliasDraw to create a multinomial distribution. \n + +*@par Inputs: +*Inputs include: +* @li probs: A Tensor. Must be one of the following types: float, double. +1-D Tensor with shape [num_classes]. \n + +*@par Outputs: +*j: A Tensor. Must be one of the following types: int64. +1-D Tensor with shape [num_classes]. +*q: A Tensor. Must be one of the following types: float, double. +1-D Tensor with shape [num_classes]. \n + +*@attention Constraints: +*The implementation for MultinomialAliasSetup on Ascend uses AICPU, with bad performance. + +*@par Third-party framework compatibility +*@li compatible with torch _multinomial_alias_setup operator. +*/ +REG_OP(MultinomialAliasSetup) + .INPUT(probs, TensorType({DT_FLOAT, DT_DOUBLE})) + .OUTPUT(j, TensorType({DT_INT64})) + .OUTPUT(q, TensorType({DT_FLOAT, DT_DOUBLE})) + .OP_END_FACTORY_REG(MultinomialAliasSetup) + /** *@brief Outputs random values from a normal distribution . \n @@ -173,6 +232,27 @@ REG_OP(Randperm) .ATTR(dtype, Type, DT_INT64) .OP_END_FACTORY_REG(Randperm) +/** +*@brief Fills a tensor with elements drawn from the poisson distribution. \n + +*@par Inputs: +*x: A Tensor. Must be one of the following types: float16, float. \n + +*@par Attributes: +*@li seed: An optional int. Defaults to 0. \n + +*@par Outputs: +*y: A Tensor list with same type as "x" . \n + +*@par Third-party framework compatibility +*@ Compatible with the Pytorch operator Poisson. +*/ +REG_OP(Poisson) + .INPUT(x, TensorType({ DT_FLOAT16,DT_FLOAT })) + .OUTPUT(y, TensorType({ DT_FLOAT16,DT_FLOAT })) + .ATTR(seed, Int, 0) + .OP_END_FACTORY_REG(Poisson) + /** *@brief Outputs random values from the Poisson distribution(s) described by rate . \n @@ -446,6 +526,34 @@ REG_OP(DropOutGenMaskV3) .ATTR(seed2, Int, 0) .OP_END_FACTORY_REG(DropOutGenMaskV3) + +/** +*@brief Generate stateless random bit mask for dropout . \n + +*@par Inputs: +include: +*@li shape:The shape of the output tensor. +*@li prob:0-D. Number of bit 1 . \n +*@li seed:If either seed or seed2 are set to be non-zero, the random number +*generator is seeded by the given seed. Otherwise, it is seeded by a random seed. +*@li seed2:A second seed to avoid seed collision . \n + +*@par Outputs: +*y:Output (1-D) random number using uint data format . \n + +*@attention Constraints: +*The output is aligned with 128 bits + +*@see StatelessDropOutGenMask() +*/ +REG_OP(StatelessDropOutGenMask) + .INPUT(shape, TensorType({ DT_INT32, DT_INT64 })) + .INPUT(prob, TensorType({ DT_FLOAT16, DT_FLOAT })) + .INPUT(seed, TensorType({ DT_INT32, DT_INT64 })) + .INPUT(seed1, TensorType({ DT_INT32, DT_INT64 })) + .OUTPUT(y, TensorType({ DT_UINT8 })) + .OP_END_FACTORY_REG(StatelessDropOutGenMask) + /** *@brief Generates values in an interval . \n @@ -698,11 +806,62 @@ REG_OP(Uniform) *@attention Constraints: * Compatible with the Caffe operator ContinuationIndicator. */ - REG_OP(ContinuationIndicator) .REQUIRED_ATTR(time_step, Int) .REQUIRED_ATTR(batch_size, Int) .OUTPUT(y, TensorType({DT_FLOAT})) .OP_END_FACTORY_REG(ContinuationIndicator) + +/** +*@brief Outputs random values from the Exponential distribution(s) described by rate . \n + +*@par Inputs: +*Inputs include: +* @li x: A Tensor. Must be one of the following types: half, float32, float64. \n + +*@par Attributes: +*@li lambda: An optional float. Defaults to 1. +*@li seed: An optional int. Defaults to 0.The random number generator is seeded by the given seed. + Otherwise, it is seeded by a random seed. \n + +*@par Outputs: +*y: A Tensor of type dtype float16, float, double. \n + +*@attention Constraints: +*The implementation for Exponential on Ascend uses AICPU, with bad performance. + +*@par Third-party framework compatibility +*@li compatible with tensorflow Exponential operator. +*/ +REG_OP(Exponential) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .ATTR(lambda, Float, 1) + .ATTR(seed, Int, 0) + .OP_END_FACTORY_REG(Exponential) + +/** +*@brief Fills a tensor with elements drawn from the geometric distribution. \n + +*@par Inputs: +*x: A Tensor. Must be one of the following types: float16, float. \n + +*@par Attributes: +*@li p: The probability of experimental success in Bernoulli's experiment. +*@li seed: An optional int. Defaults to 0. \n + +*@par Outputs: +*y: A Tensor list with same type as "x" . \n + +*@par Third-party framework compatibility +*@ Compatible with the Pytorch operator Geometric. +*/ +REG_OP(Geometric) + .INPUT(x, TensorType({ DT_FLOAT16,DT_FLOAT })) + .OUTPUT(y, TensorType({ DT_FLOAT16,DT_FLOAT })) + .REQUIRED_ATTR(p, Float) + .ATTR(seed, Int, 0) + .OP_END_FACTORY_REG(Geometric) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/randomdsa_ops.h b/third_party/fwkacllib/inc/ops/randomdsa_ops.h new file mode 100644 index 00000000..4574bb5d --- /dev/null +++ b/third_party/fwkacllib/inc/ops/randomdsa_ops.h @@ -0,0 +1,139 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * \file randomdsa_ops.h + * \brief + */ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_RANDOMDSA_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_RANDOMDSA_OPS_H_ + +#include +#include "graph/operator_reg.h" +#include "graph/operator.h" + +namespace ge { +/** +* @brief Generate DSA random bit mask for dropout. \n + +* @par Inputs: +include: +* @li count:The shape of the input tensor. +* @li seed:If seed is set to be non-zero, the random number +* generator is seeded by the given seed. Otherwise, it is seeded by a random seed +* @li dropout:0-D. Number of bit 1 . \n + +* @par Attributes: +* @li random_algorithm:The default value is "Philox". \n + +* @par Outputs: +* y:Output (1-D) random number using uint data format . \n + +* @see DSAGenBitMask() +*/ +REG_OP(DSAGenBitMask) + .INPUT(count, TensorType({DT_INT64})) + .INPUT(seed, TensorType({DT_UINT64})) + .INPUT(dropout, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16})) + .OUTPUT(out, TensorType({DT_UINT8})) + .ATTR(random_algorithm, String, "Philox") + .OP_END_FACTORY_REG(DSAGenBitMask) + +/** +* @brief Generate DSA truncatenormal data in random. \n + +* @par Inputs: +include: +* @li count: The shape of the input tensor. +* @li seed: If seed is set to be non-zero, the random number +* generator is seeded by the given seed. Otherwise, it is seeded by a random seed +* @li mean: A Tensor. Must be one of the following types: float16, float32, double +* @li stdev: A Tensor. Must be one of the following types: float16, float32, double. \n + +* @par Attributes: +* @li random_algorithm:The default value is "Philox". \n + +* @par Outputs: +* y:Output (1-D) random number using float and bf data format . \n + +* @see DSARandomTruncatedNormal() +*/ +REG_OP(DSARandomTruncatedNormal) + .INPUT(count, TensorType({DT_INT64})) + .INPUT(seed, TensorType({DT_UINT64})) + .INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16})) + .INPUT(stdev, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16})) + .OUTPUT(out, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16})) + .ATTR(random_algorithm, String, "Philox") + .OP_END_FACTORY_REG(DSARandomTruncatedNormal) + +/** +* @brief Generate DSA normal data in random. \n + +* @par Inputs: +include: +* @li count: The shape of the input tensor. +* @li seed: If seed is set to be non-zero, the random number +* generator is seeded by the given seed. Otherwise, it is seeded by a random seed +* @li mean: A Tensor. Must be one of the following types: float16, float32, double +* @li stdev: A Tensor. Must be one of the following types: float16, float32, double. \n + +* @par Attributes: +* @li random_algorithm:The default value is "Philox". \n + +* @par Outputs: +* y:Output (1-D) random number using float and bf data format . \n + +* @see DSARandomNormal() +*/ +REG_OP(DSARandomNormal) + .INPUT(count, TensorType({DT_INT64})) + .INPUT(seed, TensorType({DT_UINT64})) + .INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16})) + .INPUT(stdev, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16})) + .OUTPUT(out, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16})) + .ATTR(random_algorithm, String, "Philox") + .OP_END_FACTORY_REG(DSARandomNormal) + +/** +* @brief Generate DSA uniform data in random. \n + +* @par Inputs: +include: +* @li count: The shape of the input tensor. +* @li seed: If seed is set to be non-zero, the random number +* generator is seeded by the given seed. Otherwise, it is seeded by a random seed +* @li low: A Tensor. Must be one of the following types: int, float, bf +* @li high: A Tensor. Must be one of the following types: int, float, bf. \n + +* @par Attributes: +* @li random_algorithm:The default value is "Philox". \n + +* @par Outputs: +* y:Output (1-D) random number using float int and bf data format . \n + +* @see DSARandomUniform() +*/ +REG_OP(DSARandomUniform) + .INPUT(count, TensorType({DT_INT64})) + .INPUT(seed, TensorType({DT_UINT64})) + .INPUT(low, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64})) + .INPUT(high, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64})) + .OUTPUT(out, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64})) + .ATTR(random_algorithm, String, "Philox") + .OP_END_FACTORY_REG(DSARandomUniform) +} +#endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOMDSA_OPS_H diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index fa572b66..6fa95abc 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -515,6 +515,34 @@ REG_OP(ReduceSumD) .ATTR(keep_dims, Bool, false) .OP_END_FACTORY_REG(ReduceSumD) +/** +*@brief Calculate the total mean based on the mean of each device . \n + +*@par Inputs: +* Three inputs, including: +*@li x: A Tensor. Must be one of the following types: float16, float32 . +*@li count: A Tensor. Must be one of the following types: float16, float32 . +*@li count_sum: A Tensor. Must be one of the following types: float16, float32 . \n + +*@par Attributes: +*@li axes: A required 1D list or tuple of int32 or int64. Specifies the dimensions to reduce. +*@li keepdims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n + +*@par Outputs: +*y: The reduced tensor. Has the same type and format as input "x" . \n + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Sum. +*/ +REG_OP(ReduceMeanWithCount) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(count, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .REQUIRED_ATTR(axes, ListInt) + .ATTR(keep_dims, Bool, false) + .OP_END_FACTORY_REG(ReduceMeanWithCount) + /** *@brief Calculates the "logical sum" of elements of a tensor in a dimension . \n @@ -1326,6 +1354,101 @@ REG_OP(ReduceMeanVariance) .ATTR(axes, ListInt, {}) .ATTR(keep_dims, Bool, true) .OP_END_FACTORY_REG(ReduceMeanVariance) + +/** +* @brief Calculates the standard deviation or the variance of Tensors with the average value. + +* @par Inputs: +* Two inputs, including: +* @li x: A Tensor. Must be one of the following types: float16, float32. \n +* @li mean: A Tensor. It's the mean of X. Has the same shape and type as "x" \n + +* @par Attributes: +* Four Attributes, including: +* @li dim: An listint. \n +* @li if_std: An optional bool. Defaults to "False" +* If "True", Calculate the standard deviation +* If "False", Calculate the variance +* @li unbiased: An optional bool. Defaults to "True". +* If "True", Use Bessel Correction. +* If "False", Do not use Bessel Correction. \n +* @li keepdim: An optional bool. Defaults to "False". +* If "True", Keep the original tensor dimension. +* If "False", Do not keep the original tensor dimension. \n + +* @par Outputs: +* @li output_var: A Tensor. It's the standard deviation or the variance of X. Has the same type as "x". + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Var_mean. +*/ +REG_OP(ReduceStdV2Update) + .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) + .INPUT(mean, TensorType({DT_FLOAT,DT_FLOAT16})) + .OUTPUT(output_var, TensorType({DT_FLOAT,DT_FLOAT16})) + .REQUIRED_ATTR(dim, ListInt) + .ATTR(if_std, Bool, false) + .ATTR(unbiased, Bool, true) + .ATTR(keepdim, Bool, false) + .OP_END_FACTORY_REG(ReduceStdV2Update) + +/** +*@brief Computes the log and sum and exp of elements across dimensions of a tensor. +* Reduces "x" along the dimensions given in "axes". +* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each +* entry in "axes". If "keep_dims" is true, the reduced dimensions +* are retained with length 1. +* +*@par Inputs: +* Two inputs, including: +*@li x: A Tensor. Must be one of the following types: +* float32, float16, int32, int64, uint32, uint64, double +*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n +* +*@par Attributes: +*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n +* +*@par Outputs: +*y: The reduced tensor. Has the same type and format as input "x" . \n +* +*@par Third-party framework compatibility +* Compatible with the Onnx operator ReduceLogSumExp. +*/ +REG_OP(ReduceLogSumExp) + .INPUT(x, TensorType::NumberType()) + .INPUT(axes, TensorType::IndexNumberType()) + .OUTPUT(y, TensorType::NumberType()) + .ATTR(keep_dims, Bool, false) + .OP_END_FACTORY_REG(ReduceLogSumExp) + +/** +*@brief Computes the log and sum of elements across dimensions of a tensor. +* Reduces "x" along the dimensions given in "axes". +* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each +* entry in "axes". If "keep_dims" is true, the reduced dimensions +* are retained with length 1. +* +*@par Inputs: +* Two inputs, including: +*@li x: A Tensor. Must be one of the following types: +* float32, float16, int32, int64, uint32, uint64, double +*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n +* +*@par Attributes: +*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n +* +*@par Outputs: +*y: The reduced tensor. Has the same type and format as input "x" . \n +* +*@par Third-party framework compatibility +* Compatible with the Onnx operator ReduceLogSum. +*/ +REG_OP(ReduceLogSum) + .INPUT(x, TensorType::NumberType()) + .INPUT(axes, TensorType::IndexNumberType()) + .OUTPUT(y, TensorType::NumberType()) + .ATTR(keep_dims, Bool, false) + .OP_END_FACTORY_REG(ReduceLogSum) } //namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index b374fa5c..43951402 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -1280,6 +1280,44 @@ REG_OP(EmbeddingBag) .ATTR(sparse, Bool, false) .ATTR(include_last_offset, Bool, false) .OP_END_FACTORY_REG(EmbeddingBag) +/** + * @brief:LSTMP calculation + * @par Inputs: + * eight inputs: + * @li x:A required Tensor(seq, batch, dim). Must be one of the following types: float16, float32. + * @li real_mask:A optional Tensor(seq, batch). Must be one of the following types: float16, float32. + * @li init_h:A optional Tensor(batch, state). Must be one of the following types: float16, float32. + * @li init_c:A optional Tensor(batch, hidden). Must be one of the following types: float16, float32. + * @li wx:A required Tensor(4*hidden, dim). Must be one of the following types: float16, float32. + * @li wr:A required Tensor(4*hidden, state). Must be one of the following types: float16, float32. + * @li bias:A optional Tensor(hidden). Must be one of the following types: float16, float32. The format must be ND. + * @li project: A optional Tensor. Must be one of the following types: float16, float32. + * + * @par Outputs: + *three outputs: + *@li y:A Tensor. Must be one of the following types: float16, float32. + *@li output_h:A Tensor. Must be one of the following types: float16, float32. + *@li output_c:A Tensor. Must be one of the following types: float16, float32. + * + *@par Attributes: + *time_major:An bool identifying the time major in the op. Default to false. + * @par Restrictions: + * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(LSTMP) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(wx, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(wr, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(project, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(real_mask, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(time_major, Bool, false) + .OP_END_FACTORY_REG(LSTMP) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index b09d08b0..2cc8fd1d 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -259,13 +259,39 @@ REG_OP(GatherV2D) *@par Third-party framework compatibility *Compatible with the PyTorch operator Gather. */ - REG_OP(GatherElements) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_INT16,DT_INT32, + DT_INT64,DT_UINT8,DT_UINT16,DT_UINT32,DT_UINT64})) + .INPUT(index, TensorType({DT_INT32,DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_INT16,DT_INT32, + DT_INT64,DT_UINT8,DT_UINT16,DT_UINT32,DT_UINT64})) + .ATTR(dim, Int, 0) + .OP_END_FACTORY_REG(GatherElements) + +/** +*@Gathers values along an axis specified by dim . \n + +*@par Inputs: +*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +* int64, uint16, float16, uint32, uint64, bool. +*@li dim: A Tensor. Must be one of the following types: int32, int64. +*@li index: A Tensor. Must be one of the following types: int32, int64 . \n + + +*@par Outputs: +* y: A Tensor. Has the same type as "x" . \n + +*@par Third-party framework compatibility +*Compatible with the PyTorch operator Gather. +*/ +REG_OP(GatherD) + .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32 + DT_INT64, DT_UINT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(dim, TensorType({DT_INT32, DT_INT64})) .INPUT(index, TensorType({DT_INT32, DT_INT64})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) .ATTR(dim, Int, 0) - .OP_END_FACTORY_REG(GatherElements) + .OP_END_FACTORY_REG(GatherD) /** *@brief Extracts a strided slice of a tensor. Roughly speaking, this op @@ -360,9 +386,9 @@ REG_OP(StridedSlice) * Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSlice instead. */ REG_OP(StridedSliceD) - .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8, + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT64, DT_UINT8, DT_INT8, DT_BOOL})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8, + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT64, DT_UINT8, DT_INT8, DT_BOOL})) .REQUIRED_ATTR(begin, ListInt) .REQUIRED_ATTR(end, ListInt) @@ -700,6 +726,27 @@ REG_OP(SegmentMax) .OUTPUT(y, TensorType::RealNumberType()) .OP_END_FACTORY_REG(SegmentMax) +/** +*@brief Computes the sum along segments of a tensor . \n + +*@par Inputs: +*Two inputs, including: +* @li x: A Tensor of type NumberType. +* @li segment_ids: A Tensor of type IndexNumberType, whose shape is a prefix +* of "x.shape". + +*@par Outputs: +*y: A Tensor of type NumberType . \n + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SegmentSum. +*/ +REG_OP(SegmentSum) + .INPUT(x, TensorType::NumberType()) + .INPUT(segment_ids, TensorType::IndexNumberType()) + .OUTPUT(y, TensorType::NumberType()) + .OP_END_FACTORY_REG(SegmentSum) + /** *@brief: Computes the maximum along segments of a tensor. *Computes a tensor such that output[i]=(data[i]) where max is over j @@ -929,6 +976,49 @@ REG_OP(TopKD) * @brief Finds values and indices of the "k" largest elements for the last * dimension . \n +* @par Inputs: +* Two inputs, including: +* @li x: A 1D or higher tensor of type BasicType, with the last dimension +* at least "k". +* @li k: A 0D Tensor of type int32. +* Number of top elements to look for along the last dimension (along each row +* for matrices) . +* @li assist_seq: A 1D tensor of type float16. +* with size of 2N, which "N" is the last dimension. +* The first N numbers is indices, and the next N numbers is deviation of casting +* int32 to float16. \n + +* @par Attributes: +* @li sorted: An optional bool. Defaults to true. +* If true, the resulting "k" elements will be sorted by the values in descending +* order. +* @li dim: An optional int. Defaults to -1. For reserved use. +* @li largest: An optional bool. Defaults to true. For reserved use. \n + +* @par Outputs: +* @li values: A Tensor, specifying the sorted data. Has the same type as +* "input". +* @li indices: A Tensor of type int32, specifying the indices of sorted data . \n + +* @see TopK() +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator TopKV2. +*/ +REG_OP(TopKV2D) + .INPUT(x, TensorType::RealNumberType()) + .INPUT(k, TensorType({DT_INT32})) + .INPUT(assist_seq, TensorType({DT_FLOAT16})) + .OUTPUT(values, TensorType::RealNumberType()) + .OUTPUT(indices, TensorType({DT_INT32})) + .ATTR(sorted, Bool, true) + .ATTR(dim, Int, -1) + .ATTR(largest, Bool, true) + .OP_END_FACTORY_REG(TopKV2D) + +/** +* @brief Finds values and indices of the "k" largest elements for the last +* dimension . \n + * @par Inputs: * Two inputs, including: * @li x: A 1D or higher tensor of type BasicType, with the last dimension @@ -2340,7 +2430,7 @@ REG_OP(AddRowRanges) *@par Outputs: *y: A ND Tensor of float32/float16/int32/int8 with shapes 1-D (D,), 2-D(N, D), 3-D(N, C, D) -* @par Restrictions: +*@attention Constraints: * Warning: input shape's length must not be bigger than 1024 * 1024 * 1024. */ REG_OP(MaskedFillRange) @@ -2442,6 +2532,34 @@ REG_OP(StridedSliceV3) .OPTIONAL_INPUT(strides, TensorType::IndexNumberType()) .OUTPUT(y, TensorType::BasicType()) .OP_END_FACTORY_REG(StridedSliceV3) + +/** +*@brief MovingSumWithSigmoid. + +*@par Inputs: +*Four inputs, including: +* @li alpha: A Tensor. Must be one of the following types: float32, float16. +* @li energy: A Tensor. Must be one of the following types: float32, float16. +* @li beam_size: A Tensor of type int32. +* @li frame_size: A Tensor of type int32. \n + +*@par Outputs: +* y: A Tensor. Has the same type as "alpha". \n +* +* @par Attributes: +* window_size: A int. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(MovingSumWithSigmoid) + .INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(energy, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(beam_size, TensorType({DT_INT32})) + .INPUT(frame_size, TensorType({DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(window_size, Int) + .OP_END_FACTORY_REG(MovingSumWithSigmoid) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h index 08726080..7d181545 100644 --- a/third_party/fwkacllib/inc/ops/split_combination_ops.h +++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h @@ -381,6 +381,30 @@ REG_OP(ConcatOffsetD) .REQUIRED_ATTR(concat_dim, Int) .REQUIRED_ATTR(N, Int) .OP_END_FACTORY_REG(ConcatOffsetD) + +/** +*@brief Compute combinations of length of the given tensor. \n + +*@par Inputs: +*x: A list of 1D Tensor objects. \n + +*@par Attributes: +*@li r: An optional int indicates number of elements to combine. Defaults to 2. +*@li with_replacement: An optional bool indicates whether to allow duplication +*in combination. Defaults to "False". \n + +*@par Outputs: +*y: A Tensor list with same type as "x" . \n + +*@par Third-party framework compatibility +*@ Compatible with the Pytorch operator Combinations. +*/ +REG_OP(Combinations) + .INPUT(x, TensorType::ALL()) + .OUTPUT(y, TensorType::ALL()) + .ATTR(r, Int, 2) + .ATTR(with_replacement, Bool, false) + .OP_END_FACTORY_REG(Combinations) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_SPLIT_COMBINATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/vector_search.h b/third_party/fwkacllib/inc/ops/vector_search.h index 1e8c574b..ab4daa70 100644 --- a/third_party/fwkacllib/inc/ops/vector_search.h +++ b/third_party/fwkacllib/inc/ops/vector_search.h @@ -1,5 +1,5 @@ /** - * Copyright 2021 Huawei Technologies Co., Ltd + * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -154,43 +154,6 @@ REG_OP(CalcBucketsLimitAndOffset) .OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) .REQUIRED_ATTR(total_limit, Int) .OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) - -/** -* @brief Calculate ProdVirialSeA. \n -* -* @par Inputs: -* Five inputs, including: -* @li net_deriv: A Tensor. Must be one of the following types: float16, float32, float64. -* @li in_deriv: A Tensor. Must be one of the following types: float16, float32, float64. -* @li rij: A Tensor. Must be one of the following types: float16, float32, float64. -* @li nlist: A Tensor. dtype is int32. -* @li natoms: A Tensor. dtype is int32. \n -* -* @par Outputs: -* Two outputs, including: -* @li virial: A Tensor. Must be one of the following types: float16, float32, float64. -* @li atom_virial: A Tensor. Must be one of the following types: float16, float32, float64. \n -* -* @par Attributes: -* Two attributes, including: -* @li n_a_sel: A Scalar. -* @li n_r_sel: A Scalar. \n -* -* @par Restrictions: -* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. -*/ -REG_OP(ProdVirialSeA) - .INPUT(net_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) - .INPUT(in_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) - .INPUT(rij, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) - .INPUT(nlist, TensorType({DT_INT32})) - .INPUT(natoms, TensorType({DT_INT32})) - .OUTPUT(virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) - .OUTPUT(atom_virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) - .REQUIRED_ATTR(n_a_sel, Int) - .REQUIRED_ATTR(n_r_sel, Int) - .ATTR(nall, Int, 28328) - .OP_END_FACTORY_REG(ProdVirialSeA) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_ diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index aa98ed9a..c9779326 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -98,11 +98,11 @@ typedef struct rtExceptionInfo { uint32_t tid; uint32_t deviceid; uint32_t retcode; -} rtExceptionInfo; +} rtExceptionInfo_t; typedef void (*rtErrorCallback)(rtExceptionType); -typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); +typedef void (*rtTaskFailCallback)(rtExceptionInfo_t *exceptionInfo); typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen); @@ -429,6 +429,15 @@ RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *lbl, rtModel_t mdl, rtStream_t st * @return RT_ERROR_INVALID_VALUE for input null ptr */ RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId); + +/** + * @ingroup dvrt_base + * @brief get max model num + * @param [out] max model num + * @param [in] null + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtGetMaxModelNum(uint32_t *maxModelCount); #if defined(__cplusplus) } #endif diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index c83c1521..3881ac5c 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -22,7 +22,8 @@ typedef enum tagRtArchType { ARCH_BEGIN = 0, ARCH_V100 = ARCH_BEGIN, ARCH_V200 = 1, - ARCH_END = 2, + ARCH_V300 = 2, + ARCH_END = 3, } rtArchType_t; typedef enum tagRtChipType { @@ -34,7 +35,8 @@ typedef enum tagRtChipType { CHIP_DC = 4, CHIP_CLOUD_V2 = 5, CHIP_NO_DEVICE = 6, - CHIP_END = 7, + CHIP_MINI_V3 = 7, + CHIP_END = 8, } rtChipType_t; typedef enum tagRtAicpuScheType { @@ -74,7 +76,8 @@ typedef enum tagRtPlatformType { PLATFORM_DC = 5, PLATFORM_CLOUD_V2 = 6, PLATFORM_LHISI_SD3403 = 7, - PLATFORM_END = 8, + PLATFORM_MINI_V3 = 8, + PLATFORM_END = 9, } rtPlatformType_t; typedef enum tagRtCubeFracMKNFp16 { @@ -140,6 +143,12 @@ typedef enum tagRTTaskTimeoutType { RT_TIMEOUT_TYPE_OP_EXECUTE, } rtTaskTimeoutType_t; +typedef enum tagRtFloatOverflowMode { + RT_OVERFLOW_MODE_SATURATION = 0, + RT_OVERFLOW_MODE_INFNAN, + RT_OVERFLOW_MODE_UNDEF, +} rtFloatOverflowMode_t; + /** * @ingroup * @brief get AI core count @@ -180,6 +189,15 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate */ RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig); +/** + * @ingroup + * @brief get float overflow mode + * @param [out] floatOverflowMode + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtGetFloatOverflowMode(rtFloatOverflowMode_t * const floatOverflowMode); + /** * @ingroup * @brief get l2 buffer Info,virtual baseaddr,Size diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h index e920a939..378470c7 100644 --- a/third_party/fwkacllib/inc/runtime/context.h +++ b/third_party/fwkacllib/inc/runtime/context.h @@ -140,7 +140,7 @@ RTS_API rtError_t rtSetGroup(int32_t groupId); * @param [in] groupid count * @return RT_ERROR_NONE for ok, errno for failed */ -RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t *groupInfo, uint32_t count); +RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t *groupInfo, uint32_t cnt); /** * @ingroup diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index e5b741c7..bce6625b 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -94,11 +94,11 @@ typedef enum tagGetDevMsgType { /** * @ingroup dvrt_dev * @brief get total device number. - * @param [in|out] count the device number + * @param [in|out] cnt the device number * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtGetDeviceCount(int32_t *count); +RTS_API rtError_t rtGetDeviceCount(int32_t *cnt); /** * @ingroup dvrt_dev * @brief get device ids @@ -338,7 +338,7 @@ RTS_API rtError_t rtSetTSDevice(uint32_t tsId); * @return RT_ERROR_NONE for ok * @return RT_ERROR_DRV_ERR for can not get run mode */ -RTS_API rtError_t rtGetRunMode(rtRunMode *mode); +RTS_API rtError_t rtGetRunMode(rtRunMode *runMode); /** * @ingroup dvrt_dev diff --git a/third_party/fwkacllib/inc/runtime/dvfsprofile.h b/third_party/fwkacllib/inc/runtime/dvfsprofile.h index 7cb8c8a6..2813d73b 100644 --- a/third_party/fwkacllib/inc/runtime/dvfsprofile.h +++ b/third_party/fwkacllib/inc/runtime/dvfsprofile.h @@ -23,11 +23,11 @@ typedef enum dvfsProfileMode { /** * @ingroup dvrt_dvfsprofile * @brief Set the performance mode of the device - * @param [in] mode dvfsProfileMode + * @param [in] profMode dvfsProfileMode * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtSetDvfsProfile(DvfsProfileMode mode); +RTS_API rtError_t rtSetDvfsProfile(DvfsProfileMode profMode); /** * @ingroup dvrt_dvfsprofile diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h index 024ff3e3..6bd47992 100644 --- a/third_party/fwkacllib/inc/runtime/event.h +++ b/third_party/fwkacllib/inc/runtime/event.h @@ -19,6 +19,11 @@ typedef enum rtEventWaitStatus { EVENT_STATUS_MAX = 2, } rtEventWaitStatus_t; +typedef enum rtEventStatus { + RT_EVENT_INIT = 0, + RT_EVENT_RECORDED = 1, +} rtEventStatus_t; + /** * @ingroup event_flags * @brief event op bit flags @@ -115,6 +120,16 @@ RTS_API rtError_t rtEventQuery(rtEvent_t evt); */ RTS_API rtError_t rtEventQueryWaitStatus(rtEvent_t evt, rtEventWaitStatus_t *status); +/** + * @ingroup dvrt_event + * @brief Queries an event's status + * @param [in] evt event to query + * @param [in out] rtEventStatus_t status + * @return RT_EVENT_RECORDED for recorded + * @return RT_EVENT_INIT for not recorded + */ +RTS_API rtError_t rtEventQueryStatus(rtEvent_t evt, rtEventStatus_t *status); + /** * @ingroup dvrt_event * @brief computes the elapsed time between events. diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index 08860ccd..1c3f571b 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -287,13 +287,13 @@ RTS_API rtError_t rtDependencyRegister(void *mHandle, void *sHandle); * @param [in] binHandle device binary handle * @param [in] stubFunc stub function * @param [in] stubName stub function name - * @param [in] devFunc device function description. symbol name or address - * offset, depending binary type. + * @param [in] kernelInfoExt kernel Info extension. device function description or tiling key, + * depending static shape or dynmaic shape. * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char_t *stubName, - const void *devFunc, uint32_t funcMode); + const void *kernelInfoExt, uint32_t funcMode); /** * @ingroup rt_kernel @@ -354,7 +354,8 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void * * @ingroup rt_kernel * @brief launch kernel with handle to device * @param [in] hdl program - * @param [in] devFunc device function description. + * @param [in] kernelInfoExt kernel Info extension. device function description or tiling key, + * depending static shape or dynmaic shape. * @param [in] blockDim block dimentions * @param [in] args argments address for kernel function * @param [in] argsSize argements size @@ -364,7 +365,7 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void * * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtKernelLaunchWithHandle(void *hdl, const void *devFunc, uint32_t blockDim, +RTS_API rtError_t rtKernelLaunchWithHandle(void *hdl, const void *kernelInfoExt, uint32_t blockDim, void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo); @@ -497,6 +498,28 @@ RTS_API rtError_t rtDumpAddrSet(rtModel_t mdl, void *addr, uint32_t dumpSize, ui */ RTS_API rtError_t rtDatadumpInfoLoad(const void *dumpInfo, uint32_t length); +/** + * @ingroup rt_kernel + * @brief launch npu get float status task + * @param [in] outputAddr pointer to op output addr + * @param [in] outputSize op output size + * @param [in] checkMode check mode + * @param [in] stm associated stream + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtNpuGetFloatStatus(void *outputAddr, uint64_t outputSize, uint32_t checkMode, rtStream_t stm); + +/** + * @ingroup rt_kernel + * @brief launch npu clear float status task + * @param [in] checkMode check mode + * @param [in] stm associated stream + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtNpuClearFloatStatus(uint32_t checkMode, rtStream_t stm); + #ifndef __CLANG_CCE_RUNTIME_H__ #define __CLANG_CCE_RUNTIME_H__ /** @@ -519,13 +542,13 @@ RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc, rtStre /** * @ingroup rt_kernel * @brief setup argment for next rtLaunch in current thread - * @param [in] arg argment address for kernel function + * @param [in] args argment address for kernel function * @param [in] size argment size * @param [in] offset argment table offset * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtSetupArgument(const void *arg, uint32_t size, uint32_t offset); +RTS_API rtError_t rtSetupArgument(const void *args, uint32_t size, uint32_t offset); /** * @ingroup rt_kernel @@ -544,11 +567,11 @@ RTS_API rtError_t rtLaunch(const void *stubFunc); * @param [in] ptr host memory * @param [in] size host memory size * @param [in] flag reserved. set to 0 - * @param [out] arg returned arg. used for next kernel's arg. + * @param [out] args returned arg. used for next kernel's arg. * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg); +RTS_API rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **args); /** * @ingroup rt_kernel @@ -675,7 +698,8 @@ RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockD * @ingroup rt_kernel * @brief launch kernel with handle and tiling data to device * @param [in] hdl program - * @param [in] devFunc device function description. + * @param [in] kernelInfoExt kernel Info extension. device function description or tiling key, + * depending static shape or dynmaic shape. * @param [in] blockDim block dimentions * @param [in] argsInfo argments info address for kernel function * @param [in] smDesc shared memory description @@ -684,7 +708,7 @@ RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockD * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtKernelLaunchWithHandleAndTiling(void *hdl, const void *devFunc, uint32_t blockDim, +RTS_API rtError_t rtKernelLaunchWithHandleAndTiling(void *hdl, const void *kernelInfoExt, uint32_t blockDim, rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm, const void* kernelInfo); #if defined(__cplusplus) diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index c086da10..7f3a4bc7 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -341,6 +341,20 @@ RTS_API rtError_t rtInvalidCache(void *base, size_t len); */ RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind); +/** + * @ingroup dvrt_mem + * @brief host task memcpy + * @param [in] dst destination address pointer + * @param [in] destMax length of destination address memory + * @param [in] src source address pointer + * @param [in] cnt the number of byte to copy + * @param [in] kind memcpy type + * @param [in] stm task stream + * @return RT_ERROR_NONE for ok, errno for failed + */ +RTS_API rtError_t rtMemcpyHostTask(void * const dst, const uint64_t destMax, const void * const src, + const uint64_t cnt, rtMemcpyKind_t kind, rtStream_t stm); + /** * @ingroup dvrt_mem * @brief asynchronized memcpy @@ -424,6 +438,16 @@ RTS_API rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize); */ RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize); +/** + * @ingroup dvrt_mem + * @brief Specifies how memory is use + * @param [in] devPtr memory pointer + * @param [in] count memory count + * @param [in] advise reserved, set to 1 + * @return RT_ERROR_NONE for ok + * @return others for error + */ +RTS_API rtError_t rtMemAdvise(void *devPtr, uint64_t count, uint32_t advise); /** * @ingroup dvrt_mem * @brief set memory with uint32_t value diff --git a/third_party/fwkacllib/inc/runtime/rt_dfx.h b/third_party/fwkacllib/inc/runtime/rt_dfx.h index f96c0859..71215f80 100644 --- a/third_party/fwkacllib/inc/runtime/rt_dfx.h +++ b/third_party/fwkacllib/inc/runtime/rt_dfx.h @@ -28,6 +28,16 @@ extern "C" { */ RTS_API rtError_t rtSetTaskTag(const char_t *taskTag); +/** + * @brief set aicpu device attribute. + * it is used for aicpu device to be aware of enviroment config + * @param [in] key attrubute key. + * @param [in] val attrubute value. + * @return RT_ERROR_NONE for ok + * @return other failed + */ +RTS_API rtError_t rtSetAicpuAttr(const char_t *key, const char_t *val); + #if defined(__cplusplus) } #endif diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h b/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h index 18b25d36..4978d5fa 100644 --- a/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h +++ b/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h @@ -25,7 +25,8 @@ typedef struct tagFftsPlusTaskInfo { #pragma pack(pop) -RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *hdl, const void *devFunc, void **addr, uint32_t *prefetchCnt); +RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *hdl, const void *kernelInfoExt, void **addr, + uint32_t *prefetchCnt); RTS_API rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stm); diff --git a/third_party/fwkacllib/inc/runtime/rt_mem_queue.h b/third_party/fwkacllib/inc/runtime/rt_mem_queue.h index 49a272f0..2ed9fd08 100644 --- a/third_party/fwkacllib/inc/runtime/rt_mem_queue.h +++ b/third_party/fwkacllib/inc/runtime/rt_mem_queue.h @@ -184,6 +184,13 @@ typedef enum rtGroupType { RT_GRP_TYPE_BIND_DP_CPU_EXCLUSIVE /* Bound to a AICPU, intra-group threads are mutex awakened */ } rtGroupType_t; +typedef struct tagInitFlowGwInfo { + const char_t *groupName; + uint64_t schedPolicy; + uint64_t reschedInterval; + char_t rsv[128]; +} rtInitFlowGwInfo_t; + /** * @ingroup rt_mem_queue * @brief init queue schedule @@ -193,6 +200,15 @@ typedef enum rtGroupType { */ RTS_API rtError_t rtMemQueueInitQS(int32_t devId, const char_t *grpName); +/** + * @ingroup rt_mem_queue + * @brief init flow gateway + * @param [in] devId the logical device id + * @param [in] initInfo Initialization parameters + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtMemQueueInitFlowGw(int32_t devId, const rtInitFlowGwInfo_t * const initInfo); + /** * @ingroup rt_mem_queue * @brief create mbuf queue @@ -222,24 +238,24 @@ RTS_API rtError_t rtMemQueueInit(int32_t devId); /** * @ingroup rt_mem_queue - * @brief enqueu mbuf + * @brief enqueue memBuf * @param [in] devId the logical device id * @param [in] qid queue id - * @param [in] mbuf enqueue mbuf + * @param [in] memBuf enqueue memBuf * @return RT_ERROR_NONE for ok */ -RTS_API rtError_t rtMemQueueEnQueue(int32_t devId, uint32_t qid, void *mbuf); +RTS_API rtError_t rtMemQueueEnQueue(int32_t devId, uint32_t qid, void *memBuf); /** * @ingroup rt_mem_queue - * @brief enqueu mbuf + * @brief dequeue memBuf * @param [in] devId the logical device id * @param [in] qid queue id - * @param [out] mbuf dequeue mbuf + * @param [out] memBuf dequeue memBuf * @return RT_ERROR_NONE for ok */ -RTS_API rtError_t rtMemQueueDeQueue(int32_t devId, uint32_t qid, void **mbuf); +RTS_API rtError_t rtMemQueueDeQueue(int32_t devId, uint32_t qid, void **memBuf); /** * @ingroup rt_mem_queue @@ -350,47 +366,56 @@ RTS_API rtError_t rtMbufInit(rtMemBuffCfg_t *cfg); /** * @ingroup rt_mem_queue * @brief alloc buff -* @param [out] buff: buff addr alloced +* @param [out] memBuf: buff addr alloced * @param [in] size: The amount of memory space requested * @return RT_ERROR_NONE for ok */ -RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *mbuf, uint64_t size); +RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *memBuf, uint64_t size); /** * @ingroup rt_mem_queue * @brief free buff -* @param [in] buff: buff addr to be freed +* @param [in] memBuf: buff addr to be freed * @return RT_ERROR_NONE for ok */ -RTS_API rtError_t rtMbufFree(rtMbufPtr_t mbuf); +RTS_API rtError_t rtMbufFree(rtMbufPtr_t memBuf); + +/** +* @ingroup rt_mem_queue +* @brief set Data len of Mbuf +* @param [in] memBuf: Mbuf addr +* @param [in] len: data len +* @return RT_ERROR_NONE for success, others for fail +*/ +RTS_API rtError_t rtMbufSetDataLen(rtMbufPtr_t memBuf, uint64_t len); /** * @ingroup rt_mem_queue * @brief get Data addr of Mbuf -* @param [in] mbuf: Mbuf addr +* @param [in] memBuf: Mbuf addr * @param [out] buf: Mbuf data addr * @return RT_ERROR_NONE for ok */ -RTS_API rtError_t rtMbufGetBuffAddr(rtMbufPtr_t mbuf, void **buf); +RTS_API rtError_t rtMbufGetBuffAddr(rtMbufPtr_t memBuf, void **buf); /** * @ingroup rt_mem_queue * @brief get total Buffer size of Mbuf -* @param [in] mbuf: Mbuf addr +* @param [in] memBuf: Mbuf addr * @param [out] totalSize: total buffer size of Mbuf * @return RT_ERROR_NONE for ok */ -RTS_API rtError_t rtMbufGetBuffSize(rtMbufPtr_t mbuf, uint64_t *totalSize); +RTS_API rtError_t rtMbufGetBuffSize(rtMbufPtr_t memBuf, uint64_t *totalSize); /** * @ingroup rt_mem_queue * @brief Get the address and length of its user_data from the specified Mbuf -* @param [in] mbuf: Mbuf addr +* @param [in] memBuf: Mbuf addr * @param [out] priv: address of its user_data * @param [out] size: length of its user_data * @return RT_ERROR_NONE for ok */ -RTS_API rtError_t rtMbufGetPrivInfo (rtMbufPtr_t mbuf, void **priv, uint64_t *size); +RTS_API rtError_t rtMbufGetPrivInfo(rtMbufPtr_t memBuf, void **priv, uint64_t *size); // mem group typedef struct { @@ -573,6 +598,14 @@ RTS_API rtError_t rtQueueSubF2NFEvent(int32_t devId, uint32_t qId, uint32_t grou */ RTS_API rtError_t rtQueueSubscribe(int32_t devId, uint32_t qId, uint32_t groupId, int32_t type); +/** +* @ingroup rtBufEventTrigger +* @brief buf event trigger +* @param [in] name, group name +* @return 0 for success, others for fail +*/ +RTS_API rtError_t rtBufEventTrigger(const char_t *name); + #if defined(__cplusplus) } #endif diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index 2807a705..f550887d 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -44,6 +44,11 @@ typedef enum tagModelTaskType { RT_MODEL_TASK_PROFILER_TRACE_EX, RT_MODEL_TASK_FFTS_TASK, RT_MODEL_TASK_FFTS_PLUS_TASK, + RT_MODEL_TASK_DSA_TASK, + RT_MODEL_TASK_CMO, + RT_MODEL_TASK_BARRIER, + RT_MODEL_TASK_NPU_GET_FLOAT_STATUS, + RT_MODEL_TASK_NPU_CLEAR_FLOAT_STATUS, } rtModelTaskType_t; typedef enum tagModelStreamType { @@ -115,9 +120,9 @@ typedef struct tagKernelTaskInfo { uint16_t argsCount; uint16_t argsSize; uint16_t reserved; - char_t *stubFunc; + const char_t *stubFunc; uint8_t *smDesc; - uint8_t *args; + const uint8_t *args; uint16_t *argsOffset; } rtKernelTaskInfo_t; @@ -126,17 +131,17 @@ typedef struct tagAllKernelTaskInfo { uint16_t argsCount; uint16_t argsSize; uint16_t reserved; - void *devfunc; + const void *kernelInfoExt; void *handle; uint8_t *smDesc; - uint8_t *args; + const uint8_t *args; uint16_t *argsOffset; } rtAllKernelTaskInfo_t; typedef struct tagKernelTaskInfoEx { uint32_t flags; uint32_t argsSize; - void *args; + const void *args; uint32_t reserved[6]; } rtKernelTaskInfoEx_t; @@ -198,9 +203,9 @@ typedef struct tagProfilerTraceExTaskInfo { } rtProfilerTraceEx_t; typedef struct tagrtMemcpyAsyncTaskInfo { - void *dst; + const void *dst; uint64_t destMax; - void *src; + const void *src; uint64_t count; uint32_t kind; uint32_t reserved; @@ -212,9 +217,9 @@ typedef struct tagrtNotifyTaskInfo { } rtNotifyTaskInfo_t; typedef struct tagrtReduceAsyncTaskInfo { - void *dst; + const void *dst; uint64_t destMax; - void *src; + const void *src; uint64_t count; uint32_t kind; uint32_t type; @@ -481,6 +486,16 @@ RTS_API rtError_t rtDebugRegister(rtModel_t mdl, uint32_t flag, const void *addr */ RTS_API rtError_t rtDebugUnRegister(rtModel_t mdl); +/** + * @ingroup rt_model + * @brief set model group id + * @param [in] mdl model + * @param [in] schGrpId groupId (0,4) 0:default invalid value 1-4 valid value Maximum support 4 groups + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtModelSetSchGroupId(rtModel_t mdl, const int16_t schGrpId); + #if defined(__cplusplus) } #endif diff --git a/third_party/fwkacllib/inc/runtime/rt_stars.h b/third_party/fwkacllib/inc/runtime/rt_stars.h index a9c91897..b778550f 100644 --- a/third_party/fwkacllib/inc/runtime/rt_stars.h +++ b/third_party/fwkacllib/inc/runtime/rt_stars.h @@ -7,7 +7,7 @@ #define CCE_RUNTIME_RT_STARS_H #include "base.h" - +#include "rt_stars_define.h" #if defined(__cplusplus) extern "C" { #endif @@ -80,6 +80,25 @@ RTS_API rtError_t rtCdqEnQueue(const char_t *queName, uint32_t cdqeIndex, void * RTS_API rtError_t rtCdqEnQueuePtrMode(const char_t *queName, uint32_t cdqeIndex, const void *ptrAddr, rtStream_t stm); +/** + * @ingroup rt_stars + * @brief launch common cmo task on the stream. + * @param [in] taskInfo cmo task info + * @param [in] stm launch task on the stream + * @param [in] flag flag + * @return RT_ERROR_NONE for ok, others failed + */ +RTS_API rtError_t rtCmoTaskLaunch(rtCmoTaskInfo_t *taskInfo, rtStream_t stm, uint32_t flag); + +/** + * @ingroup rt_stars + * @brief launch barrier cmo task on the stream. + * @param [in] taskInfo barrier task info + * @param [in] stm launch task on the stream + * @param [in] flag flag + * @return RT_ERROR_NONE for ok, others failed + */ +RTS_API rtError_t rtBarrierTaskLaunch(rtBarrierTaskInfo_t *taskInfo, rtStream_t stm, uint32_t flag); #if defined(__cplusplus) } diff --git a/third_party/fwkacllib/inc/runtime/rt_stars_define.h b/third_party/fwkacllib/inc/runtime/rt_stars_define.h index 260809af..4920ef00 100644 --- a/third_party/fwkacllib/inc/runtime/rt_stars_define.h +++ b/third_party/fwkacllib/inc/runtime/rt_stars_define.h @@ -32,6 +32,37 @@ typedef struct tagStarsSqeHeader { uint16_t taskId; } rtStarsSqeHeader_t; +typedef struct tagStarsDsaSqe { + // 0-7 bytes + rtStarsSqeHeader_t sqeHeader; + // 8-11 bytes + uint32_t start : 1; + uint32_t functionType : 3; + uint32_t dataType : 3; + uint32_t algoType : 3; + uint32_t paramVldBitmap : 5; + uint32_t paramAddrValBitmap : 7; + uint32_t reserved0 : 10; + // 12-15 bytes + uint16_t sqeIndex; + uint8_t kernelCredit; + uint8_t reserved1; + // 16-31 bytes + uint32_t dsaCfgResultAddrLow; + uint32_t dsaCfgResultAddrHigh; + uint32_t dsaCfgStateAddrLow; + uint32_t dsaCfgStateAddrHigh; + // 32-47 bytes + uint32_t dsaCfgParamAddrLow; + uint32_t dsaCfgParamAddrHigh; + uint32_t dsaCfgSeedLow; + uint32_t dsaCfgSeedHigh; + // 48-63 bytes + uint32_t dsaCfgNumberLow; + uint32_t dsaCfgNumberHigh; + uint32_t reserved2[2]; +} rtStarsDsaSqe_t; + // ffts+ type typedef enum tagFftsPlusType { RT_FFTS_PLUS_TYPE_RES1 = 2, // Reserved @@ -83,6 +114,33 @@ typedef struct tagFftsPlusSqe { uint32_t reserved16[4]; } rtFftsPlusSqe_t; +typedef struct tagCmoTaskInfo { + uint8_t qos; + uint8_t partId; + uint8_t pmg; + uint8_t reserved; + uint16_t cmoType; + uint16_t opCode; + uint16_t numInner; + uint16_t numOuter; + uint32_t logicId; + uint32_t lengthInner; + uint64_t sourceAddr; + uint32_t striderOuter; + uint32_t striderInner; +} rtCmoTaskInfo_t; + +typedef struct tagBarrierCmoInfo { + uint16_t cmoType; // 0 is barrier, 1 is invalid, Prefetch is 2, Write_back is 3, FE/GE only use invalid type. + uint32_t logicId; +} rtBarrierCmoInfo_t; + +#define RT_CMO_MAX_BARRIER_NUM 6U // 6U is max support +typedef struct tagBarrierTaskInfo { + uint8_t logicIdNum; + rtBarrierCmoInfo_t cmoInfo[RT_CMO_MAX_BARRIER_NUM]; +} rtBarrierTaskInfo_t; + #pragma pack(pop) #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) diff --git a/third_party/fwkacllib/inc/tdt/data_common.h b/third_party/fwkacllib/inc/tdt/data_common.h index 7b1d631b..a9b347c4 100644 --- a/third_party/fwkacllib/inc/tdt/data_common.h +++ b/third_party/fwkacllib/inc/tdt/data_common.h @@ -1,21 +1,14 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +* @file data_common.h +* +* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved. +* +* This program is used to data structure +*/ #ifndef HOST_INNER_INC_DATA_COMMON_H_ #define HOST_INNER_INC_DATA_COMMON_H_ +#include namespace tdt { #ifndef TDT_DATA_TYPE diff --git a/third_party/fwkacllib/inc/tdt/index_transform.h b/third_party/fwkacllib/inc/tdt/index_transform.h index a62e0185..a5af2c83 100644 --- a/third_party/fwkacllib/inc/tdt/index_transform.h +++ b/third_party/fwkacllib/inc/tdt/index_transform.h @@ -1,18 +1,10 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +* @file index_transform.h +* +* Copyright (C) Huawei Technologies Co., Ltd. 2018-2019. All Rights Reserved. +* +* This program is used to get logical device id by phy device id. +*/ #ifndef INC_TDT_INDEX_TRANSFORM_H #define INC_TDT_INDEX_TRANSFORM_H diff --git a/third_party/fwkacllib/inc/tdt/status.h b/third_party/fwkacllib/inc/tdt/status.h index dc9e670f..b78eee75 100644 --- a/third_party/fwkacllib/inc/tdt/status.h +++ b/third_party/fwkacllib/inc/tdt/status.h @@ -1,18 +1,10 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/** +* @file status.h +* +* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved. +* +* This program is used to describe status +*/ #ifndef INC_TDT_STATUS_H_ #define INC_TDT_STATUS_H_ diff --git a/third_party/fwkacllib/inc/tdt/tdt_host_interface.h b/third_party/fwkacllib/inc/tdt/tdt_host_interface.h index 3e7d11ee..ea23211c 100644 --- a/third_party/fwkacllib/inc/tdt/tdt_host_interface.h +++ b/third_party/fwkacllib/inc/tdt/tdt_host_interface.h @@ -1,18 +1,10 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +* @file tdt_host_interface.h +* +* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved. +* +* This program is used to host server +*/ #ifndef HOST_INNER_INC_TDT_HOST_INTERFACE_H_ #define HOST_INNER_INC_TDT_HOST_INTERFACE_H_ diff --git a/third_party/fwkacllib/inc/tdt/tsd_client.h b/third_party/fwkacllib/inc/tdt/tsd_client.h index 36fc500e..406ab1c3 100644 --- a/third_party/fwkacllib/inc/tdt/tsd_client.h +++ b/third_party/fwkacllib/inc/tdt/tsd_client.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright (c) Hisilicon Technologies Co., Ltd. 2018-2021. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,17 +14,22 @@ * limitations under the License. */ -#ifndef TDT_HOST_INNER_INC_TSD_CLIENT_H_ -#define TDT_HOST_INNER_INC_TSD_CLIENT_H_ +#ifndef TDT_HOST_INNER_INC_TSD_CLIENT_H +#define TDT_HOST_INNER_INC_TSD_CLIENT_H #include #include #include #include -#include "tdt/status.h" -#include "tdt/data_common.h" +#include "tsd/status.h" #include "toolchain/prof_callback.h" +#ifdef WIN_TSD +#define TDT_LIB_EXPORT __declspec(dllexport) +#else +#define TDT_LIB_EXPORT __attribute__((visibility("default"))) +#endif + #ifdef __cplusplus extern "C" { #endif // __cplusplus @@ -50,7 +55,51 @@ extern "C" { * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize); +TDT_LIB_EXPORT uint32_t TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize); + +/** +* @ingroup Open +* @brief Used for the Framework process to communicate with the TSDDaemon process in 1981, +* and notify TSD to complete the initialization of other processes +* +* @par Function +* Used for the Framework process to communicate with the TSDDaemon process, +* and notify TSD to complete the initialization of other processes +* +* @param logicDeviceId [IN] type #unsigned int. Logic device ID +* @param rankSize [IN] type #unsigned int. The rankSize of the training. +* The default value is 1. When rankSize is greater than 1, +* HCCP will be pulled to perform set communication related operations. +* @param deviceMode [IN] type unsigned int. The device running mode of aicpuSd, +* it include chipMode and DieMode +* @retval TDT_OK Success +* @retval OtherValues Failure +* +* @par Dependency +* @li data_common.h: Header file where 'TDT_StatusT' defined +*/ +TDT_LIB_EXPORT uint32_t TsdOpenEx(const uint32_t logicDeviceId, const uint32_t rankSize, const uint32_t deviceMode); + +/** +* @ingroup InitialQs +* @brief Used for the Framework process to communicate with the TSDDaemon process, +* and notify TSD to complete the initialization of QS processes +* +* @par Function +* Used for the Framework process to communicate with the TSDDaemon process, +* and notify TSD to complete the initialization of other processes +* +* @param logicDeviceId [IN] type #unsigned int. Logic device ID +* @param groupName [IN] type #char pointer. qs group name send by host process +* @retval TDT_OK Success +* @retval OtherValues Failure +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tsd_client.h: Header file where the interface declaration is located. +* @li data_common.h: Header file where 'TDT_StatusT' defined +*/ +TDT_LIB_EXPORT uint32_t TsdInitQs(const uint32_t logicDeviceId, const char_t * const groupName = nullptr); /** * @ingroup Close @@ -64,11 +113,12 @@ TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t * @retval OtherValues Failure * * @par Dependency + * @li libtsdclient.so: Library to which the interface belongs. * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId); +TDT_LIB_EXPORT uint32_t TsdClose(const uint32_t logicDeviceId); /** * @ingroup UpdateProfilingMode @@ -86,7 +136,7 @@ TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId); * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag); +TDT_LIB_EXPORT uint32_t UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag); /** * @ingroup TsdSetMsprofReporterCallback @@ -105,9 +155,22 @@ TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, con * @li data_common.h: Header file where 'TDT_StatusT' defined * @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined */ -TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback); +TDT_LIB_EXPORT uint32_t TsdSetMsprofReporterCallback(const MsprofReporterCallback callback); +/** +* @ingroup TsdSetAttr +* @brief used to set tsd attr +* +* @par key +* key set for tsd attr,now only support RunMode +* +* @par value +* value set to run correspond mode, PROCESS_MODE or THREAD_MODE +* @retval TDT_OK Success +* @retval OtherValues Failure +*/ +TDT_LIB_EXPORT uint32_t TsdSetAttr(const char * const attrKey, const char * const attrValue); #ifdef __cplusplus } #endif // __cplusplus -#endif // TDT_HOST_INNER_INC_TSD_CLIENT_H_ +#endif // TDT_HOST_INNER_INC_TSD_CLIENT_H diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h index 8ecd5f14..15baae46 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h +++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h @@ -1,17 +1,8 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved. + * Description: handle perf data + * Author: xp + * Create: 2019-10-13 */ #ifndef MSPROFILER_API_PROF_ACL_API_H_ @@ -25,6 +16,8 @@ #define PROF_L2CACHE 0x00000010ULL #define PROF_HCCL_TRACE 0x00000020ULL #define PROF_TRAINING_TRACE 0x00000040ULL +#define PROF_MSPROFTX 0x00000080ULL +#define PROF_RUNTIME_API 0x00000100ULL // system profilinig switch #define PROF_CPU 0x00010000ULL @@ -36,17 +29,18 @@ #define PROF_AIVECTORCORE_SAMPLE 0x00400000ULL #define PROF_MODEL_EXECUTE 0x0000001000000ULL -#define PROF_RUNTIME_API 0x0000002000000ULL #define PROF_RUNTIME_TRACE 0x0000004000000ULL #define PROF_SCHEDULE_TIMELINE 0x0000008000000ULL #define PROF_SCHEDULE_TRACE 0x0000010000000ULL #define PROF_AIVECTORCORE_METRICS 0x0000020000000ULL #define PROF_SUBTASK_TIME 0x0000040000000ULL - -#define PROF_TASK_TRACE 0x0000005000062ULL +#define PROF_OP_DETAIL 0x0000080000000ULL #define PROF_MODEL_LOAD 0x8000000000000000ULL +#define PROF_TASK_TRACE (PROF_MODEL_EXECUTE | PROF_RUNTIME_TRACE | PROF_TRAINING_TRACE | \ + PROF_HCCL_TRACE | PROF_TASK_TIME) + // DataTypeConfig MASK #define PROF_ACL_API_MASK 0x00000001ULL #define PROF_TASK_TIME_MASK 0x00000002ULL @@ -55,6 +49,8 @@ #define PROF_L2CACHE_MASK 0x00000010ULL #define PROF_HCCL_TRACE_MASK 0x00000020ULL #define PROF_TRAINING_TRACE_MASK 0x00000040ULL +#define PROF_MSPROFTX_MASK 0x00000080ULL +#define PROF_RUNTIME_API_MASK 0x00000100ULL // system profilinig mask #define PROF_CPU_MASK 0x00010000ULL @@ -66,12 +62,12 @@ #define PROF_AIVECTORCORE_SAMPLE_MASK 0x00400000ULL #define PROF_MODEL_EXECUTE_MASK 0x0000001000000ULL -#define PROF_RUNTIME_API_MASK 0x0000002000000ULL #define PROF_RUNTIME_TRACE_MASK 0x0000004000000ULL #define PROF_SCHEDULE_TIMELINE_MASK 0x0000008000000ULL #define PROF_SCHEDULE_TRACE_MASK 0x0000010000000ULL #define PROF_AIVECTORCORE_METRICS_MASK 0x0000020000000ULL #define PROF_SUBTASK_TIME_MASK 0x0000040000000ULL +#define PROF_OP_DETAIL_MASK 0x0000080000000ULL #define PROF_MODEL_LOAD_MASK 0x8000000000000000ULL @@ -104,7 +100,7 @@ extern "C" { MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); -typedef int32_t Status; +typedef in32_t Status; typedef struct aclprofSubscribeConfig aclprofSubscribeConfig1; /// /// @ingroup AscendCL @@ -135,6 +131,33 @@ MSVP_PROF_API Status aclgrphProfGraphUnSubscribe(const uint32_t graphId); * @retval 0 for failed */ MSVP_PROF_API size_t aclprofGetGraphId(const void *opInfo, size_t opInfoLen, uint32_t index); + +/** +* @ingroup AscendCL +* @brief set stamp pay load +* +* +* @retval void +*/ +MSVP_PROF_API int aclprofSetStampPayload(void *stamp, const int32_t type, void *value); + +/** +* @ingroup AscendCL +* @brief set category and name +* +* +* @retval void +*/ +MSVP_PROF_API int aclprofSetCategoryName(uint32_t category, const char *categoryName); + +/** +* @ingroup AscendCL +* @brief set category to stamp +* +* +* @retval void +*/ +MSVP_PROF_API int aclprofSetStampCategory(void *stamp, uint32_t category); #ifdef __cplusplus } #endif diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h index 5f5a151b..24101aad 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_callback.h +++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h @@ -1,17 +1,8 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved. + * Description: handle perf data + * Author: xp + * Create: 2019-10-13 */ #ifndef MSPROFILER_PROF_CALLBACK_H_ @@ -76,7 +67,8 @@ enum MsprofReporterModuleId { MSPROF_MODULE_HCCL, // HCCL MSPROF_MODULE_ACL, // AclModule MSPROF_MODULE_FRAMEWORK, // Framework - MSPROF_MODULE_RUNTIME // runtime + MSPROF_MODULE_RUNTIME, // runtime + MSPROF_MODULE_MSPROF // msprofTx }; /** @@ -119,7 +111,7 @@ struct MsprofGeOptions { */ enum MsprofCtrlCallbackType { MSPROF_CTRL_INIT_ACL_ENV = 0, // start profiling with acl env - MSPROF_CTRL_INIT_ACL_JSON, // start profiling with acl.json + MSPROF_CTRL_INIT_ACL_JSON, // start pro with acl.json MSPROF_CTRL_INIT_GE_OPTIONS, // start profiling with ge env and options MSPROF_CTRL_FINALIZE, // stop profiling MSPROF_CTRL_INIT_DYNA = 0xFF, // start profiling for dynamic profiling diff --git a/third_party/fwkacllib/inc/toolchain/prof_common.h b/third_party/fwkacllib/inc/toolchain/prof_common.h new file mode 100644 index 00000000..bc56fb7d --- /dev/null +++ b/third_party/fwkacllib/inc/toolchain/prof_common.h @@ -0,0 +1,449 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved. + * Description: handle perf data + * Author: Huawei Technologies Co., Ltd. + * Create: 2019-10-13 + */ +#ifndef MSPROFILER_PROF_COMMON_H_ +#define MSPROFILER_PROF_COMMON_H_ + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#include + +#define MSPROF_DATA_HEAD_MAGIC_NUM 0x5a5a + +enum MsprofDataTag { + MSPROF_ACL_DATA_TAG = 0, //acl data tag, range: 0~19 + MSPROF_GE_DATA_TAG_MODEL_LOAD = 20, //ge data tag, range: 20~39 + MSPROF_GE_DATA_TAG_FUSION = 21, + MSPROF_GE_DATA_TAG_INFER = 22, + MSPROF_GE_DATA_TAG_TASK = 23, + MSPROF_GE_DATA_TAG_TENSOR = 24, + MSPROF_GE_DATA_TAG_STEP = 25, + MSPROF_GE_DATA_TAG_ID_MAP = 26, + MSPROF_GE_DATA_TAG_HOST_SCH = 27, + MSPROF_RUNTIME_DATA_TAG_API = 40, //runtime data tag, range: 40~59 + MSPROF_RUNTIME_DATA_TAG_TRACK = 41, + MSPROF_AICPU_DATA_TAG = 60, //aicpu data tag, range: 60~79 + MSPROF_HCCL_DATA_TAG = 80, //hccl data tag, range: 80~99 + MSPROF_DP_DATA_TAG = 100, //dp data tag, range: 100~119 + MSPROF_MSPROFTX_DATA_TAG = 120, //hccl data tag, range: 120~139 + MSPROF_DATA_TAG_MAX = 65536, //data tag value type is uint16_t +}; + +/** + * @brief struct of mixed data + */ +#define MSPROF_MIX_DATA_RESERVE_BYTES 7 +#define MSPROF_MIX_DATA_STRING_LEN 120 +enum MsprofMixDataType { + MSPROF_MIX_DATA_HASH_ID = 0, + MSPROF_MIX_DATA_STRING, +}; +struct MsprofMixData { + uint8_t type; // MsprofMixDataType + uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES]; + union { + uint64_t hashId; + char dataStr[MSPROF_MIX_DATA_STRING_LEN]; + } data; +}; + +/** + * @brief profiling command info + */ +#define MSPROF_MAX_DEV_NUM 64 +struct MsprofCommandHandle { + uint64_t profSwitch; + uint64_t profSwitchHi; + uint32_t devNums; + uint32_t devIdList[MSPROF_MAX_DEV_NUM]; + uint32_t modelId; + uint32_t type; +}; + +/** + * @brief struct of data reported by acl + */ +#define MSPROF_ACL_DATA_RESERVE_BYTES 32 +#define MSPROF_ACL_API_NAME_LEN 64 +enum MsprofAclApiType { + MSPROF_ACL_API_TYPE_OP = 1, + MSPROF_ACL_API_TYPE_MODEL, + MSPROF_ACL_API_TYPE_RUNTIME, + MSPROF_ACL_API_TYPE_OTHERS, +}; +struct MsprofAclProfData { + uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; + uint16_t dataTag = MSPROF_ACL_DATA_TAG; + uint32_t apiType; // enum MsprofAclApiType + uint64_t beginTime; + uint64_t endTime; + uint32_t processId; + uint32_t threadId; + char apiName[MSPROF_ACL_API_NAME_LEN]; + uint8_t reserve[MSPROF_ACL_DATA_RESERVE_BYTES]; +}; + +/** + * @brief struct of data reported by GE + */ +#define MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES 104 +struct MsprofGeProfModelLoadData { + uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; + uint16_t dataTag = MSPROF_GE_DATA_TAG_MODEL_LOAD; + uint32_t modelId; + MsprofMixData modelName; + uint64_t startTime; + uint64_t endTime; + uint8_t reserve[MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES]; +}; + +#define MSPROF_GE_FUSION_DATA_RESERVE_BYTES 8 +#define MSPROF_GE_FUSION_OP_NUM 8 +struct MsprofGeProfFusionData { + uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; + uint16_t dataTag = MSPROF_GE_DATA_TAG_FUSION; + uint32_t modelId; + MsprofMixData fusionName; + uint64_t inputMemSize; + uint64_t outputMemSize; + uint64_t weightMemSize; + uint64_t workspaceMemSize; + uint64_t totalMemSize; + uint64_t fusionOpNum; + uint64_t fusionOp[MSPROF_GE_FUSION_OP_NUM]; + uint8_t reserve[MSPROF_GE_FUSION_DATA_RESERVE_BYTES]; +}; + +#define MSPROF_GE_INFER_DATA_RESERVE_BYTES 64 +struct MsprofGeProfInferData { + uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; + uint16_t dataTag = MSPROF_GE_DATA_TAG_INFER; + uint32_t modelId; + MsprofMixData modelName; + uint32_t requestId; + uint32_t threadId; + uint64_t inputDataStartTime; + uint64_t inputDataEndTime; + uint64_t inferStartTime; + uint64_t inferEndTime; + uint64_t outputDataStartTime; + uint64_t outputDataEndTime; + uint8_t reserve[MSPROF_GE_INFER_DATA_RESERVE_BYTES]; +}; + +#define MSPROF_GE_TASK_DATA_RESERVE_BYTES 16 +#define MSPROF_GE_OP_TYPE_LEN 56 +enum MsprofGeTaskType { + MSPROF_GE_TASK_TYPE_AI_CORE = 0, + MSPROF_GE_TASK_TYPE_AI_CPU, + MSPROF_GE_TASK_TYPE_AIV, +}; +enum MsprofGeShapeType { + MSPROF_GE_SHAPE_TYPE_STATIC = 0, + MSPROF_GE_SHAPE_TYPE_DYNAMIC, +}; +struct MsprofGeOpType { + uint8_t type; // MsprofMixDataType + uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES]; + union { + uint64_t hashId; + char dataStr[MSPROF_GE_OP_TYPE_LEN]; + } data; +}; +struct MsprofGeProfTaskData { + uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; + uint16_t dataTag = MSPROF_GE_DATA_TAG_TASK; + uint32_t taskType; // MsprofGeTaskType + MsprofMixData opName; + MsprofGeOpType opType; + uint64_t curIterNum; + uint64_t timeStamp; + uint32_t shapeType; // MsprofGeShapeType + uint32_t blockDims; + uint32_t modelId; + uint32_t streamId; + uint32_t taskId; + uint32_t threadId; + uint8_t reserve[MSPROF_GE_TASK_DATA_RESERVE_BYTES]; +}; + +#define MSPROF_GE_TENSOR_DATA_RESERVE_BYTES 8 +#define MSPROF_GE_TENSOR_DATA_SHAPE_LEN 8 +#define MSPROF_GE_TENSOR_DATA_NUM 5 +enum MsprofGeTensorType { + MSPROF_GE_TENSOR_TYPE_INPUT = 0, + MSPROF_GE_TENSOR_TYPE_OUTPUT, +}; +struct MsprofGeTensorData { + uint32_t tensorType; // MsprofGeTensorType + uint32_t format; + uint32_t dataType; + uint32_t shape[MSPROF_GE_TENSOR_DATA_SHAPE_LEN]; +}; + +struct MsprofGeProfTensorData { + uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; + uint16_t dataTag = MSPROF_GE_DATA_TAG_TENSOR; + uint32_t modelId; + uint64_t curIterNum; + uint32_t streamId; + uint32_t taskId; + uint32_t tensorNum; + MsprofGeTensorData tensorData[MSPROF_GE_TENSOR_DATA_NUM]; + uint8_t reserve[MSPROF_GE_TENSOR_DATA_RESERVE_BYTES]; +}; + +#define MSPROF_GE_STEP_DATA_RESERVE_BYTES 27 +enum MsprofGeStepTag { + MSPROF_GE_STEP_TAG_BEGIN = 0, + MSPROF_GE_STEP_TAG_END, +}; +struct MsprofGeProfStepData { + uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; + uint16_t dataTag = MSPROF_GE_DATA_TAG_STEP; + uint32_t modelId; + uint32_t streamId; + uint32_t taskId; + uint64_t timeStamp; + uint64_t curIterNum; + uint32_t threadId; + uint8_t tag; // MsprofGeStepTag + uint8_t reserve[MSPROF_GE_STEP_DATA_RESERVE_BYTES]; +}; + +#define MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES 6 +struct MsprofGeProfIdMapData { + uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; + uint16_t dataTag = MSPROF_GE_DATA_TAG_ID_MAP; + uint32_t graphId; + uint32_t modelId; + uint32_t sessionId; + uint64_t timeStamp; + uint16_t mode; + uint8_t reserve[MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES]; +}; + +#define MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES 24 +struct MsprofGeProfHostSchData { + uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; + uint16_t dataTag = MSPROF_GE_DATA_TAG_HOST_SCH; + uint32_t threadId; // record in start event + uint64_t element; + uint64_t event; + uint64_t startTime; // record in start event + uint64_t endTime; // record in end event + uint8_t reserve[MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES]; +}; + +/** + * @brief struct of data reported by RunTime + */ +#define MSPROF_RUNTIME_API_DATA_RESERVE_BYTES 106 +#define MSPROF_RUNTIME_TASK_ID_NUM 10 +#define MSPROF_RUNTIME_API_NAME_LEN 64 +struct MsprofRuntimeProfApiData { + uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; + uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_API; + uint32_t threadId; + uint64_t entryTime; + uint64_t exitTime; + uint64_t dataSize; + uint8_t apiName[MSPROF_RUNTIME_API_NAME_LEN]; + uint32_t retCode; + uint32_t streamId; + uint32_t taskNum; + uint32_t taskId[MSPROF_RUNTIME_TASK_ID_NUM]; + uint16_t memcpyDirection; + uint8_t reserve[MSPROF_RUNTIME_API_DATA_RESERVE_BYTES]; +}; + +#define MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES 10 +#define MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN 32 +struct MsprofRuntimeProfTrackData { + uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; + uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_TRACK; + uint32_t threadId; + uint64_t timeStamp; + char taskType[MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN]; + uint32_t taskId; + uint16_t streamId; + uint8_t reserve[MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES]; +}; + +/** + * @brief struct of data reported by RunTime + */ +#define MSPROF_AICPU_DATA_RESERVE_BYTES 9 +struct MsprofAicpuProfData { + uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; + uint16_t dataTag = MSPROF_AICPU_DATA_TAG; + uint16_t streamId; + uint16_t taskId; + uint64_t runStartTime; + uint64_t runStartTick; + uint64_t computeStartTime; + uint64_t memcpyStartTime; + uint64_t memcpyEndTime; + uint64_t runEndTime; + uint64_t runEndTick; + uint32_t threadId; + uint32_t deviceId; + uint64_t submitTick; + uint64_t scheduleTick; + uint64_t tickBeforeRun; + uint64_t tickAfterRun; + uint32_t kernelType; + uint32_t dispatchTime; + uint32_t totalTime; + uint16_t fftsThreadId; + uint8_t version; + uint8_t reserve[MSPROF_AICPU_DATA_RESERVE_BYTES]; +}; + +/** + * @brief struct of data reported by DP + */ +#define MSPROF_DP_DATA_RESERVE_BYTES 16 +#define MSPROF_DP_DATA_ACTION_LEN 16 +#define MSPROF_DP_DATA_SOURCE_LEN 64 +struct MsprofDpProfData { + uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; + uint16_t dataTag = MSPROF_DP_DATA_TAG; + uint32_t rsv; // Ensure 8-byte alignment + uint64_t timeStamp; + char action[MSPROF_DP_DATA_ACTION_LEN]; + char source[MSPROF_DP_DATA_SOURCE_LEN]; + uint64_t index; + uint64_t size; + uint8_t reserve[MSPROF_DP_DATA_RESERVE_BYTES]; +}; + +/** + * @brief struct of data reported by HCCL + */ +#pragma pack(4) +struct MsprofHcclProfNotify { + uint32_t taskID; + uint64_t notifyID; + uint32_t stage; + uint32_t remoteRank; + uint32_t transportType; + uint32_t role; // role {0: dst, 1:src} + double durationEstimated; +}; + +struct MsprofHcclProfReduce { + uint32_t taskID; + uint64_t src; + uint64_t dst; + uint64_t size; + uint32_t op; // {0: sum, 1: mul, 2: max, 3: min} + uint32_t dataType; // data type {0: INT8, 1: INT16, 2: INT32, 3: FP16, 4:FP32, 5:INT64, 6:UINT64} + uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} + uint32_t remoteRank; + uint32_t transportType; // transport type {0: SDMA, 1: RDMA, 2:LOCAL} + uint32_t role; // role {0: dst, 1:src} + double durationEstimated; +}; + +struct MsprofHcclProfRDMA { + uint32_t taskID; + uint64_t src; + uint64_t dst; + uint64_t size; + uint64_t notifyID; + uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} + uint32_t remoteRank; + uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL} + uint32_t role; // role {0: dst, 1:src} + uint32_t type; // RDMA type {0: RDMASendNotify, 1:RDMASendPayload} + double durationEstimated; +}; + +struct MsprofHcclProfMemcpy { + uint32_t taskID; + uint64_t src; + uint64_t dst; + uint64_t size; + uint64_t notifyID; + uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} + uint32_t remoteRank; + uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL} + uint32_t role; // role {0: dst, 1:src} + double durationEstimated; +}; + +struct MsprofHcclProfStageStep { + uint32_t rank; + uint32_t rankSize; +}; + +struct MsprofHcclProfFlag { + uint64_t cclTag; + uint64_t groupName; + uint32_t localRank; + uint32_t workFlowMode; +}; + +/** + * @name MsprofHcclProfData + * @brief struct of data reported by hccl + */ +struct MsprofHcclProfData { + uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; + uint16_t dataTag = MSPROF_HCCL_DATA_TAG; + uint32_t planeID; + uint32_t deviceID; + uint32_t streamID; + double ts; + char name[16]; + union { + MsprofHcclProfNotify notify; + MsprofHcclProfReduce reduce; + MsprofHcclProfStageStep stageStep; + MsprofHcclProfMemcpy forMemcpy; + MsprofHcclProfRDMA RDMA; + MsprofHcclProfFlag flag; + } args; +}; +#pragma pack() + +/** + * @name MsprofStampInfo + * @brief struct of data reported by msproftx + */ +struct MsprofStampInfo { + uint16_t magicNumber; + uint16_t dataTag; + uint32_t processId; + uint32_t threadId; + uint32_t category; //marker category + uint32_t eventType; + int32_t payloadType; + union PayloadValue //payload info for marker + { + uint64_t ullValue; + int64_t llValue; + double dValue; + uint32_t uiValue[2]; + int32_t iValue[2]; + float fValue[2]; + } payload; + uint64_t startTime; + uint64_t endTime; + int32_t messageType; + char message[128]; + uint8_t reserve0[4]; + uint8_t reserve1[72]; +}; + +#ifdef __cplusplus +} +#endif + +#endif // MSPROFILER_PROF_COMMON_H_ diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h index a4680efe..48c753ce 100644 --- a/third_party/fwkacllib/inc/toolchain/slog.h +++ b/third_party/fwkacllib/inc/toolchain/slog.h @@ -111,14 +111,14 @@ extern "C" { #define OPERATION_LOG_MASK (0x10000000) #define RESERVERD_LENGTH 52 -typedef struct tagDCODE { - const char *cName; - int cVal; +typedef struct { + const char *cName; + int cVal; } DCODE; typedef struct tagKV { - char *kname; - char *value; + char *kname; + char *value; } KeyValue; typedef enum { @@ -139,73 +139,74 @@ typedef struct { * module id */ enum { - SLOG, /**< Slog */ - IDEDD, /**< IDE daemon device */ - IDEDH, /**< IDE daemon host */ - HCCL, /**< HCCL */ - FMK, /**< Adapter */ - HIAIENGINE, /**< Matrix */ - DVPP, /**< DVPP */ - RUNTIME, /**< Runtime */ - CCE, /**< CCE */ + SLOG, /**< Slog */ + IDEDD, /**< IDE daemon device */ + IDEDH, /**< IDE daemon host */ + HCCL, /**< HCCL */ + FMK, /**< Adapter */ + HIAIENGINE, /**< Matrix */ + DVPP, /**< DVPP */ + RUNTIME, /**< Runtime */ + CCE, /**< CCE */ #if (OS_TYPE == LINUX) HDC, /**< HDC */ #else HDCL, #endif // OS_TYPE - DRV, /**< Driver */ - MDCFUSION, /**< Mdc fusion */ - MDCLOCATION, /**< Mdc location */ - MDCPERCEPTION, /**< Mdc perception */ - MDCFSM, - MDCCOMMON, - MDCMONITOR, - MDCBSWP, /**< MDC base software platform */ - MDCDEFAULT, /**< MDC undefine */ - MDCSC, /**< MDC spatial cognition */ - MDCPNC, - MLL, /**< abandon */ - DEVMM, /**< Dlog memory managent */ - KERNEL, /**< Kernel */ - LIBMEDIA, /**< Libmedia */ - CCECPU, /**< aicpu shedule */ - ASCENDDK, /**< AscendDK */ - ROS, /**< ROS */ - HCCP, - ROCE, - TEFUSION, - PROFILING, /**< Profiling */ - DP, /**< Data Preprocess */ - APP, /**< User Application */ - TS, /**< TS module */ - TSDUMP, /**< TSDUMP module */ - AICPU, /**< AICPU module */ - LP, /**< LP module */ - TDT, /**< tsdaemon or aicpu shedule */ - FE, - MD, - MB, - ME, - IMU, - IMP, - GE, /**< Fmk */ - MDCFUSA, - CAMERA, - ASCENDCL, - TEEOS, - ISP, - SIS, - HSM, - DSS, - PROCMGR, // Process Manager, Base Platform - BBOX, - AIVECTOR, - TBE, - FV, - MDCMAP, - TUNE, - HSS, /**< helper */ - INVLID_MOUDLE_ID + DRV, /**< Driver */ + MDCFUSION, /**< Mdc fusion */ + MDCLOCATION, /**< Mdc location */ + MDCPERCEPTION, /**< Mdc perception */ + MDCFSM, + MDCCOMMON, + MDCMONITOR, + MDCBSWP, /**< MDC base software platform */ + MDCDEFAULT, /**< MDC undefine */ + MDCSC, /**< MDC spatial cognition */ + MDCPNC, + MLL, /**< abandon */ + DEVMM, /**< Dlog memory managent */ + KERNEL, /**< Kernel */ + LIBMEDIA, /**< Libmedia */ + CCECPU, /**< aicpu shedule */ + ASCENDDK, /**< AscendDK */ + ROS, /**< ROS */ + HCCP, + ROCE, + TEFUSION, + PROFILING, /**< Profiling */ + DP, /**< Data Preprocess */ + APP, /**< User Application */ + TS, /**< TS module */ + TSDUMP, /**< TSDUMP module */ + AICPU, /**< AICPU module */ + LP, /**< LP module */ + TDT, /**< tsdaemon or aicpu shedule */ + FE, + MD, + MB, + ME, + IMU, + IMP, + GE, /**< Fmk */ + MDCFUSA, + CAMERA, + ASCENDCL, + TEEOS, + ISP, + SIS, + HSM, + DSS, + PROCMGR, // Process Manager, Base Platform + BBOX, + AIVECTOR, + TBE, + FV, + MDCMAP, + TUNE, + HSS, /**< helper */ + FFTS, + INVLID_MOUDLE_ID }; /** @@ -262,9 +263,9 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); * @param [in]fmt: log content */ #define dlog_error(moduleId, fmt, ...) \ - do { \ - DlogErrorInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ - } while (TMP_LOG != 0) + do { \ + DlogErrorInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } while (TMP_LOG != 0) /** * @ingroup slog @@ -275,11 +276,11 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); * @param [in]fmt: log content */ #define dlog_warn(moduleId, fmt, ...) \ - do { \ - if(CheckLogLevel(moduleId, DLOG_WARN) == 1) { \ - DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ - } \ - } while (TMP_LOG != 0) + do { \ + if (CheckLogLevel(moduleId, DLOG_WARN) == 1) { \ + DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (TMP_LOG != 0) /** * @ingroup slog @@ -290,11 +291,11 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); * @param [in]fmt: log content */ #define dlog_info(moduleId, fmt, ...) \ - do { \ - if(CheckLogLevel(moduleId, DLOG_INFO) == 1) { \ - DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ - } \ - } while (TMP_LOG != 0) + do { \ + if (CheckLogLevel(moduleId, DLOG_INFO) == 1) { \ + DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (TMP_LOG != 0) /** * @ingroup slog @@ -305,11 +306,11 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); * @param [in]fmt: log content */ #define dlog_debug(moduleId, fmt, ...) \ - do { \ - if(CheckLogLevel(moduleId, DLOG_DEBUG) == 1) { \ - DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ - } \ - } while (TMP_LOG != 0) + do { \ + if (CheckLogLevel(moduleId, DLOG_DEBUG) == 1) { \ + DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (TMP_LOG != 0) /** * @ingroup slog @@ -319,9 +320,9 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); * @param [in]fmt: log content */ #define dlog_event(moduleId, fmt, ...) \ - do { \ - DlogEventInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ - } while (TMP_LOG != 0) + do { \ + DlogEventInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } while (TMP_LOG != 0) /** * @ingroup slog @@ -333,11 +334,11 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); * @param [in]fmt: log content */ #define Dlog(moduleId, level, fmt, ...) \ - do { \ - if(CheckLogLevel(moduleId, level) == 1) { \ - DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ - } \ - } while (TMP_LOG != 0) + do { \ + if (CheckLogLevel(moduleId, level) == 1) { \ + DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (TMP_LOG != 0) /** * @ingroup slog @@ -350,11 +351,11 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); * @param [in]fmt: log content */ #define DlogSub(moduleId, submodule, level, fmt, ...) \ - do { \ - if(CheckLogLevel(moduleId, level) == 1) { \ - DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ - } \ - } while (TMP_LOG != 0) + do { \ + if (CheckLogLevel(moduleId, level) == 1) { \ + DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ + } \ + } while (TMP_LOG != 0) /** * @ingroup slog @@ -368,11 +369,11 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr); * @param [in]fmt: log content */ #define DlogWithKV(moduleId, level, pstKVArray, kvNum, fmt, ...) \ - do { \ - if(CheckLogLevel(moduleId, level) == 1) { \ - DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ - } \ - } while (TMP_LOG != 0) + do { \ + if (CheckLogLevel(moduleId, level) == 1) { \ + DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (TMP_LOG != 0) /** * @ingroup slog @@ -452,11 +453,11 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr); * @param [in]fmt: log content */ #define DlogForC(moduleId, level, fmt, ...) \ - do { \ - if(CheckLogLevelForC(moduleId, level) == 1) { \ - DlogInnerForC(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ - } \ - } while (TMP_LOG != 0) + do { \ + if (CheckLogLevelForC(moduleId, level) == 1) { \ + DlogInnerForC(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (TMP_LOG != 0) /** * @ingroup slog @@ -469,11 +470,11 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr); * @param [in]fmt: log content */ #define DlogSubForC(moduleId, submodule, level, fmt, ...) \ - do { \ - if(CheckLogLevelForC(moduleId, level) == 1) { \ - DlogInnerForC(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ - } \ - } while (TMP_LOG != 0) + do { \ + if (CheckLogLevelForC(moduleId, level) == 1) { \ + DlogInnerForC(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ + } \ + } while (TMP_LOG != 0) /** * @ingroup slog @@ -487,11 +488,11 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr); * @param [in]fmt: log content */ #define DlogWithKVForC(moduleId, level, pstKVArray, kvNum, fmt, ...) \ - do { \ - if(CheckLogLevelForC(moduleId, level) == 1) { \ - DlogWithKVInnerForC(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ - } \ - } while (TMP_LOG != 0) + do { \ + if (CheckLogLevelForC(moduleId, level) == 1) { \ + DlogWithKVInnerForC(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (TMP_LOG != 0) /** * @ingroup slog