Browse Source

!2108 upgrade Ascend package 11 Feb 22

Merge pull request !2108 from yanghaoran/release
tags/v1.8.0^2
yanghaoran Gitee 2 years ago
parent
commit
4402452f71
No known key found for this signature in database GPG Key ID: 173E9B9CA92EEF8F
100 changed files with 3534 additions and 1700 deletions
  1. +14
    -0
      inc/external/acl/acl_base.h
  2. +51
    -0
      inc/external/acl/acl_op_compiler.h
  3. +74
    -0
      inc/external/acl/acl_prof.h
  4. +18
    -0
      inc/external/acl/acl_rt.h
  5. +33
    -32
      inc/external/acl/error_codes/ge_error_codes.h
  6. +7
    -1
      inc/external/acl/error_codes/rt_error_codes.h
  7. +7
    -1
      inc/external/acl/ops/acl_dvpp.h
  8. +15
    -52
      inc/external/ge/ge_api_error_codes.h
  9. +142
    -136
      inc/external/ge/ge_api_types.h
  10. +4
    -4
      inc/external/ge/ge_ir_build.h
  11. +1
    -1
      inc/external/runtime/rt_error_codes.h
  12. +8
    -8
      inc/framework/common/debug/ge_log.h
  13. +6
    -6
      inc/framework/common/debug/log.h
  14. +1
    -9
      inc/framework/common/file_constant_util.h
  15. +2
    -2
      inc/framework/common/fmk_error_codes.h
  16. +45
    -36
      inc/framework/common/ge_inner_error_codes.h
  17. +34
    -8
      inc/framework/common/ge_types.h
  18. +2
    -2
      inc/framework/common/helper/model_helper.h
  19. +18
    -24
      inc/framework/common/helper/om_file_helper.h
  20. +3
    -87
      inc/framework/common/l2_cache_optimize.h
  21. +4
    -136
      inc/framework/common/op/attr_value_util.h
  22. +17
    -70
      inc/framework/common/op/ge_op_utils.h
  23. +10
    -10
      inc/framework/common/op_types.h
  24. +2
    -4
      inc/framework/common/profiling/ge_profiling.h
  25. +173
    -0
      inc/framework/common/profiling_definitions.h
  26. +3
    -3
      inc/framework/common/scope_guard.h
  27. +21
    -18
      inc/framework/common/string_util.h
  28. +29
    -8
      inc/framework/common/taskdown_common.h
  29. +39
    -268
      inc/framework/common/types.h
  30. +100
    -109
      inc/framework/common/util.h
  31. +8
    -7
      inc/framework/engine/dnnengine.h
  32. +76
    -50
      inc/framework/executor/ge_executor.h
  33. +10
    -8
      inc/framework/generator/ge_generator.h
  34. +0
    -4
      inc/framework/memory/memory_api.h
  35. +1
    -1
      inc/framework/memory/memory_assigner.h
  36. +4
    -4
      inc/framework/omg/omg.h
  37. +6
    -34
      inc/framework/omg/omg_inner_types.h
  38. +7
    -0
      inc/framework/omg/parser/model_parser.h
  39. +4
    -4
      inc/framework/omg/parser/op_parser.h
  40. +2
    -4
      inc/framework/omg/parser/parser_factory.h
  41. +3
    -3
      inc/framework/omg/parser/parser_inner_ctx.h
  42. +1
    -3
      inc/framework/omg/version.h
  43. +1
    -1
      metadef
  44. +57
    -57
      third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
  45. +47
    -0
      third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
  46. +0
    -1
      third_party/fwkacllib/inc/cce/aicpu_engine.h
  47. +9
    -7
      third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
  48. +14
    -5
      third_party/fwkacllib/inc/hccl/base.h
  49. +0
    -66
      third_party/fwkacllib/inc/hccl/hcom.h
  50. +9
    -15
      third_party/fwkacllib/inc/mmpa/mmpa_api.h
  51. +9
    -15
      third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
  52. +3
    -0
      third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h
  53. +86
    -83
      third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h
  54. +9
    -15
      third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
  55. +65
    -0
      third_party/fwkacllib/inc/ops/OWNERS
  56. +111
    -6
      third_party/fwkacllib/inc/ops/array_ops.h
  57. +8
    -4
      third_party/fwkacllib/inc/ops/ctc_ops.h
  58. +45
    -0
      third_party/fwkacllib/inc/ops/data_flow_ops.h
  59. +155
    -1
      third_party/fwkacllib/inc/ops/deep_md.h
  60. +11
    -3
      third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
  61. +10
    -10
      third_party/fwkacllib/inc/ops/image_ops.h
  62. +1
    -1
      third_party/fwkacllib/inc/ops/math_ops.h
  63. +61
    -22
      third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
  64. +68
    -0
      third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
  65. +42
    -4
      third_party/fwkacllib/inc/ops/nn_calculation_ops.h
  66. +97
    -1
      third_party/fwkacllib/inc/ops/nn_detect_ops.h
  67. +76
    -20
      third_party/fwkacllib/inc/ops/nn_norm_ops.h
  68. +4
    -2
      third_party/fwkacllib/inc/ops/nn_pooling_ops.h
  69. +21
    -8
      third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
  70. +27
    -0
      third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
  71. +1
    -0
      third_party/fwkacllib/inc/ops/ocr_ops.h
  72. +160
    -1
      third_party/fwkacllib/inc/ops/random_ops.h
  73. +139
    -0
      third_party/fwkacllib/inc/ops/randomdsa_ops.h
  74. +123
    -0
      third_party/fwkacllib/inc/ops/reduce_ops.h
  75. +38
    -0
      third_party/fwkacllib/inc/ops/rnn.h
  76. +124
    -6
      third_party/fwkacllib/inc/ops/selection_ops.h
  77. +24
    -0
      third_party/fwkacllib/inc/ops/split_combination_ops.h
  78. +1
    -38
      third_party/fwkacllib/inc/ops/vector_search.h
  79. +11
    -2
      third_party/fwkacllib/inc/runtime/base.h
  80. +21
    -3
      third_party/fwkacllib/inc/runtime/config.h
  81. +1
    -1
      third_party/fwkacllib/inc/runtime/context.h
  82. +3
    -3
      third_party/fwkacllib/inc/runtime/dev.h
  83. +2
    -2
      third_party/fwkacllib/inc/runtime/dvfsprofile.h
  84. +15
    -0
      third_party/fwkacllib/inc/runtime/event.h
  85. +35
    -11
      third_party/fwkacllib/inc/runtime/kernel.h
  86. +24
    -0
      third_party/fwkacllib/inc/runtime/mem.h
  87. +10
    -0
      third_party/fwkacllib/inc/runtime/rt_dfx.h
  88. +2
    -1
      third_party/fwkacllib/inc/runtime/rt_ffts_plus.h
  89. +49
    -16
      third_party/fwkacllib/inc/runtime/rt_mem_queue.h
  90. +24
    -9
      third_party/fwkacllib/inc/runtime/rt_model.h
  91. +20
    -1
      third_party/fwkacllib/inc/runtime/rt_stars.h
  92. +58
    -0
      third_party/fwkacllib/inc/runtime/rt_stars_define.h
  93. +7
    -14
      third_party/fwkacllib/inc/tdt/data_common.h
  94. +6
    -14
      third_party/fwkacllib/inc/tdt/index_transform.h
  95. +7
    -15
      third_party/fwkacllib/inc/tdt/status.h
  96. +6
    -14
      third_party/fwkacllib/inc/tdt/tdt_host_interface.h
  97. +73
    -10
      third_party/fwkacllib/inc/tdt/tsd_client.h
  98. +42
    -19
      third_party/fwkacllib/inc/toolchain/prof_acl_api.h
  99. +8
    -16
      third_party/fwkacllib/inc/toolchain/prof_callback.h
  100. +449
    -0
      third_party/fwkacllib/inc/toolchain/prof_common.h

+ 14
- 0
inc/external/acl/acl_base.h View File

@@ -134,6 +134,7 @@ static const int ACL_ERROR_DRV_FAILURE = 500004;
static const int ACL_ERROR_PROFILING_FAILURE = 500005; static const int ACL_ERROR_PROFILING_FAILURE = 500005;


#define ACL_TENSOR_SHAPE_RANGE_NUM 2 #define ACL_TENSOR_SHAPE_RANGE_NUM 2
#define ACL_TENSOR_VALUE_RANGE_NUM 2
#define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE #define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE


typedef enum { typedef enum {
@@ -336,6 +337,19 @@ ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc);
ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount, ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount,
int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]); int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]);


/**
* @ingroup AscendCL
* @brief set value range for aclTensorDesc
*
* @param desc [OUT] pointer to the data of aclTensorDesc
* @param valueCount [IN] the number of value
* @param valueRange [IN] the range of value
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclSetTensorValueRange(aclTensorDesc *desc, size_t valueCount,
int64_t valueRange[][ACL_TENSOR_VALUE_RANGE_NUM]);
/** /**
* @ingroup AscendCL * @ingroup AscendCL
* @brief get data type specified by the tensor description * @brief get data type specified by the tensor description


+ 51
- 0
inc/external/acl/acl_op_compiler.h View File

@@ -41,6 +41,8 @@ typedef enum {


typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclOpCompileFlag; typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclOpCompileFlag;


typedef struct aclGraphDumpOption aclGraphDumpOption;

/** /**
* @ingroup AscendCL * @ingroup AscendCL
* @brief compile op * @brief compile op
@@ -114,6 +116,55 @@ ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *val
*/ */
ACL_FUNC_VISIBILITY aclError aclopSetCompileFlag(aclOpCompileFlag flag); ACL_FUNC_VISIBILITY aclError aclopSetCompileFlag(aclOpCompileFlag flag);


/**
* @ingroup AscendCL
* @brief generate graph and dump
*
* @param opType [IN] op type
* @param numInputs [IN] number of inputs
* @param inputDesc [IN] pointer to array of input tensor descriptions
* @param inputs [IN] pointer to array of input buffers
* @param numOutputs [IN] number of outputs
* @param outputDesc [IN] pointer to array of output tensor descriptions
* @param outputs [IN] pointer to array of outputs buffers
* @param attr [IN] pointer to instance of aclopAttr.
* may pass nullptr if the op has no attribute
* @param engineType [IN] engine type
* @param graphDumpPath [IN] dump path, if the suffix is ".txt", it means file path, else it means directory path
* @param graphDumpOpt [IN] dump option, nullptr is supported
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclGenGraphAndDumpForOp(
const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr,
aclopEngineType engineType, const char *graphDumpPath, const aclGraphDumpOption *graphDumpOpt);

/**
* @ingroup AscendCL
* @brief Create the graph dump option
*
* @retval null for failed
* @retval OtherValues success
*
* @see aclDestroyGraphDumpOpt
*/
ACL_FUNC_VISIBILITY aclGraphDumpOption *aclCreateGraphDumpOpt();

/**
* @ingroup AscendCL
* @brief Destroy graph dump option
*
* @param graphDumpOpt [IN] pointer to the graph dump option
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclCreateGraphDumpOpt
*/
ACL_FUNC_VISIBILITY aclError aclDestroyGraphDumpOpt(const aclGraphDumpOption *graphDumpOpt);

#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif


+ 74
- 0
inc/external/acl/acl_prof.h View File

@@ -37,6 +37,7 @@ extern "C" {
#define ACL_PROF_HCCL_TRACE 0x0020ULL #define ACL_PROF_HCCL_TRACE 0x0020ULL
#define ACL_PROF_TRAINING_TRACE 0x0040ULL #define ACL_PROF_TRAINING_TRACE 0x0040ULL
#define ACL_PROF_MSPROFTX 0x0080ULL #define ACL_PROF_MSPROFTX 0x0080ULL
#define ACL_PROF_RUNTIME_API 0x0100ULL


/** /**
* @deprecated please use aclprofGetOpTypeLen and aclprofGetOpTNameLen instead * @deprecated please use aclprofGetOpTypeLen and aclprofGetOpTNameLen instead
@@ -367,6 +368,79 @@ MSVP_PROF_API aclprofStepInfo *aclprofCreateStepInfo();
*/ */
MSVP_PROF_API void aclprofDestroyStepInfo(aclprofStepInfo *stepinfo); MSVP_PROF_API void aclprofDestroyStepInfo(aclprofStepInfo *stepinfo);


/**
* @ingroup AscendCL
* @brief create pointer to aclprofstamp
*
*
* @retval aclprofStamp pointer
*/
MSVP_PROF_API void *aclprofCreateStamp();

/**
* @ingroup AscendCL
* @brief destory stamp pointer
*
*
* @retval void
*/
MSVP_PROF_API void aclprofDestroyStamp(void *stamp);

/**
* @ingroup AscendCL
* @brief Record push timestamp
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
MSVP_PROF_API aclError aclprofPush(void *stamp);

/**
* @ingroup AscendCL
* @brief Record pop timestamp
*
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
MSVP_PROF_API aclError aclprofPop();

/**
* @ingroup AscendCL
* @brief Record range start timestamp
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
MSVP_PROF_API aclError aclprofRangeStart(void *stamp, uint32_t *rangeId);

/**
* @ingroup AscendCL
* @brief Record range end timestamp
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
MSVP_PROF_API aclError aclprofRangeStop(uint32_t rangeId);

/**
* @ingroup AscendCL
* @brief set message to stamp
*
*
* @retval void
*/
MSVP_PROF_API aclError aclprofSetStampTraceMessage(void *stamp, const char *msg, uint32_t msgLen);

/**
* @ingroup AscendCL
* @brief Record mark timestamp
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
MSVP_PROF_API aclError aclprofMark(void *stamp);

#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif


+ 18
- 0
inc/external/acl/acl_rt.h View File

@@ -44,6 +44,11 @@ typedef enum aclrtEventStatus {
ACL_EVENT_STATUS_RESERVED = 2, ACL_EVENT_STATUS_RESERVED = 2,
} aclrtEventStatus; } aclrtEventStatus;


typedef enum aclrtEventRecordedStatus {
ACL_EVENT_RECORDED_STATUS_NOT_READY = 0,
ACL_EVENT_RECORDED_STATUS_COMPLETE = 1,
} aclrtEventRecordedStatus;

typedef enum aclrtEventWaitStatus { typedef enum aclrtEventWaitStatus {
ACL_EVENT_WAIT_STATUS_COMPLETE = 0, ACL_EVENT_WAIT_STATUS_COMPLETE = 0,
ACL_EVENT_WAIT_STATUS_NOT_READY = 1, ACL_EVENT_WAIT_STATUS_NOT_READY = 1,
@@ -503,8 +508,21 @@ ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream strea
* @retval ACL_SUCCESS The function is successfully executed. * @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure * @retval OtherValues Failure
*/ */
ACL_DEPRECATED_MESSAGE("aclrtQueryEvent is deprecated, use aclrtQueryEventStatus instead")
ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status); ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status);


/**
* @ingroup AscendCL
* @brief Queries an event's status
*
* @param event [IN] event to query
* @param status [OUT] event recorded status
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtQueryEventStatus(aclrtEvent event, aclrtEventRecordedStatus *status);

/** /**
* @ingroup AscendCL * @ingroup AscendCL
* @brief Queries an event's wait-status * @brief Queries an event's wait-status


+ 33
- 32
inc/external/acl/error_codes/ge_error_codes.h View File

@@ -32,42 +32,43 @@
#endif #endif


#include <stddef.h> #include <stddef.h>
#include <stdint.h>


#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000;
static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001;
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002;
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003;
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006;
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007;
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008;
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009;
static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011;
static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012;
static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013;
static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014;
static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015;
static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016;
static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017;
static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018;
static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019;
static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020;
static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021;
static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022;
static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000;
static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001;
static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000;
static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001;
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002;
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003;
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004;
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005;
static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006;
static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007;
static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008;
static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009;
static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000U;
static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001U;
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002U;
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003U;
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006U;
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007U;
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008U;
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009U;
static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011U;
static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012U;
static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013U;
static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014U;
static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015U;
static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016U;
static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017U;
static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018U;
static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019U;
static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020U;
static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021U;
static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022U;
static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000U;
static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001U;
static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000U;
static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001U;
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002U;
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003U;
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004U;
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005U;
static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006U;
static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007U;
static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008U;
static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009U;


#ifdef __cplusplus #ifdef __cplusplus
} // namespace ge } // namespace ge


+ 7
- 1
inc/external/acl/error_codes/rt_error_codes.h View File

@@ -44,6 +44,7 @@ static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callbac
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout


static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
@@ -61,6 +62,7 @@ static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit
static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init
static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow


static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
@@ -99,6 +101,11 @@ static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // devic
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set


static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error
@@ -107,5 +114,4 @@ static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconn
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__

+ 7
- 1
inc/external/acl/ops/acl_dvpp.h View File

@@ -158,7 +158,13 @@ enum acldvppJpegFormat {
ACL_JPEG_CSS_UNKNOWN = 1000 ACL_JPEG_CSS_UNKNOWN = 1000
}; };


enum acldvppChannelDescParamType { ACL_DVPP_CSC_MATRIX_UINT32 = 0, ACL_DVPP_MODE_UINT32, ACL_DVPP_CHANNEL_ID_UINT64 };
enum acldvppChannelDescParamType {
ACL_DVPP_CSC_MATRIX_UINT32 = 0,
ACL_DVPP_MODE_UINT32,
ACL_DVPP_CHANNEL_ID_UINT64,
ACL_DVPP_CHANNEL_HEIGHT_UINT32,
ACL_DVPP_CHANNEL_WIDTH_UINT32
};


enum aclvdecChannelDescParamType { enum aclvdecChannelDescParamType {
ACL_VDEC_CSC_MATRIX_UINT32 = 0, ACL_VDEC_CSC_MATRIX_UINT32 = 0,


+ 15
- 52
inc/external/ge/ge_api_error_codes.h View File

@@ -20,15 +20,27 @@
#include <map> #include <map>
#include <string> #include <string>
#include "ge_error_codes.h" #include "ge_error_codes.h"
#include "graph/types.h"
#include "ge_api_types.h"


namespace ge {
#ifdef __GNUC__ #ifdef __GNUC__
#define ATTRIBUTED_DEPRECATED(replacement) __attribute__((deprecated("Please use " #replacement " instead."))) #define ATTRIBUTED_DEPRECATED(replacement) __attribute__((deprecated("Please use " #replacement " instead.")))
#else #else
#define ATTRIBUTED_DEPRECATED(replacement) __declspec(deprecated("Please use " #replacement " instead.")) #define ATTRIBUTED_DEPRECATED(replacement) __declspec(deprecated("Please use " #replacement " instead."))
#endif #endif


// Code compose(4 byte), runtime: 2 bit, type: 2 bit, level: 3 bit, sysid: 8 bit, modid: 5 bit, value: 12 bit
#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc) \
constexpr ge::Status name = (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(runtime))) << 30U) | \
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(type))) << 28U) | \
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(level))) << 25U) | \
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(sysid))) << 17U) | \
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(modid))) << 12U) | \
(static_cast<uint32_t>(0x0FFFU) & (static_cast<uint32_t>(value))); \
const ErrorNoRegisterar g_errorno_##name((name), (desc));

#define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_errorno_##name((name), (desc));

namespace ge {
class GE_FUNC_VISIBILITY StatusFactory { class GE_FUNC_VISIBILITY StatusFactory {
public: public:
static StatusFactory *Instance() { static StatusFactory *Instance() {
@@ -56,7 +68,7 @@ class GE_FUNC_VISIBILITY StatusFactory {
} }


std::string GetErrDesc(const uint32_t err) { std::string GetErrDesc(const uint32_t err) {
const auto iter_find = err_desc_.find(err);
const std::map<uint32_t, std::string>::const_iterator iter_find = err_desc_.find(err);
if (iter_find == err_desc_.end()) { if (iter_find == err_desc_.end()) {
return ""; return "";
} }
@@ -82,59 +94,10 @@ class GE_FUNC_VISIBILITY ErrorNoRegisterar {
~ErrorNoRegisterar() {} ~ErrorNoRegisterar() {}
}; };


// Code compose(4 byte), runtime: 2 bit, type: 2 bit, level: 3 bit, sysid: 8 bit, modid: 5 bit, value: 12 bit
#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc) \
constexpr ge::Status name = (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(runtime))) << 30U) | \
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(type))) << 28U) | \
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(level))) << 25U) | \
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(sysid))) << 17U) | \
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(modid))) << 12U) | \
(static_cast<uint32_t>(0x0FFFU) & (static_cast<uint32_t>(value))); \
const ErrorNoRegisterar g_##name##_errorno(name, desc);

#define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_##name##_errorno(name, desc);

using Status = uint32_t;

// General error code // General error code
GE_ERRORNO(0, 0, 0, 0, 0, SUCCESS, 0, "success"); GE_ERRORNO(0, 0, 0, 0, 0, SUCCESS, 0, "success");
GE_ERRORNO(0b11, 0b11, 0b111, 0xFFU, 0b11111, FAILED, 0xFFFU, "failed"); /*lint !e401*/ GE_ERRORNO(0b11, 0b11, 0b111, 0xFFU, 0b11111, FAILED, 0xFFFU, "failed"); /*lint !e401*/


GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_PARAM_INVALID, "Parameter invalid.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_NOT_INIT, "GE executor not initialized yet.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "Model file path invalid.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "Model id invalid.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "Data size of model invalid.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID, "Model addr invalid.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Queue id of model invalid.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED, "The model loaded repeatedly.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, "Dynamic input addr invalid.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Dynamic input size invalid.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, "Dynamic batch size invalid.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_BATCH_EMPTY, "AIPP batch parameter empty.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_NOT_EXIST, "AIPP parameter not exist.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_MODE_INVALID, "AIPP mode invalid.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Task type invalid.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Kernel type invalid.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_PLGMGR_PATH_INVALID, "Plugin path is invalid.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_FORMAT_INVALID, "Format is invalid.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_SHAPE_INVALID, "Shape is invalid.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DATATYPE_INVALID, "Datatype is invalid.");

GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_ALLOCATION, "Memory allocation error.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate memory.");

GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_INTERNAL_ERROR, "Internal error.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_LOAD_MODEL, "Load model error.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED, "Failed to load model partition.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED, "Failed to load weight partition.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED, "Failed to load task partition.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED, "Failed to load op kernel partition.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA, "Failed to release the model data.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_COMMAND_HANDLE, "Command handle error.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_GET_TENSOR_INFO, "Get tensor info error.");
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_UNLOAD_MODEL, "Load model error.");

} // namespace ge } // namespace ge


#endif // INC_EXTERNAL_GE_GE_API_ERROR_CODES_H_ #endif // INC_EXTERNAL_GE_GE_API_ERROR_CODES_H_

+ 142
- 136
inc/external/ge/ge_api_types.h View File

@@ -28,96 +28,98 @@


namespace ge { namespace ge {
// Option key: graph run mode // Option key: graph run mode
const char *const OPTION_GRAPH_RUN_MODE = "ge.graphRunMode";
const char *const OPTION_DEVICE_TYPE = "ge.deviceType";
const char_t *const OPTION_GRAPH_RUN_MODE = "ge.graphRunMode";
const char_t *const OPTION_DEVICE_TYPE = "ge.deviceType";


// Option key: ome init // Option key: ome init
const char *const OPTION_EXEC_SESSION_ID = "ge.exec.sessionId";
const char *const OPTION_EXEC_DEVICE_ID = "ge.exec.deviceId";
const char *const OPTION_EXEC_JOB_ID = "ge.exec.jobId";
const char *const OPTION_EXEC_IS_USEHCOM = "ge.exec.isUseHcom";
const char *const OPTION_EXEC_IS_USEHVD = "ge.exec.isUseHvd";
const char *const OPTION_EXEC_RANK_ID = "ge.exec.rankId";
const char *const OPTION_EXEC_POD_NAME = "ge.exec.podName";
const char *const OPTION_EXEC_DEPLOY_MODE = "ge.exec.deployMode";
const char *const OPTION_EXEC_RANK_TABLE_FILE = "ge.exec.rankTableFile";
const char *const GE_AICPU_FLAG = "ge.aicpuFlag";
const char *const OPTION_EXEC_EXTERN_PLUGIN_PATH = "ge.soLoadPath";
const char_t *const OPTION_EXEC_SESSION_ID = "ge.exec.sessionId";
const char_t *const OPTION_EXEC_DEVICE_ID = "ge.exec.deviceId";
const char_t *const OPTION_EXEC_JOB_ID = "ge.exec.jobId";
const char_t *const OPTION_EXEC_IS_USEHCOM = "ge.exec.isUseHcom";
const char_t *const OPTION_EXEC_IS_USEHVD = "ge.exec.isUseHvd";
const char_t *const OPTION_EXEC_RANK_ID = "ge.exec.rankId";
const char_t *const OPTION_EXEC_POD_NAME = "ge.exec.podName";
const char_t *const OPTION_EXEC_DEPLOY_MODE = "ge.exec.deployMode";
const char_t *const OPTION_EXEC_RANK_TABLE_FILE = "ge.exec.rankTableFile";
const char_t *const GE_AICPU_FLAG = "ge.aicpuFlag";
const char_t *const OPTION_EXEC_EXTERN_PLUGIN_PATH = "ge.soLoadPath";
// Dump flag and para // Dump flag and para
const char *const OPTION_EXEC_ENABLE_DUMP = "ge.exec.enableDump";
const char *const OPTION_EXEC_DUMP_PATH = "ge.exec.dumpPath";
const char *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep";
const char *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode";
const char *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug";
const char *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode";
const char *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild";
const char *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath";
const char *const OPTION_EXEC_ENABLE_EXCEPTION_DUMP = "ge.exec.enable_exception_dump";
const char *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses";
const char *const OPTION_EXEC_PROFILING_FPPONIT_OPTIONS = "ge.exec.profilingFpPointOptions";
const char *const OPTION_EXEC_PROFILING_BPPONIT_OPTIONS = "ge.exec.profilingBpPointOptions";
const char_t *const OPTION_EXEC_ENABLE_DUMP = "ge.exec.enableDump";
const char_t *const OPTION_EXEC_DUMP_PATH = "ge.exec.dumpPath";
const char_t *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep";
const char_t *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode";
const char_t *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug";
const char_t *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode";
const char_t *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild";
const char_t *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath";
const char_t *const OPTION_EXEC_ENABLE_EXCEPTION_DUMP = "ge.exec.enable_exception_dump";
const char_t *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses";
const char_t *const OPTION_EXEC_PROFILING_FPPONIT_OPTIONS = "ge.exec.profilingFpPointOptions";
const char_t *const OPTION_EXEC_PROFILING_BPPONIT_OPTIONS = "ge.exec.profilingBpPointOptions";
// profiling flag // profiling flag
const char *const OPTION_EXEC_PROFILING_MODE = "ge.exec.profilingMode";
const char *const OPTION_EXEC_PROFILING_OPTIONS = "ge.exec.profilingOptions";
const char_t *const OPTION_EXEC_PROFILING_MODE = "ge.exec.profilingMode";
const char_t *const OPTION_EXEC_PROFILING_OPTIONS = "ge.exec.profilingOptions";
// Hccl flag, if ge.exec.hcclFlag =1, it means load plugin for opskernel, else:ge.exec.hcclFlag =0 // Hccl flag, if ge.exec.hcclFlag =1, it means load plugin for opskernel, else:ge.exec.hcclFlag =0
const char *const OPTION_EXEC_HCCL_FLAG = "ge.exec.hcclFlag";
const char *const OPTION_EXEC_ATOMIC_FLAG = "ge.exec.enable_atomic";
const char *const OPTION_EXEC_DISABLE_REUSED_MEMORY = "ge.exec.disableReuseMemory";
const char *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOptimization";
const char_t *const OPTION_EXEC_HCCL_FLAG = "ge.exec.hcclFlag";
const char_t *const OPTION_EXEC_ATOMIC_FLAG = "ge.exec.enable_atomic";
const char_t *const OPTION_EXEC_DISABLE_REUSED_MEMORY = "ge.exec.disableReuseMemory";
const char_t *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOptimization";
// Dynamic input flag. ge.exec.dynamicInput=1, means enable dynaimc input, // Dynamic input flag. ge.exec.dynamicInput=1, means enable dynaimc input,
// ge.exec.dynamicGraphExecuteMode, dynamic_execute[default] // ge.exec.dynamicGraphExecuteMode, dynamic_execute[default]
const char *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput";
const char *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode";
const char *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange";
const char *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr";
const char_t *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput";
const char_t *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode";
const char_t *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange";
const char_t *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr";


// Option key: memory init // Option key: memory init
const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize";
const char *const VARIABLE_MEMORY_MAX_SIZE = "ge.variableMemoryMaxSize";
const char_t *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize";
const char_t *const VARIABLE_MEMORY_MAX_SIZE = "ge.variableMemoryMaxSize";
const char_t *const OPTION_EXEC_REUSE_ZERO_COPY_MEMORY = "ge.exec.reuseZeroCopyMemory";

namespace configure_option { namespace configure_option {
const char *const STREAM_NUM = "ge.streamNum";
const char *const HEAD_STREAM = "ge.headStream";
const char *const PERF_LEVEL = "ge.perfLevel";
const char *const ENCRYPT_MODE = "ge.encryptMode";
const char *const EK_FILE = "ge.ekFile";
const char *const CERT_FILE = "ge.certFile";
const char *const HW_KEY_FILE = "ge.hwKeyFile";
const char *const PRIVATE_KEY_FILE = "ge.privateKeyFile";
const char *const FRAMEWORK_TYPE = "ge.frameworkType";
const char *const CALIBRATION_CONF_FILE = "ge.calibrationConfFile";
const char *const INSERT_OP_FILE = "ge.insertOpFile";
const char *const OUTPUT_NODE_NAME = "ge.outputNodeName";
const char *const COMPRESS_FLAG = "ge.compressFlag";
const char *const PRECISION_MODE = "ge.exec.precision_mode";
const char *const SINGLE_OP_FLAG = "ge.exec.single_op";
const char *const TRAIN_FLAG = "ge.trainFlag";
const char *const RUN_FLAG = "ge.runFlag";
const char *const LOCAL_FMKOP_FLAG = "ge.enabledLocalFmkop";
const char *const TBE_PLUGIN_PATH_FLAG = "ge.TBE_plugin_path";
const char *const DDK_VERSION_FLAG = "ge.DDK_version";
const char *const GE_FE_FLAG = "ge.feFlag";
const char *const STREAM_MAX_PARALLEL_NUM = "ge.streamMaxParallelNum";
const char *const OUTPUT_DATATYPE = "ge.outputDatatype";
const char *const OP_SELECT_IMPL_MODE = "ge.opSelectImplmode";
const char *const OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode";
const char *const HCOM_PARALLEL = "ge.hcomParallel";
const char *const AUTO_TUNE_MODE = "ge.autoTuneMode";
const char *const SOC_VERSION = "ge.socVersion";
const char *const CORE_TYPE = "ge.engineType";
const char *const AICORE_NUM = "ge.aicoreNum";
const char *const L1_FUSION = "ge.l1Fusion";
const char *const BUFFER_OPTIMIZE = "ge.bufferOptimize";
const char *const ENABLE_SMALL_CHANNEL = "ge.enableSmallChannel";
const char *const ENABLE_COMPRESS_WEIGHT = "ge.enableCompressWeight";
const char *const FUSION_SWITCH_FILE = "ge.fusionSwitchFile";
const char *const SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel";
const char *const ORIGINAL_MODEL_FILE = "ge.originalModelFile";
const char *const INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16";
const char *const OP_DEBUG_LEVEL = "ge.opDebugLevel";
const char *const PERFORMANCE_MODE = "ge.performance_mode";
const char *const SHAPE_GENERALIZED_BUILD_MODE = "ge.shape_generalized_build_mode";
const char *const MODIFY_MIXLIST = "ge.exec.modify_mixlist";
const char *const OP_PRECISION_MODE = "ge.exec.op_precision_mode";
const char_t *const STREAM_NUM = "ge.streamNum";
const char_t *const HEAD_STREAM = "ge.headStream";
const char_t *const PERF_LEVEL = "ge.perfLevel";
const char_t *const ENCRYPT_MODE = "ge.encryptMode";
const char_t *const EK_FILE = "ge.ekFile";
const char_t *const CERT_FILE = "ge.certFile";
const char_t *const HW_KEY_FILE = "ge.hwKeyFile";
const char_t *const PRIVATE_KEY_FILE = "ge.privateKeyFile";
const char_t *const FRAMEWORK_TYPE = "ge.frameworkType";
const char_t *const CALIBRATION_CONF_FILE = "ge.calibrationConfFile";
const char_t *const INSERT_OP_FILE = "ge.insertOpFile";
const char_t *const OUTPUT_NODE_NAME = "ge.outputNodeName";
const char_t *const COMPRESS_FLAG = "ge.compressFlag";
const char_t *const PRECISION_MODE = "ge.exec.precision_mode";
const char_t *const SINGLE_OP_FLAG = "ge.exec.single_op";
const char_t *const TRAIN_FLAG = "ge.trainFlag";
const char_t *const RUN_FLAG = "ge.runFlag";
const char_t *const LOCAL_FMKOP_FLAG = "ge.enabledLocalFmkop";
const char_t *const TBE_PLUGIN_PATH_FLAG = "ge.TBE_plugin_path";
const char_t *const DDK_VERSION_FLAG = "ge.DDK_version";
const char_t *const GE_FE_FLAG = "ge.feFlag";
const char_t *const STREAM_MAX_PARALLEL_NUM = "ge.streamMaxParallelNum";
const char_t *const OUTPUT_DATATYPE = "ge.outputDatatype";
const char_t *const OP_SELECT_IMPL_MODE = "ge.opSelectImplmode";
const char_t *const OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode";
const char_t *const HCOM_PARALLEL = "ge.hcomParallel";
const char_t *const AUTO_TUNE_MODE = "ge.autoTuneMode";
const char_t *const SOC_VERSION = "ge.socVersion";
const char_t *const CORE_TYPE = "ge.engineType";
const char_t *const AICORE_NUM = "ge.aicoreNum";
const char_t *const L1_FUSION = "ge.l1Fusion";
const char_t *const BUFFER_OPTIMIZE = "ge.bufferOptimize";
const char_t *const ENABLE_SMALL_CHANNEL = "ge.enableSmallChannel";
const char_t *const ENABLE_COMPRESS_WEIGHT = "ge.enableCompressWeight";
const char_t *const FUSION_SWITCH_FILE = "ge.fusionSwitchFile";
const char_t *const SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel";
const char_t *const ORIGINAL_MODEL_FILE = "ge.originalModelFile";
const char_t *const INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16";
const char_t *const OP_DEBUG_LEVEL = "ge.opDebugLevel";
const char_t *const PERFORMANCE_MODE = "ge.performance_mode";
const char_t *const SHAPE_GENERALIZED_BUILD_MODE = "ge.shape_generalized_build_mode";
const char_t *const MODIFY_MIXLIST = "ge.exec.modify_mixlist";
const char_t *const OP_PRECISION_MODE = "ge.exec.op_precision_mode";
} // namespace configure_option } // namespace configure_option
// Configure stream num by Session constructor options param, // Configure stream num by Session constructor options param,
// its value should be int32_t type, default value is "1" // its value should be int32_t type, default value is "1"
@@ -227,7 +229,7 @@ const std::string OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode";
const std::string HCOM_PARALLEL = "ge.hcomParallel"; const std::string HCOM_PARALLEL = "ge.hcomParallel";


// configure whether to use dynamic batch size // configure whether to use dynamic batch size
const char *const kDynamicBatchSize = "ge.dynamicBatchSize";
const char_t *const kDynamicBatchSize = "ge.dynamicBatchSize";


// configure threshold of fusion data size for communication op // configure threshold of fusion data size for communication op
const std::string FUSION_TENSOR_SIZE = "ge.fusionTensorSize"; const std::string FUSION_TENSOR_SIZE = "ge.fusionTensorSize";
@@ -236,10 +238,10 @@ const std::string INPUT_SHAPE = "ge.inputShape";


const std::string DYNAMIC_NODE_TYPE = "ge.dynamicNodeType"; const std::string DYNAMIC_NODE_TYPE = "ge.dynamicNodeType";
// configure whether to use dynamic image size // configure whether to use dynamic image size
const char *const kDynamicImageSize = "ge.dynamicImageSize";
const char_t *const kDynamicImageSize = "ge.dynamicImageSize";


// Configure whether to use dynamic dims // Configure whether to use dynamic dims
const char *const kDynamicDims = "ge.dynamicDims";
const char_t *const kDynamicDims = "ge.dynamicDims";


// Configure auto tune mode, this option only take effect while AUTO_TUNE_FLAG is Y, // Configure auto tune mode, this option only take effect while AUTO_TUNE_FLAG is Y,
// example: GA|RL, support configure multiple, split by | // example: GA|RL, support configure multiple, split by |
@@ -275,29 +277,29 @@ const std::string SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel";
// Save original model file name // Save original model file name
const std::string ORIGINAL_MODEL_FILE = "ge.originalModelFile"; const std::string ORIGINAL_MODEL_FILE = "ge.originalModelFile";


const char *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum";
const char *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize";
const char *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum";
const char_t *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum";
const char_t *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize";
const char_t *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum";


// Configure for print op pass // Configure for print op pass
// Its value should be "0" or "1", default value is "1" // Its value should be "0" or "1", default value is "1"
const char *const ENABLE_PRINT_OP_PASS = "ge.enablePrintOpPass";
const char_t *const ENABLE_PRINT_OP_PASS = "ge.enablePrintOpPass";


// Configure operator compilation path // Configure operator compilation path
// Its value should be file path, default value is "./" // Its value should be file path, default value is "./"
const char *const DEBUG_DIR = "ge.debugDir";
const char_t *const DEBUG_DIR = "ge.debugDir";


// Configure operator compiler cache path // Configure operator compiler cache path
// Its value should be file path, default value is "./" // Its value should be file path, default value is "./"
const char *const OP_COMPILER_CACHE_DIR = "ge.op_compiler_cache_dir";
const char_t *const OP_COMPILER_CACHE_DIR = "ge.op_compiler_cache_dir";


// Configure operator compiler cache mode // Configure operator compiler cache mode
// Its value should be "disable", "enable" or "force", default value is "disable" // Its value should be "disable", "enable" or "force", default value is "disable"
const char *const OP_COMPILER_CACHE_MODE = "ge.op_compiler_cache_mode";
const char_t *const OP_COMPILER_CACHE_MODE = "ge.op_compiler_cache_mode";


// Configure whether to use single stream. // Configure whether to use single stream.
// Its value should be "true" or "false", default value is "false" // Its value should be "true" or "false", default value is "false"
const char *const ENABLE_SINGLE_STREAM = "ge.enableSingleStream";
const char_t *const ENABLE_SINGLE_STREAM = "ge.enableSingleStream";


// Configure input fp16 nodes // Configure input fp16 nodes
const std::string INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16"; const std::string INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16";
@@ -322,7 +324,7 @@ const std::string OP_BANK_UPDATE_FLAG = "ge.op_bank_update";
const std::string HCOM_MULTI_MODE = "ge.hcomMultiMode"; const std::string HCOM_MULTI_MODE = "ge.hcomMultiMode";


// atc and ir option // atc and ir option
const char *const INPUT_SHAPE_RANGE = "input_shape_range";
const char_t *const INPUT_SHAPE_RANGE = "input_shape_range";


// Configure express high compile performance or high execute performance // Configure express high compile performance or high execute performance
// normal: no need to compile, used saved .o files directly // normal: no need to compile, used saved .o files directly
@@ -338,7 +340,11 @@ const std::string MODIFY_MIXLIST = "ge.exec.modify_mixlist";


const std::string OP_PRECISION_MODE = "ge.exec.op_precision_mode"; const std::string OP_PRECISION_MODE = "ge.exec.op_precision_mode";


const char *const FILE_CONSTANT_PATH = "ge.exec.value_bins";
const std::string OP_WAIT_TIMEOUT = "ge.exec.opWaitTimeout";

const std::string OP_EXECUTE_TIMEOUT = "ge.exec.opExecuteTimeout";

const char_t *const FILE_CONSTANT_PATH = "ge.exec.value_bins";


// Graph run mode // Graph run mode
enum GraphRunMode { PREDICTION = 0, TRAIN }; enum GraphRunMode { PREDICTION = 0, TRAIN };
@@ -378,49 +384,49 @@ using RunAsyncCallback = std::function<void(Status, std::vector<ge::Tensor> &)>;


// for ir build // for ir build
namespace ir_option { namespace ir_option {
static const char *const INPUT_FORMAT = "input_format";
static const char *const INPUT_SHAPE = "input_shape";
static const char *const INPUT_SHAPE_RANGE = ge::INPUT_SHAPE_RANGE;
static const char *const OP_NAME_MAP = "op_name_map";
static const char *const IS_DYNAMIC_INPUT = "is_dynamic_input";
static const char *const IS_INPUT_ADJUST_HW_LAYOUT = "is_input_adjust_hw_layout";
static const char *const IS_OUTPUT_ADJUST_HW_LAYOUT = "is_output_adjust_hw_layout";
static const char *const ENABLE_SCOPE_FUSION_PASSES = "enable_scope_fusion_passes";
static const char *const OUTPUT = "output";
static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize;
static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize;
static const char *const DYNAMIC_DIMS = kDynamicDims;
static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str();
static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str();
static const char *const TUNE_DEVICE_IDS = ge::TUNE_DEVICE_IDS.c_str();
static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY;
static const char *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str();
static const char *const CORE_TYPE = ge::CORE_TYPE.c_str();
static const char *const SOC_VERSION = ge::SOC_VERSION.c_str();
static const char *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM;
static const char *const AICORE_NUM = ge::AICORE_NUM.c_str();
static const char *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str();
static const char *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str();
static const char *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str();
static const char *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str();
static const char *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str();
static const char *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str();
static const char *const COMPRESS_WEIGHT_CONF = "compress_weight_conf";
static const char *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str();
static const char *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str();
static const char *const LOG_LEVEL = "log";
static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str();
static const char *const DEBUG_DIR = ge::DEBUG_DIR;
static const char *const OP_COMPILER_CACHE_DIR = ge::OP_COMPILER_CACHE_DIR;
static const char *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE;
static const char *const MDL_BANK_PATH = ge::MDL_BANK_PATH_FLAG.c_str();
static const char *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str();
static const char *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str();
static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str();
static const char *const PERFORMANCE_MODE = ge::PERFORMANCE_MODE.c_str();
static const char *const SHAPE_GENERALIZED_BUILD_MODE = ge::SHAPE_GENERALIZED_BUILD_MODE.c_str();
static const char *const MODIFY_MIXLIST = ge::MODIFY_MIXLIST.c_str();
static const char *const OP_PRECISION_MODE = ge::OP_PRECISION_MODE.c_str();
static const char_t *const INPUT_FORMAT = "input_format";
static const char_t *const INPUT_SHAPE = "input_shape";
static const char_t *const INPUT_SHAPE_RANGE = ge::INPUT_SHAPE_RANGE;
static const char_t *const OP_NAME_MAP = "op_name_map";
static const char_t *const IS_DYNAMIC_INPUT = "is_dynamic_input";
static const char_t *const IS_INPUT_ADJUST_HW_LAYOUT = "is_input_adjust_hw_layout";
static const char_t *const IS_OUTPUT_ADJUST_HW_LAYOUT = "is_output_adjust_hw_layout";
static const char_t *const ENABLE_SCOPE_FUSION_PASSES = "enable_scope_fusion_passes";
static const char_t *const OUTPUT = "output";
static const char_t *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize;
static const char_t *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize;
static const char_t *const DYNAMIC_DIMS = kDynamicDims;
static const char_t *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str();
static const char_t *const PRECISION_MODE = ge::PRECISION_MODE.c_str();
static const char_t *const TUNE_DEVICE_IDS = ge::TUNE_DEVICE_IDS.c_str();
static const char_t *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY;
static const char_t *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str();
static const char_t *const CORE_TYPE = ge::CORE_TYPE.c_str();
static const char_t *const SOC_VERSION = ge::SOC_VERSION.c_str();
static const char_t *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM;
static const char_t *const AICORE_NUM = ge::AICORE_NUM.c_str();
static const char_t *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str();
static const char_t *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str();
static const char_t *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str();
static const char_t *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str();
static const char_t *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str();
static const char_t *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str();
static const char_t *const COMPRESS_WEIGHT_CONF = "compress_weight_conf";
static const char_t *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str();
static const char_t *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str();
static const char_t *const LOG_LEVEL = "log";
static const char_t *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str();
static const char_t *const DEBUG_DIR = ge::DEBUG_DIR;
static const char_t *const OP_COMPILER_CACHE_DIR = ge::OP_COMPILER_CACHE_DIR;
static const char_t *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE;
static const char_t *const MDL_BANK_PATH = ge::MDL_BANK_PATH_FLAG.c_str();
static const char_t *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str();
static const char_t *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str();
static const char_t *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str();
static const char_t *const PERFORMANCE_MODE = ge::PERFORMANCE_MODE.c_str();
static const char_t *const SHAPE_GENERALIZED_BUILD_MODE = ge::SHAPE_GENERALIZED_BUILD_MODE.c_str();
static const char_t *const MODIFY_MIXLIST = ge::MODIFY_MIXLIST.c_str();
static const char_t *const OP_PRECISION_MODE = ge::OP_PRECISION_MODE.c_str();


// for interface: aclgrphBuildModel // for interface: aclgrphBuildModel
#ifdef __GNUC__ #ifdef __GNUC__


+ 4
- 4
inc/external/ge/ge_ir_build.h View File

@@ -98,10 +98,10 @@ GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph,
* @retval GRAPH_SUCCESS The function is successfully executed. * @retval GRAPH_SUCCESS The function is successfully executed.
* @retval OtherValues Failure * @retval OtherValues Failure
*/ */
ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *, const ModelBufferData &))
ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char_t *, const ModelBufferData &))
GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const std::string &output_file, const ModelBufferData &model); GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const std::string &output_file, const ModelBufferData &model);


GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model);
GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char_t *output_file, const ModelBufferData &model);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -126,7 +126,7 @@ GE_FUNC_VISIBILITY graphStatus aclgrphGetIRVersion(int32_t *major_version, int32
* @retval GRAPH_SUCCESS The function is successfully executed. * @retval GRAPH_SUCCESS The function is successfully executed.
* @retval OtherValues Failure * @retval OtherValues Failure
*/ */
GE_FUNC_VISIBILITY graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len);
GE_FUNC_VISIBILITY graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char_t *file, const size_t len);


/** /**
* @ingroup AscendCL * @ingroup AscendCL
@@ -150,7 +150,7 @@ GE_FUNC_VISIBILITY graphStatus aclgrphGenerateForOp(const AscendString &op_type,
* @param cfg_path [IN] the config file path * @param cfg_path [IN] the config file path
* @return graphStatus * @return graphStatus
*/ */
GE_FUNC_VISIBILITY graphStatus aclgrphSetOpAttr(Graph &graph, aclgrphAttrType attr_type, const char *cfg_path);
GE_FUNC_VISIBILITY graphStatus aclgrphSetOpAttr(Graph &graph, aclgrphAttrType attr_type, const char_t *cfg_path);


}; // namespace ge }; // namespace ge
#endif // INC_EXTERNAL_GE_IR_BUILD_H_ #endif // INC_EXTERNAL_GE_IR_BUILD_H_

+ 1
- 1
inc/external/runtime/rt_error_codes.h View File

@@ -62,6 +62,7 @@ static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit
static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init
static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow


static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
@@ -113,5 +114,4 @@ static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconn
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__

+ 8
- 8
inc/framework/common/debug/ge_log.h View File

@@ -40,7 +40,7 @@ enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP };


class GE_FUNC_VISIBILITY GeLog { class GE_FUNC_VISIBILITY GeLog {
public: public:
static const uint64_t GetTid() {
static uint64_t GetTid() {
#ifdef __GNUC__ #ifdef __GNUC__
const uint64_t tid = static_cast<uint64_t>(syscall(__NR_gettid)); const uint64_t tid = static_cast<uint64_t>(syscall(__NR_gettid));
#else #else
@@ -56,11 +56,11 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) {
return (enable == 1); return (enable == 1);
} }


#define GELOGE(ERROR_CODE, fmt, ...) \
do { \
dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE, \
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \
##__VA_ARGS__); \
#define GELOGE(ERROR_CODE, fmt, ...) \
do { \
dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], (ERROR_CODE), \
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \
##__VA_ARGS__); \
} while (false) } while (false)


#define GELOGW(fmt, ...) \ #define GELOGW(fmt, ...) \
@@ -91,7 +91,7 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) {


#define GELOGT(VALUE, fmt, ...) \ #define GELOGT(VALUE, fmt, ...) \
do { \ do { \
TraceStatus stat = VALUE; \
TraceStatus stat = (VALUE); \
const char_t *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ const char_t *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \
const int32_t idx = static_cast<int32_t>(stat); \ const int32_t idx = static_cast<int32_t>(stat); \
char_t *k = const_cast<char_t *>("status"); \ char_t *k = const_cast<char_t *>("status"); \
@@ -102,7 +102,7 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) {


#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ #define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \
do { \ do { \
dlog_error(MOD_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE, \
dlog_error((MOD_NAME), "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], (ERROR_CODE), \
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \
##__VA_ARGS__); \ ##__VA_ARGS__); \
} while (false) } while (false)


+ 6
- 6
inc/framework/common/debug/log.h View File

@@ -69,7 +69,7 @@
do { \ do { \
const ge::Status _chk_status = (expr); \ const ge::Status _chk_status = (expr); \
if (_chk_status != ge::SUCCESS) { \ if (_chk_status != ge::SUCCESS) { \
GELOGE((ge::FAILED), __VA_ARGS__); \
GELOGE(_chk_status, __VA_ARGS__); \
} \ } \
} while (false) } while (false)


@@ -213,9 +213,9 @@
// If expr is not RT_ERROR_NONE, print the log // If expr is not RT_ERROR_NONE, print the log
#define GE_CHK_RT(expr) \ #define GE_CHK_RT(expr) \
do { \ do { \
const rtError_t _rt_ret = (expr); \
if (_rt_ret != RT_ERROR_NONE) { \
GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \
const rtError_t _rt_err = (expr); \
if (_rt_err != RT_ERROR_NONE) { \
GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_err); \
} \ } \
} while (false) } while (false)


@@ -278,7 +278,7 @@
return (_status); \ return (_status); \
} \ } \
} while (false) } while (false)
namespace ge {
template <typename T> template <typename T>
GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) { GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) {
std::string fmt; std::string fmt;
@@ -287,5 +287,5 @@ GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) {
fmt = st.str(); fmt = st.str();
return fmt; return fmt;
} }
} // namespace ge
#endif // INC_FRAMEWORK_COMMON_DEBUG_LOG_H_ #endif // INC_FRAMEWORK_COMMON_DEBUG_LOG_H_

+ 1
- 9
inc/framework/common/file_constant_util.h View File

@@ -27,11 +27,6 @@
#include "graph/ge_tensor.h" #include "graph/ge_tensor.h"


namespace ge { namespace ge {
extern const int64_t kBlockSize;
extern const std::string kBinFileValues;
extern const std::string kBinIdValue;
extern const std::string kBinFilePathValue;

struct FileConstantInfo { struct FileConstantInfo {
std::string value_bin_file_id; std::string value_bin_file_id;
std::string value_bin_file_path; std::string value_bin_file_path;
@@ -47,14 +42,11 @@ void from_json(const nlohmann::json &j, OptionInfo &option_info);


Status GetFilePathFromOption(std::map<std::string, std::string> &file_id_and_path_map); Status GetFilePathFromOption(std::map<std::string, std::string> &file_id_and_path_map);


Status CopyOneWeightFromFile(const void *curr_dev_ptr, const std::string &value, const size_t file_constant_size,
Status CopyOneWeightFromFile(const void *const curr_dev_ptr, const std::string &value, const size_t file_constant_size,
size_t &left_size); size_t &left_size);


Status GetFilePath(const OpDescPtr &op_desc, const std::map<std::string, std::string> &file_id_and_path_map, Status GetFilePath(const OpDescPtr &op_desc, const std::map<std::string, std::string> &file_id_and_path_map,
std::string &file_path); std::string &file_path);

Status GetFileConstantElementTotalSize(const GeShape &shape, const DataType data_type, int64_t &mem_size,
const Format format = FORMAT_ND);
} // namespace ge } // namespace ge


#endif // INC_FRAMEWORK_COMMON_FILE_CONSTANT_UTIL_H #endif // INC_FRAMEWORK_COMMON_FILE_CONSTANT_UTIL_H

+ 2
- 2
inc/framework/common/fmk_error_codes.h View File

@@ -44,7 +44,7 @@
// Each module uses the following four macros to define error codes: // Each module uses the following four macros to define error codes:
#define DECLARE_ERRORNO_OMG(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OMG, (name), (value)) #define DECLARE_ERRORNO_OMG(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OMG, (name), (value))
#define DECLARE_ERRORNO_OME(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OME, (name), (value)) #define DECLARE_ERRORNO_OME(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OME, (name), (value))
#define DECLARE_ERRORNO_CALIBRATION(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_CALIBRATION, name, value)
#define DECLARE_ERRORNO_CALIBRATION(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_CALIBRATION, (name), (value))


#define DEF_ERRORNO(name, desc) const ErrorNoRegisterar g_##name##_errorno((name), (desc)); #define DEF_ERRORNO(name, desc) const ErrorNoRegisterar g_##name##_errorno((name), (desc));


@@ -74,7 +74,7 @@ class GE_FUNC_VISIBILITY StatusFactory {


class GE_FUNC_VISIBILITY ErrorNoRegisterar { class GE_FUNC_VISIBILITY ErrorNoRegisterar {
public: public:
ErrorNoRegisterar(uint32_t err, const std::string &desc) {
ErrorNoRegisterar(const uint32_t err, const std::string &desc) {
StatusFactory::Instance()->RegisterErrorNo(err, desc); StatusFactory::Instance()->RegisterErrorNo(err, desc);
} }
~ErrorNoRegisterar() {} ~ErrorNoRegisterar() {}


+ 45
- 36
inc/framework/common/ge_inner_error_codes.h View File

@@ -22,17 +22,57 @@
#include <string> #include <string>
#include "ge/ge_api_error_codes.h" #include "ge/ge_api_error_codes.h"


// Each module defines error codes using the following macros, name can not be modified to (name)
#define GE_ERRORNO_COMMON(name, value, desc) \
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::COMMON_MODULE, name, (value), (desc))
#define GE_ERRORNO_CLIENT(name, value, desc) \
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::CLIENT_MODULE, name, (value), (desc))
#define GE_ERRORNO_INIT(name, value, desc) \
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::INIT_MODULE, name, (value), (desc))
#define GE_ERRORNO_SESSION(name, value, desc) \
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::SESSION_MODULE, name, (value), (desc))
#define GE_ERRORNO_GRAPH(name, value, desc) \
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::GRAPH_MODULE, name, (value), (desc))
#define GE_ERRORNO_ENGINE(name, value, desc) \
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::ENGINE_MODULE, name, (value), (desc))
#define GE_ERRORNO_OPS(name, value, desc) \
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::OPS_MODULE, name, (value), (desc))
#define GE_ERRORNO_PLUGIN(name, value, desc) \
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::PLUGIN_MODULE, name, (value), (desc))
#define GE_ERRORNO_RUNTIME(name, value, desc) \
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::RUNTIME_MODULE, name, (value), (desc))
#define GE_ERRORNO_EXECUTOR(name, value, desc) \
GE_ERRORNO(ge::InnLogRuntime::RT_DEVICE, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::EXECUTOR_MODULE, name, (value), (desc))
#define GE_ERRORNO_GENERATOR(name, value, desc) \
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::GENERATOR_MODULE, name, (value), (desc))

// Get error code description
#define GE_GET_ERRORNO_STR(value) ge::StatusFactory::Instance()->GetErrDesc(value)

#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast<Status>(RT_ERROR)

namespace ge { namespace ge {
// System ID // System ID
enum SystemIdType { SYSID_GE = 8 };
enum class InnSystemIdType { SYSID_GE = 8 };
// Runtime location // Runtime location
enum LogRuntime {
enum class InnLogRuntime {
RT_HOST = 0b01, RT_HOST = 0b01,
RT_DEVICE = 0b10, RT_DEVICE = 0b10,
}; };


// Sub model // Sub model
enum SubModuleId {
enum class InnSubModuleId {
COMMON_MODULE = 0, COMMON_MODULE = 0,
CLIENT_MODULE = 1, CLIENT_MODULE = 1,
INIT_MODULE = 2, INIT_MODULE = 2,
@@ -47,13 +87,13 @@ enum SubModuleId {
}; };


// Error code type // Error code type
enum ErrorCodeType {
enum class InnErrorCodeType {
ERROR_CODE = 0b01, ERROR_CODE = 0b01,
EXCEPTION_CODE = 0b10, EXCEPTION_CODE = 0b10,
}; };


// Error level // Error level
enum ErrorLevel {
enum class InnErrorLevel {
COMMON_LEVEL = 0b000, COMMON_LEVEL = 0b000,
SUGGESTION_LEVEL = 0b001, SUGGESTION_LEVEL = 0b001,
MINOR_LEVEL = 0b010, MINOR_LEVEL = 0b010,
@@ -61,33 +101,6 @@ enum ErrorLevel {
CRITICAL_LEVEL = 0b100, CRITICAL_LEVEL = 0b100,
}; };


// Each module defines error codes using the following macros, name can not be modified to (name)
#define GE_ERRORNO_COMMON(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, COMMON_MODULE, name, (value), (desc))
#define GE_ERRORNO_CLIENT(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, CLIENT_MODULE, name, (value), (desc))
#define GE_ERRORNO_INIT(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, INIT_MODULE, name, (value), (desc))
#define GE_ERRORNO_SESSION(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, SESSION_MODULE, name, (value), (desc))
#define GE_ERRORNO_GRAPH(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GRAPH_MODULE, name, (value), (desc))
#define GE_ERRORNO_ENGINE(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, ENGINE_MODULE, name, (value), (desc))
#define GE_ERRORNO_OPS(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, OPS_MODULE, name, (value), (desc))
#define GE_ERRORNO_PLUGIN(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, PLUGIN_MODULE, name, (value), (desc))
#define GE_ERRORNO_RUNTIME(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, RUNTIME_MODULE, name, (value), (desc))
#define GE_ERRORNO_EXECUTOR(name, value, desc) \
GE_ERRORNO(RT_DEVICE, ERROR_CODE, COMMON_LEVEL, SYSID_GE, EXECUTOR_MODULE, name, (value), (desc))
#define GE_ERRORNO_GENERATOR(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GENERATOR_MODULE, name, (value), (desc))

// Get error code description
#define GE_GET_ERRORNO_STR(value) ge::StatusFactory::Instance()->GetErrDesc(value)

// Common module error code definition // Common module error code definition
GE_ERRORNO_COMMON(MEMALLOC_FAILED, 0, "Failed to allocate memory!"); // 1343225856 GE_ERRORNO_COMMON(MEMALLOC_FAILED, 0, "Failed to allocate memory!"); // 1343225856
GE_ERRORNO_COMMON(PARAM_INVALID, 1, "Parameter's invalid!"); // 1343225857 GE_ERRORNO_COMMON(PARAM_INVALID, 1, "Parameter's invalid!"); // 1343225857
@@ -313,10 +326,6 @@ GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, 3, "Graph ma
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, 4, "Graph manager finalize failed."); GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, 4, "Graph manager finalize failed.");
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_SAVE_MODEL_FAILED, 5, "Graph manager save model failed."); GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_SAVE_MODEL_FAILED, 5, "Graph manager save model failed.");


static inline Status TransRtErrorCode(const int32_t error_code) {
return static_cast<Status>(error_code);
}
#define RT_ERROR_TO_GE_STATUS(RT_ERROR) TransRtErrorCode(RT_ERROR)
} // namespace ge } // namespace ge


#endif // INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_ #endif // INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_

+ 34
- 8
inc/framework/common/ge_types.h View File

@@ -40,13 +40,13 @@ enum FrameworkType {
CAFFE = 0, CAFFE = 0,
MINDSPORE = 1, MINDSPORE = 1,
TENSORFLOW = 3, TENSORFLOW = 3,
ANDROID_NN,
ONNX,
ANDROID_NN = 4,
ONNX = 5,
}; };


enum class GraphStage : int64_t { GRAPH_STAGE_FUZZ = 0, GRAPH_STAGE_RESERVED }; enum class GraphStage : int64_t { GRAPH_STAGE_FUZZ = 0, GRAPH_STAGE_RESERVED };


const char *const kGraphDumpStage = "DumpStage";
const char_t *const kGraphDumpStage = "DumpStage";


const std::map<std::string, std::string> kFwkTypeToStr = { const std::map<std::string, std::string> kFwkTypeToStr = {
{"0", "Caffe"}, {"1", "MindSpore"}, {"3", "TensorFlow"}, {"4", "Android_NN"}, {"5", "Onnx"}}; {"0", "Caffe"}, {"1", "MindSpore"}, {"3", "TensorFlow"}, {"4", "Android_NN"}, {"5", "Onnx"}};
@@ -70,21 +70,42 @@ const std::string kTaskTypeAicore = "AI_CORE";
const std::string kTaskTypeAicpu = "AI_CPU"; const std::string kTaskTypeAicpu = "AI_CPU";
const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID"; const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID";
const std::string kTaskTypeFftsPlus = "FFTS_PLUS"; const std::string kTaskTypeFftsPlus = "FFTS_PLUS";
const std::string kEngineNameVectorCore = "VectorEngine";

const std::string kEngineNameHccl = "ops_kernel_info_hccl";
const std::string kEngineNameRts = "DNN_VM_RTS_OP_STORE";
const std::string kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE";
const std::string kEngineNameGeLocal = "DNN_VM_GE_LOCAL_OP_STORE";
const std::string kEngineNameAiCpu = "aicpu_ascend_kernel";
const std::string kEngineNameAiCpuTf = "aicpu_tf_kernel";
const std::string kEngineNameAiCore = "AIcoreEngine";
const std::string kAtomicOpType = "DynamicAtomicAddrClean";

const std::string kShapeTypeStatic = "static";
const std::string kShapeTypeDynamic = "dynamic";

constexpr uint64_t kInferSessionId = 0U;
constexpr uint64_t kReleaseFlag = 1U;
constexpr uint32_t kInvalidModelId = 0xFFFFFFFFU;
constexpr size_t kNumTaskWithAtomicAddrCleanTask = 2U;


// dynamic execute mode // dynamic execute mode
const char_t *const kLazyRecompile = "lazy_recompile"; const char_t *const kLazyRecompile = "lazy_recompile";


constexpr size_t kMaxHostMemInputLen = 64U;

// Data cache, including data address and length // Data cache, including data address and length
struct DataBuffer { struct DataBuffer {
public:
void *data; // Data address void *data; // Data address
uint64_t length; // Data length uint64_t length; // Data length
bool isDataSupportMemShare = false; bool isDataSupportMemShare = false;
uint32_t placement = 0U; uint32_t placement = 0U;
DataBuffer(void *data_in, uint64_t data_len, bool is_support_mem_share, uint32_t placement = 0U)
: data(data_in), length(data_len), isDataSupportMemShare(is_support_mem_share), placement(placement) {}


DataBuffer() : data(nullptr), length(0UL), isDataSupportMemShare(false) {}
DataBuffer(void *const data_in, const uint64_t data_len, const bool is_support_mem_share = false,
const uint32_t data_placement = 0U)
: data(data_in), length(data_len), isDataSupportMemShare(is_support_mem_share), placement(data_placement) {}

DataBuffer() : data(nullptr), length(0UL), isDataSupportMemShare(false), placement(0U) {}
}; };


/// ///
@@ -232,6 +253,9 @@ struct ModelInfo {
class GE_FUNC_VISIBILITY ModelListener { class GE_FUNC_VISIBILITY ModelListener {
public: public:
virtual ~ModelListener() {} virtual ~ModelListener() {}
ModelListener() = default;
ModelListener(const ModelListener &) = delete;
ModelListener &operator=(const ModelListener &) = delete;
/// ///
/// @brief Asynchronous callback interface /// @brief Asynchronous callback interface
/// @param [in] model_id Model ID of the callback /// @param [in] model_id Model ID of the callback
@@ -241,7 +265,9 @@ class GE_FUNC_VISIBILITY ModelListener {
virtual Status OnComputeDone(uint32_t model_id, uint32_t data_index, uint32_t result_code, virtual Status OnComputeDone(uint32_t model_id, uint32_t data_index, uint32_t result_code,
std::vector<ge::Tensor> &outputs) = 0; std::vector<ge::Tensor> &outputs) = 0;


virtual void SetCallback(const RunAsyncCallback &callback){};
virtual void SetCallback(const RunAsyncCallback &callback) {
(void)callback;
}


virtual uint32_t GetResultCode() { virtual uint32_t GetResultCode() {
return 0U; return 0U;


+ 2
- 2
inc/framework/common/helper/model_helper.h View File

@@ -34,12 +34,13 @@ class GE_FUNC_VISIBILITY ModelHelper {
~ModelHelper(); ~ModelHelper();


Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, const std::string &output_file, Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, const std::string &output_file,
ge::ModelBufferData &model);
ge::ModelBufferData &model) const;
Status SaveToOmRootModel(const GeRootModelPtr &ge_root_model, const SaveParam &save_param, Status SaveToOmRootModel(const GeRootModelPtr &ge_root_model, const SaveParam &save_param,
const std::string &output_file, ModelBufferData &model, const bool is_unknown_shape); const std::string &output_file, ModelBufferData &model, const bool is_unknown_shape);
Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file); Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file);
Status LoadModel(const ge::ModelData &model_data); Status LoadModel(const ge::ModelData &model_data);
Status LoadRootModel(const ge::ModelData &model_data); Status LoadRootModel(const ge::ModelData &model_data);
static void SetModelToGeModel(GeModelPtr &ge_model, Model &model);


GeModelPtr GetGeModel(); GeModelPtr GetGeModel();
GeRootModelPtr GetGeRootModel(); GeRootModelPtr GetGeRootModel();
@@ -67,7 +68,6 @@ class GE_FUNC_VISIBILITY ModelHelper {
Status GenerateGeModel(OmFileLoadHelper &om_load_helper); Status GenerateGeModel(OmFileLoadHelper &om_load_helper);
Status GenerateGeRootModel(OmFileLoadHelper &om_load_helper); Status GenerateGeRootModel(OmFileLoadHelper &om_load_helper);
Status LoadModelData(OmFileLoadHelper &om_load_helper); Status LoadModelData(OmFileLoadHelper &om_load_helper);
void SetModelToGeModel(GeModelPtr &ge_model, Model &model) const;
Status LoadModelData(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const; Status LoadModelData(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const;
Status LoadWeights(OmFileLoadHelper &om_load_helper); Status LoadWeights(OmFileLoadHelper &om_load_helper);
Status LoadWeights(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const; Status LoadWeights(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const;


+ 18
- 24
inc/framework/common/helper/om_file_helper.h View File

@@ -21,25 +21,20 @@
#include <vector> #include <vector>


#include "external/ge/ge_ir_build.h" #include "external/ge/ge_ir_build.h"
#include "framework/common/fmk_types.h"
#include "framework/common/types.h" #include "framework/common/types.h"
#include "framework/common/ge_types.h" #include "framework/common/ge_types.h"


using ProcParam = struct PROC_PARAM;
using std::string;
using std::vector;

namespace ge { namespace ge {
struct ModelPartition { struct ModelPartition {
ModelPartitionType type; ModelPartitionType type;
uint8_t *data = 0;
uint32_t size = 0;
const uint8_t *data = nullptr;
uint32_t size = 0U;
}; };


struct OmFileContext { struct OmFileContext {
std::vector<ModelPartition> partition_datas_; std::vector<ModelPartition> partition_datas_;
std::vector<char> partition_table_;
uint32_t model_data_len_ = 0;
std::vector<char_t> partition_table_;
uint32_t model_data_len_ = 0U;
}; };


struct SaveParam { struct SaveParam {
@@ -55,13 +50,13 @@ class GE_FUNC_VISIBILITY OmFileLoadHelper {
public: public:
Status Init(const ge::ModelData &model); Status Init(const ge::ModelData &model);


Status Init(uint8_t *model_data, const uint32_t model_data_size);
Status Init(uint8_t *const model_data, const uint32_t model_data_size);


Status Init(uint8_t *model_data, const uint32_t model_data_size, uint32_t model_num);
Status Init(uint8_t *const model_data, const uint32_t model_data_size, const uint32_t model_num);


Status GetModelPartition(ModelPartitionType type, ModelPartition &partition);
Status GetModelPartition(const ModelPartitionType type, ModelPartition &partition);


Status GetModelPartition(ModelPartitionType type, ModelPartition &partition, size_t model_index);
Status GetModelPartition(const ModelPartitionType type, ModelPartition &partition, const size_t model_index);


OmFileContext context_; OmFileContext context_;


@@ -70,9 +65,9 @@ class GE_FUNC_VISIBILITY OmFileLoadHelper {
private: private:
Status CheckModelValid(const ge::ModelData &model) const; Status CheckModelValid(const ge::ModelData &model) const;


Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size);
Status LoadModelPartitionTable(uint8_t *const model_data, const uint32_t model_data_size);


Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size, uint32_t model_num);
Status LoadModelPartitionTable(uint8_t *const model_data, const uint32_t model_data_size, const uint32_t model_num);


bool is_inited_{false}; bool is_inited_{false};
}; };
@@ -89,25 +84,24 @@ class GE_FUNC_VISIBILITY OmFileSaveHelper {


ModelPartitionTable *GetPartitionTable(); ModelPartitionTable *GetPartitionTable();


Status AddPartition(ModelPartition &partition);

Status AddPartition(ModelPartition &partition, size_t cur_index);
Status AddPartition(const ModelPartition &partition);


const std::vector<ModelPartition> &GetModelPartitions() const;
Status AddPartition(const ModelPartition &partition, const size_t cur_index);


Status SaveModel(const SaveParam &save_param, const char *target_file, ge::ModelBufferData &model,
bool is_offline = true);
Status SaveModel(const SaveParam &save_param, const char_t *const output_file, ge::ModelBufferData &model,
const bool is_offline = true);


Status SaveModelToFile(const char *output_file, ge::ModelBufferData &model, bool is_offline = true);
Status SaveModelToFile(const char_t *const output_file, ge::ModelBufferData &model, const bool is_offline = true);


std::vector<OmFileContext> model_contexts_; std::vector<OmFileContext> model_contexts_;


ModelFileHeader model_header_; ModelFileHeader model_header_;
OmFileContext context_; OmFileContext context_;


ModelPartitionTable *GetPartitionTable(size_t cur_ctx_index);
ModelPartitionTable *GetPartitionTable(const size_t cur_ctx_index);


Status SaveRootModel(const SaveParam &save_param, const char *output_file, ModelBufferData &model, bool is_offline);
Status SaveRootModel(const SaveParam &save_param, const char_t *const output_file, ModelBufferData &model,
const bool is_offline);
}; };
} // namespace ge } // namespace ge
#endif // INC_FRAMEWORK_COMMON_HELPER_OM_FILE_HELPER_H_ #endif // INC_FRAMEWORK_COMMON_HELPER_OM_FILE_HELPER_H_

+ 3
- 87
inc/framework/common/l2_cache_optimize.h View File

@@ -28,97 +28,13 @@
#include "framework/common/util.h" #include "framework/common/util.h"
#include "graph/compute_graph.h" #include "graph/compute_graph.h"


using std::vector;

namespace ge { namespace ge {
// Size of RC memory alignment, 2M // Size of RC memory alignment, 2M
constexpr size_t ALIGN_SIZE = 2097152;

constexpr uint32_t RC_VALUE_DEFAULT = 1;
constexpr uint32_t RC_VALUE_MAX = 32;

// RC data type classification
enum RCType {
RC_DEFAULT, // Such as temporary workspace memory of operator, variable (including global and local variable)
RC_HCOM, // Output of gradient aggregation, RC value should be set to 0
RC_L2LOSS, // Parameter of L2 loss operator, RC value should be set to 0
RC_INPUTOUTPUT, // Input and output tensor of operator, RC value is returned by FE calculation
RC_WEIGHTS, // The weight, fp16, RC value used by FP/BP operator should be set to 1 or the actual access numbers
RC_DW, // The gradient data DW and RC value output by BP operator
// should be set to 1 or the actual access numbers
RC_ARGS // Args of FlowTable, actual access numbers
};

enum MemType { INPUT_TENSOR, OUTPUT_TENSOR, WEIGHT, WORKSPACE };

// Memory usage information < node, type, number >
struct NodeInfo {
std::string nodeName;
MemType memType;
size_t index;
};

// Memory block RC value
struct RCMemoryBlock {
RCType type; // RC type
size_t blockSize; // memory block size
size_t headOffset; // Start offset from base address
size_t tailOffset; // End offset from base address
uint32_t rcCount; // RC value
NodeInfo nodeInfo; // Input and output indexes of node objects to which RC belongs
};

// L2Cache optimizer
class GE_FUNC_VISIBILITY L2CacheOptimize {
public:
explicit L2CacheOptimize(ge::ComputeGraphPtr &graph);
~L2CacheOptimize();

// Collect the information L2Cache Memory optimization
Status Gath();

private:
ge::ComputeGraphPtr graph_;

// Save RC block information list
std::vector<RCMemoryBlock> weightRCs;
std::vector<RCMemoryBlock> opRCs;

// Extract RC information generated by FE from compiled graph
void RetirveRCinfo();

// Take the maximum common divisor of RC values for the duplicate address
void Merge(std::vector<RCMemoryBlock> &blocks);

// The RC information is aligned with the 2m address
void Align(std::vector<RCMemoryBlock> &blocks);

// Weight of l2loss operator, output of gradient aggregation output, RC value set to 0
void HandleOutputZeroRC(RCType type, ge::NodePtr node, std::vector<int64_t> &outputList,
std::vector<RCMemoryBlock> &blocks);

// Processing operator input Tensor's RC
void HandOPInput(ge::NodePtr node, std::vector<int64_t> &inputList, std::vector<RCMemoryBlock> &blocks);

// Processing operator output Tensor's RC
void HandOPoutput(ge::NodePtr node, std::vector<int64_t> &outputList, std::vector<RCMemoryBlock> &blocks);
constexpr size_t ALIGN_SIZE = 2097152U;


// maximum common divisor
uint32_t Measure(uint32_t x, uint32_t y) {
if ((x == 0) || (y == 0)) return RC_VALUE_DEFAULT;
uint32_t z = y;
while (x % y != 0) {
z = x % y;
x = y;
y = z;
}
return z;
}
constexpr uint32_t RC_VALUE_DEFAULT = 1U;
constexpr uint32_t RC_VALUE_MAX = 32U;


bool Contain(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block);
bool Cross(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block);
bool Connect(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block);
};
} // namespace ge } // namespace ge


#endif // INC_FRAMEWORK_COMMON_L2_CACHE_OPTIMIZE_H_ #endif // INC_FRAMEWORK_COMMON_L2_CACHE_OPTIMIZE_H_

+ 4
- 136
inc/framework/common/op/attr_value_util.h View File

@@ -34,143 +34,11 @@
#include <google/protobuf/map.h> #include <google/protobuf/map.h>
#include <unordered_map> #include <unordered_map>
#include <string> #include <string>
#include "external/graph/types.h"
#include "graph/debug/ge_attr_define.h" #include "graph/debug/ge_attr_define.h"
#include "proto/om.pb.h" #include "proto/om.pb.h"


using domi::AttrDef;
using domi::AttrDef_ListValue;
using domi::ModelDef;
using domi::NamedAttrs;
using domi::OpDef;

namespace ge { namespace ge {
using AttrDefMap = ::google::protobuf::Map<::std::string, ::domi::AttrDef>;
using AttrDefPair = ::google::protobuf::MapPair<std::string, domi::AttrDef>;

GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, AttrDef &attr, OpDef *opdef);
// DEFINE_ADD_ATTR_VALUE
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const std::string &value, AttrDefMap *attrs);
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const char *value, AttrDefMap *attrs);
GE_FUNC_VISIBILITY void AddOpAttr(const char *key, const char *value, AttrDefMap *attrs);
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const uint32_t value, AttrDefMap *attrs);
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int32_t value, AttrDefMap *attrs);
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int64_t value, AttrDefMap *attrs);
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const float value, AttrDefMap *attrs);
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const double value, AttrDefMap *attrs);
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const bool value, AttrDefMap *attrs);

GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, AttrDefMap *attrs);

// DEFINE_ADD_ATTR_VALUE
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const std::string &value, OpDef *opdef);
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const char *value, OpDef *opdef);
GE_FUNC_VISIBILITY void AddOpAttr(const char *key, const char *value, OpDef *opdef);
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const uint32_t value, OpDef *opdef);
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int32_t value, OpDef *opdef);
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int64_t value, OpDef *opdef);
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const float value, OpDef *opdef);
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const double value, OpDef *opdef);
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const bool value, OpDef *opdef);

GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, OpDef *opdef);

GE_FUNC_VISIBILITY void AddOpBytesAttr(const std::string &key, const void *value, size_t size, OpDef *opdef);

// DEFINE_ADD_ATTR_VALUE_LIST
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const double value, AttrDefMap *attrs);
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const float value, AttrDefMap *attrs);
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const uint32_t value, AttrDefMap *attrs);
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int32_t value, AttrDefMap *attrs);
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const std::string value, AttrDefMap *attrs);
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const double value, OpDef *opdef);
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const float value, OpDef *opdef);
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const uint32_t value, OpDef *opdef);
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int32_t value, OpDef *opdef);
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const bool value, OpDef *opdef);
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int64_t value, OpDef *opdef);

GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const std::string &value, OpDef *opdef);

GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, std::string *value, const OpDef *opdef);
GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, int32_t *value, const OpDef *opdef);
GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, int64_t *value, const OpDef *opdef);
GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, uint32_t *value, const OpDef *opdef);
GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, float *value, const OpDef *opdef);
GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, double *value, const OpDef *opdef);
GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, bool *value, const OpDef *opdef);
GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, AttrDef_ListValue *value, const OpDef *opdef);

GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, std::string value, const OpDef *opdef);
GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, int32_t value, const OpDef *opdef);
GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, int64_t value, const OpDef *opdef);
GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, uint32_t value, const OpDef *opdef);
GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, float value, const OpDef *opdef);
GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, double value, const OpDef *opdef);
GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, bool value, const OpDef *opdef);

GE_FUNC_VISIBILITY bool GetBytesAttr(const std::string &key, std::string *value, const OpDef *opdef);
GE_FUNC_VISIBILITY bool GetBytesAttr(const std::string &key, std::string *value, const ModelDef *model_def);

GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const std::string &value, ModelDef *model_def);
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const char *value, ModelDef *model_def);
GE_FUNC_VISIBILITY void AddModelAttr(const char *key, const char *value, ModelDef *model_def);
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const uint32_t value, ModelDef *model_def);
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const int32_t value, ModelDef *model_def);
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const int64_t value, ModelDef *model_def);
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const float value, ModelDef *model_def);
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const double value, ModelDef *model_def);
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const bool value, ModelDef *model_def);
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const void *value, size_t size, ModelDef *model_def);
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const AttrDef_ListValue &value, ModelDef *model_def);

GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const double value, ModelDef *model_def);
GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const float value, ModelDef *model_def);
GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const uint32_t value, ModelDef *model_def);
GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const int32_t value, ModelDef *model_def);
GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const std::string &value, ModelDef *model_def);

GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, std::string *value, const ModelDef *model_def);
GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, int32_t *value, const ModelDef *model_def);
GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, int64_t *value, const ModelDef *model_def);
GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, uint32_t *value, const ModelDef *model_def);
GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, float *value, const ModelDef *model_def);
GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, double *value, const ModelDef *model_def);
GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, bool *value, const ModelDef *model_def);
GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, AttrDef_ListValue *value, const ModelDef *model_def);

GE_FUNC_VISIBILITY bool HasOpAttr(const OpDef *opdef, const std::string &attr_name);

GE_FUNC_VISIBILITY void SetAttrDef(const std::string &value, AttrDef *out);
GE_FUNC_VISIBILITY void SetAttrDef(const char *value, AttrDef *out);
GE_FUNC_VISIBILITY void SetAttrDef(const uint32_t value, AttrDef *out);
GE_FUNC_VISIBILITY void SetAttrDef(const int32_t value, AttrDef *out);
GE_FUNC_VISIBILITY void SetAttrDef(const float value, AttrDef *out);
GE_FUNC_VISIBILITY void SetAttrDef(const double value, AttrDef *out);
GE_FUNC_VISIBILITY void SetAttrDef(const bool value, AttrDef *out);
GE_FUNC_VISIBILITY void SetAttrList(const std::string &value, AttrDef *out);
GE_FUNC_VISIBILITY void SetAttrList(const bool value, AttrDef *out);
GE_FUNC_VISIBILITY void SetAttrList(const float value, AttrDef *out);
GE_FUNC_VISIBILITY void SetAttrList(const double value, AttrDef *out);
GE_FUNC_VISIBILITY void SetAttrList(const uint32_t value, AttrDef *out);

GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, std::string *value, const AttrDefMap &attr);
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, int32_t *value, const AttrDefMap &attr);
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, int64_t *value, const AttrDefMap &attr);
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, uint32_t *value, const AttrDefMap &attr);
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, float *value, const AttrDefMap &attr);
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, double *value, const AttrDefMap &attr);
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, bool *value, const AttrDefMap &attr);
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, AttrDef_ListValue *value, const AttrDefMap &attr);
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, NamedAttrs *&value, AttrDefMap *attr);
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, const NamedAttrs *&value, const AttrDefMap &attr);

GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, int32_t *value,
const AttrDefMap &attr);
GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, uint32_t *value,
const AttrDefMap &attr);
GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, float *value, const AttrDefMap &attr);
GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, double *value, const AttrDefMap &attr);
} // namespace ge

#endif // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_
GE_FUNC_VISIBILITY void SetAttrDef(const std::string &value, domi::AttrDef *const out);
}
#endif // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_

+ 17
- 70
inc/framework/common/op/ge_op_utils.h View File

@@ -31,18 +31,16 @@
#include "proto/insert_op.pb.h" #include "proto/insert_op.pb.h"


namespace ge { namespace ge {
using domi::Status;


// Add Sub Mul // Add Sub Mul
GE_FUNC_VISIBILITY extern const uint32_t ADD_INPUT_NUM; GE_FUNC_VISIBILITY extern const uint32_t ADD_INPUT_NUM;
GE_FUNC_VISIBILITY extern const uint32_t SUB_INPUT_NUM;
GE_FUNC_VISIBILITY extern const uint32_t MUL_INPUT_NUM; GE_FUNC_VISIBILITY extern const uint32_t MUL_INPUT_NUM;


// Permute // Permute
GE_FUNC_VISIBILITY extern const int32_t PERMUTE_ORDER_NUM; GE_FUNC_VISIBILITY extern const int32_t PERMUTE_ORDER_NUM;


// Ssd PriroBox // Ssd PriroBox
GE_FUNC_VISIBILITY extern const double SSD_PRIORBOX_ASPECT_RATIO_VALUE;
GE_FUNC_VISIBILITY extern const float64_t SSD_PRIORBOX_ASPECT_RATIO_VALUE;


GE_FUNC_VISIBILITY extern const uint32_t STRIDEDSLICE_INPUT_NUM; GE_FUNC_VISIBILITY extern const uint32_t STRIDEDSLICE_INPUT_NUM;


@@ -55,8 +53,8 @@ GE_FUNC_VISIBILITY extern const uint32_t SWITCH_DATA_INPUT;
GE_FUNC_VISIBILITY extern const uint32_t SWITCH_PRED_INPUT; GE_FUNC_VISIBILITY extern const uint32_t SWITCH_PRED_INPUT;


// Merge // Merge
GE_FUNC_VISIBILITY extern const uint32_t MERGE_DATA_OUTPUT;
GE_FUNC_VISIBILITY extern const uint32_t MERGE_INDEX_OUTPUT;
GE_FUNC_VISIBILITY extern const int32_t MERGE_DATA_OUTPUT;
GE_FUNC_VISIBILITY extern const int32_t MERGE_INDEX_OUTPUT;


// FunctionOp // FunctionOp
GE_FUNC_VISIBILITY extern const uint32_t IF_COND_INPUT; GE_FUNC_VISIBILITY extern const uint32_t IF_COND_INPUT;
@@ -66,86 +64,35 @@ GE_FUNC_VISIBILITY extern const uint32_t FOR_DELTA_INPUT;
GE_FUNC_VISIBILITY extern const uint32_t FOR_DATA_INPUT; GE_FUNC_VISIBILITY extern const uint32_t FOR_DATA_INPUT;


GE_FUNC_VISIBILITY extern const int32_t NORMAL_TENSOR_SIZE; GE_FUNC_VISIBILITY extern const int32_t NORMAL_TENSOR_SIZE;
/*lint -e148*/
class GE_FUNC_VISIBILITY OpUtils { class GE_FUNC_VISIBILITY OpUtils {
public: public:
///
/// @ingroup domi_ome
/// @brief Check whether check_value is in [min_enum_value, max_enum_value]
/// @return true Within
/// @return false out of range
//
static inline bool CheckEnumValid(int32_t check_value, int32_t min_enum_value, int32_t max_enum_value) {
return check_value < min_enum_value ? false : (check_value >= max_enum_value ? false : true);
}

///
/// @ingroup domi_omg
/// @brief Determine whether to manually calculate the tensor size based on the values of format and dim
/// @param [in] format, Format information of the tensor
/// @param [in] real_dim_cnt, Tensor dim
/// @return true Manually calculate the size based on dim and datatype
/// @return false skip
///
static bool IsComputDimsSize(const int32_t format, const uint32_t real_dim_cnt);

/// ///
/// @brief Extract AIPP parameters from AttrDefMap and splice them /// @brief Extract AIPP parameters from AttrDefMap and splice them
/// @param [in] aipp_attr attr of operator /// @param [in] aipp_attr attr of operator
/// @param [out] aipp_params aipp parameters /// @param [out] aipp_params aipp parameters
/// @return enum of tagCCAippInputFormat /// @return enum of tagCCAippInputFormat
/// ///
static Status ConvertAippParams(const NamedAttrs &aipp_attr, domi::AippOpParams *aipp_params);
static Status TransferDim(const std::vector<int64_t> &dim, std::vector<int64_t> &dim_vector);

static Status ConvertAippParams(const GeAttrValue::NamedAttrs &aipp_attr, domi::AippOpParams &aipp_params);
template <typename T> template <typename T>
static void SliceData(const std::vector<char *> &input, int64_t chunk_size, std::vector<char *> &output,
int64_t begin, int64_t out_dim, int64_t stride);
static void SliceData(const std::vector<char_t *> &input, const int64_t chunk_size, std::vector<char_t *> &output,
const int64_t begin, const int64_t out_dim, const int64_t stride);
template <typename T> template <typename T>
static Status SetDataByDataType(size_t out_size, const std::vector<char *> &chunk_input,
const std::vector<char *> &chunk_output, GeTensor *output);
static Status SetDataByDataType(const size_t out_size, const std::vector<char_t *> &chunk_input,
const std::vector<char_t *> &chunk_output, GeTensor *const output);
template <typename T> template <typename T>
static Status SetOutputSliceDataByDataType(void *data, int64_t data_size, const std::vector<int64_t> &input_dims,
const std::vector<int64_t> &begin, const std::vector<int64_t> &output_dims,
ge::GeTensor *output, const std::vector<int64_t> &stride);
static Status SetOutputSliceData(void *data, int64_t data_size, int32_t data_type,
static Status SetOutputSliceDataByDataType(void *const data, const int64_t data_size,
const std::vector<int64_t> &input_dims, const std::vector<int64_t> &begin,
const std::vector<int64_t> &output_dims, ge::GeTensor *const output,
const std::vector<int64_t> &stride);
static Status SetOutputSliceData(void *const data, const int64_t data_size, const int32_t data_type,
const std::vector<int64_t> &input_dims, const std::vector<int64_t> &begin, const std::vector<int64_t> &input_dims, const std::vector<int64_t> &begin,
const std::vector<int64_t> &output_dims, ge::GeTensor *const output,
const std::vector<int64_t> &output_dims, GeTensor *const output,
const std::vector<int64_t> &stride); const std::vector<int64_t> &stride);

///
/// @ingroup domi_omg
/// @brief Convert the convolutional weight data from [h, w, c, k] to [k, c, h, w]
/// @param [in] input Weight data in HWCK format
/// @param [in] H value of H dimension
/// @param [in] W value of W dimension
/// @param [in] C value of C dimension
/// @param [in] K value of K dimension
/// @param [out] output Data pointer after conversion. The format is KCHW.
///
static void TransDataHWCK2KCHW(const void *input, int64_t h, int64_t w, int64_t c, int64_t k, void **output);
///
/// @ingroup domi_omg
/// @brief Converts the convolutional weight data from [k, c, h, w] to [h, w, c, k].
/// @param [in] input Weight data in HWCK format
/// @param [in] K value of K dimension
/// @param [in] C value of C dimension
/// @param [in] H value of H dimension
/// @param [in] W value of W dimension
/// @param [out] output Data pointer after conversion. The format is HWCK
///
static void TransDataKCHW2HWCK(const void *input, int64_t k, int64_t c, int64_t h, int64_t w, void *output);

static std::vector<ConstGeTensorPtr> GetWeights(const ge::Node &node);
static std::vector<ConstGeTensorPtr> GetWeights(ge::ConstNodePtr node);
static std::vector<GeTensorPtr> MutableWeights(const ge::Node &node);
static std::vector<GeTensorPtr> MutableWeights(const ge::NodePtr node);
static Status SetWeights(ge::Node &node, const std::vector<ge::GeTensorPtr> &weights);
static Status SetWeights(const ge::NodePtr node, const std::vector<ge::GeTensorPtr> &weights);
static Status GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, const DataType type, static Status GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, const DataType type,
std::vector<int64_t> &dims); std::vector<int64_t> &dims);

private:
static uint32_t GetRealDimCnt(const GeTensorDesc &tensor_desc);
}; };
/*lint +e148*/
} // namespace ge } // namespace ge
#endif // INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_ #endif // INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_

+ 10
- 10
inc/framework/common/op_types.h View File

@@ -20,6 +20,8 @@
#include <set> #include <set>
#include <string> #include <string>


#include "graph/types.h"

namespace ge { namespace ge {
class GE_FUNC_VISIBILITY OpTypeContainer { class GE_FUNC_VISIBILITY OpTypeContainer {
public: public:
@@ -30,12 +32,11 @@ class GE_FUNC_VISIBILITY OpTypeContainer {
~OpTypeContainer() = default; ~OpTypeContainer() = default;


void Register(const std::string &op_type) { void Register(const std::string &op_type) {
op_type_list_.insert(op_type);
static_cast<void>(op_type_list_.insert(op_type));
} }


bool IsExisting(const std::string &op_type) { bool IsExisting(const std::string &op_type) {
auto iter_find = op_type_list_.find(op_type);
return iter_find != op_type_list_.end();
return op_type_list_.find(op_type) != op_type_list_.end();
} }


protected: protected:
@@ -47,20 +48,19 @@ class GE_FUNC_VISIBILITY OpTypeContainer {


class GE_FUNC_VISIBILITY OpTypeRegistrar { class GE_FUNC_VISIBILITY OpTypeRegistrar {
public: public:
explicit OpTypeRegistrar(const std::string &op_type) {
explicit OpTypeRegistrar(const std::string &op_type) noexcept {
OpTypeContainer::Instance()->Register(op_type); OpTypeContainer::Instance()->Register(op_type);
} }
~OpTypeRegistrar() {} ~OpTypeRegistrar() {}
}; };
} // namespace ge


#define REGISTER_OPTYPE_DECLARE(var_name, str_name) \ #define REGISTER_OPTYPE_DECLARE(var_name, str_name) \
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *var_name;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char_t *var_name;


#define REGISTER_OPTYPE_DEFINE(var_name, str_name) \ #define REGISTER_OPTYPE_DEFINE(var_name, str_name) \
const char *var_name = str_name; \
const OpTypeRegistrar g_##var_name##_reg(str_name);

#define IS_OPTYPE_EXISTING(str_name) (OpTypeContainer::Instance()->IsExisting(str_name))
} // namespace ge
const char_t *var_name = str_name; \
const ge::OpTypeRegistrar g_##var_name##_reg(str_name);


#define IS_OPTYPE_EXISTING(str_name) (ge::OpTypeContainer::Instance()->IsExisting(str_name))
#endif // INC_FRAMEWORK_COMMON_OP_TYPES_H_ #endif // INC_FRAMEWORK_COMMON_OP_TYPES_H_

+ 2
- 4
inc/framework/common/profiling/ge_profiling.h View File

@@ -24,10 +24,8 @@
/// @brief Output the profiling data of single operator in Pytorch, and does not support multithreading /// @brief Output the profiling data of single operator in Pytorch, and does not support multithreading
/// @return Status result /// @return Status result
/// ///
GE_FUNC_VISIBILITY ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id, rtStream_t stream);
GE_FUNC_VISIBILITY ge::Status ProfSetStepInfo(const uint64_t index_id, const uint16_t tag_id, rtStream_t const stream);


GE_FUNC_VISIBILITY ge::Status ProfGetDeviceFormGraphId(uint32_t graph_id, uint32_t &device_id);

GE_FUNC_VISIBILITY void ProfSetGraphIdToDeviceMap(uint32_t graph_id, uint32_t &device_id);
GE_FUNC_VISIBILITY ge::Status ProfGetDeviceFormGraphId(const uint32_t graph_id, uint32_t &device_id);


#endif // INC_FRAMEWORK_COMMON_GE_PROFILING_H_ #endif // INC_FRAMEWORK_COMMON_GE_PROFILING_H_

+ 173
- 0
inc/framework/common/profiling_definitions.h View File

@@ -0,0 +1,173 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef AIR_CXX_PROFILING_DEFINITIONS_H
#define AIR_CXX_PROFILING_DEFINITIONS_H
#include <string>
#include <iostream>
#include <mutex>
#include <unordered_map>
#include "graph/profiler.h"
#include "external/ge/ge_api_types.h"
#include "toolchain/prof_callback.h"
namespace ge {
namespace profiling {
enum {
kAclCompileAndExecute,
kAclMatchOpModel,
kAclMatchStaticOpModel,
kAclMatchDynamicOpModel,
kAclExecuteAsync,
kAclLoadSingleOp,
kAclBuildOpModel,
kInferShape,
kTiling,
kUpdateShape,
kConstPrepare,
kInitHybridExecuteArgs,
kInitInferShapeContext,
kDestroyInferShapeContext,
kResetSubgraphExecutor,
kCommitInferShapeTask,
kDeviceToHost,
kPrepareTask,
kLaunchTask,
kCommitTilingTask,
kAtomic,
kKernelLaunchPrepare,
kRtKernelLaunch,
kOpExecute,
kAllocMem,
kCopyH2D,

// Add new definitions here
kProfilingIndexEnd
};
constexpr uint64_t kInvalidHashId = 0UL;

class ProfilingContext {
public:
static bool IsDumpToStdEnabled();
static ProfilingContext &GetInstance();
ProfilingContext();
~ProfilingContext();

/*
* 还有一种思路是`IsEnabled`只判断profiler_是否为空指针,不再设置单独的enabled标记位,这样可以少一个标记位。
* 但是这么做就意味着,profiler_实例在未使能profiling时,必须是空指针状态。
* 为了性能考虑,profiling机制在编译和加载时,就会调用`RegisterString`,向profiler_注册字符串,后续执行时,只会使用注册好的index了。
* 因此存在一种场景:编译时并未使能profiling(因为编译时间很长,使能profiling也无法真实反应执行时的耗时状态),
* 因此编译时注册字符串的动作并没有生效。在执行时,动态的打开了profiling,这种场景下,执行时无法拿到注册后字符串
*/
bool IsEnabled() const noexcept {
return enabled_ && profiler_ != nullptr;
}
void SetEnable() noexcept {
enabled_ = true;
}
void SetDisable() noexcept {
enabled_ = false;
}

void RecordCurrentThread(const int64_t element, const int64_t event, const EventType et,
const std::chrono::time_point<std::chrono::system_clock> time_point) {
if (IsEnabled()) {
profiler_->RecordCurrentThread(element, event, et, time_point);
}
}

void RecordCurrentThread(const int64_t element, const int64_t event, const EventType et) {
RecordCurrentThread(element, event, et, std::chrono::system_clock::now());
}

const Profiler *GetProfiler() const {
return profiler_.get();
}

void Dump(std::ostream &out_stream) const {
if (IsEnabled()) {
profiler_->Dump(out_stream);
} else {
out_stream << "Profiling not enable, skip to dump" << std::endl;
}
}

void DumpToStdOut() const {
Dump(std::cout);
}

void Reset() {
if (IsEnabled()) {
profiler_->Reset();
}
}

int64_t RegisterString(const std::string &str);
int64_t RegisterStringHash(const uint64_t hash_id, const std::string &str);
void UpdateElementHashId(const MsprofReporterCallback reporter_callback);
static Status QueryHashId(const MsprofReporterCallback reporter_callback, const std::string &src_str,
uint64_t &hash_id);
size_t GetRegisterStringNum() const {
return strings_to_index_.size();
}

void Init();

private:
void UpdateHashByStr(const std::string &str, const uint64_t hash);

private:
bool inited_;
bool enabled_;
int64_t str_index_;
std::unordered_map<std::string, int64_t> strings_to_index_;
std::mutex strings_to_index_mutex_;
std::unique_ptr<Profiler> profiler_;
};

class ScopeProfiler {
public:
ScopeProfiler(const int64_t element, const int64_t event) : element_(element), event_(event) {
if (ProfilingContext::GetInstance().IsEnabled()) {
start_trace_ = std::chrono::system_clock::now();
}
}
~ScopeProfiler() {
if (ProfilingContext::GetInstance().IsEnabled()) {
ProfilingContext::GetInstance().RecordCurrentThread(element_, event_, EventType::kEventStart, start_trace_);
ProfilingContext::GetInstance().RecordCurrentThread(element_, event_, EventType::kEventEnd);
}
}
void SetElement(const int64_t element) {
element_ = element;
}

private:
std::chrono::time_point<std::chrono::system_clock> start_trace_;
int64_t element_;
int64_t event_;
};
} // namespace profiling
} // namespace ge
#define PROFILING_START(element, event) \
ge::profiling::ProfilingContext::GetInstance().RecordCurrentThread((element), (event), \
ge::profiling::EventType::kEventStart)
#define PROFILING_END(element, event) \
ge::profiling::ProfilingContext::GetInstance().RecordCurrentThread((element), (event), \
ge::profiling::EventType::kEventEnd)
#define PROFILING_SCOPE(element, event) ge::profiling::ScopeProfiler profiler((element), (event))
#define PROFILING_SCOPE_ELEMENT(element) profiler.SetElement((element))
#endif // AIR_CXX_PROFILING_DEFINITIONS_H

+ 3
- 3
inc/framework/common/scope_guard.h View File

@@ -25,9 +25,9 @@
/// MAKE_GUARD([&] { Release Resource 1 }) /// MAKE_GUARD([&] { Release Resource 1 })
/// Acquire Resource 2 /// Acquire Resource 2
// MAKE_GUARD([&] { Release Resource 2 }) // MAKE_GUARD([&] { Release Resource 2 })
#define GE_MAKE_GUARD(var, callback) const ScopeGuard const_guard_##var(callback)
#define GE_MAKE_GUARD(var, callback) const ::ge::ScopeGuard const_guard_##var(callback)


#define GE_DISMISSABLE_GUARD(var, callback) ScopeGuard make_guard_##var(callback)
#define GE_DISMISSABLE_GUARD(var, callback) ::ge::ScopeGuard make_guard_##var(callback)
#define GE_DISMISS_GUARD(var) make_guard_##var.Dismiss() #define GE_DISMISS_GUARD(var) make_guard_##var.Dismiss()


namespace ge { namespace ge {
@@ -44,7 +44,7 @@ class GE_FUNC_VISIBILITY ScopeGuard {
if (on_exit_scope_ != nullptr) { if (on_exit_scope_ != nullptr) {
try { try {
on_exit_scope_(); on_exit_scope_();
} catch (std::bad_function_call &e) {
} catch (std::bad_function_call &) {
} catch (...) { } catch (...) {
} }
} }


+ 21
- 18
inc/framework/common/string_util.h View File

@@ -39,13 +39,14 @@
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <vector> #include <vector>
#include "graph/types.h"


namespace ge { namespace ge {
class GE_FUNC_VISIBILITY StringUtils { class GE_FUNC_VISIBILITY StringUtils {
public: public:
static std::string &Ltrim(std::string &s) { static std::string &Ltrim(std::string &s) {
#if __cplusplus >= 201103L #if __cplusplus >= 201103L
(void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int32_t c) { return std::isspace(c) == 0; }));
(void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](const int32_t c) { return std::isspace(c) == 0; }));
#else #else
(void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int32_t, int32_t>(std::isspace)))); (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int32_t, int32_t>(std::isspace))));
#endif #endif
@@ -54,7 +55,8 @@ class GE_FUNC_VISIBILITY StringUtils {
// lint -esym(551,*) // lint -esym(551,*)
static std::string &Rtrim(std::string &s) { /*lint !e618*/ static std::string &Rtrim(std::string &s) { /*lint !e618*/
#if __cplusplus >= 201103L #if __cplusplus >= 201103L
(void)s.erase(std::find_if(s.rbegin(), s.rend(), [](int32_t c) { return std::isspace(c) == 0; }).base(), s.end());
(void)s.erase(std::find_if(s.rbegin(), s.rend(), [](const int32_t c) { return std::isspace(c) == 0; }).base(),
s.end());
#else #else
(void)s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int32_t, int32_t>(std::isspace))).base(), (void)s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int32_t, int32_t>(std::isspace))).base(),
s.end()); s.end());
@@ -79,7 +81,7 @@ class GE_FUNC_VISIBILITY StringUtils {
/// @param [in] delim separator /// @param [in] delim separator
/// @return string array after segmentation /// @return string array after segmentation
/// ///
static std::vector<std::string, std::allocator<std::string>> Split(const std::string &str, char delim) {
static std::vector<std::string, std::allocator<std::string>> Split(const std::string &str, const char_t delim) {
std::vector<std::string, std::allocator<std::string>> elems; std::vector<std::string, std::allocator<std::string>> elems;


if (str.empty()) { if (str.empty()) {
@@ -94,8 +96,8 @@ class GE_FUNC_VISIBILITY StringUtils {
elems.push_back(item); elems.push_back(item);
} }


auto str_size = str.size();
if ((str_size > 0) && (str[str_size - 1] == delim)) {
const auto str_size = str.size();
if ((str_size > 0U) && (str[str_size - 1U] == delim)) {
elems.emplace_back(""); elems.emplace_back("");
} }


@@ -107,13 +109,13 @@ class GE_FUNC_VISIBILITY StringUtils {
/// @param [in] s path name /// @param [in] s path name
/// @return file name /// @return file name
/// ///
static std::string GetFileName(std::string &s) {
static std::string GetFileName(const std::string &s) {
if (s.empty()) { if (s.empty()) {
return ""; return "";
} }
std::vector<std::string> files = StringUtils::Split(s, '/');
const std::vector<std::string> files = StringUtils::Split(s, '/');


return files.empty() ? "" : files[files.size() - 1];
return files.empty() ? "" : files[files.size() - 1U];
} }
/// ///
/// @ingroup domi_common /// @ingroup domi_common
@@ -125,12 +127,13 @@ class GE_FUNC_VISIBILITY StringUtils {
/// @return string after replacement /// @return string after replacement
/// ///
static std::string ReplaceAll(std::string str, const std::string &old_value, const std::string &new_value) { static std::string ReplaceAll(std::string str, const std::string &old_value, const std::string &new_value) {
std::string::size_type cur_pos = 0;
std::string::size_type old_length = old_value.length();
std::string::size_type new_length = new_value.length();
std::string::size_type cur_pos = 0U;
const std::string::size_type old_length = old_value.length();
const std::string::size_type new_length = new_value.length();
// cycle replace // cycle replace
for (; cur_pos != std::string::npos; cur_pos += new_length) { for (; cur_pos != std::string::npos; cur_pos += new_length) {
if ((cur_pos = str.find(old_value, cur_pos)) != std::string::npos) {
cur_pos = str.find(old_value, cur_pos);
if (cur_pos != std::string::npos) {
(void)str.replace(cur_pos, old_length, new_value); (void)str.replace(cur_pos, old_length, new_value);
} else { } else {
break; break;
@@ -148,7 +151,7 @@ class GE_FUNC_VISIBILITY StringUtils {
/// @return if the value is a prefix, true is returned. Otherwise, false is returned /// @return if the value is a prefix, true is returned. Otherwise, false is returned
/// ///
static bool StartWith(const std::string &str, const std::string str_x) { static bool StartWith(const std::string &str, const std::string str_x) {
return ((str.size() >= str_x.size()) && (str.compare(0, str_x.size(), str_x) == 0));
return ((str.size() >= str_x.size()) && (str.compare(0U, str_x.size(), str_x) == 0));
} }


/// ///
@@ -159,14 +162,14 @@ class GE_FUNC_VISIBILITY StringUtils {
/// @param [in] ... format Filling Content /// @param [in] ... format Filling Content
/// @return formatted string /// @return formatted string
/// ///
static std::string FormatString(const char *format, ...) {
const uint32_t MAX_BUFFER_LEN = 1024; // the stack memory plint check result must be less than 1024
static std::string FormatString(const char_t *const format, ...) {
const uint32_t MAX_BUFFER_LEN = 1024U; // the stack memory plint check result must be less than 1024
va_list args; va_list args;
va_start(args, format); va_start(args, format);
char buffer[MAX_BUFFER_LEN] = {0};
int32_t ret = vsnprintf_s(buffer, MAX_BUFFER_LEN, MAX_BUFFER_LEN - 1, format, args);
char_t buffer[MAX_BUFFER_LEN] = {};
const int32_t ret = vsnprintf_s(&buffer[0], MAX_BUFFER_LEN, MAX_BUFFER_LEN - 1U, format, args);
va_end(args); va_end(args);
return ret > 0 ? buffer : "";
return (ret > 0) ? buffer : "";
} }
}; };
} // namespace ge } // namespace ge


+ 29
- 8
inc/framework/common/taskdown_common.h View File

@@ -23,7 +23,7 @@ namespace ge {


const int32_t CC_FUSION_OP_MAX = 32; const int32_t CC_FUSION_OP_MAX = 32;


typedef enum tagCcStatus {
enum class ccStatus_t {
CC_STATUS_SUCCESS = 0, /**< succ */ CC_STATUS_SUCCESS = 0, /**< succ */
CC_STATUS_NOT_INITIALIZED = 1, /**< not init */ CC_STATUS_NOT_INITIALIZED = 1, /**< not init */
CC_STATUS_ALLOC_FAILED = 2, /**< alloc mem failed */ CC_STATUS_ALLOC_FAILED = 2, /**< alloc mem failed */
@@ -33,10 +33,10 @@ typedef enum tagCcStatus {
CC_STATUS_RUNTIME_ERROR = 6, /**< runtime error */ CC_STATUS_RUNTIME_ERROR = 6, /**< runtime error */
CC_STATUS_NOT_SUPPORTED = 7, /**< unsupport error */ CC_STATUS_NOT_SUPPORTED = 7, /**< unsupport error */
CC_STATUS_INVALID_VALUE = 7, /**< invalid value error for blas*/ CC_STATUS_INVALID_VALUE = 7, /**< invalid value error for blas*/
CC_STATUS_RESERVED /**< just for check */
} ccStatus_t;
CC_STATUS_RESERVED = 8, /**< just for check */
};


typedef enum tagccKernelType {
enum class ccKernelType {
CCE_AI_CORE = 0, /* cce aicore */ CCE_AI_CORE = 0, /* cce aicore */
CCE_AI_CPU = 1, /* cce aicpu */ CCE_AI_CPU = 1, /* cce aicpu */
TE = 2, /* te operator*/ TE = 2, /* te operator*/
@@ -47,9 +47,9 @@ typedef enum tagccKernelType {
CUST_AI_CPU = 7, /* custom aicpu*/ CUST_AI_CPU = 7, /* custom aicpu*/
HOST_CPU = 8, /* host cpu */ HOST_CPU = 8, /* host cpu */
INVALID = 10000 /* unknown kernel type */ INVALID = 10000 /* unknown kernel type */
} ccKernelType;
};


typedef struct tagOpContext {
using ccOpContext = struct tagOpContext {
ccKernelType kernelType; ccKernelType kernelType;
uint32_t opId; uint32_t opId;
uint32_t kernelFuncId; uint32_t kernelFuncId;
@@ -66,7 +66,28 @@ typedef struct tagOpContext {
uint64_t genVariableBaseAddr; uint64_t genVariableBaseAddr;
uint64_t genVariableBaseSize; uint64_t genVariableBaseSize;
uint64_t l2ctrlSize; uint64_t l2ctrlSize;
} ccOpContext;
} // namespace ge
};

enum class tagOpTensorFormat { OP_TENSOR_FORMAT_NC1HWC0 = 0, OP_TENSOR_FORMAT_ND, OP_TENSOR_FORMAT_RESERVED };


enum class tagOpDataType {
OP_DATA_FLOAT = 0, /**< float type */
OP_DATA_HALF, /**< fp16 type */
OP_DATA_INT8, /**< int8 type */
OP_DATA_INT32, /**< int32 type */
OP_DATA_UINT8, /**< uint8 type */
OP_DATA_HALF_UINT16_PROPOSAL, /**< mixed type for proposal */
OP_DATA_RESERVED
};

// AICPU Tensor
using ccAICPUTensor = struct tagOpTensor {
// real dim info
tagOpTensorFormat format;
tagOpDataType data_type;
int32_t dim_cnt;
int32_t mm;
int32_t dim[8];
};
} // namespace ge
#endif // INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_ #endif // INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_

+ 39
- 268
inc/framework/common/types.h View File

@@ -19,7 +19,6 @@


#include <climits> #include <climits>
#include <cstdint> #include <cstdint>
#include <algorithm>
#include <map> #include <map>
#include <memory> #include <memory>
#include <string> #include <string>
@@ -44,32 +43,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEB
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEBUG_ALL; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEBUG_ALL;


// Profile-related constants // Profile-related constants
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OME_PROFILE;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string CCE_PROFILE;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string RTS_PROFILE;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_MODEL_ID; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_MODEL_ID;


FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASKS; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASKS;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_FUSION_MODEL_DEF; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_FUSION_MODEL_DEF;

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint64_t ALLOC_MEMORY_MAX_SIZE; // Max size of 8 GB. FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint64_t ALLOC_MEMORY_MAX_SIZE; // Max size of 8 GB.


template <typename K, typename V>
static std::pair<V, K> flip_pair(const std::pair<K, V> &p) {
return std::pair<V, K>(p.second, p.first);
}

template <typename K, typename V>
static std::map<V, K> flip_map(std::map<K, V> src) {
std::map<V, K> dst;
std::transform(src.begin(), src.end(), std::inserter(dst, dst.begin()), flip_pair<K, V>);
return dst;
}

REGISTER_OPTYPE_DECLARE(DATA, "Data"); REGISTER_OPTYPE_DECLARE(DATA, "Data");
REGISTER_OPTYPE_DECLARE(AIPPDATA, "AippData"); REGISTER_OPTYPE_DECLARE(AIPPDATA, "AippData");
REGISTER_OPTYPE_DECLARE(QUEUE_DATA, "QueueData");
REGISTER_OPTYPE_DECLARE(CONVOLUTION, "Convolution"); REGISTER_OPTYPE_DECLARE(CONVOLUTION, "Convolution");
REGISTER_OPTYPE_DECLARE(CORRELATION, "Correlation"); REGISTER_OPTYPE_DECLARE(CORRELATION, "Correlation");
REGISTER_OPTYPE_DECLARE(CORRELATIONV2, "Correlation_V2"); REGISTER_OPTYPE_DECLARE(CORRELATIONV2, "Correlation_V2");
@@ -140,6 +124,8 @@ REGISTER_OPTYPE_DECLARE(SQUEEZE, "Squeeze");
REGISTER_OPTYPE_DECLARE(UNSQUEEZE, "Unsqueeze"); REGISTER_OPTYPE_DECLARE(UNSQUEEZE, "Unsqueeze");
REGISTER_OPTYPE_DECLARE(SQUEEZEV2, "SqueezeV2"); REGISTER_OPTYPE_DECLARE(SQUEEZEV2, "SqueezeV2");
REGISTER_OPTYPE_DECLARE(UNSQUEEZEV2, "UnsqueezeV2"); REGISTER_OPTYPE_DECLARE(UNSQUEEZEV2, "UnsqueezeV2");
REGISTER_OPTYPE_DECLARE(SQUEEZEV3, "SqueezeV3");
REGISTER_OPTYPE_DECLARE(UNSQUEEZEV3, "UnsqueezeV3");
REGISTER_OPTYPE_DECLARE(STRIDEDSLICE, "StridedSlice"); REGISTER_OPTYPE_DECLARE(STRIDEDSLICE, "StridedSlice");
REGISTER_OPTYPE_DECLARE(RANGE, "Range"); REGISTER_OPTYPE_DECLARE(RANGE, "Range");
REGISTER_OPTYPE_DECLARE(RPNPROPOSALS, "GenerateRpnProposals"); REGISTER_OPTYPE_DECLARE(RPNPROPOSALS, "GenerateRpnProposals");
@@ -438,6 +424,7 @@ REGISTER_OPTYPE_DECLARE(MODELEXIT, "ModelExit");
REGISTER_OPTYPE_DECLARE(SEND, "Send"); REGISTER_OPTYPE_DECLARE(SEND, "Send");
REGISTER_OPTYPE_DECLARE(RECV, "Recv"); REGISTER_OPTYPE_DECLARE(RECV, "Recv");
REGISTER_OPTYPE_DECLARE(ENDOFSEQUENCE, "EndOfSequence"); REGISTER_OPTYPE_DECLARE(ENDOFSEQUENCE, "EndOfSequence");
REGISTER_OPTYPE_DECLARE(STARTOFSEQUENCE, "StartOfSequence");


REGISTER_OPTYPE_DECLARE(LABELSET, "LabelSet"); REGISTER_OPTYPE_DECLARE(LABELSET, "LabelSet");
REGISTER_OPTYPE_DECLARE(LABELGOTO, "LabelGoto"); REGISTER_OPTYPE_DECLARE(LABELGOTO, "LabelGoto");
@@ -461,8 +448,6 @@ REGISTER_OPTYPE_DECLARE(ELU_GRAD, "EluGrad");
REGISTER_OPTYPE_DECLARE(ADD_V2, "AddV2"); REGISTER_OPTYPE_DECLARE(ADD_V2, "AddV2");
REGISTER_OPTYPE_DECLARE(DATAFORMATDIMMAP, "DataFormatDimMap"); REGISTER_OPTYPE_DECLARE(DATAFORMATDIMMAP, "DataFormatDimMap");
REGISTER_OPTYPE_DECLARE(DATAFORMATVECPERMUTE, "DataFormatVecPermute"); REGISTER_OPTYPE_DECLARE(DATAFORMATVECPERMUTE, "DataFormatVecPermute");
REGISTER_OPTYPE_DECLARE(BESSELI0e, "BesselI0e");
REGISTER_OPTYPE_DECLARE(BESSELI1e, "BesselI1e");
REGISTER_OPTYPE_DECLARE(DEQUANTIZE, "Dequantize"); REGISTER_OPTYPE_DECLARE(DEQUANTIZE, "Dequantize");
REGISTER_OPTYPE_DECLARE(APPLYADADELTA, "ApplyAdadelta"); REGISTER_OPTYPE_DECLARE(APPLYADADELTA, "ApplyAdadelta");
REGISTER_OPTYPE_DECLARE(APPLYADAGRAD, "ApplyAdagrad"); REGISTER_OPTYPE_DECLARE(APPLYADAGRAD, "ApplyAdagrad");
@@ -516,29 +501,11 @@ REGISTER_OPTYPE_DECLARE(GETDYNAMICDIMS, "GetDynamicDims");
// profiling training trace node // profiling training trace node
REGISTER_OPTYPE_DECLARE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace"); REGISTER_OPTYPE_DECLARE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace");


enum InputMode { INPUT = 0, CONST_INPUT };

// Definition of the processing status enum of the process module
enum ModelProcessState {
INIT_STATE = 0, // init status
WAIT_EVENT_STATE, // Wait for the event status
IND_RSLT_STATE, // The model execution result is being output to the high level
STOPPED_STATE, // Model execution completed. The model enters this state after Model Manager::Stop
RESERVED_STATE, // reserved
};

// Indicates the enun definition of the execution mode of the access module
enum SysMode {
INFERENCE = 0, // Normal, that is, Inference mode
DEBUG, // Debug mode
TIME, // Model execution time mode, including the execution time of each OP
STOP, // STOP mode
RESET, // RESET mode
PERFORMANCE, // Impact of enabling the performance model: 1. The input data of the model is considered ready and does
// not need to be converted
ANDROID_DEBUG, // Exports Android platform computing data
RESERVED, // reserved
};
// Stack series
REGISTER_OPTYPE_DECLARE(STACK, "Stack");
REGISTER_OPTYPE_DECLARE(STACKPUSH, "StackPush");
REGISTER_OPTYPE_DECLARE(STACKPOP, "StackPop");
REGISTER_OPTYPE_DECLARE(STACKCLOSE, "StackClose");


// @brief encryption type of the model file // @brief encryption type of the model file
enum ModelEncryptType { enum ModelEncryptType {
@@ -577,22 +544,22 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_FIL
/// ///
/// @brief model name length /// @brief model name length
/// ///
static constexpr uint32_t MODEL_NAME_LENGTH = 32;
constexpr uint32_t MODEL_NAME_LENGTH = 32U;


/// ///
/// @brief length of user-defined information /// @brief length of user-defined information
/// ///
static constexpr uint32_t USER_DEFINE_INFO_LENGTH = 32;
constexpr uint32_t USER_DEFINE_INFO_LENGTH = 32U;


/// ///
/// @brief length of the model file signature /// @brief length of the model file signature
/// ///
static constexpr uint32_t MODEL_FILE_CHECKSUM_LENGTH = 64;
constexpr uint32_t MODEL_FILE_CHECKSUM_LENGTH = 64U;


/// ///
/// @brief length of the reserved field in the model file header /// @brief length of the reserved field in the model file header
/// ///
static constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 75;
constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 75U;


// DATA node type // DATA node type
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DATA_TYPE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DATA_TYPE;
@@ -617,7 +584,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_TYP
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_CONF_DELIMITER; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_CONF_DELIMITER;


// dim default size value // dim default size value
static const int32_t DIM_DEFAULT_SIZE = 4;
constexpr int32_t DIM_DEFAULT_SIZE = 4;


// dim extension default value // dim extension default value
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t DIM_DEFAULT_VALUE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t DIM_DEFAULT_VALUE;
@@ -650,34 +617,35 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t STREAM_SW


FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_GLOBAL_STEP; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_GLOBAL_STEP;


static const uint32_t PLATFORM_VERSION_LEN = 20;
constexpr uint32_t PLATFORM_VERSION_LEN = 20U;


// Definition of the file header of the model file // Definition of the file header of the model file
struct ModelFileHeader { struct ModelFileHeader {
uint32_t magic = MODEL_FILE_MAGIC_NUM; // magic number of DOMI
uint32_t headsize = MODEL_FILE_HEAD_LEN; // length of the model header. The value is fixed at 256
uint32_t version = MODEL_VERSION; // version 1.0
uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0}; // signature
uint32_t length = 0; // Ciphertext length. In the non-encryption model, the length is the plaintext length.
uint8_t is_encrypt = ModelEncryptType::UNENCRYPTED; // whether encrypted 0:not encrypt, 1:encrypt
uint8_t is_checksum = ModelCheckType::CHECK; // whether to check the checksum
uint8_t modeltype = 0; // 0:IR model 1:standard model 2: OM Tiny model
uint8_t genmode = 0; // 0:offline generate 1:online generate
uint8_t name[MODEL_NAME_LENGTH] = {0}; // Model name, which contains 32 characters
uint32_t ops = 0; // Computing power (Kops)
uint8_t userdefineinfo[USER_DEFINE_INFO_LENGTH] = {0}; // User-defined information. The value contains 32 characters
uint32_t om_ir_version = 0;
uint32_t model_num = 0;
uint8_t platform_version[PLATFORM_VERSION_LEN] = {0};
uint8_t platform_type = {0};
uint8_t reserved[MODEL_FILE_RESERVED_LENGTH] = {0}; // Reserved field 75
uint32_t magic = MODEL_FILE_MAGIC_NUM; // magic number of DOMI
uint32_t headsize = MODEL_FILE_HEAD_LEN; // length of the model header. The value is fixed at 256
uint32_t version = MODEL_VERSION; // version 1.0
uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0U}; // signature
uint32_t length = 0U; // Ciphertext length. In the non-encryption model, the length is the plaintext length.
uint8_t is_encrypt =
static_cast<uint8_t>(ModelEncryptType::UNENCRYPTED); // whether encrypted 0:not encrypt, 1:encrypt
uint8_t is_checksum = static_cast<uint8_t>(ModelCheckType::CHECK); // whether to check the checksum
uint8_t modeltype = 0U; // 0:IR model 1:standard model 2: OM Tiny model
uint8_t genmode = 0U; // 0:offline generate 1:online generate
uint8_t name[MODEL_NAME_LENGTH] = {0U}; // Model name, which contains 32 characters
uint32_t ops = 0U; // Computing power (Kops)
uint8_t userdefineinfo[USER_DEFINE_INFO_LENGTH] = {0U}; // User-defined information. The value contains 32 characters
uint32_t om_ir_version = 0U;
uint32_t model_num = 0U;
uint8_t platform_version[PLATFORM_VERSION_LEN] = {0U};
uint8_t platform_type = {0U};
uint8_t reserved[MODEL_FILE_RESERVED_LENGTH] = {0U}; // Reserved field 75
}; };


static constexpr uint8_t TARGET_TYPE_LTTE_8BIT = 0;
static constexpr uint8_t TARGET_TYPE_MINI_8BIT = 1;
constexpr uint8_t TARGET_TYPE_LTTE_8BIT = 0U;
constexpr uint8_t TARGET_TYPE_MINI_8BIT = 1U;


// number of partitions in the current model // number of partitions in the current model
static constexpr uint32_t PARTITION_SIZE = 5;
constexpr uint32_t PARTITION_SIZE = 5U;


enum ModelPartitionType { MODEL_DEF = 0, WEIGHTS_DATA, TASK_INFO, TBE_KERNELS, CUST_AICPU_KERNELS }; enum ModelPartitionType { MODEL_DEF = 0, WEIGHTS_DATA, TASK_INFO, TBE_KERNELS, CUST_AICPU_KERNELS };


@@ -692,22 +660,9 @@ struct ModelPartitionTable {
ModelPartitionMemInfo partition[0]; ModelPartitionMemInfo partition[0];
}; };


#define SIZE_OF_MODEL_PARTITION_TABLE(table) (sizeof(ModelPartitionTable) + sizeof(ModelPartitionMemInfo) * (table).num)

// Filter format
typedef enum tagDomiFilterFormat {
DOMI_FILTER_KCHW, // KCHW
DOMI_FILTER_HWCK, // HWCK
DOMI_FILTER_RESERVED
} domiFilterFormat_t;

// Const data trans type
typedef enum tagDomiConstDataTransType {
DOMI_CONST_DATA_NOT_CHANGE = 0, // No action is required
DOMI_CONST_DATA_TRANS_MATMUL, // The const input to MatMul and needs to be transposed
DOMI_CONST_DATA_RESERVED
} domiConstDataTransType_t;

inline uint64_t SizeOfModelPartitionTable(const ModelPartitionTable &table) {
return sizeof(ModelPartitionTable) + (sizeof(ModelPartitionMemInfo) * static_cast<uint64_t>(table.num));
}
// mode of activation // mode of activation
typedef enum tagDomiActivationMode { typedef enum tagDomiActivationMode {
DOMI_ACTIVATION_SIGMOID = 0, // sigmoid DOMI_ACTIVATION_SIGMOID = 0, // sigmoid
@@ -727,190 +682,6 @@ typedef enum tagDomiActivationMode {
DOMI_ACTIVATION_RESERVED DOMI_ACTIVATION_RESERVED
} domiActivationMode_t; } domiActivationMode_t;


// mode of batchnorm
typedef enum tagDomiBatchNormMode {
DOMI_BATCHNORM_PER_ACTIVATION = 0, // bnScale, bnBias tensor dims are 1xCxHxW
DOMI_BATCHNORM_SPATIAL, // bnScale, bnBias tensor dims are 1xCx1x1
DOMI_BATCHNORM_RESERVED
} domiBatchNormMode_t;

// eltwise mode
typedef enum tagDomiEltwiseMode {
DOMI_ELTWISE_PROD = 0, // prod
DOMI_ELTWISE_SUM, // sum
DOMI_ELTWISE_MAX, // max
DOMI_ELTWISE_RESERVED
} domiEltwiseMode_t;

// mode of padding
typedef enum tagDomiPaddingMode {
DOMI_PADDING_CEIL = 0, // Default padding mode
DOMI_PADDING_DIRECTASSIGN, // Default padding mode: NOTSET
DOMI_PADDING_VALID, // VALID padding mode
DOMI_PADDING_SAME, // Padding values of 0 are always used
DOMI_PADDING_CEIL_NEW, // Padding values of 0 are always used
DOMI_PADDING_VALID_NEW, // Padding values of 0 are always used
DOMI_PADDING_SAME_NEW, // Padding values of 0 are always used
DOMI_PADDING_RESERVED
} domiPaddingMode_t;

// algorithm of convolution forward
typedef enum tagDomiConvolutionFwdAlgo {
DOMI_CONVOLUTION_FWD_ALGO_GEMM = 0, // matrix gemm algo
DOMI_CONVOLUTION_FWD_ALGO_WINOGRAD, // Winograd Transform algo
DOMI_CONVOLUTION_FWD_ALGO_GEMM_ACCU_FLOAT32, // accumulate in L0c with FP32
DOMI_CONVOLUTION_FWD_ALGO_RESERVED
} domiConvolutionFwdAlgo_t;

typedef enum tagDomiFullConnectFwdAlgo {
DOMI_FULLCONNECT_FWD_ALGO_HALF = 0, // accumulate in L0c with FP16
DOMI_FULLCONNECT_FWD_ALGO_FLOAT32 // accumulate in L0c with FP32
} domiFullConnectFwdAlgo_t;

typedef enum tagDomiPooingFwdAlgo {
DOMI_POOLING_FWD_ALGO_HALF = 0, // accumulate in L0c with FP16
DOMI_POOLING_FWD_ALGO_FLOAT32 // accumulate in L0c with FP32
} domiPooingFwdAlgo_t;

// mode of convolution
typedef enum tagDomiConvolutionMode {
DOMI_CONV_CONVOLUTION = 0, // math convolution
DOMI_CONV_CROSS_CORRELATION, // cross-correlation convolution
DOMI_CONV_DECONVOLUTION, // deconvolution, also named transposed convolution
DOMI_CONV_MODE_DEPTHWISE, // depthwise convolution
DOMI_CONV_MODE_RESERVED
} domiConvolutionMode_t;

// softmax mode
typedef enum tagDomiSoftmaxMode {
DOMI_SOFTMAX_MODE_INSTANCE = 0, // compute the softmax over all C, H, W for each N
DOMI_SOFTMAX_MODE_CHANNEL, // compute the softmax over all C for each H, W, N
DOMI_SOFTMAX_MODE_HEIGHT, // compute the softmax over all H for each N, C, W
DOMI_SOFTMAX_MODE_WIDTH, // compute the softmax over all W for each N, C, H
DOMI_SOFTMAX_MODE_RESERVED
} domiSoftmaxMode_t;

// softmax algorithm
typedef enum tagDomiSoftmaxAlgo {
DOMI_SOFTMAX_FAST = 0, // straightforward implementation
DOMI_SOFTMAX_ACCURATE, // subtract max from every point to avoid overflow
DOMI_SOFTMAX_LOG, // perform the Log softmax operation to avoid overflow
DOMI_SOFTMAX_ACCURATE_FP32,
DOMI_SOFTMAX_RESERVED
} domiSoftmaxAlgo_t;

// algorithm of convolution backward
typedef enum tagDomiConvolutionBwdAlgo {
DOMI_CONVOLUTION_BWD_ALGO_GEMM = 0, // matrix gemm algo
DOMI_CONVOLUTION_BWD_ALGO_WINOGRAD, // Winograd Transform algo
DOMI_CONVOLUTION_BWD_ALGO_RESERVED
} domiConvolutionBwdAlgo_t;

// mode of pooling
typedef enum tagDomiPoolingMode {
DOMI_POOLING_MAX = 0, // max pooling
DOMI_POOLING_AVG, // average pooling
DOMI_POOLING_L2, // L2 pooling
DOMI_POOLING_RESERVED
} domiPoolingMode_t;

// propagate Nan
typedef enum tagDomiNanPropagation {
DOMI_NAN_NOT_PROPAGATE = 0, // Nan numbers are not propagated
DOMI_NAN_PROPAGATE, // Nan numbers are propagated
DOMI_NAN_PROPAGATE_RESERVED
} domiNanPropagation_t;

// mode of cropandresize
typedef enum tagDomiCropAndResizeMode {
DOMI_RESIZE_METHOD_BILINEAR = 0, // resize bilinear
DOMI_RESIZE_METHOD_NEAREST, // resize nearest
DOMI_RESIZE_RESERVED
} domiCropAndResizeMode_t;

// yolo version
typedef enum tagDomiYoloVersion { DOMI_YOLO_V2 = 1, DOMI_YOLO_V3, DOMI_YOLO_TRSERVED } domiYoloVersion_t;

typedef enum tagDomiRNNScopePassType {
DOMI_STATIC_BIDIRECTIONAL_RNN_GENERAL_PASS = 0,
DOMI_DYNAMIC_BIDIRECTIONAL_RNN_GENERAL_PASS,
DOMI_DYNAMIC_BIDIRECTIONAL_RNN_BIDAF_PASS
} domiRNNScopePassType;

// RNNDataLayout
typedef enum tagDomiRNNDataLayout {
DOMI_RNN_ND_TBX = 0, // data[max_time,batch_size,Xt]
DOMI_RNN_ND_BTX, // data[batch_size,max_time,Xt]
DOMI_RNN_5D_TX1BX, // data[max_time,Xt,1,batch_size,Xt]
DOMI_RNN_5D_BX1TX, // dataa[batch_size,Xt,1,max_time,Xt]
DOMI_RNN_4DTBX1,
DOMI_ENN_DL_RESERVED
} domiRNNDataLayout_t;

// RNNInputMode
typedef enum tagDomiRNNInputMode { DOMI_RNN_LINEAR_INPUT = 0, DOMI_RNN_SKIP_INPUT } domiRNNInputMode_t;

// RNNDirectionMode
typedef enum tagDomiRNNDirectionMode { DOMI_RNN_UNIDIRECTIONAL = 0, DOMI_RNN_BIDIRECTIONAL } domiDirectionMode_t;

typedef enum tagDomiPoolingCeilMode { DOMI_POOLING_FLOOR = 0, DOMI_POOLING_CEIL } domiPoolingCeilMode_t;

// RNNMode
typedef enum tagDomiRNNActivationMode {
DOMI_RNN_ACTIVATION_SIGMOID = 0, // sigmoid
DOMI_RNN_ACTIVATION_TANH, // tanh
DOMI_RNN_ACTIVATION_RELU, // ReLU
DOMI_RNN_ACTIVATION_RELU1, // ReLU1
DOMI_RNN_ACTIVATION_RELU6, // ReLU6
DOMI_RNN_ACTIVATION_RESERVED
} domiRNNActivationMode_t;

typedef enum tagDomiRNNLSTMOutMode {
DOMI_RNN_LSTM_OUT_SEPARATE = 0,
DOMI_RNN_LSTM_OUT_CONCAT,
DOMI_RNN_LSTM_OUT_RESERVED
} domiRNNLSTMOutPutMode_t;
typedef enum tagDomiRNNLSTMStateOutMode {
DOMI_RNN_LSTM_STATE_OUT_SEPARATE = 0,
DOMI_RNN_LSTM_STATE_OUT_CONCAT_ALL,
DOMI_RNN_LSTM_STATE_OUT_RESERVED
} domiRNNLSTMStateOutMode_t;

typedef enum tagDomiRNNMode {
DOMI_RNN_RELU = 0,
DOMI_RNN_TANH,
DOMI_LSTM,
DOMI_GRU,
DOMI_RNN_MODE_RESERVED
} domiRNNMode_t;

typedef enum tagDomiResizeBilinearMode {
DOMI_RESIZE_OUTPUT_DIM_BY_ZOOM_FACTOR = 0, // Output dimension specified by zoom factor
DOMI_RESIZE_OUTPUT_DIM_BY_SHRINK_FACTOR, // specified by shrink factor
DOMI_RESIZE_OUTPUT_DIM_EXPLICIT, // specified explicitly
DOMI_RESIZE_OUTPUT_DIM_RESERVED
} domiResizeOutputDimMode_t;

#pragma pack(1) // single-byte alignment
// DUMP file struct
struct FileHeader {
int32_t Version; // version
int32_t Output_Offset; // output offset address
char Reserved[24] = {0}; // 24 bytes reserved
};

struct BasicInfo {
struct FileHeader header; // file header
int32_t stream_id; // stread id
uint64_t start_time; // start time
uint64_t end_time; // end time
uint32_t input_size; // input memory size
uint32_t output_size; // output memory size
uint32_t weight_size; // weight Memory Size
uint32_t workspace_size; // workspace
uint32_t total_size; // total memory size
};
#pragma pack() // Cancels single-byte alignment
enum class MemorySizeCalcType { NORMAL = 0, ALWAYS_EMPTY }; enum class MemorySizeCalcType { NORMAL = 0, ALWAYS_EMPTY };
} // namespace ge } // namespace ge




+ 100
- 109
inc/framework/common/util.h View File

@@ -14,8 +14,8 @@
* limitations under the License. * limitations under the License.
*/ */


#ifndef INC_FRAMEWORK_COMMON_UTIL_H_
#define INC_FRAMEWORK_COMMON_UTIL_H_
#ifndef AIR_INC_FRAMEWORK_COMMON_UTIL_H_
#define AIR_INC_FRAMEWORK_COMMON_UTIL_H_


#include <climits> #include <climits>
#include <cmath> #include <cmath>
@@ -24,13 +24,16 @@
#include <vector> #include <vector>


#include <google/protobuf/text_format.h> #include <google/protobuf/text_format.h>
#include "external/graph/types.h"
#include "external/register/register.h"
#include "framework/common/debug/log.h" #include "framework/common/debug/log.h"
#include "framework/common/scope_guard.h" #include "framework/common/scope_guard.h"
#include "framework/common/ge_inner_error_codes.h" #include "framework/common/ge_inner_error_codes.h"
#include "graph/detail/attributes_holder.h"


#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ #define GE_CHECK_POSITIVE_SIZE_RANGE(size) \
do { \ do { \
if (size <= 0) { \
if ((size) <= 0) { \
GELOGE(ge::FAILED, "param[%s] is not a positive number", #size); \ GELOGE(ge::FAILED, "param[%s] is not a positive number", #size); \
return PARAM_INVALID; \ return PARAM_INVALID; \
} \ } \
@@ -46,15 +49,19 @@


// new ge marco // new ge marco
// Encapsulate common resource releases // Encapsulate common resource releases
#define GE_MAKE_GUARD_RTMEM(var) \
GE_MAKE_GUARD(var, [&] { \
if (var) GE_CHK_RT(rtFreeHost(var)); \
});
#define GE_MAKE_GUARD_RTMEM(var) \
GE_MAKE_GUARD(var, [&] { \
if ((var) != nullptr) { \
GE_CHK_RT(rtFreeHost(var)); \
} \
})


#define GE_MAKE_GUARD_RTSTREAM(var) \
GE_MAKE_GUARD(var, [&] { \
if (var) GE_CHK_RT(rtStreamDestroy(var)); \
});
#define GE_MAKE_GUARD_RTSTREAM(var) \
GE_MAKE_GUARD(var, [&] { \
if ((var) != nullptr) { \
GE_CHK_RT(rtStreamDestroy(var)); \
} \
})


// For propagating errors when calling a function. // For propagating errors when calling a function.
#define GE_RETURN_IF_ERROR(expr) \ #define GE_RETURN_IF_ERROR(expr) \
@@ -115,7 +122,7 @@
// Check if the parameter is null. If yes, return PARAM_INVALID and record the error // Check if the parameter is null. If yes, return PARAM_INVALID and record the error
#define GE_CHECK_NOTNULL(val) \ #define GE_CHECK_NOTNULL(val) \
do { \ do { \
if (val == nullptr) { \
if ((val) == nullptr) { \
REPORT_INNER_ERROR("E19999", "Param:%s is nullptr, check invalid", #val); \ REPORT_INNER_ERROR("E19999", "Param:%s is nullptr, check invalid", #val); \
GELOGE(ge::FAILED, "[Check][Param:%s]null is invalid.", #val); \ GELOGE(ge::FAILED, "[Check][Param:%s]null is invalid.", #val); \
return ge::PARAM_INVALID; \ return ge::PARAM_INVALID; \
@@ -125,7 +132,7 @@
// Check if the parameter is null. If yes, just return and record the error // Check if the parameter is null. If yes, just return and record the error
#define GE_CHECK_NOTNULL_JUST_RETURN(val) \ #define GE_CHECK_NOTNULL_JUST_RETURN(val) \
do { \ do { \
if (val == nullptr) { \
if ((val) == nullptr) { \
GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
return; \ return; \
} \ } \
@@ -134,7 +141,7 @@
// Check whether the parameter is null. If so, execute the exec_expr expression and record the error log // Check whether the parameter is null. If so, execute the exec_expr expression and record the error log
#define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \ #define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \
do { \ do { \
if (val == nullptr) { \
if ((val) == nullptr) { \
GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
exec_expr; \ exec_expr; \
} \ } \
@@ -143,7 +150,7 @@
// Check whether the parameter is null. If yes, return directly and record the error log // Check whether the parameter is null. If yes, return directly and record the error log
#define GE_RT_VOID_CHECK_NOTNULL(val) \ #define GE_RT_VOID_CHECK_NOTNULL(val) \
do { \ do { \
if (val == nullptr) { \
if ((val) == nullptr) { \
GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
return; \ return; \
} \ } \
@@ -152,7 +159,7 @@
// Check if the parameter is null. If yes, return false and record the error log // Check if the parameter is null. If yes, return false and record the error log
#define GE_RT_FALSE_CHECK_NOTNULL(val) \ #define GE_RT_FALSE_CHECK_NOTNULL(val) \
do { \ do { \
if (val == nullptr) { \
if ((val) == nullptr) { \
GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
return false; \ return false; \
} \ } \
@@ -161,7 +168,7 @@
// Check if the parameter is out of bounds // Check if the parameter is out of bounds
#define GE_CHECK_SIZE(size) \ #define GE_CHECK_SIZE(size) \
do { \ do { \
if (size == 0) { \
if ((size) == 0U) { \
GELOGE(ge::FAILED, "param[%s] is out of range", #size); \ GELOGE(ge::FAILED, "param[%s] is out of range", #size); \
return ge::PARAM_INVALID; \ return ge::PARAM_INVALID; \
} \ } \
@@ -170,7 +177,7 @@
// Check if the value on the left is greater than or equal to the value on the right // Check if the value on the left is greater than or equal to the value on the right
#define GE_CHECK_GE(lhs, rhs) \ #define GE_CHECK_GE(lhs, rhs) \
do { \ do { \
if (lhs < rhs) { \
if ((lhs) < (rhs)) { \
GELOGE(ge::FAILED, "param[%s] is less than[%s]", #lhs, #rhs); \ GELOGE(ge::FAILED, "param[%s] is less than[%s]", #lhs, #rhs); \
return ge::PARAM_INVALID; \ return ge::PARAM_INVALID; \
} \ } \
@@ -179,7 +186,7 @@
// Check if the value on the left is less than or equal to the value on the right // Check if the value on the left is less than or equal to the value on the right
#define GE_CHECK_LE(lhs, rhs) \ #define GE_CHECK_LE(lhs, rhs) \
do { \ do { \
if (lhs > rhs) { \
if ((lhs) > (rhs)) { \
GELOGE(ge::FAILED, "param[%s] is greater than[%s]", #lhs, #rhs); \ GELOGE(ge::FAILED, "param[%s] is greater than[%s]", #lhs, #rhs); \
return ge::PARAM_INVALID; \ return ge::PARAM_INVALID; \
} \ } \
@@ -187,102 +194,37 @@


#define GE_DELETE_NEW_SINGLE(var) \ #define GE_DELETE_NEW_SINGLE(var) \
do { \ do { \
if (var != nullptr) { \
delete var; \
var = nullptr; \
if ((var) != nullptr) { \
delete (var); \
(var) = nullptr; \
} \ } \
} while (false) } while (false)


#define GE_DELETE_NEW_ARRAY(var) \ #define GE_DELETE_NEW_ARRAY(var) \
do { \ do { \
if (var != nullptr) { \
delete[] var; \
var = nullptr; \
if ((var) != nullptr) { \
delete[](var); \
(var) = nullptr; \
} \ } \
} while (false) } while (false)


#define GE_FREE_RT_LOG(addr) \ #define GE_FREE_RT_LOG(addr) \
do { \ do { \
if (addr != nullptr) { \
if ((addr) != nullptr) { \
const rtError_t error = rtFree(addr); \ const rtError_t error = rtFree(addr); \
if (error != RT_ERROR_NONE) { \ if (error != RT_ERROR_NONE) { \
GELOGE(RT_FAILED, "Call rtFree failed, error: %#x", error); \ GELOGE(RT_FAILED, "Call rtFree failed, error: %#x", error); \
} \ } \
addr = nullptr; \
(addr) = nullptr; \
} \ } \
} while (false) } while (false)


namespace ge {
/** /**
* @ingroup domi_common * @ingroup domi_common
* @brief version of om.proto file * @brief version of om.proto file
*/ */
static constexpr int32_t OM_PROTO_VERSION = 2;

/**
* Finding an Integer Ceiling Value Without Precision Loss
*/
#define CEIL(N, n) (((N) + (n)-1) / (n))

namespace ge {
using google::protobuf::Message;

///
/// @ingroup domi_common
/// @brief Reads the proto structure from an array.
/// @param [in] data proto data to be read
/// @param [in] size proto data size
/// @param [out] proto Memory for storing the proto file
/// @return true success
/// @return false fail
///
GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *data, int32_t size, Message *proto);

///
/// @ingroup domi_proto
/// @brief Reads the proto file in the text format.
/// @param [in] file path of proto file
/// @param [out] message Memory for storing the proto file
/// @return true success
/// @return false fail
///
GE_FUNC_VISIBILITY bool ReadProtoFromText(const char *file, google::protobuf::Message *message);

///
/// @ingroup: domi_common
/// @brief: get length of file
/// @param [in] input_file: path of file
/// @return long: File length. If the file length fails to be obtained, the value -1 is returned.
///
GE_FUNC_VISIBILITY extern long GetFileLength(const std::string &input_file);

///
/// @ingroup domi_common
/// @brief Reads all data from a binary file.
/// @param [in] file_name path of file
/// @param [out] buffer Output memory address, which needs to be released by the caller.
/// @param [out] length Output memory size
/// @return false fail
/// @return true success
///
GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *const file_name, char **buffer, int32_t &length);

GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *file_name, std::vector<char> &buffer);

///
/// @ingroup domi_common
/// @brief Recursively Creating a Directory
/// @param [in] directory_path Path, which can be a multi-level directory.
/// @return 0 success
/// @return -1 fail
///
GE_FUNC_VISIBILITY extern int32_t CreateDirectory(const std::string &directory_path);

///
/// @ingroup domi_common
/// @brief Obtains the current time string.
/// @return Time character string in the format : %Y%m%d%H%M%S, eg: 20171011083555
///
GE_FUNC_VISIBILITY std::string CurrentTimeInStr();
constexpr int32_t OM_PROTO_VERSION = 2;


/// ///
/// @ingroup domi_common /// @ingroup domi_common
@@ -294,7 +236,7 @@ template <typename T>
GE_FUNC_VISIBILITY std::string ToString(std::vector<T> &v) { GE_FUNC_VISIBILITY std::string ToString(std::vector<T> &v) {
std::stringstream ss; std::stringstream ss;
ss << "["; ss << "[";
for (T x : v) {
for (const T x : v) {
ss << x; ss << x;
ss << ", "; ss << ", ";
} }
@@ -314,7 +256,7 @@ template <typename T>
GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedField<T> &rpd_field) { GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedField<T> &rpd_field) {
std::stringstream ss; std::stringstream ss;
ss << "["; ss << "[";
for (T x : rpd_field) {
for (const T x : rpd_field) {
ss << x; ss << x;
ss << ", "; ss << ", ";
} }
@@ -343,6 +285,63 @@ GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedPtrField
return str_ret; return str_ret;
} }


///
/// @ingroup domi_common
/// @brief Reads the proto structure from an array.
/// @param [in] data proto data to be read
/// @param [in] size proto data size
/// @param [out] proto Memory for storing the proto file
/// @return true success
/// @return false fail
///
GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *const data, const int32_t size,
google::protobuf::Message *const proto);

///
/// @ingroup domi_proto
/// @brief Reads the proto file in the text format.
/// @param [in] file path of proto file
/// @param [out] message Memory for storing the proto file
/// @return true success
/// @return false fail
///
GE_FUNC_VISIBILITY bool ReadProtoFromText(const char_t *const file, google::protobuf::Message *const message);

///
/// @ingroup: domi_common
/// @brief: get length of file
/// @param [in] input_file: path of file
/// @return int64_t: File length. If the file length fails to be obtained, the value -1 is returned.
///
GE_FUNC_VISIBILITY extern int64_t GetFileLength(const std::string &input_file);

///
/// @ingroup domi_common
/// @brief Reads all data from a binary file.
/// @param [in] file_name path of file
/// @param [out] buffer Output memory address, which needs to be released by the caller.
/// @param [out] length Output memory size
/// @return false fail
/// @return true success
///
GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char_t *const file_name, char_t **const buffer, int32_t &length);

///
/// @ingroup domi_common
/// @brief Recursively Creating a Directory
/// @param [in] directory_path Path, which can be a multi-level directory.
/// @return 0 success
/// @return -1 fail
///
GE_FUNC_VISIBILITY extern int32_t CreateDirectory(const std::string &directory_path);

///
/// @ingroup domi_common
/// @brief Obtains the current time string.
/// @return Time character string in the format : %Y%m%d%H%M%S, eg: 20171011083555
///
GE_FUNC_VISIBILITY std::string CurrentTimeInStr();

/// ///
/// @ingroup domi_common /// @ingroup domi_common
/// @brief Obtains the absolute time (timestamp) of the current system. /// @brief Obtains the absolute time (timestamp) of the current system.
@@ -366,7 +365,7 @@ GE_FUNC_VISIBILITY uint32_t GetCurrentSecondTimestap();
/// @param [in] b /// @param [in] b
/// @return false: true: The result is within the normal int64 range. /// @return false: true: The result is within the normal int64 range.
/// ///
GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(int64_t a, int64_t b);
GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(const int64_t a, const int64_t b);


/// ///
/// @ingroup domi_common /// @ingroup domi_common
@@ -374,7 +373,7 @@ GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(int64_t a, int64_t b);
/// @param [in] path of input file /// @param [in] path of input file
/// @param [out] Absolute path of a file. If the absolute path cannot be obtained, an empty string is returned /// @param [out] Absolute path of a file. If the absolute path cannot be obtained, an empty string is returned
/// ///
GE_FUNC_VISIBILITY std::string RealPath(const char *path);
GE_FUNC_VISIBILITY std::string RealPath(const char_t *path);


/// ///
/// @ingroup domi_common /// @ingroup domi_common
@@ -401,17 +400,9 @@ GE_FUNC_VISIBILITY bool CheckOutputPathValid(const std::string &file_path, const
/// @param [in] str file path /// @param [in] str file path
/// @param [out] result /// @param [out] result
/// ///
GE_FUNC_VISIBILITY bool ValidateStr(const std::string &str, const std::string &mode);
GE_FUNC_VISIBILITY bool ValidateStr(const std::string &file_path, const std::string &mode);


///
/// @ingroup domi_common
/// @brief Check path invalid
/// @param [in] path, path to be checked
/// @param [in] length, length of path
/// @return 0 success
/// @return -1 fail
///
GE_FUNC_VISIBILITY Status CheckPath(const char *path, size_t length);
GE_FUNC_VISIBILITY Status ConvertToInt32(const std::string &str, int32_t &val);
} // namespace ge } // namespace ge


#endif // INC_FRAMEWORK_COMMON_UTIL_H_
#endif // AIR_INC_FRAMEWORK_COMMON_UTIL_H_

+ 8
- 7
inc/framework/engine/dnnengine.h View File

@@ -26,11 +26,11 @@
#include "graph/types.h" #include "graph/types.h"


namespace ge { namespace ge {
enum PriorityEnum {
enum class PriorityEnum {
COST_0 = 0, COST_0 = 0,
COST_1,
COST_2,
COST_3,
COST_1 = 1,
COST_2 = 2,
COST_3 = 3,
COST_9 = 9, COST_9 = 9,
COST_10 = 10, COST_10 = 10,
}; };
@@ -38,7 +38,7 @@ enum PriorityEnum {
struct DNNEngineAttribute { struct DNNEngineAttribute {
std::string engine_name; std::string engine_name;
std::vector<std::string> mem_type; std::vector<std::string> mem_type;
uint32_t compute_cost;
PriorityEnum compute_cost;
enum RuntimeType runtime_type; // HOST, DEVICE enum RuntimeType runtime_type; // HOST, DEVICE
// If engine input format must be specific, set this attribute, else set FORMAT_RESERVED // If engine input format must be specific, set this attribute, else set FORMAT_RESERVED
Format engine_input_format; Format engine_input_format;
@@ -53,10 +53,11 @@ class GE_FUNC_VISIBILITY DNNEngine {
engine_attribute_ = attrs; engine_attribute_ = attrs;
} }
virtual ~DNNEngine() = default; virtual ~DNNEngine() = default;
Status Initialize(const std::map<std::string, std::string> &options) {
Status Initialize(const std::map<std::string, std::string> &options) const {
(void)options;
return SUCCESS; return SUCCESS;
} }
Status Finalize() {
Status Finalize() const {
return SUCCESS; return SUCCESS;
} }
void GetAttributes(DNNEngineAttribute &attr) const { void GetAttributes(DNNEngineAttribute &attr) const {


+ 76
- 50
inc/framework/executor/ge_executor.h View File

@@ -32,18 +32,19 @@
namespace ge { namespace ge {
class SingleOp; class SingleOp;
class DynamicSingleOp; class DynamicSingleOp;
class GeRootModel;


struct RunModelData { struct RunModelData {
uint32_t index; // Data index uint32_t index; // Data index
uint32_t modelId; uint32_t modelId;
std::vector<DataBuffer> blobs; // All input/output data buffer
uint32_t timestamp; // Data creation time
uint32_t timeout; // Processing timeout
uint64_t request_id = 0; // Request ID
uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0
uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0
uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0
std::vector<uint64_t> dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty
std::vector<DataBuffer> blobs; // All input/output data buffer
uint32_t timestamp; // Data creation time
uint32_t timeout; // Processing timeout
uint64_t request_id = 0UL; // Request ID
uint64_t dynamic_batch_size = 0UL; // Dynamic batch size scene, set dynamic size, not supported by default:0
uint64_t dynamic_image_height = 0UL; // Dynamic image size scene, set image height, not supported by default:0
uint64_t dynamic_image_width = 0UL; // Dynamic image size scene, set image width, not supported by default:0
std::vector<uint64_t> dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty
}; };


class GE_FUNC_VISIBILITY GeExecutor { class GE_FUNC_VISIBILITY GeExecutor {
@@ -69,11 +70,11 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// ///
static Status FinalizeEx(); static Status FinalizeEx();


Status UnloadModel(uint32_t modelId);
Status UnloadModel(const uint32_t model_id);


// Get input and output descriptor // Get input and output descriptor
Status GetModelDescInfo(uint32_t model_id, std::vector<TensorDesc> &input_desc, std::vector<TensorDesc> &output_desc,
bool new_model_desc = false);
Status GetModelDescInfo(const uint32_t model_id, std::vector<TensorDesc> &input_desc,
std::vector<TensorDesc> &output_desc, const bool new_model_desc = false);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -84,7 +85,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [in] batch_size: batch size entered by user in dynamic multi-batch scenario /// @param [in] batch_size: batch size entered by user in dynamic multi-batch scenario
/// @return execute result /// @return execute result
/// ///
Status SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t batch_size);
Status SetDynamicBatchSize(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length,
const uint64_t batch_size);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -96,8 +98,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [in] image_width: image width entered by user in dynamic multi-resolution scenario /// @param [in] image_width: image width entered by user in dynamic multi-resolution scenario
/// @return execute result /// @return execute result
/// ///
Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height,
uint64_t image_width);
Status SetDynamicImageSize(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length,
const uint64_t image_height, const uint64_t image_width);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -109,7 +111,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [in] dynamic_dims: array of dynamic dimensions /// @param [in] dynamic_dims: array of dynamic dimensions
/// @return execute result /// @return execute result
/// ///
Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
Status SetDynamicDims(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length,
const std::vector<uint64_t> &dynamic_dims); const std::vector<uint64_t> &dynamic_dims);


/// ///
@@ -120,7 +122,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] cur_dynamic_dims: current dynamic dims /// @param [out] cur_dynamic_dims: current dynamic dims
/// @return execute result /// @return execute result
/// ///
Status GetCurDynamicDims(uint32_t model_id, const std::vector<uint64_t> &dynamic_dims,
Status GetCurDynamicDims(const uint32_t model_id, const std::vector<uint64_t> &dynamic_dims,
std::vector<uint64_t> &cur_dynamic_dims); std::vector<uint64_t> &cur_dynamic_dims);


/// ///
@@ -131,7 +133,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] dynamic_type /// @param [out] dynamic_type
/// @return execute result /// @return execute result
/// ///
Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type);
Status GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
int32_t &dynamic_type);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -140,7 +143,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] batch_info /// @param [out] batch_info
/// @return execute result /// @return execute result
/// ///
Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);
Status GetCombinedDynamicDims(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -149,7 +152,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] user_designate_shape_order /// @param [out] user_designate_shape_order
/// @return execute result /// @return execute result
/// ///
Status GetUserDesignateShapeOrder(uint32_t model_id, std::vector<std::string> &user_designate_shape_order);
Status GetUserDesignateShapeOrder(const uint32_t model_id, std::vector<std::string> &user_designate_shape_order);


Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type); Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);


@@ -163,18 +166,18 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [in] aippParms: kAippDynamicPara by user in dynamic aipp /// @param [in] aippParms: kAippDynamicPara by user in dynamic aipp
/// @return execute result /// @return execute result
/// ///
Status SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
Status SetDynamicAippData(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length,
const std::vector<kAippDynamicBatchPara> &aipp_batch_para, const std::vector<kAippDynamicBatchPara> &aipp_batch_para,
const kAippDynamicPara &aippParms);
const kAippDynamicPara &aipp_parms);


Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
Status GetAIPPInfo(const uint32_t model_id, const uint32_t index, AippConfigInfo &aipp_info);


Status GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name,
Status GetOpAttr(const uint32_t model_id, const std::string &op_name, const std::string &attr_name,
std::string &attr_value); std::string &attr_value);


Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info);
Status GetModelAttr(const uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info);


Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);
Status GetAippType(const uint32_t model_id, const uint32_t index, InputAippType &type, size_t &aipp_index);


Status CommandHandle(const Command &command); Status CommandHandle(const Command &command);


@@ -188,7 +191,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @return SUCCESS /// @return SUCCESS
/// @return FAILED /// @return FAILED
/// ///
Status GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size);
Status GetMaxUsedMemory(const uint32_t model_id, uint32_t &max_size);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -210,8 +213,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] uint32_t &model_id: Corresponding identification after model loading /// @param [out] uint32_t &model_id: Corresponding identification after model loading
/// @return SUCCESS handle successfully / others handle failed /// @return SUCCESS handle successfully / others handle failed
/// ///
Status LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t mem_size,
void *weight_ptr, size_t weight_size);
Status LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *const dev_ptr, const size_t mem_size,
void *const weight_ptr, const size_t weight_size);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -225,6 +228,18 @@ class GE_FUNC_VISIBILITY GeExecutor {
Status LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector<uint32_t> &input_queue_ids, Status LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector<uint32_t> &input_queue_ids,
const std::vector<uint32_t> &output_queue_ids); const std::vector<uint32_t> &output_queue_ids);


///
/// @ingroup ge
/// @brief Load task list from ModelData with queue.
/// @param [out] model_id: model id allocate from manager.
/// @param [in] root_model: Instance of GeRootModel.
/// @param [in] input_queue_ids: input queue ids create from user.
/// @param [in] output_queue_ids: input queue ids create from user.
/// @return: 0 for success / others for fail
///
Status LoadModelWithQ(uint32_t &model_id, const std::shared_ptr<GeRootModel> &root_model,
const std::vector<uint32_t> &input_queue_ids, const std::vector<uint32_t> &output_queue_ids);

/// ///
/// @ingroup ge /// @ingroup ge
/// @brief Synchronous execution of offline model(Do not create thread) /// @brief Synchronous execution of offline model(Do not create thread)
@@ -235,8 +250,17 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] domi::OutputData *output_data: Model output data /// @param [out] domi::OutputData *output_data: Model output data
/// @return SUCCESS handle successfully / others handle failed /// @return SUCCESS handle successfully / others handle failed
/// ///
Status ExecModel(uint32_t model_id, void *stream, const RunModelData &input_data, RunModelData &output_data,
bool async_mode = false);
Status ExecModel(const uint32_t model_id, void *const stream, const RunModelData &input_data,
RunModelData &output_data, const bool async_mode = false);

///
/// @ingroup ge
/// @brief Load task list from root_model without input queue or output queue.
/// @param [out] model_id: model id allocate from manager.
/// @param [in] root_model: Instance of GeRootModel.
/// @return: 0 for success / others for fail
///
Status LoadModelWithoutQ(uint32_t &model_id, const std::shared_ptr<GeRootModel> &root_model) const;


/// ///
/// @ingroup ge /// @ingroup ge
@@ -250,9 +274,9 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] std::vector<GeTensorDesc> &output_desc: description of model output data /// @param [out] std::vector<GeTensorDesc> &output_desc: description of model output data
/// @return SUCCESS handle successfully / others handle failed /// @return SUCCESS handle successfully / others handle failed
/// ///
Status ExecModel(uint32_t model_id, void *stream, const RunModelData &run_input_data,
Status ExecModel(const uint32_t model_id, void *const stream, const RunModelData &run_input_data,
const std::vector<GeTensorDesc> &input_desc, RunModelData &run_output_data, const std::vector<GeTensorDesc> &input_desc, RunModelData &run_output_data,
std::vector<GeTensorDesc> &output_desc, bool async_mode = false);
std::vector<GeTensorDesc> &output_desc, const bool async_mode = false);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -273,36 +297,38 @@ class GE_FUNC_VISIBILITY GeExecutor {
/// @param [out] size_t &weight_size Weight memory space size /// @param [out] size_t &weight_size Weight memory space size
/// @return SUCCESS handle successfully / others handle failed /// @return SUCCESS handle successfully / others handle failed
/// ///
Status GetMemAndWeightSize(const void *model_data, size_t model_size, size_t &mem_size, size_t &weight_size);
Status GetMemAndWeightSize(const void *const model_data, const size_t model_size, size_t &mem_size,
size_t &weight_size);


static Status LoadSingleOp(const std::string &modelName, const ModelData &modelData, void *stream,
SingleOp **single_op);
static Status LoadSingleOp(const std::string &model_name, const ModelData &model_data, void *const stream,
SingleOp **const single_op);


static Status LoadSingleOpV2(const std::string &modelName, const ModelData &modelData, void *stream,
SingleOp **single_op, const uint64_t model_id);
static Status LoadSingleOpV2(const std::string &model_name, const ModelData &model_data, void *const stream,
SingleOp **const single_op, const uint64_t model_id);


static Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs,
static Status ExecuteAsync(SingleOp *const executor, const std::vector<DataBuffer> &inputs,
std::vector<DataBuffer> &outputs); std::vector<DataBuffer> &outputs);


static Status LoadDynamicSingleOp(const std::string &model_name, const ModelData &modelData, void *stream,
DynamicSingleOp **single_op);
static Status LoadDynamicSingleOp(const std::string &model_name, const ModelData &model_data, void *const stream,
DynamicSingleOp **const single_op);


static Status LoadDynamicSingleOpV2(const std::string &model_name, const ModelData &modelData, void *stream,
DynamicSingleOp **single_op, const uint64_t model_id);
static Status LoadDynamicSingleOpV2(const std::string &model_name, const ModelData &model_data, void *const stream,
DynamicSingleOp **const single_op, const uint64_t model_id);


static Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc,
static Status ExecuteAsync(DynamicSingleOp *const executor, const std::vector<GeTensorDesc> &input_desc,
const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc, const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc,
std::vector<DataBuffer> &outputs); std::vector<DataBuffer> &outputs);


static Status ReleaseSingleOpResource(void *stream);
static Status ReleaseSingleOpResource(void *const stream);


static Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id);
static Status GetDeviceIdByModelId(const uint32_t model_id, uint32_t &device_id);


Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count);
Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
std::vector<InputOutputDims> &output_dims);
Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info);
Status GetBatchInfoSize(const uint32_t model_id, size_t &shape_count);
Status GetOrigInputInfo(const uint32_t model_id, const uint32_t index, OriginInputInfo &orig_input_info);
Status GetAllAippInputOutputDims(const uint32_t model_id, const uint32_t index,
std::vector<InputOutputDims> &input_dims, std::vector<InputOutputDims> &output_dims);
Status GetOpDescInfo(const uint32_t device_id, const uint32_t stream_id, const uint32_t task_id,
OpDescInfo &op_desc_info);


private: private:
static std::atomic_bool is_inited_; static std::atomic_bool is_inited_;


+ 10
- 8
inc/framework/generator/ge_generator.h View File

@@ -31,6 +31,8 @@
#include "framework/omg/omg_inner_types.h" #include "framework/omg/omg_inner_types.h"


namespace ge { namespace ge {
const std::string kAttrSupportDynamicShape = "support_dynamicshape";

class GeRootModel; class GeRootModel;
class GE_FUNC_VISIBILITY GeGenerator { class GE_FUNC_VISIBILITY GeGenerator {
public: public:
@@ -103,8 +105,8 @@ class GE_FUNC_VISIBILITY GeGenerator {
/// @param [in] graph_name: graph name. /// @param [in] graph_name: graph name.
/// @param [out] graph: graph of single op. /// @param [out] graph: graph of single op.
/// @return SUCCESS or FAILED /// @return SUCCESS or FAILED
Status BuildSingleOpGraph(OpDescPtr &op_desc, const InOutTensorRef &inputs_outputs, std::string graph_name,
Graph &graph, std::vector<std::pair<std::string, std::string>> &inputs_name_type);
Status BuildSingleOpGraph(const OpDescPtr &op_desc, const InOutTensorRef &inputs_outputs, std::string graph_name,
Graph &graph, std::vector<std::pair<std::string, std::string>> &inputs_name_type) const;
Status BuildOriginalGraphInfo(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, Status BuildOriginalGraphInfo(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
const std::vector<GeTensor> &outputs, const std::string &model_file_name, const std::vector<GeTensor> &outputs, const std::string &model_file_name,
bool is_offline, int32_t compile_flag, GraphStage graph_stage, Graph &graph, bool is_offline, int32_t compile_flag, GraphStage graph_stage, Graph &graph,
@@ -116,20 +118,20 @@ class GE_FUNC_VISIBILITY GeGenerator {
ge::ModelBufferData &model, bool is_offline = true); ge::ModelBufferData &model, bool is_offline = true);
Status BuildSingleOp(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, const std::vector<GeTensor> &outputs, Status BuildSingleOp(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, const std::vector<GeTensor> &outputs,
const std::string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, const std::string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
ComputeGraphPtr &compute_graph, bool is_offline = true, int32_t compile_flag = 0,
ComputeGraphPtr &comp_graph, bool is_offline = true, int32_t compile_flag = 0,
GraphStage graph_stage = GraphStage::GRAPH_STAGE_RESERVED); GraphStage graph_stage = GraphStage::GRAPH_STAGE_RESERVED);
bool CheckNoAicore(const ComputeGraphPtr &graph); bool CheckNoAicore(const ComputeGraphPtr &graph);
void RemoveConst(const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs);
Status CheckForSingleOp(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
const std::vector<GeTensor> &outputs);
Status InferFormatForSingleOp(OpDescPtr &op_desc, Graph &graph);
void RemoveConst(const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs) const;
Status CheckForSingleOp(const OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
const std::vector<GeTensor> &outputs) const;
Status InferFormatForSingleOp(const OpDescPtr &op_desc, const Graph &graph) const;


using GeRootModelPtr = std::shared_ptr<ge::GeRootModel>; using GeRootModelPtr = std::shared_ptr<ge::GeRootModel>;
Status SetModelNameForDump(const GeRootModelPtr &ge_root_model); Status SetModelNameForDump(const GeRootModelPtr &ge_root_model);
Status CreateGeneralizedBuildAttrs(const GeRootModelPtr &ge_root_model, const std::vector<GeTensor> &inputs, Status CreateGeneralizedBuildAttrs(const GeRootModelPtr &ge_root_model, const std::vector<GeTensor> &inputs,
const std::vector<GeTensor> &outputs, const std::vector<GeTensor> &outputs,
const std::vector<std::pair<std::string, std::string>> &inputs_name_type, const std::vector<std::pair<std::string, std::string>> &inputs_name_type,
std::vector<ge::NamedAttrs> &generalized_build_attrs);
std::vector<ge::NamedAttrs> &generalized_build_attrs) const;


class Impl; class Impl;




+ 0
- 4
inc/framework/memory/memory_api.h View File

@@ -17,11 +17,7 @@
#ifndef INC_FRAMEWORK_MEMORY_MEMORY_API_H_ #ifndef INC_FRAMEWORK_MEMORY_MEMORY_API_H_
#define INC_FRAMEWORK_MEMORY_MEMORY_API_H_ #define INC_FRAMEWORK_MEMORY_MEMORY_API_H_


#include <string>
#include <vector>

#include "external/ge/ge_api_error_codes.h" #include "external/ge/ge_api_error_codes.h"
#include "graph/types.h"
#include "runtime/mem.h" #include "runtime/mem.h"


namespace ge { namespace ge {


+ 1
- 1
inc/framework/memory/memory_assigner.h View File

@@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY MemoryAssigner {


MemoryAssigner &operator=(const MemoryAssigner &) = delete; MemoryAssigner &operator=(const MemoryAssigner &) = delete;


Status AssignMemory(bool is_loop_graph, std::map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size);
Status AssignMemory(std::map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size);


private: private:
ge::ComputeGraphPtr compute_graph_; ge::ComputeGraphPtr compute_graph_;


+ 4
- 4
inc/framework/omg/omg.h View File

@@ -64,7 +64,7 @@ GE_FUNC_VISIBILITY Status InitDomiOmgContext(const std::string &input_shape, con
GE_FUNC_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<std::string, std::string> &atc_params, GE_FUNC_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<std::string, std::string> &atc_params,
const char *model_file, const char *weights_file, domi::FrameworkType type, const char *model_file, const char *weights_file, domi::FrameworkType type,
const char *op_conf = nullptr, const char *target = nullptr, const char *op_conf = nullptr, const char *target = nullptr,
RunMode run_mode = GEN_OM_MODEL, bool is_dynamic_input = false);
RunMode run_mode = RunMode::GEN_OM_MODEL, bool is_dynamic_input = false);


/** /**
* @ingroup domi_omg * @ingroup domi_omg
@@ -89,15 +89,15 @@ GE_FUNC_VISIBILITY Status ConvertPbtxtToJson(const char *model_file, const char
GE_FUNC_VISIBILITY Status ConvertFwkModelToJson(domi::FrameworkType framework, const char *model_file, GE_FUNC_VISIBILITY Status ConvertFwkModelToJson(domi::FrameworkType framework, const char *model_file,
const char *json_file); const char *json_file);


GE_FUNC_VISIBILITY void GetGroupName(ge::proto::ModelDef &model);
GE_FUNC_VISIBILITY void GetGroupName(ge::proto::ModelDef &model_def);


GE_FUNC_VISIBILITY void FindParserSo(const std::string &path, std::vector<std::string> &fileList,
GE_FUNC_VISIBILITY void FindParserSo(const std::string &path, std::vector<std::string> &file_list,
std::string &caffe_parser_path); std::string &caffe_parser_path);


GE_FUNC_VISIBILITY Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file); GE_FUNC_VISIBILITY Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file);


GE_FUNC_VISIBILITY Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, GE_FUNC_VISIBILITY Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type,
const std::string &output_format);
const std::string &output);


GE_FUNC_VISIBILITY Status GetOutputLeaf(ge::NodePtr node, GE_FUNC_VISIBILITY Status GetOutputLeaf(ge::NodePtr node,
std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info); std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info);


+ 6
- 34
inc/framework/omg/omg_inner_types.h View File

@@ -31,12 +31,7 @@
using domi::DOMI_TENSOR_ND; using domi::DOMI_TENSOR_ND;
using domi::DOMI_TENSOR_RESERVED; using domi::DOMI_TENSOR_RESERVED;
using domi::domiTensorFormat_t; using domi::domiTensorFormat_t;
using domi::FRAMEWORK_RESERVED;
using domi::FrameworkType;
using std::map;
using std::string;
using std::unordered_map; using std::unordered_map;
using std::vector;


namespace ge { namespace ge {
/** /**
@@ -51,36 +46,13 @@ enum RunMode {
DISPLAY_OM_INFO = 6 // display model info DISPLAY_OM_INFO = 6 // display model info
}; };


///
/// @ingroup domi_omg
/// @brief high-precision mode
///
enum HighPrecisionMode {
// the FP16 high-precision function is disabled in common mode
HIGH_PRECISION_DEFAULT = 0,

// high-precision mode, enabling FP16 high-precision mode (Convolution/FullConnect/AvgPooling are involved)
HIGH_PRECISION_FP16 = 1
};

///
/// @ingroup domi_omg
/// @brief description buffer data
///
struct OMGBufferData {
void *data;
uint32_t length;
};

struct OmgContext { struct OmgContext {
OmgContext() {
format = DOMI_TENSOR_ND;
}
domiTensorFormat_t format;
OmgContext() : format(domi::DOMI_TENSOR_ND) {}
domi::domiTensorFormat_t format;


// format of the input specified by the command line // format of the input specified by the command line
std::unordered_map<std::string, domiTensorFormat_t> input_nodes_format_map;
std::vector<domiTensorFormat_t> output_formats;
std::unordered_map<std::string, domi::domiTensorFormat_t> input_nodes_format_map;
std::vector<domi::domiTensorFormat_t> output_formats;


// user-designate input dims // user-designate input dims
std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims; std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims;
@@ -107,9 +79,9 @@ struct OmgContext {
// net data nodes tensor names(caffe or onnx) // net data nodes tensor names(caffe or onnx)
std::vector<std::string> data_tensor_names; std::vector<std::string> data_tensor_names;
// preferential format used by the entire network // preferential format used by the entire network
domiTensorFormat_t net_format = DOMI_TENSOR_RESERVED;
domi::domiTensorFormat_t net_format = domi::DOMI_TENSOR_RESERVED;
domi::FrameworkType type = domi::FRAMEWORK_RESERVED; domi::FrameworkType type = domi::FRAMEWORK_RESERVED;
RunMode run_mode = ONLY_PRE_CHECK;
RunMode run_mode = RunMode::ONLY_PRE_CHECK;
bool train_flag = false; bool train_flag = false;


std::string output_type; std::string output_type;


+ 7
- 0
inc/framework/omg/parser/model_parser.h View File

@@ -108,6 +108,8 @@ class GE_FUNC_VISIBILITY ModelParser {
* @return Others failed * @return Others failed
*/ */
virtual domi::Status ToJson(const char *model_file, const char *json_file) { virtual domi::Status ToJson(const char *model_file, const char *json_file) {
(void)model_file;
(void)json_file;
return domi::SUCCESS; return domi::SUCCESS;
} }


@@ -130,6 +132,8 @@ class GE_FUNC_VISIBILITY ModelParser {
* @return Others failed * @return Others failed
*/ */
virtual domi::Status ParseProto(const std::string &serialized_proto, ge::ComputeGraphPtr &graph) { virtual domi::Status ParseProto(const std::string &serialized_proto, ge::ComputeGraphPtr &graph) {
(void)serialized_proto;
(void)graph;
return UNSUPPORTED; return UNSUPPORTED;
} }


@@ -144,6 +148,9 @@ class GE_FUNC_VISIBILITY ModelParser {
*/ */
virtual domi::Status ParseProtoWithSubgraph(const std::string &serialized_proto, GetGraphCallbackV2 callback, virtual domi::Status ParseProtoWithSubgraph(const std::string &serialized_proto, GetGraphCallbackV2 callback,
ge::ComputeGraphPtr &graph) { ge::ComputeGraphPtr &graph) {
(void)serialized_proto;
(void)callback;
(void)graph;
return UNSUPPORTED; return UNSUPPORTED;
} }
}; };


+ 4
- 4
inc/framework/omg/parser/op_parser.h View File

@@ -50,7 +50,7 @@ class GE_FUNC_VISIBILITY OpParser {
* @return SUCCESS * @return SUCCESS
* @return FAILED * @return FAILED
*/ */
virtual domi::Status ParseParams(const Message *op_src, ge::OpDescPtr &op_desc) = 0;
virtual domi::Status ParseParams(const google::protobuf::Message *op_src, ge::OpDescPtr &op_desc) = 0;


/** /**
* @ingroup domi_omg * @ingroup domi_omg
@@ -60,7 +60,7 @@ class GE_FUNC_VISIBILITY OpParser {
* @return SUCCESS * @return SUCCESS
* @return FAILED * @return FAILED
*/ */
virtual domi::Status ParseParams(const Message *op_src, ge::Operator &op_dest) = 0;
virtual domi::Status ParseParams(const google::protobuf::Message *op_src, ge::Operator &op_dest) = 0;


/** /**
* @ingroup domi_omg * @ingroup domi_omg
@@ -70,7 +70,7 @@ class GE_FUNC_VISIBILITY OpParser {
* @return SUCCESS * @return SUCCESS
* @return FAILED * @return FAILED
*/ */
virtual domi::Status ParseWeights(const Message *op_src, ge::NodePtr &node) = 0;
virtual domi::Status ParseWeights(const google::protobuf::Message *op_src, ge::NodePtr &node) = 0;


/** /**
* @ingroup domi_omg * @ingroup domi_omg
@@ -80,7 +80,7 @@ class GE_FUNC_VISIBILITY OpParser {
* @return SUCCESS * @return SUCCESS
* @return FAILED * @return FAILED
*/ */
virtual domi::Status GetFormat(const Message *op_src, domi::domiTensorFormat_t &format) {
virtual domi::Status GetFormat(const google::protobuf::Message *op_src, domi::domiTensorFormat_t &format) {
(void)op_src; (void)op_src;
// Indicates that the op does not provide a value for format // Indicates that the op does not provide a value for format
format = domi::DOMI_TENSOR_RESERVED; format = domi::DOMI_TENSOR_RESERVED;


+ 2
- 4
inc/framework/omg/parser/parser_factory.h View File

@@ -24,13 +24,11 @@
#include "framework/omg/omg_inner_types.h" #include "framework/omg/omg_inner_types.h"
#include "framework/omg/parser/parser_types.h" #include "framework/omg/parser/parser_types.h"


using Status = domi::Status;

namespace domi { namespace domi {
class WeightsParser; class WeightsParser;
class ModelParser; class ModelParser;


typedef std::shared_ptr<ModelParser> (*MODEL_PARSER_CREATOR_FUN)(void);
using MODEL_PARSER_CREATOR_FUN = std::shared_ptr<ModelParser> (*)(void);


// Create modelparser for different frameworks // Create modelparser for different frameworks
class GE_FUNC_VISIBILITY ModelParserFactory { class GE_FUNC_VISIBILITY ModelParserFactory {
@@ -82,7 +80,7 @@ class GE_FUNC_VISIBILITY ModelParserRegisterar {
} \ } \
ModelParserRegisterar g_##type##_Model_Parser_Creator(type, Creator_##type##_Model_Parser) ModelParserRegisterar g_##type##_Model_Parser_Creator(type, Creator_##type##_Model_Parser)


typedef std::shared_ptr<WeightsParser> (*WEIGHTS_PARSER_CREATOR_FUN)(void);
using WEIGHTS_PARSER_CREATOR_FUN = std::shared_ptr<WeightsParser> (*)(void);


// Create weightsparser for different frameworks // Create weightsparser for different frameworks
class GE_FUNC_VISIBILITY WeightsParserFactory { class GE_FUNC_VISIBILITY WeightsParserFactory {


+ 3
- 3
inc/framework/omg/parser/parser_inner_ctx.h View File

@@ -29,8 +29,8 @@
namespace ge { namespace ge {
struct ParserContext { struct ParserContext {
// format of the input specified by the command line // format of the input specified by the command line
std::unordered_map<std::string, domiTensorFormat_t> input_nodes_format_map;
std::vector<domiTensorFormat_t> output_formats;
std::unordered_map<std::string, domi::domiTensorFormat_t> input_nodes_format_map;
std::vector<domi::domiTensorFormat_t> output_formats;
// user-designate input dims // user-designate input dims
std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims; std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims;
std::map<std::string, std::vector<int64_t>> input_dims; std::map<std::string, std::vector<int64_t>> input_dims;
@@ -58,7 +58,7 @@ struct ParserContext {
bool train_flag = false; bool train_flag = false;
domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND; domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND;
domi::FrameworkType type = domi::FRAMEWORK_RESERVED; domi::FrameworkType type = domi::FRAMEWORK_RESERVED;
RunMode run_mode = GEN_OM_MODEL;
RunMode run_mode = RunMode::GEN_OM_MODEL;
// save caffe custom proto path, used by caffe parse // save caffe custom proto path, used by caffe parse
std::string custom_proto_path; std::string custom_proto_path;
// save caffe proto path, used by caffe parse // save caffe proto path, used by caffe parse


+ 1
- 3
inc/framework/omg/version.h View File

@@ -19,8 +19,6 @@


#include <memory> #include <memory>
#include <set> #include <set>
#include <string>
#include <vector>


#include "framework/common/debug/log.h" #include "framework/common/debug/log.h"
#include "framework/common/string_util.h" #include "framework/common/string_util.h"
@@ -34,7 +32,7 @@ class GE_FUNC_VISIBILITY PlatformVersionManager {
static Status GetPlatformVersion(std::string &ver) { static Status GetPlatformVersion(std::string &ver) {
ver = "1.11.z"; ver = "1.11.z";
const std::vector<std::string> version_splits = StringUtils::Split(ver, '.'); const std::vector<std::string> version_splits = StringUtils::Split(ver, '.');
GE_IF_BOOL_EXEC(version_splits.size() < 3, GELOGW("Read platform version error!"); return FAILED;);
GE_IF_BOOL_EXEC(version_splits.size() < 3U, GELOGW("Read platform version error!"); return FAILED;);


GELOGI("Read current platform version: %s.", ver.c_str()); GELOGI("Read current platform version: %s.", ver.c_str());
return SUCCESS; return SUCCESS;


+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit 1d99928bfcb02e45acc7db73e3ee57304ff1131a
Subproject commit 0a2335712484f85cd44a0f2402eac6932b22b40a

+ 57
- 57
third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h View File

@@ -1,57 +1,57 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OP_TYPE_LIST_H_
#define AICPU_OP_TYPE_LIST_H_
extern "C" {
enum OpKernelType {
TF_KERNEL,
CPU_KERNEL
};
enum ReturnCode {
OP_TYPE_NOT_SUPPORT,
FORMAT_NOT_SUPPORT,
DTYPE_NOT_SUPPORT
};
#pragma pack(push, 1)
//One byte alignment
struct SysOpInfo {
uint64_t opLen;
uint64_t opType;
OpKernelType kernelsType;
};
struct SysOpCheckInfo {
uint64_t opListNum;
uint64_t offSetLen;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
};
struct SysOpCheckResp {
uint64_t opListNum;
bool isWithoutJson;
uint64_t returnCodeList;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
};
#pragma pack(pop)
}
#endif // AICPU_OP_TYPE_LIST_H_
/**
* Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OP_TYPE_LIST_H_
#define AICPU_OP_TYPE_LIST_H_
extern "C" {
enum OpKernelType {
TF_KERNEL,
CPU_KERNEL
};
enum ReturnCode {
OP_TYPE_NOT_SUPPORT,
FORMAT_NOT_SUPPORT,
DTYPE_NOT_SUPPORT
};
#pragma pack(push, 1)
// One byte alignment
struct SysOpInfo {
uint64_t opLen;
uint64_t opType;
OpKernelType kernelsType;
};
struct SysOpCheckInfo {
uint64_t opListNum;
uint64_t offSetLen;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
};
struct SysOpCheckResp {
uint64_t opListNum;
bool isWithoutJson;
uint64_t returnCodeList;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
};
#pragma pack(pop)
}
#endif // AICPU_OP_TYPE_LIST_H_

+ 47
- 0
third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h View File

@@ -29,6 +29,53 @@ struct AicpuParamHead
uint32_t extInfoLength; // extInfo struct Length uint32_t extInfoLength; // extInfo struct Length
uint64_t extInfoAddr; // extInfo address uint64_t extInfoAddr; // extInfo address
}; };

enum class AicpuConfigMsgType {
AICPU_CONFIG_MSG_TYPE_BUF_FREE = 0, /* free buf */
AICPU_CONFIG_MSG_TYPE_BUF_RESET = 1, /* reset buf */
AICPU_CONFIG_MSG_TYPE_BUF_SET_ADDR = 2, /* set buf addr to aicpu */
};

enum class AicpuErrMsgType {
ERR_MSG_TYPE_NULL = 0,
ERR_MSG_TYPE_AICORE = 1,
ERR_MSG_TYPE_AICPU = 2,
};

typedef struct tagAicpuConfigMsg {
uint8_t msgType;
uint8_t reserved1;
uint16_t bufLen;
uint32_t offset;
uint64_t bufAddr;
uint32_t tsId;
uint32_t reserved2;
} AicpuConfigMsg;

typedef struct tagAicoreErrMsgInfo {
uint8_t errType;
uint8_t version;
uint8_t reserved1[2]; /* reserved1, 4 byte alignment */
uint32_t errorCode;
uint32_t modelId;
uint32_t taskId;
uint32_t streamId;
uint64_t transactionId;
uint8_t reserved2[228]; /* the total byte is 256, reserved2 len = 256 - other lens */
} AicoreErrMsgInfo;

typedef struct tagAicpuErrMsgInfo {
uint8_t errType;
uint8_t version;
uint8_t reserved1[2]; /* reserved1, 4 byte alignment */
uint32_t errorCode;
uint32_t modelId;
uint32_t streamId;
uint64_t transactionId;
char opName[64]; /* op name str */
char errDesc[128]; /* err msg desc info */
uint8_t reserved2[40]; /* the total byte is 256, reserved2 len = 256 - other lens */
} AicpuErrMsgInfo;
#pragma pack(pop) #pragma pack(pop)


} // namespace aicpu } // namespace aicpu


+ 0
- 1
third_party/fwkacllib/inc/cce/aicpu_engine.h View File

@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */

#ifndef AICPU_ENGINE_H__ #ifndef AICPU_ENGINE_H__
#define AICPU_ENGINE_H__ #define AICPU_ENGINE_H__




+ 9
- 7
third_party/fwkacllib/inc/cce/fwk_adpt_struct.h View File

@@ -21,7 +21,7 @@


namespace aicpu { namespace aicpu {
namespace FWKAdapter { namespace FWKAdapter {
using char_t = char;
// API RETURN CODE // API RETURN CODE
enum FWKAdptAPIRetCode { enum FWKAdptAPIRetCode {
FWK_ADPT_SUCCESS = 0, // success FWK_ADPT_SUCCESS = 0, // success
@@ -63,6 +63,8 @@ enum FWKTaskExtInfoType {
FWK_ADPT_EXT_BITMAP, FWK_ADPT_EXT_BITMAP,
FWK_ADPT_EXT_TOPIC_TYPE, FWK_ADPT_EXT_TOPIC_TYPE,
FWK_ADPT_EXT_ASYNCWAIT, FWK_ADPT_EXT_ASYNCWAIT,
FWK_ADPT_EXT_UNKNOWN_SHAPE_INPUT_INDEX,
FWK_ADPT_EXT_UNKNOWN_SHAPE_OUTPUT_INDEX,
FWK_ADPT_EXT_INVALID FWK_ADPT_EXT_INVALID
}; };


@@ -113,7 +115,7 @@ struct StrFWKKernel {
typedef StrFWKKernel FWKOperateParam; typedef StrFWKKernel FWKOperateParam;


// Extent info ShapeAndType // Extent info ShapeAndType
const uint32_t kMaxShapeDims = 8;
const uint32_t kMaxShapeDims = 8U;
#pragma pack(push, 1) #pragma pack(push, 1)
struct ShapeAndType { struct ShapeAndType {
int32_t type; int32_t type;
@@ -122,13 +124,13 @@ struct ShapeAndType {
#pragma pack(pop) #pragma pack(pop)


// Extend info structure for extInfoAddr // Extend info structure for extInfoAddr
const uint32_t kExtInfoHeadSize = 8;
const uint32_t kExtInfoHeadSize = 8U;


#pragma pack(push, 1) #pragma pack(push, 1)
struct ExtInfo { struct ExtInfo {
int32_t infoType; // extend type int32_t infoType; // extend type
uint32_t infoLen; // length for infoMsg uint32_t infoLen; // length for infoMsg
char infoMsg[0]; // extend value
char_t infoMsg[0]; // extend value
}; };
#pragma pack(pop) #pragma pack(pop)


@@ -143,9 +145,9 @@ struct ResultSummary {


#pragma pack(push, 1) #pragma pack(push, 1)
struct AsyncWait { struct AsyncWait {
uint8_t waitType; // wait type, FWK_ADPT_WAIT_TYPE_EVENT: event wait
uint32_t waitId; // wait id, GE refresh
uint32_t timeOut; // reserved
uint8_t waitType; // wait type, FWk_ADPT_WAIT_TPYE_EVENT: event wait
uint32_t waitId; // wait id, GE refresh
uint32_t timeOut; // reserved
uint64_t reserved; uint64_t reserved;
}; };
#pragma pack(pop) #pragma pack(pop)


+ 14
- 5
third_party/fwkacllib/inc/hccl/base.h View File

@@ -94,13 +94,13 @@ enum HcclEventType {
HCCL_EVENT_RESERVED /**< reserved */ HCCL_EVENT_RESERVED /**< reserved */
}; };


const u32 TAG_MAX_LEN = 127; // 脳卯麓贸碌脛tag 鲁陇露脠
const u32 TAG_MAX_LEN = 127; // 最大的tag 长度
using TagAttr = struct TagAttrDef { using TagAttr = struct TagAttrDef {
char name[TAG_MAX_LEN + 1]; // tag卤锚脢露
// tag卤锚脢露碌脛陆脫脢脮脢媒戮脻拢卢碌梅脫脙脮脽脢脟路帽禄谩脰梅露炉碌梅脫脙陆脫脢脮陆脫驴脷拢卢0 = 路帽, 1 = 禄谩(脭陇脕么拢卢脭脻虏禄脰搂鲁脰)隆拢
// 露脭脫脷activeRecv = 0拢卢碌卤陆脫脢脮虏脿脢脮碌陆脢媒戮脻禄貌脮脽路垄脣脥脟毛脟贸脢卤拢卢脰梅露炉脥篓脰陋碌梅脫脙脮脽隆拢
char name[TAG_MAX_LEN + 1]; // tag标识
// tag标识的接收数据,调用者是否会主动调用接收接口,0 = 否, 1 = 会(预留,暂不支持)。
// 对于activeRecv = 0,当接收侧收到数据或者发送请求时,主动通知调用者。
uint32_t activeRecv; uint32_t activeRecv;
uint32_t sendCredit; // 脜盲脰脙赂脙tag脭脢脨铆inflight碌脛send赂枚脢媒
uint32_t sendCredit; // 配置该tag允许inflight的send个数
uint32_t eventId; uint32_t eventId;
}; };


@@ -188,6 +188,15 @@ struct HcomGatherAllToAllVParams {
const char *group; // not used now const char *group; // not used now
}; };


typedef enum workMode {
HCCL_MODE_NORMAL = 0, // 不支持任何Probe any,仅支持精确的probe
HCCL_MODE_ANY = 1 // 仅支持ANY_SOURCE + ANY_TAG的probe
} WorkMode;

typedef struct tagCommAttr {
WorkMode mode; // 通信域内的probe工作模式
uint32_t deviceId = 0;
} CommAttr;
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif // __cplusplus #endif // __cplusplus


+ 0
- 66
third_party/fwkacllib/inc/hccl/hcom.h View File

@@ -126,72 +126,6 @@ extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, co
* @return HcclResult * @return HcclResult
*/ */
extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList);

/**
* @brief Initialize hcom executor.
*
* @param void
* @return HcclResult
*/
HcclResult HcomExecInitialize();

/**
* @brief Finalize hcom executor.
*
* @param void
* @return HcclResult
*/
HcclResult HcomExecFinalize();

/**
* @brief Put collective communication operation into hcom executor.
*
* @param opInfo information about collective communication operation.
* @param callback callback after collective communication operation.
* @return HcclResult
*/
HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback);

/**
* @brief Put remote access operation into hcom executor.
*
* @param remoteAccessType operation type (read or write).
* @param addrInfos address information about collective communication operation.
* @param callback callback after collective communication operation.
* @return HcclResult
*/
HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType,
const std::vector<HcomRemoteAccessAddrInfo>& addrInfos,
std::function<void(HcclResult status)> callback);

/**
* @brief Put alltoallv communication operation into hcom executor.
*
* @param params information about alltoallv communication operation.
* @param callback callback after collective communication operation.
* @return HcclResult
*/
HcclResult HcomExecEnqueueAllToAllV(HcomAllToAllVParams params, std::function<void(HcclResult status)> callback);

/**
* @brief Put agther alltoallv communication operation into hcom executor.
*
* @param params information about agther alltoallv communication operation.
* @param callback callback after collective communication operation.
* @return HcclResult
*/
HcclResult HcomExecEnqueueGatherAllToAllV(HcomGatherAllToAllVParams params,
std::function<void(HcclResult status)> callback);

/**
* @brief Register memories and init resources for remote access.
*
* @param addrList memory addresses for remote access.
* @param count number of remote memory addresses.
* @return HcclResult
*/
extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count);

#ifdef __cplusplus #ifdef __cplusplus
} }
#endif // __cplusplus #endif // __cplusplus


+ 9
- 15
third_party/fwkacllib/inc/mmpa/mmpa_api.h View File

@@ -1,18 +1,12 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* @file mmpa_api.h
*
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/


#ifndef _MMPA_API_H_ #ifndef _MMPA_API_H_
#define _MMPA_API_H_ #define _MMPA_API_H_


+ 9
- 15
third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h View File

@@ -1,18 +1,12 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* @file mmpa_linux.h
*
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/


#ifndef MMPA_LINUX_MMPA_LINUX_H #ifndef MMPA_LINUX_MMPA_LINUX_H
#define MMPA_LINUX_MMPA_LINUX_H #define MMPA_LINUX_MMPA_LINUX_H


+ 3
- 0
third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h View File

@@ -79,6 +79,9 @@ typedef long LONG;
#define MMPA_THREAD_SCHED_OTHER SCHED_OTHER #define MMPA_THREAD_SCHED_OTHER SCHED_OTHER
#define MMPA_THREAD_MIN_STACK_SIZE PTHREAD_STACK_MIN #define MMPA_THREAD_MIN_STACK_SIZE PTHREAD_STACK_MIN


#define MMPA_PATH_SEPARATOR_STR "/"
#define MMPA_PATH_SEPARATOR_CHAR '/'

#define MM_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER #define MM_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER


#define MMPA_MAX_NI 19 #define MMPA_MAX_NI 19


+ 86
- 83
third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h View File

@@ -1,83 +1,86 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MMPA_TYPEDEF_WIN_H
#define MMPA_TYPEDEF_WIN_H

#ifdef __cplusplus
#if __cplusplus
extern "C" {
#endif // __cpluscplus
#endif // __cpluscplus

#ifndef FALSE
#define FALSE 0
#endif

#ifndef TRUE
#define TRUE 1
#endif

#define EN_OK 0
#define EN_ERR 1
#define EN_ERROR (-1)
#define EN_INVALID_PARAM (-2)
#define EN_TIMEOUT (-3)

#define HANDLE_INVALID_VALUE (-1)
#define INVALID_SOCKET_HANDLE INVALID_SOCKET
#define MMPA_MEM_MAX_LEN (0x7fffffff)
#define MMPA_PROCESS_ERROR (0x7fffffff)

#define MMPA_ONE_THOUSAND 1000
#define MMPA_COMPUTER_BEGIN_YEAR 1900
#define SUMMER_TIME_OR_NOT (-1)
#define MMPA_ZERO 0
#define MMPA_VALUE_ONE 1
#define MMPA_SOCKET_MAIN_EDITION 2
#define MMPA_SOCKET_SECOND_EDITION 0
#define MMPA_PIPE_BUF_SIZE 1024
#define MMPA_MAX_SCANDIR_COUNT 1024
#define MAX_IOVEC_SIZE 32
#define MMPA_PIPE_COUNT 2
#define MMPA_THREADNAME_SIZE 16
#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1)
#define MMPA_MIN_OS_VERSION_SIZE 64

#define MMPA_MAX_NI 19
#define MMPA_MIDDLE_NI 5
#define MMPA_LOW_NI (-5)
#define MMPA_MIN_NI (-20)
#define MMPA_MAX_FILE 128

#define MMPA_MAX_THREAD_PIO 99
#define MMPA_MIDDLE_THREAD_PIO 66
#define MMPA_LOW_THREAD_PIO 33
#define MMPA_MIN_THREAD_PIO 1

#define MMPA_THREAD_SCHED_RR 0
#define MMPA_THREAD_SCHED_FIFO 0
#define MMPA_THREAD_SCHED_OTHER 0
#define MMPA_THREAD_MIN_STACK_SIZE 0

#define MM_MUTEX_INITIALIZER NULL

#ifdef __cplusplus
#if __cplusplus
}
#endif // __cpluscplus
#endif // __cpluscplus
#endif // _MMPA_TYPEDEF_WIN_H_
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MMPA_TYPEDEF_WIN_H
#define MMPA_TYPEDEF_WIN_H
#ifdef __cplusplus
#if __cplusplus
extern "C" {
#endif // __cpluscplus
#endif // __cpluscplus
#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif
#define EN_OK 0
#define EN_ERR 1
#define EN_ERROR (-1)
#define EN_INVALID_PARAM (-2)
#define EN_TIMEOUT (-3)
#define HANDLE_INVALID_VALUE (-1)
#define INVALID_SOCKET_HANDLE INVALID_SOCKET
#define MMPA_MEM_MAX_LEN (0x7fffffff)
#define MMPA_PROCESS_ERROR (0x7fffffff)
#define MMPA_ONE_THOUSAND 1000
#define MMPA_COMPUTER_BEGIN_YEAR 1900
#define SUMMER_TIME_OR_NOT (-1)
#define MMPA_ZERO 0
#define MMPA_VALUE_ONE 1
#define MMPA_SOCKET_MAIN_EDITION 2
#define MMPA_SOCKET_SECOND_EDITION 0
#define MMPA_PIPE_BUF_SIZE 1024
#define MMPA_MAX_SCANDIR_COUNT 1024
#define MAX_IOVEC_SIZE 32
#define MMPA_PIPE_COUNT 2
#define MMPA_THREADNAME_SIZE 16
#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1)
#define MMPA_MIN_OS_VERSION_SIZE 64
#define MMPA_MAX_NI 19
#define MMPA_MIDDLE_NI 5
#define MMPA_LOW_NI (-5)
#define MMPA_MIN_NI (-20)
#define MMPA_MAX_FILE 128
#define MMPA_PATH_SEPARATOR_STR "\\"
#define MMPA_PATH_SEPARATOR_CHAR '\\'
#define MMPA_MAX_THREAD_PIO 99
#define MMPA_MIDDLE_THREAD_PIO 66
#define MMPA_LOW_THREAD_PIO 33
#define MMPA_MIN_THREAD_PIO 1
#define MMPA_THREAD_SCHED_RR 0
#define MMPA_THREAD_SCHED_FIFO 0
#define MMPA_THREAD_SCHED_OTHER 0
#define MMPA_THREAD_MIN_STACK_SIZE 0
#define MM_MUTEX_INITIALIZER NULL
#ifdef __cplusplus
#if __cplusplus
}
#endif // __cpluscplus
#endif // __cpluscplus
#endif // _MMPA_TYPEDEF_WIN_H_

+ 9
- 15
third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h View File

@@ -1,18 +1,12 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* @file mmpa_win.h
*
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/


#ifndef MMPA_WIN_MMPA_WIN_H #ifndef MMPA_WIN_MMPA_WIN_H
#define MMPA_WIN_MMPA_WIN_H #define MMPA_WIN_MMPA_WIN_H


+ 65
- 0
third_party/fwkacllib/inc/ops/OWNERS View File

@@ -0,0 +1,65 @@
approvers:
- gegenhua
- qiaohairong
reviewers:
- chuqingxi
- wang-jintang
- luanma_bl
- chen-kang30
- li-xulong
- Allan_Yu
- minshen
- pan-jixing
- yl_wang
- lijie176
- mabing726
- miao-fangzheng
- huang-qiang002
- su-yueming
- chenpeng-hw
- wang_jianle
- luanma_bl
- LDLD0524
- wywismygod2020
- lipeiyang3699
- koala-zhang
- zhu-jingjing
- zhaozhihui5
- simbaliuxx
- lyxyz
- zhou-qilong
- block0219
- hanfuwei
- xchu42
- sheng-nan
- yangjing88
- alexlak
- xig514
- jellylj
- brightlyking
- liuzhenyuhw
- djh602
- wangjiangben_hw
- li1jie
- clinglai
- liujun2014
- soupkey
- wu-shengji
- cimeng
- ccl_ligang
- xiaozhedeng
- granpad7
- tc1qaz
- Ronnie_zheng
- xiexianhu
- zhouyujoe
- zhaoping12
- tanshengshun
- fanqirui
- xu-binglin
- yangyang016
- zhangzhongzt
- gegenhua
- qiaohairong
options:
no_parent_owners: true

+ 111
- 6
third_party/fwkacllib/inc/ops/array_ops.h View File

@@ -745,6 +745,28 @@ REG_OP(UnsqueezeV2)
.ATTR(axis, ListInt, {}) .ATTR(axis, ListInt, {})
.OP_END_FACTORY_REG(UnsqueezeV2) .OP_END_FACTORY_REG(UnsqueezeV2)



/**
*@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape
is changed, but the data is not changed. \n

*@par Inputs:
*x: A tensor.
*axes: A list of int64, which indicates the dimensions to be inserted. \n

*@par Outputs:
*y: Reshape tensor with same data as input. \n

*@par Third-party framework compatibility
*Compatible with the Onnx operator Unsqueeze in V13. \n
*/

REG_OP(UnsqueezeV3)
.INPUT(x, TensorType::ALL())
.INPUT(axes, ListInt)
.OUTPUT(y, TensorType::ALL())
.OP_END_FACTORY_REG(UnsqueezeV3)

/** /**
*@brief Reshapes a tensor. Only the tensor shape is changed, without changing the data. \n *@brief Reshapes a tensor. Only the tensor shape is changed, without changing the data. \n


@@ -821,6 +843,28 @@ REG_OP(SqueezeV2)
.ATTR(axis, ListInt, {}) .ATTR(axis, ListInt, {})
.OP_END_FACTORY_REG(SqueezeV2) .OP_END_FACTORY_REG(SqueezeV2)


/**
*@brief Removes dimensions of size 1 from the shape of a tensor according to axes. \n

*@par Inputs:
*x: A tensor.
*axes: An optional list of int64. If not specified, squeezes all dimensions of
size 1. If specified, only squeezes the dimensions listed. It is an error to
squeeze a dimension that is not 1. \n

*@par Outputs:
*y: Reshape tensor with same data as input. \n

*@par Third-party framework compatibility
*Compatible with the onnx operator Squeeze in V13. \n
*/

REG_OP(SqueezeV3)
.INPUT(x, TensorType::ALL())
.OPTIONAL_INPUT(axes, ListInt)
.OUTPUT(y, TensorType::ALL())
.OP_END_FACTORY_REG(SqueezeV3)

/** /**
*@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n *@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n


@@ -1273,7 +1317,7 @@ REG_OP(SortV2)
* @par Inputs: * @par Inputs:
* One inputs, including: * One inputs, including:
* @li x: A Tensor. Must be one of the following types: * @li x: A Tensor. Must be one of the following types:
* float16, float32, int32, int8 ,uint8. \n
* float16, float32, int32, int8, uint8, bool. \n
* @li shape: A Tensor to specify the shape that the input tensor expanded to. \n * @li shape: A Tensor to specify the shape that the input tensor expanded to. \n


* @par Outputs: * @par Outputs:
@@ -1284,9 +1328,9 @@ REG_OP(SortV2)
*/ */


REG_OP(Expand) REG_OP(Expand)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32,DT_INT64, DT_INT8, DT_UINT8, DT_BOOL}))
.INPUT(shape, TensorType({DT_INT16, DT_INT32, DT_INT64})) .INPUT(shape, TensorType({DT_INT16, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32,DT_INT64, DT_INT8, DT_UINT8, DT_BOOL}))
.OP_END_FACTORY_REG(Expand) .OP_END_FACTORY_REG(Expand)


/** /**
@@ -1342,13 +1386,37 @@ REG_OP(NonZeroWithValue)
.ATTR(dtype, Type, DT_INT32) .ATTR(dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(NonZeroWithValue) .OP_END_FACTORY_REG(NonZeroWithValue)




/**
*@Returns a tensor with updated shape from NonZeroWithValue. \n

*@par Inputs:
*value: A Tensor. The output of NonZeroWithValue. \n
*index: A Tensor. The output of NonZeroWithValue. \n
*count: A Tensor. The type is INT32, means count for non_zero ele in input. \n

* out_value: A Tensor. Has the same type as "value" . \n
* out_index: A Tensor. Has the same type as "index". \n
*/
REG_OP(NonZeroWithValueShape)
.INPUT(value, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16,
DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL}))
.INPUT(index, TensorType({DT_INT32}))
.INPUT(count, TensorType({DT_INT32}))
.OUTPUT(out_value, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16,
DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL}))
.OUTPUT(out_index, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(NonZeroWithValueShape)


/** /**
* @brief Expand the input tensor to a compatible shape. \n * @brief Expand the input tensor to a compatible shape. \n


* @par Inputs: * @par Inputs:
* One inputs, including: * One inputs, including:
* x: A Tensor. Must be one of the following types: * x: A Tensor. Must be one of the following types:
* float16, float32, int32, int8 ,uint8. \n
* float16, float32, int32, int8, uint8, bool. \n


* @par Attributes: * @par Attributes:
* shape: A required listInt to specify the shape that the input tensor expanded to. \n * shape: A required listInt to specify the shape that the input tensor expanded to. \n
@@ -1362,8 +1430,8 @@ REG_OP(NonZeroWithValue)
*/ */


REG_OP(ExpandD) REG_OP(ExpandD)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL}))
.REQUIRED_ATTR(shape, ListInt) .REQUIRED_ATTR(shape, ListInt)
.OP_END_FACTORY_REG(ExpandD) .OP_END_FACTORY_REG(ExpandD)


@@ -1404,6 +1472,43 @@ REG_OP(UpdateTensorDesc)
DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE})) DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE}))
.REQUIRED_ATTR(shape, ListInt) .REQUIRED_ATTR(shape, ListInt)
.OP_END_FACTORY_REG(UpdateTensorDesc) .OP_END_FACTORY_REG(UpdateTensorDesc)

/**
*@brief Queue data for other operators. \n
*@par Attributes:
*index: Index of the input tensor.The data type must be int32 or int64.
Assume that net has three data nodes, one should be set 0, another should
be set 1, and the left should be set 2. \n
*queue_name: queue name
*output_types: types of outputs data
*output_shapes: shapes of outputs data
*@par Outputs:
*y: A DT_UINT8 tensor. \n
*/
REG_OP(QueueData)
.OUTPUT(y, TensorType({DT_UINT8}))
.ATTR(index, Int, 0)
.ATTR(queue_name, String, "")
.ATTR(output_types, ListType, {})
.ATTR(output_shapes, ListListInt, {{}, {}})
.OP_END_FACTORY_REG(QueueData)

/**
* @brief Ensures that the tensor's shape matches the expected shape. \n
* @par Inputs:
* x: A Tensor. \n
* @par Attributes:
* shape: The shape that needs to be checked \n
* @par Outputs:
* y: A tensor. \n
*/
REG_OP(EnsureShape)
.INPUT(input, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \
DT_FLOAT,DT_DOUBLE}))
.OUTPUT(output, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \
DT_FLOAT,DT_DOUBLE}))
.REQUIRED_ATTR(shape, ListInt)
.OP_END_FACTORY_REG(EnsureShape)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_

+ 8
- 4
third_party/fwkacllib/inc/ops/ctc_ops.h View File

@@ -146,7 +146,7 @@ REG_OP(CTCBeamSearchDecoder)
*@li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size, *@li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size,
and C = number of classes (including blank). and C = number of classes (including blank).
It represent the logarithmized probabilities of the outputs. It represent the logarithmized probabilities of the outputs.
*@li targets: Tensor of size (N, S), where S= max target length.
*@li targets: Tensor of size (N, S) or sum(target_lengths), where S = max target length.
It represent the target sequences. It represent the target sequences.
*@li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs. *@li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs.
*@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets. *@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets.
@@ -159,11 +159,12 @@ REG_OP(CTCBeamSearchDecoder)
*@li blank : Blank label. Default 0. *@li blank : Blank label. Default 0.
*@li reduction: Specifies the reduction to apply to the output. Default: 'mean'. *@li reduction: Specifies the reduction to apply to the output. Default: 'mean'.
*@li zero_infinity : Whether to zero infinite losses and the associated gradients. *@li zero_infinity : Whether to zero infinite losses and the associated gradients.
*@li label_max : The max length of targets.


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with Pytorch CTCLoss operator. * Compatible with Pytorch CTCLoss operator.


*@par Restrictions:
*@attention Constraints:
*The limit of Label’s length is 1K. *The limit of Label’s length is 1K.
*/ */
REG_OP(CTCLossV2) REG_OP(CTCLossV2)
@@ -176,6 +177,7 @@ REG_OP(CTCLossV2)
.ATTR(blank, Int, 0) .ATTR(blank, Int, 0)
.ATTR(reduction, String, "mean") .ATTR(reduction, String, "mean")
.ATTR(zero_infinity, Bool, false) .ATTR(zero_infinity, Bool, false)
.ATTR(label_max, Int, 0)
.OP_END_FACTORY_REG(CTCLossV2) .OP_END_FACTORY_REG(CTCLossV2)


/** /**
@@ -186,7 +188,7 @@ REG_OP(CTCLossV2)
*@li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size, *@li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size,
and C = number of classes (including blank). and C = number of classes (including blank).
It represent the logarithmized probabilities of the outputs. It represent the logarithmized probabilities of the outputs.
*@li targets: Tensor of size (N, S), where S= max target length.
*@li targets: Tensor of size (N, S) or sum(target_lengths), where S = max target length.
It represent the target sequences. It represent the target sequences.
*@li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs. *@li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs.
*@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets. *@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets.
@@ -200,11 +202,12 @@ REG_OP(CTCLossV2)
*@li blank : Blank label. Default 0. *@li blank : Blank label. Default 0.
*@li reduction: Specifies the reduction to apply to the output. Default: 'mean'. *@li reduction: Specifies the reduction to apply to the output. Default: 'mean'.
*@li zero_infinity : Whether to zero infinite losses and the associated gradients. *@li zero_infinity : Whether to zero infinite losses and the associated gradients.
*@li label_max : The max length of targets.


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with Pytorch CTCLoss operator. * Compatible with Pytorch CTCLoss operator.


*@par Restrictions:
*@attention Constraints:
*The limit of Label’s length is 1K. *The limit of Label’s length is 1K.
*/ */
REG_OP(CTCLossV2Grad) REG_OP(CTCLossV2Grad)
@@ -219,6 +222,7 @@ REG_OP(CTCLossV2Grad)
.ATTR(blank, Int, 0) .ATTR(blank, Int, 0)
.ATTR(reduction, String, "mean") .ATTR(reduction, String, "mean")
.ATTR(zero_infinity, Bool, false) .ATTR(zero_infinity, Bool, false)
.ATTR(label_max, Int, 0)
.OP_END_FACTORY_REG(CTCLossV2Grad) .OP_END_FACTORY_REG(CTCLossV2Grad)
} // namespace ge } // namespace ge



+ 45
- 0
third_party/fwkacllib/inc/ops/data_flow_ops.h View File

@@ -2398,6 +2398,32 @@ REG_OP(DynamicGetNext)
.ATTR(_getnext_inputs_shape_range, String, "") .ATTR(_getnext_inputs_shape_range, String, "")
.OP_END_FACTORY_REG(DynamicGetNext) .OP_END_FACTORY_REG(DynamicGetNext)


/**
@brief DynamicGetNextV2, dynamic get next data
* @par Inputs:
*x: the iterator, all types are available
* @par Outputs:
* y: the date in iterator, all types are available
* @par Attributes:
* output_types: types of all outputs
* output_shapes: shapes of all outputs
*_dynamic_graph_execute_mode: dynamic graph execution mode,
value is one of lazy_recompile and dynamic_execute
*_getnext_inputs_shape_range: shape ranges of outputs,
it works where _dynamic_graph_execute_mode is dynamic_execute
*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/

REG_OP(DynamicGetNextV2)
.DYNAMIC_OUTPUT(y, TensorType::ALL())
.ATTR(output_types, ListType, {})
.ATTR(channel_name, String, "")
.ATTR(output_shapes, ListListInt, {{}, {}})
.ATTR(_dynamic_graph_execute_mode, String, "lazy_recompile")
.ATTR(_getnext_inputs_shape_range, String, "")
.OP_END_FACTORY_REG(DynamicGetNextV2)

/** /**
*@brief AdpGetNext *@brief AdpGetNext
*@par Outputs: *@par Outputs:
@@ -2433,5 +2459,24 @@ REG_OP(GetNextV2)
.ATTR(output_shapes, ListListInt, {{}, {}}) .ATTR(output_shapes, ListListInt, {{}, {}})
.ATTR(channel_name, String, "") .ATTR(channel_name, String, "")
.OP_END_FACTORY_REG(GetNextV2) .OP_END_FACTORY_REG(GetNextV2)

/**
*@brief GetNextFromQueue
*@par Inputs:
*x: the data, only support uint8
*@par Outputs:
*y: the data in iterator, all types are available
*@par Attributes:
*output_types: types of all outputs
*output_shapes: shapes of all outputs
*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(GetNextFromQueue)
.INPUT(x, TensorType({DT_UINT8}))
.DYNAMIC_OUTPUT(y, TensorType::ALL())
.ATTR(output_types, ListType, {})
.ATTR(output_shapes, ListListInt, {{}, {}})
.OP_END_FACTORY_REG(GetNextFromQueue)
} // namespace ge } // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_

+ 155
- 1
third_party/fwkacllib/inc/ops/deep_md.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2021 Huawei Technologies Co., Ltd
* CCopyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -24,6 +24,87 @@
#include "graph/operator_reg.h" #include "graph/operator_reg.h"
namespace ge { namespace ge {
/**
* @brief Calculate TabulateFusion. \n
*
* @par Inputs:
* Five inputs, including:
* @li table: A Tensor. Must be one of the following types: float16, float32, float64.
* @li table_info: A Tensor. Must be one of the following types: float16, float32, float64.
* @li em_x: A Tensor. Must be one of the following types: float16, float32, float64.
* @li em: A Tensor. Must be one of the following types: float16, float32, float64. \n
*
* @par Outputs:
* descriptor: A Tensor. Must be one of the following types: float16, float32, float64. \n
*
* @par Attributes:
* Three attributes, including:
* @li last_layer_size: int value.
* @li split_count: int value.
* @li split_index: int value. \n
*
* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(TabulateFusion)
.INPUT(table, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(table_info, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(em_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(em, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(descriptor, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.REQUIRED_ATTR(last_layer_size, Int)
.ATTR(split_count, Int, 1)
.ATTR(split_index, Int, 0)
.OP_END_FACTORY_REG(TabulateFusion)
/**
* @brief Calculate ProdEnvMatA. \n
*
* @par Inputs:
* @li coord: A Tensor. Must be one of the following types: float32, float64.
* @li type: A Tensor. Must be one of the following types: int32.
* @li natoms: A Tensor. Must be one of the following types: int32.
* @li box: A Tensor. Must be one of the following types: float32, float64.
* @li mesh: A Tensor. Must be one of the following types: int32.
* @li davg: A Tensor. Must be one of the following types: float32, float64.
* @li dstd: A Tensor. Must be one of the following types: float32, float64.
*
* @par Outputs:
* descrpt: A Tensor. Must be one of the following types: float32, float64.
* descrpt_deriv: A Tensor. Must be one of the following types: float32, float64.
* rij: A Tensor. Must be one of the following types: float32, float64.
* nlist: A Tensor. Must be one of the following types: int32. \n
*
* @par Attributes:
* @li rcut_a: A Float.
* @li rcut_r: A Float.
* @li rcut_r_smth: A Float.
* @li sel_a: A ListInt.
* @li split_count: A Int.
* @li split_index: A Int.\n
*
*/
REG_OP(ProdEnvMatA)
.INPUT(coord, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(type, TensorType({DT_INT32}))
.INPUT(natoms, TensorType({DT_INT32}))
.INPUT(box, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(mesh, TensorType({DT_INT32}))
.INPUT(davg, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(dstd, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(descrpt, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(descrpt_deriv, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(rij, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(nlist, TensorType({DT_INT32}))
.ATTR(rcut_a, Float, 1.0)
.ATTR(rcut_r, Float, 1.0)
.ATTR(rcut_r_smth, Float, 1.0)
.ATTR(sel_a, ListInt, {})
.ATTR(sel_r, ListInt, {})
.ATTR(split_count, Int, 1)
.ATTR(split_index, Int, 0)
.OP_END_FACTORY_REG(ProdEnvMatA)
/** /**
* @brief Calculate ProdForceSeA. \n * @brief Calculate ProdForceSeA. \n
* *
@@ -53,7 +134,80 @@ REG_OP(ProdForceSeA)
.OUTPUT(atom_force, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) .OUTPUT(atom_force, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.REQUIRED_ATTR(n_a_sel, Int) .REQUIRED_ATTR(n_a_sel, Int)
.REQUIRED_ATTR(n_r_sel, Int) .REQUIRED_ATTR(n_r_sel, Int)
.ATTR(split_count, Int, 1)
.ATTR(split_index, Int, 0)
.OP_END_FACTORY_REG(ProdForceSeA) .OP_END_FACTORY_REG(ProdForceSeA)
/**
* @brief Calculate ProdVirialSeA. \n
*
* @par Inputs:
* Five inputs, including:
* @li net_deriv: A Tensor. Must be one of the following types: float16, float32, float64.
* @li in_deriv: A Tensor. Must be one of the following types: float16, float32, float64.
* @li rij: A Tensor. Must be one of the following types: float16, float32, float64.
* @li nlist: A Tensor. dtype is int32.
* @li natoms: A Tensor. dtype is int32. \n
*
* @par Outputs:
* Two outputs, including:
* @li virial: A Tensor. Must be one of the following types: float16, float32, float64.
* @li atom_virial: A Tensor. Must be one of the following types: float16, float32, float64. \n
*
* @par Attributes:
* Two attributes, including:
* @li n_a_sel: Int value.
* @li n_r_sel: Int value.
* @li split_count: Int value.
* @li split_index: Int value. \n
*/
REG_OP(ProdVirialSeA)
.INPUT(net_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(in_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(rij, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(nlist, TensorType({DT_INT32}))
.INPUT(natoms, TensorType({DT_INT32}))
.OUTPUT(virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(atom_virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.REQUIRED_ATTR(n_a_sel, Int)
.REQUIRED_ATTR(n_r_sel, Int)
.ATTR(split_count, Int, 1)
.ATTR(split_index, Int, 0)
.OP_END_FACTORY_REG(ProdVirialSeA)
/**
* @brief Calculate TabulateFusionGrad. \n
*
* @par Inputs:
* Five inputs, including:
* @li table: A Tensor. Must be one of the following types: float16, float32, float64.
* @li table_info: A Tensor. Must be one of the following types: float16, float32, float64.
* @li em_x: A Tensor. Must be one of the following types: float16, float32, float64.
* @li em: A Tensor. Must be one of the following types: float16, float32, float64.
* @li dy: A Tensor. Must be one of the following types: float16, float32, float64.
* @li descriptor: A Tensor. Must be one of the following types: float16, float32, float64. \n
*
* @par Outputs:
* @li dy_dem_x: A Tensor. Must be one of the following types: float16, float32, float64.
* @li dy_dem: A Tensor. Must be one of the following types: float16, float32, float64. \n
*
* @par Attributes:
* Two attributes, including:
* @li split_count: A Scalar.
* @li split_index: A Scalar. \n
*/
REG_OP(TabulateFusionGrad)
.INPUT(table, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(table_info, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(em_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(em, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(descriptor, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(dy_dem_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(dy_dem, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.ATTR(split_count, Int, 1)
.ATTR(split_index, Int, 0)
.OP_END_FACTORY_REG(TabulateFusionGrad)
} // namespace ge } // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_DEEP_MD_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_DEEP_MD_H_

+ 11
- 3
third_party/fwkacllib/inc/ops/elewise_calculation_ops.h View File

@@ -331,7 +331,7 @@ REG_OP(Sub)


*@par Inputs: *@par Inputs:
*One input, including: \n *One input, including: \n
*x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n
*x: A Tensor. Must be one of the following types: float16, float32, double, int8, int16, int32, int64. \n


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as "x". \n *y: A Tensor. Has the same type as "x". \n
@@ -340,8 +340,10 @@ REG_OP(Sub)
*Compatible with the TensorFlow operator Abs. *Compatible with the TensorFlow operator Abs.
*/ */
REG_OP(Abs) REG_OP(Abs)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16,
DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16,
DT_INT32, DT_INT64}))
.OP_END_FACTORY_REG(Abs) .OP_END_FACTORY_REG(Abs)


/** /**
@@ -3821,6 +3823,10 @@ REG_OP(CosineSimilarity)
* @li max_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n * @li max_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
* @li global_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n * @li global_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
* @li weight_decay: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n * @li weight_decay: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
* @li step_size: A Optional Tensor. Datatype is same as exp_avg. Shape (1, ).\n

* @par Attributes:
* @li adam_mode: An optional bool. Defaults to "adam". \n


*@par Outputs: *@par Outputs:
*three inputs, including: *three inputs, including:
@@ -3840,9 +3846,11 @@ REG_OP(ApplyAdamV2)
.INPUT(max_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 })) .INPUT(max_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.INPUT(global_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 })) .INPUT(global_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.INPUT(weight_decay, TensorType({ DT_FLOAT, DT_FLOAT16 })) .INPUT(weight_decay, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.OPTIONAL_INPUT(step_size, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.OUTPUT(var, TensorType({ DT_FLOAT, DT_FLOAT16 })) .OUTPUT(var, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.OUTPUT(m, TensorType({ DT_FLOAT, DT_FLOAT16 })) .OUTPUT(m, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.OUTPUT(v, TensorType({ DT_FLOAT, DT_FLOAT16 })) .OUTPUT(v, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.ATTR(adam_mode, String, "adam")
.OP_END_FACTORY_REG(ApplyAdamV2) .OP_END_FACTORY_REG(ApplyAdamV2)
} // namespace ge } // namespace ge




+ 10
- 10
third_party/fwkacllib/inc/ops/image_ops.h View File

@@ -132,7 +132,7 @@ nearest neighbor sampling to a common output size specified by crop_size . \n
*@li x:A Tensor. Must be one of the following types:uint8, uint16, int8, *@li x:A Tensor. Must be one of the following types:uint8, uint16, int8,
int16, int32, int64, float16, float, double. A 4-D tensor of shape int16, int32, int64, float16, float, double. A 4-D tensor of shape
[batch, image_height, image_width, depth]. The format must be NHWC. [batch, image_height, image_width, depth]. The format must be NHWC.
*@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4].
*@li boxes: A Tensor. Must be one of the following types: float16, float. A 2-D tensor of shape [num_boxes, 4].
*@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with
int32 values in [0, batch). int32 values in [0, batch).
*@li crop_size: A Tensor of type int32. A 1-D tensor of 2 elements, crop_size *@li crop_size: A Tensor of type int32. A 1-D tensor of 2 elements, crop_size
@@ -146,7 +146,7 @@ extrapolation, when applicable.
NearestNeighbor . \n NearestNeighbor . \n


*@par Outputs: *@par Outputs:
*y:A Tensor of type float. The format must be NHWC. \n
*y: A Tensor. Must be one of the following types: float16, float. The format must be NHWC. \n


*@attention Constraints: *@attention Constraints:
*Input images must be a 4-D tensor . \n *Input images must be a 4-D tensor . \n
@@ -158,10 +158,10 @@ NearestNeighbor . \n
REG_OP(CropAndResize) REG_OP(CropAndResize)
.INPUT(x, TensorType({DT_UINT8, DT_UINT16, DT_INT8, \ .INPUT(x, TensorType({DT_UINT8, DT_UINT16, DT_INT8, \
DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(boxes, TensorType({DT_FLOAT}))
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(box_index, TensorType({DT_INT32})) .INPUT(box_index, TensorType({DT_INT32}))
.INPUT(crop_size, TensorType({DT_INT32})) .INPUT(crop_size, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(extrapolation_value, Float, 0) .ATTR(extrapolation_value, Float, 0)
.ATTR(method, String, "bilinear") .ATTR(method, String, "bilinear")
.OP_END_FACTORY_REG(CropAndResize) .OP_END_FACTORY_REG(CropAndResize)
@@ -175,7 +175,7 @@ REG_OP(CropAndResize)
*Input images must be a 5HD tensor. Inputs include: *Input images must be a 5HD tensor. Inputs include:
*@li x:A Tensor. Must be one of the following types:float16, float. A 5HD tensor of shape *@li x:A Tensor. Must be one of the following types:float16, float. A 5HD tensor of shape
* [batch, C1, image_height, image_width, C0]. * [batch, C1, image_height, image_width, C0].
*@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4].
*@li boxes: A Tensor. Must be one of the following types: float16, float. A 2-D tensor of shape [num_boxes, 4].
*@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch) . \n *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch) . \n


*@par Attributes: *@par Attributes:
@@ -184,7 +184,7 @@ REG_OP(CropAndResize)
*@li method: An optional string from: '"bilinear"'. Defaults to "bilinear" . \n *@li method: An optional string from: '"bilinear"'. Defaults to "bilinear" . \n


*@par Outputs: *@par Outputs:
*y:A Tensor of type float . \n
*y: A Tensor. Must be one of the following types: float16, float. \n


*@attention Constraints: *@attention Constraints:
*Input images must be a 5HD tensor . \n *Input images must be a 5HD tensor . \n
@@ -197,9 +197,9 @@ REG_OP(CropAndResize)
*/ */
REG_OP(CropAndResizeD) REG_OP(CropAndResizeD)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(boxes, TensorType({DT_FLOAT}))
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(box_index, TensorType({DT_INT32})) .INPUT(box_index, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(crop_size, ListInt) .REQUIRED_ATTR(crop_size, ListInt)
.ATTR(extrapolation_value, Float, 0) .ATTR(extrapolation_value, Float, 0)
.ATTR(method, String, "bilinear") .ATTR(method, String, "bilinear")
@@ -888,10 +888,10 @@ Defaults to false . \n
*@li half_pixel_centers: An optional bool. Defaults to False . \n *@li half_pixel_centers: An optional bool. Defaults to False . \n


*@par Outputs: *@par Outputs:
*y: 4-D with shape [batch, new_height, new_width, channels] . \n
*y: A Tensor with the same type and format as input "images" . \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with tensorflow ResizeNearestNeighborV2 operator.
*Compatible with tensorflow ResizeNearestNeighbor operator.
*/ */


REG_OP(ResizeNearestNeighborV2) REG_OP(ResizeNearestNeighborV2)


+ 1
- 1
third_party/fwkacllib/inc/ops/math_ops.h View File

@@ -378,7 +378,7 @@ to each component of an element of this dataset.
REG_OP(GetNext) REG_OP(GetNext)
.DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, .DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64,
DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL})) DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL}))
.ATTR(output_types, ListInt, {})
.ATTR(output_types, ListType, {})
.ATTR(output_shapes, ListListInt, {}) .ATTR(output_shapes, ListListInt, {})
.ATTR(output_num, Int, 1) .ATTR(output_num, Int, 1)
.ATTR(channel_name, String, "") .ATTR(channel_name, String, "")


+ 61
- 22
third_party/fwkacllib/inc/ops/matrix_calculation_ops.h View File

@@ -213,9 +213,9 @@ REG_OP(GEMM)
*/ */


REG_OP(BatchMatMul) REG_OP(BatchMatMul)
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
.ATTR(adj_x1, Bool, false) .ATTR(adj_x1, Bool, false)
.ATTR(adj_x2, Bool, false) .ATTR(adj_x2, Bool, false)
.OP_END_FACTORY_REG(BatchMatMul) .OP_END_FACTORY_REG(BatchMatMul)
@@ -246,11 +246,11 @@ REG_OP(BatchMatMul)
*/ */


REG_OP(BatchMatMulV2) REG_OP(BatchMatMulV2)
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4}))
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4}))
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16}))
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16}))
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
.ATTR(adj_x1, Bool, false) .ATTR(adj_x1, Bool, false)
.ATTR(adj_x2, Bool, false) .ATTR(adj_x2, Bool, false)
.ATTR(offset_x, Int, 0) .ATTR(offset_x, Int, 0)
@@ -505,17 +505,17 @@ REG_OP(ScatterElements)
* Three inputs, including: * Three inputs, including:
*@li var: An ND Tensor . *@li var: An ND Tensor .


*Must be one of the following types: float16, float32, int32, int8, uint8
*@li indices: An ND Tensor of type int32 or int64

*Must be one of the following types: float16, float, int32, int8, uint8
*@li indices: An ND Tensor . \n


*@li updates: An Tensor. format:NCHW, NHWC .
*Must be one of the following types: int32 or int64
*@li updates: An ND Tensor .


*Must be one of the following types: float16, float32, int32, int8, uint8
*Must be one of the following types: float16, float, int32, int8, uint8


*@par Attributes: *@par Attributes:
* use_locking: An optional bool. Defaults to "False". If "True", the operation
* will be protected by a lock . \n
*use_locking: An optional bool. Defaults to "False". If "True",
* the operation will be protected by a lock . \n


*@par Outputs: *@par Outputs:
*var: A Tensor. Has the same type and format as input "var" . \n *var: A Tensor. Has the same type and format as input "var" . \n
@@ -792,13 +792,13 @@ REG_OP(DiagPart)
* Four inputs, including: * Four inputs, including:
*@li x: A Tensor of type float16, int8. *@li x: A Tensor of type float16, int8.
*@li w: A weight matrix of type float16, int8. *@li w: A weight matrix of type float16, int8.
*@li b: A Tensor of type float16, int32, float32.
*@li offset_w: A Tensor of type int8 . \n
*@li b: An optional Tensor of type float16, int32, float32.
*@li offset_w: An optional Tensor of type int8. Reserved. Only None Supported. \n


*@par Attributes: *@par Attributes:
*@li num_output: Reserved.
*@li num_output: Required. An int, output neuron number. Reserved.
*@li transpose: A bool, specifying weight whether to transpose input w, either "true" or "false". Defaults to "false". *@li transpose: A bool, specifying weight whether to transpose input w, either "true" or "false". Defaults to "false".
*@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1.
*@li axis: Optional. An int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1.
* The product of the subsequent dimensions starting form first dimension or the second dimension is "K". * The product of the subsequent dimensions starting form first dimension or the second dimension is "K".
*@li offset_x: An optional integer for quantized FullyConnection. *@li offset_x: An optional integer for quantized FullyConnection.
*The negative offset added to the input image for int8 type. Ensure offset_x within the *The negative offset added to the input image for int8 type. Ensure offset_x within the
@@ -814,11 +814,11 @@ REG_OP(DiagPart)
* Yes * Yes
*/ */
REG_OP(FullyConnection) REG_OP(FullyConnection)
.INPUT(x, TensorType({DT_FLOAT16, DT_INT8, DT_INT4}))
.INPUT(w, TensorType({DT_FLOAT16, DT_INT8, DT_INT4}))
.OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32}))
.INPUT(x, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT32, DT_BF16}))
.INPUT(w, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT32, DT_BF16}))
.OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32, DT_BF16}))
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32, DT_BF16}))
.REQUIRED_ATTR(num_output, Int) .REQUIRED_ATTR(num_output, Int)
.ATTR(transpose, Bool, false) .ATTR(transpose, Bool, false)
.ATTR(axis, Int, 1) .ATTR(axis, Int, 1)
@@ -1360,6 +1360,45 @@ REG_OP(FillDiagonal)
.ATTR(wrap, Bool, false) .ATTR(wrap, Bool, false)
.OP_END_FACTORY_REG(FillDiagonal) .OP_END_FACTORY_REG(FillDiagonal)


/**
*@brief: Returns the sum of the elements of the diagonal of the input 2-D matrix. \n

*@par Inputs:
*x: A Tensor. Must be one of the following types:
* float16, float. \n

*@par Outputs:
*y: A Tensor. Has the same type as "x" . \n

*@par Third-party framework compatibility
* Compatible with the Pytorch operator Trace.
*/

REG_OP(Trace)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(Trace)

/**
*@brief Computes the generalized inverse of any matrix. \n

*@par Inputs:
* @li x: input matrix. Must be one of the following types:
* double, float. \n

*@par Attributes:
* @li rcond: An optional float >= 0 or inf. Defaults to 1e-15. \n

*@par Outputs:
* y: A Tensor with the same type and shape of x's transpose. \n

*/
REG_OP(Pinverse)
.INPUT(x, TensorType({ DT_FLOAT, DT_DOUBLE }))
.OUTPUT(y, TensorType({ DT_FLOAT, DT_DOUBLE }))
.ATTR(rcond, Float, 1e-15)
.OP_END_FACTORY_REG(Pinverse)

} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_

+ 68
- 0
third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h View File

@@ -142,6 +142,74 @@ REG_OP(BatchNorm)
.ATTR(is_training, Bool, true) .ATTR(is_training, Bool, true)
.OP_END_FACTORY_REG(BatchNorm) .OP_END_FACTORY_REG(BatchNorm)


/**
* @brief After the mean and reciprocal of standard deviation(invert_std) are separately calculated on each device,
* the mena and reciprocal of standard deviation(invert_std) data on each device are normlized,
* a total mean and reciprocal of standard deviation(invert_std) are returned, and running_var are updated.

* @par Inputs:
* include:
* @li mean_all: A Tensor. The mean of each device. Must be one of the following types: float16, float32.
* @li invert_std_all: A Tensor. Reciprocal of the variances of each device. Must be one of the following types: float16, float32.
* @li count_all: A Tensor. Number of data for each device. Must be one of the following types: float16, float32.
* @li mean_broadcast: A Tensor. The overall average and broadcast. Must be one of the following types: float16, float32.
* @li count_sum: A Tensor. General statistics. Must be one of the following types: float16, float32.
* @li running_var: A Tensor. Runtime variance. Must be one of the following types: float16, float32. \n

* @par Attributes:
* Two Attributes, including:
* @li momentum: A optional float. Defaults to 0.01. \n
* @li epsilon: An optional float. Defaults to 0.00001. \n

* @par Outputs:
* include:
* @li invert_std: A Tensor. It's inverse of total variance.
* @li running_var_update: A Tensor. It's moving variance of each device after the update. \n

* @par Third-party framework compatibility
* ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate
* compatible with the Pytorch operator BatchNormGatherStatsWithCounts.
*/
REG_OP(SyncBatchNormGatherStatsWithCounts)
.INPUT(mean_all, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(invert_std_all, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(count_all, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(mean_broadcast, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(running_var, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(invert_std, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(running_var_update, TensorType({DT_FLOAT, DT_FLOAT16}))
.ATTR(momentum, Float, 0.1)
.ATTR(epsilon, Float, 0.001)
.OP_END_FACTORY_REG(SyncBatchNormGatherStatsWithCounts)

/**
* @brief update running_mean.

* @par Inputs:
* include:
* @li mean: A Tensor. The mean of each device. Must be one of the following types: float16, float32.
* @li running_mean: A Tensor. Runtime Mean. Must be one of the following types: float16, float32. \n

* @par Attributes:
* One Attribute, including:
* @li momentum: A optional float. Defaults to 0.01. \n

* @par Outputs:
* include:
* @li running_mean_update: A Tensor. It's moving mean of each device after the update. \n

* @par Third-party framework compatibility
* ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate
* compatible with the Pytorch operator BatchNormGatherStatsWithCounts.
*/
REG_OP(SyncBNTrainingUpdate)
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(running_mean, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(running_mean_update, TensorType({DT_FLOAT, DT_FLOAT16}))
.ATTR(momentum, Float, 0.1)
.OP_END_FACTORY_REG(SyncBNTrainingUpdate)

/** /**
*@brief part of SyncBatchNormBackward . \n *@brief part of SyncBatchNormBackward . \n




+ 42
- 4
third_party/fwkacllib/inc/ops/nn_calculation_ops.h View File

@@ -134,9 +134,9 @@ REG_OP(DepthwiseConv2DBackpropFilter)
* instead. * instead.
*/ */
REG_OP(DepthwiseConv2DBackpropFilterD) REG_OP(DepthwiseConv2DBackpropFilterD)
.INPUT(input, TensorType({float16}))
.INPUT(out_backprop, TensorType({float16}))
.OUTPUT(filter_grad, TensorType({float32}))
.INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16}))
.INPUT(out_backprop, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16}))
.OUTPUT(filter_grad, TensorType({DT_FLOAT32}))
.REQUIRED_ATTR(filter_size, ListInt) .REQUIRED_ATTR(filter_size, ListInt)
.REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(strides, ListInt)
.ATTR(dilations, ListInt, {1, 1, 1, 1}) .ATTR(dilations, ListInt, {1, 1, 1, 1})
@@ -764,7 +764,7 @@ REG_OP(Conv2DBackpropFilterD)
| | float32 | float32 | float32 | float32 |\n | | float32 | float32 | float32 | float32 |\n
| | int8 | int8 | int32 | int32 |\n | | int8 | int8 | int32 | int32 |\n
| Format | NCHW | NCHW | ND | NCHW |\n | Format | NCHW | NCHW | ND | NCHW |\n
| | NHWC | HWCN | | NHWC |\n
| | NHWC | HWCN | ND | NHWC |\n
*\n *\n
* For float32 type, the actual calculation on the chip is based on * For float32 type, the actual calculation on the chip is based on
* float16. * float16.
@@ -1650,5 +1650,43 @@ REG_OP(Dilation)
.ATTR(padding_value, Float, 0.0) .ATTR(padding_value, Float, 0.0)
.OP_END_FACTORY_REG(Dilation) .OP_END_FACTORY_REG(Dilation)


/**
*@brief Computes the post-cube processing output with the expected input
*@par Inputs:
* Ten inputs:
* x1: A Tensor of type float16, bfloat16, float32, int32
* x2: A Tensor of type float16, int8, int4
* quant_scale_0: A Tensor of type uint64
* relu_weight_0: A Tensor of type float32
* clip_value_0: A Tensor of type float16, int8, int4
* quant_scale_1: A Tensor of type uint64
* relu_weight_1: A Tensor of type float32
* clip_value_1: A Tensor of type float16
* anti_quant_scale: A Tensor of type float16
* anti_quant_offset: A Tensor of type int8, int4
*@par Attributes:
* @li fusion_op_list: A list of String.
* @li unit_list: A list of String
* @li eltwise_mode: An optional string from "ADD", "SUB" and "".
*@par Outputs:
* output: A Tensor. A Tensor of type float16, bfloat16, float32, int32, int8, int4.
*/
REG_OP(FixPipe)
.INPUT(x1, TensorType({DT_FLOAT16, DT_BF16, DT_FLOAT, DT_INT32}))
.OPTIONAL_INPUT(x2, TensorType({DT_FLOAT16, DT_INT8, DT_INT4}))
.OPTIONAL_INPUT(quant_scale_0, TensorType({DT_UINT64}))
.OPTIONAL_INPUT(relu_weight_0, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(clip_value_0, TensorType({DT_FLOAT16, DT_INT8, DT_INT4}))
.OPTIONAL_INPUT(quant_scale_1, TensorType({DT_UINT64}))
.OPTIONAL_INPUT(relu_weight_1, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(clip_value_1, TensorType({DT_FLOAT16}))
.OPTIONAL_INPUT(anti_quant_scale, TensorType({DT_FLOAT16}))
.OPTIONAL_INPUT(anti_quant_offset, TensorType({DT_INT8, DT_INT4}))
.OUTPUT(output, TensorType({DT_FLOAT16, DT_BF16, DT_FLOAT, DT_INT32, DT_INT8, DT_INT4}))
.REQUIRED_ATTR(fusion_op_list, ListString)
.REQUIRED_ATTR(unit_list, ListString)
.ATTR(eltwise_mode, String, "")
.OP_END_FACTORY_REG(FixPipe)

} // namespace ge } // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_

+ 97
- 1
third_party/fwkacllib/inc/ops/nn_detect_ops.h View File

@@ -1179,6 +1179,8 @@ REG_OP(SPP)
* the index of the input feature map, "x1", "y1", "x2", or "y2" must be * the index of the input feature map, "x1", "y1", "x2", or "y2" must be
* greater than or equal to "0.0". * greater than or equal to "0.0".
* roi_max_num must be less than or equal to 6000 and must be divided by 16. * roi_max_num must be less than or equal to 6000 and must be divided by 16.
* The input data of the rois cannot exceed the width and height range of the x,
* otherwise, the accuracy of the output result may not be as expected.
*@li roi_actual_num: A optional tensor of type int32, with shape [batch, 8], specifying *@li roi_actual_num: A optional tensor of type int32, with shape [batch, 8], specifying
* the number of ROIs per batch . \n * the number of ROIs per batch . \n


@@ -2076,7 +2078,7 @@ REG_OP(GIoUGrad)
* trans: An optional attr, true for 'xyxyt', false for 'xywht'. * trans: An optional attr, true for 'xyxyt', false for 'xywht'.


*@par Outputs: *@par Outputs:
* overlaps: A 3D Tensor of type float16 or float32 with shape [B, N, K].
* overlaps: A 3D Tensor of type float32 with shape [B, N, K].


*@attention Constraints: *@attention Constraints:
* In each batch, the invalid box cannot appear before the valid box. * In each batch, the invalid box cannot appear before the valid box.
@@ -2087,6 +2089,100 @@ REG_OP(RotatedOverlaps)
.OUTPUT(overlaps, TensorType({DT_FLOAT})) .OUTPUT(overlaps, TensorType({DT_FLOAT}))
.ATTR(trans, Bool, false) .ATTR(trans, Bool, false)
.OP_END_FACTORY_REG(RotatedOverlaps) .OP_END_FACTORY_REG(RotatedOverlaps)

/**
*@brief RotatedIou . \n

*@par Inputs:
*@li boxes : data of grad increment, a 3D Tensor of type float32 with
* shape (B, 5, N). "N" indicates the number of boxes, and the value
* "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta].
*@li query_boxes: Bounding boxes, a 3D Tensor of type float32 with
* shape (B, 5, K). "K" indicates the number of boxes, and the value
* "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta].

*@par Attributes:
*@li trans: An optional attr, true for 'xyxyt', false for 'xywht'.
*@li mode: An optional attr, a character string with the value range of ['iou', 'iof'],
* only support 'iou' now.
*@li is_cross: Cross calculation when it is True, and one-to-one calculation when it is False.
*@li v_threshold: An optional attr, provide condition relaxation for intersection calculation.
*@li e_threshold: An optional attr, provide condition relaxation for intersection calculation.

*@par Outputs:
* iou: A 3D Tensor of float32 with shape [B, N, K].

*@attention Constraints:
* In each batch, the invalid box cannot appear before the valid box.
*/
REG_OP(RotatedIou)
.INPUT(boxes, TensorType({DT_FLOAT}))
.INPUT(query_boxes, TensorType({DT_FLOAT}))
.OUTPUT(iou, TensorType({DT_FLOAT}))
.ATTR(trans, Bool, false)
.ATTR(mode, String, "iou")
.ATTR(is_cross, Bool, true)
.ATTR(v_threshold, Float, 0)
.ATTR(e_threshold, Float, 0)
.OP_END_FACTORY_REG(RotatedIou)

/**
*@brief RotatedBoxEncode. \n

*@par Inputs:
* Two inputs, including:
*@li anchor_box: A 3D Tensor of float32 (float16) with shape (B, 5, N).
* "B" indicates the number of batch size
* "N" indicates the number of bounding boxes, and the value "5" refers to
* "x0", "x1", "y0", "y1" and "angle".
*@li gt_box: A 3D Tensor of float32 (float16) with shape (B, 5, N).
* "B" indicates the number of batch size
* "N" indicates the number of bounding boxes, and the value "5" refers to
* "x0", "x1", "y0", "y1" and "angle". \n

*@par Attributes:
*@li weight: A float list for "x0", "x1", "y0", "y1" and "angle",
* defaults to [1.0, 1.0, 1.0, 1.0, 1.0].

*@par Outputs:
*@li y: A 3D Tensor of type float32 (float16) with shape (B, 5, N),
* specifying the variations between all anchor boxes and ground truth boxes.
*/
REG_OP(RotatedBoxEncode)
.INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(gt_box, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(weight, ListFloat, {1.0, 1.0, 1.0, 1.0, 1.0})
.OP_END_FACTORY_REG(RotatedBoxEncode)

/**
*@brief RotatedBoxDecode. \n

*@par Inputs:
* Two inputs, including:
*@li anchor_box: A 3D Tensor of float32 (float16) with shape (B, 5, N).
* "B" indicates the number of batch size
* "N" indicates the number of bounding boxes, and the value "5" refers to
* "x0", "x1", "y0", "y1" and "angle".
*@li deltas: A 3D Tensor of float32 (float16) with shape (B, 5, N).
* "B" indicates the number of batch size
* "N" indicates the number of bounding boxes, and the value "5" refers to
* "x0", "x1", "y0", "y1" and "angle". \n

*@par Attributes:
*@li weight: A float list for "x0", "x1", "y0", "y1" and "angle",
* defaults to [1.0, 1.0, 1.0, 1.0, 1.0].

*@par Outputs:
*@li y: A 3D Tensor of type float32 (float16) with shape (B, 5, N),
* specifying the variations between all anchor boxes and ground truth boxes.
*/
REG_OP(RotatedBoxDecode)
.INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(deltas, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(weight, ListFloat, {1.0, 1.0, 1.0, 1.0, 1.0})
.OP_END_FACTORY_REG(RotatedBoxDecode)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_


+ 76
- 20
third_party/fwkacllib/inc/ops/nn_norm_ops.h View File

@@ -1487,25 +1487,51 @@ REG_OP(Roll)
.OP_END_FACTORY_REG(Roll) .OP_END_FACTORY_REG(Roll)


/** /**
*@brief Calculate the loss. Creates a criterion that optimizes a two-class classification
logistic loss between input_x and input_y (containing 1 or -1). \n
* @brief Roll the tensor along the given dimension(s).


*@par Inputs:
*Tow inputs, including:
* @par Inputs:
* One inputs, including:
* x: A tensor

* @par Attributes:
* @li shift: The number of places by which the elements of the tensor are shifted. \n
* @li axes: Axis along which to roll. \n

* @par Outputs:
* y: A Tensor with the same type and shape of x's. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Roll. \n
*/
REG_OP(RollV2)
.INPUT(input, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \
DT_FLOAT,DT_DOUBLE}))
.INPUT(shift, TensorType({DT_INT32,DT_INT64}))
.INPUT(axes, TensorType({DT_INT32,DT_INT64}))
.OUTPUT(output, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \
DT_FLOAT,DT_DOUBLE}))
.OP_END_FACTORY_REG(RollV2)

/**
* @brief Calculate the loss. Creates a criterion that optimizes a two-class classification
* logistic loss between input_x and input_y (containing 1 or -1). \n

* @par Inputs:
* Tow inputs, including:
* @li input_x: A tensor. Must be one of the following types: * @li input_x: A tensor. Must be one of the following types:
* float16, float32. \n * float16, float32. \n
* @li input_y: A tensor. Must be one of the following types: * @li input_y: A tensor. Must be one of the following types:
* float16, float32. \n * float16, float32. \n


*@par Attributes:
*reduction: An optional string.Defaults to "mean". \n
* @par Attributes:
* reduction: An optional string.Defaults to "mean". \n


*@par Outputs:
*output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n
* @par Outputs:
* output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n
* while reduction == "sum" or "mean", A Tensor with the same type of input_x , shape of which is (1,) * while reduction == "sum" or "mean", A Tensor with the same type of input_x , shape of which is (1,)


*@par Third-party framework compatibility
*Compatible with the Pytorch operator SoftMarginLoss. \n
* @par Third-party framework compatibility
* Compatible with the Pytorch operator SoftMarginLoss. \n
*/ */
REG_OP(SoftMarginLoss) REG_OP(SoftMarginLoss)
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -1624,18 +1650,18 @@ REG_OP(MultilabelMarginLoss)
.OP_END_FACTORY_REG(MultilabelMarginLoss) .OP_END_FACTORY_REG(MultilabelMarginLoss)


/** /**
*@brief Performs batch normalization . \n
*@par Inputs:
* @brief Performs batch normalization . \n
* @par Inputs:
* Two inputs * Two inputs
*@li input_x: A Tensor. Support float32. shape (n, c, d).
*@li seq_len: A Tensor. Each batch normalize data num. Support Int32. Shape (n, ). \n
*@par Attributes:
*@li normalize_type: Str. Support "per_feature" or "all_features".
*@li epsilon: An optional float32, specifying the small value added to
variance to avoid dividing by zero. Defaults to "0.00001" . \n
*@par Outputs:
* @li input_x: A Tensor. Support float32. shape (n, c, d).
* @li seq_len: A Tensor. Each batch normalize data num. Support Int32. Shape (n, ). \n
* @par Attributes:
* @li normalize_type: Str. Support "per_feature" or "all_features".
* @li epsilon: An optional float32, specifying the small value added to
* variance to avoid dividing by zero. Defaults to "0.00001" . \n
* @par Outputs:
* One outputs * One outputs
*@li output_y: A Tensor for the normalized "x".Support float32. shape (n, c, d).\n
* @li output_y: A Tensor for the normalized "x".Support float32. shape (n, c, d).\n
*/ */
REG_OP(NormalizeBatch) REG_OP(NormalizeBatch)
.INPUT(input_x, TensorType({ DT_FLOAT })) .INPUT(input_x, TensorType({ DT_FLOAT }))
@@ -1644,6 +1670,36 @@ REG_OP(NormalizeBatch)
.REQUIRED_ATTR(normalize_type, String) .REQUIRED_ATTR(normalize_type, String)
.ATTR(epsilon, Float, 0.00001) .ATTR(epsilon, Float, 0.00001)
.OP_END_FACTORY_REG(NormalizeBatch) .OP_END_FACTORY_REG(NormalizeBatch)

/**
*@brief GroupNorm and Reul operator
* calculating: x, gamma, beta
* y = relu(gamma*((x - mean) / np.sqrt(variance + 0.001)) + beta)

* @par Inputs:
* Three inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32.
* @li gamma: A Tensor. Must be one of the following types: float16, float32.
* @li beta: A Tensor. Must be one of the following types: float16, float32 . \n

* @par Attributes:
* @li num_groups: A require attribute, the type is int32.
* @li eps: A optional attribute, the type is float32. Defaults to 0.00001. \n

* @par Outputs:
* One outputs, including:
* @li y: A Tensor. Must be one of the following types: float16, float32.
* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use/
*/
REG_OP(GroupNormRelu)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.REQUIRED_ATTR(num_groups, Int)
.ATTR(eps, Float, 0.00001)
.OP_END_FACTORY_REG(GroupNormRelu)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_

+ 4
- 2
third_party/fwkacllib/inc/ops/nn_pooling_ops.h View File

@@ -1747,7 +1747,8 @@ included in the sample.\n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with the Pytorch operator SubSample. *Compatible with the Pytorch operator SubSample.
*@par Restrictions:

*@attention Constraints:
*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. *Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly.
*/ */
REG_OP(SubSample) REG_OP(SubSample)
@@ -1776,7 +1777,8 @@ included in the sample.\n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with the Pytorch operator SubSampleLabels. *Compatible with the Pytorch operator SubSampleLabels.
*@par Restrictions:

*@attention Constraints:
*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. *Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly.
*/ */
REG_OP(SubSampleLabels) REG_OP(SubSampleLabels)


+ 21
- 8
third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h View File

@@ -25,7 +25,8 @@


namespace ge { namespace ge {
/** /**
*@brief Computes the for the gelu of "x" . \n
*@brief The GELU activation function is x*Φ(x),
* where Φ(x) the standard Gaussian cumulative distribution function. \n


*@par Inputs: *@par Inputs:
*One input, including: *One input, including:
@@ -144,7 +145,7 @@ REG_OP(GeluGrad)
.OP_END_FACTORY_REG(GeluGrad) .OP_END_FACTORY_REG(GeluGrad)


/** /**
*@brief Computes the for the fast_gelu of "x" . \n
*@brief The FastGelu activation function is x*e^(0.851*x)*(x-|x|)/(1+e^(-1.702|x|)). \n


*@par Inputs: *@par Inputs:
*One input, including: *One input, including:
@@ -159,7 +160,23 @@ REG_OP(FastGelu)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(FastGelu) .OP_END_FACTORY_REG(FastGelu)
/**
*@brief The FastGeluV2 activation function is x*(sgn(x)*[(a/2)*(clip(|x|,max=-b)+b)^2+0.5]+0.5),
* where sgn(x) function is (x+0.000000000001)/|(x+0.000000000001)|. \n

*@par Inputs:
*One input, including:
*x: A Tensor. Must be one of the following types: float16, float32


*@par Outputs:
*y: A Tensor. Has the same type as "x".
*@par Third-party framework compatibility
*Compatible with the TensorFlow operator FastGeluV2
*/
REG_OP(FastGeluV2)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(FastGeluV2)
/** /**
*@brief Computes the gradient for the fast_gelu of "x" . \n *@brief Computes the gradient for the fast_gelu of "x" . \n


@@ -623,9 +640,7 @@ REG_OP(Elu)
*x: A float16, float32, for the input data type . \n *x: A float16, float32, for the input data type . \n


*@par Attributes: *@par Attributes:
*@li alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" .
*@li alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" .
*@li alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n
*li alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" .


*@par Outputs: *@par Outputs:
*y: A float16, float32, for the normalized result . \n *y: A float16, float32, for the normalized result . \n
@@ -641,9 +656,7 @@ REG_OP(Elu)
REG_OP(Celu) REG_OP(Celu)
.INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16})) .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16}))
.ATTR(alpha1, Float, 1.0)
.ATTR(alpha2, Float, 1.0)
.ATTR(alpha3, Float, 1.0)
.ATTR(alpha, Float, 1.0)
.OP_END_FACTORY_REG(Celu) .OP_END_FACTORY_REG(Celu)


/** /**


+ 27
- 0
third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h View File

@@ -117,6 +117,33 @@ REG_OP(NPUGetFloatStatus)
.INPUT(addr, TensorType{DT_FLOAT}) .INPUT(addr, TensorType{DT_FLOAT})
.OUTPUT(data, TensorType({DT_FLOAT})) .OUTPUT(data, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(NPUGetFloatStatus) .OP_END_FACTORY_REG(NPUGetFloatStatus)


/**
*@brief Set the value of global workspace to 0. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(NPUClearFloatStatusV2)
.OP_END_FACTORY_REG(NPUClearFloatStatusV2)

/**
*@brief Set the value of global workspace to 0. \n

*@par Inputs:
*addr: A nested structure of Tensors of type float32 . \n

*@par Outputs:
*data: A Tensor of type float32.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(NPUGetFloatStatusV2)
.DYNAMIC_INPUT(addr, TensorType{DT_FLOAT})
.OUTPUT(data, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(NPUGetFloatStatusV2)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_

+ 1
- 0
third_party/fwkacllib/inc/ops/ocr_ops.h View File

@@ -81,6 +81,7 @@ REG_OP(OCRRecognitionPreHandle)
.OUTPUT(imgs, TensorType({DT_UINT8})) .OUTPUT(imgs, TensorType({DT_UINT8}))
.OUTPUT(imgs_relation, TensorType({DT_INT32})) .OUTPUT(imgs_relation, TensorType({DT_INT32}))
.OUTPUT(imgs_lang, TensorType({DT_INT32})) .OUTPUT(imgs_lang, TensorType({DT_INT32}))
.OUTPUT(imgs_piece_fillers, TensorType({DT_INT32}))
.ATTR(batch_size, Int, 8) .ATTR(batch_size, Int, 8)
.ATTR(data_format, String, "NHWC") .ATTR(data_format, String, "NHWC")
.ATTR(pad_mode, String, "REPLICATE") .ATTR(pad_mode, String, "REPLICATE")


+ 160
- 1
third_party/fwkacllib/inc/ops/random_ops.h View File

@@ -59,6 +59,65 @@ REG_OP(Multinomial)
.ATTR(seed2, Int, 0) .ATTR(seed2, Int, 0)
.OP_END_FACTORY_REG(Multinomial) .OP_END_FACTORY_REG(Multinomial)


/**
*@brief Creates a multinomial distribution. \n

*@par Inputs:
*Inputs include:
* @li q: A Tensor. Must be one of the following types: float, double.
1-D Tensor with shape [num_classes].
* @li j: A Tensor. Must be one of the following types: int64.
1-D Tensor with shape [num_classes].
* @li num_samples: A Tensor of type int32. 0-D. Number of independent samples to draw for each row slice . \n

*@par Attributes:
*@li output_dtype: An optional type from: int32, int64. Defaults to int64.
*@li seed: An optional int. Defaults to 0.
*@li seed2: An optional int. Defaults to 0. \n

*@par Outputs:
*y: A Tensor of type int32 or int64. \n

*@attention Constraints:
*The implementation for MultinomialAliasDraw on Ascend uses AICPU, with bad performance.

*@par Third-party framework compatibility
*@li compatible with torch _multinomial_alias_draw operator.
*/
REG_OP(MultinomialAliasDraw)
.INPUT(q, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(j, TensorType({DT_INT64}))
.OUTPUT(y, TensorType({DT_INT64}))
.REQUIRED_ATTR(num_samples, Int)
.ATTR(seed, Int, 0)
.OP_END_FACTORY_REG(MultinomialAliasDraw)

/**
*@brief Prepares for MultinomialAliasDraw to create a multinomial distribution. \n

*@par Inputs:
*Inputs include:
* @li probs: A Tensor. Must be one of the following types: float, double.
1-D Tensor with shape [num_classes]. \n

*@par Outputs:
*j: A Tensor. Must be one of the following types: int64.
1-D Tensor with shape [num_classes].
*q: A Tensor. Must be one of the following types: float, double.
1-D Tensor with shape [num_classes]. \n

*@attention Constraints:
*The implementation for MultinomialAliasSetup on Ascend uses AICPU, with bad performance.

*@par Third-party framework compatibility
*@li compatible with torch _multinomial_alias_setup operator.
*/
REG_OP(MultinomialAliasSetup)
.INPUT(probs, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(j, TensorType({DT_INT64}))
.OUTPUT(q, TensorType({DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(MultinomialAliasSetup)

/** /**
*@brief Outputs random values from a normal distribution . \n *@brief Outputs random values from a normal distribution . \n


@@ -173,6 +232,27 @@ REG_OP(Randperm)
.ATTR(dtype, Type, DT_INT64) .ATTR(dtype, Type, DT_INT64)
.OP_END_FACTORY_REG(Randperm) .OP_END_FACTORY_REG(Randperm)


/**
*@brief Fills a tensor with elements drawn from the poisson distribution. \n

*@par Inputs:
*x: A Tensor. Must be one of the following types: float16, float. \n

*@par Attributes:
*@li seed: An optional int. Defaults to 0. \n

*@par Outputs:
*y: A Tensor list with same type as "x" . \n

*@par Third-party framework compatibility
*@ Compatible with the Pytorch operator Poisson.
*/
REG_OP(Poisson)
.INPUT(x, TensorType({ DT_FLOAT16,DT_FLOAT }))
.OUTPUT(y, TensorType({ DT_FLOAT16,DT_FLOAT }))
.ATTR(seed, Int, 0)
.OP_END_FACTORY_REG(Poisson)
/** /**
*@brief Outputs random values from the Poisson distribution(s) described by rate . \n *@brief Outputs random values from the Poisson distribution(s) described by rate . \n


@@ -446,6 +526,34 @@ REG_OP(DropOutGenMaskV3)
.ATTR(seed2, Int, 0) .ATTR(seed2, Int, 0)
.OP_END_FACTORY_REG(DropOutGenMaskV3) .OP_END_FACTORY_REG(DropOutGenMaskV3)


/**
*@brief Generate stateless random bit mask for dropout . \n

*@par Inputs:
include:
*@li shape:The shape of the output tensor.
*@li prob:0-D. Number of bit 1 . \n
*@li seed:If either seed or seed2 are set to be non-zero, the random number
*generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
*@li seed2:A second seed to avoid seed collision . \n

*@par Outputs:
*y:Output (1-D) random number using uint data format . \n

*@attention Constraints:
*The output is aligned with 128 bits

*@see StatelessDropOutGenMask()
*/
REG_OP(StatelessDropOutGenMask)
.INPUT(shape, TensorType({ DT_INT32, DT_INT64 }))
.INPUT(prob, TensorType({ DT_FLOAT16, DT_FLOAT }))
.INPUT(seed, TensorType({ DT_INT32, DT_INT64 }))
.INPUT(seed1, TensorType({ DT_INT32, DT_INT64 }))
.OUTPUT(y, TensorType({ DT_UINT8 }))
.OP_END_FACTORY_REG(StatelessDropOutGenMask)

/** /**
*@brief Generates values in an interval . \n *@brief Generates values in an interval . \n


@@ -698,11 +806,62 @@ REG_OP(Uniform)
*@attention Constraints: *@attention Constraints:
* Compatible with the Caffe operator ContinuationIndicator. * Compatible with the Caffe operator ContinuationIndicator.
*/ */

REG_OP(ContinuationIndicator) REG_OP(ContinuationIndicator)
.REQUIRED_ATTR(time_step, Int) .REQUIRED_ATTR(time_step, Int)
.REQUIRED_ATTR(batch_size, Int) .REQUIRED_ATTR(batch_size, Int)
.OUTPUT(y, TensorType({DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(ContinuationIndicator) .OP_END_FACTORY_REG(ContinuationIndicator)

/**
*@brief Outputs random values from the Exponential distribution(s) described by rate . \n

*@par Inputs:
*Inputs include:
* @li x: A Tensor. Must be one of the following types: half, float32, float64. \n

*@par Attributes:
*@li lambda: An optional float. Defaults to 1.
*@li seed: An optional int. Defaults to 0.The random number generator is seeded by the given seed.
Otherwise, it is seeded by a random seed. \n

*@par Outputs:
*y: A Tensor of type dtype float16, float, double. \n

*@attention Constraints:
*The implementation for Exponential on Ascend uses AICPU, with bad performance.

*@par Third-party framework compatibility
*@li compatible with tensorflow Exponential operator.
*/
REG_OP(Exponential)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.ATTR(lambda, Float, 1)
.ATTR(seed, Int, 0)
.OP_END_FACTORY_REG(Exponential)

/**
*@brief Fills a tensor with elements drawn from the geometric distribution. \n

*@par Inputs:
*x: A Tensor. Must be one of the following types: float16, float. \n

*@par Attributes:
*@li p: The probability of experimental success in Bernoulli's experiment.
*@li seed: An optional int. Defaults to 0. \n

*@par Outputs:
*y: A Tensor list with same type as "x" . \n

*@par Third-party framework compatibility
*@ Compatible with the Pytorch operator Geometric.
*/
REG_OP(Geometric)
.INPUT(x, TensorType({ DT_FLOAT16,DT_FLOAT }))
.OUTPUT(y, TensorType({ DT_FLOAT16,DT_FLOAT }))
.REQUIRED_ATTR(p, Float)
.ATTR(seed, Int, 0)
.OP_END_FACTORY_REG(Geometric)

} // namespace ge } // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_

+ 139
- 0
third_party/fwkacllib/inc/ops/randomdsa_ops.h View File

@@ -0,0 +1,139 @@
/**
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*!
* \file randomdsa_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_RANDOMDSA_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_RANDOMDSA_OPS_H_
#include <vector>
#include "graph/operator_reg.h"
#include "graph/operator.h"
namespace ge {
/**
* @brief Generate DSA random bit mask for dropout. \n
* @par Inputs:
include:
* @li count:The shape of the input tensor.
* @li seed:If seed is set to be non-zero, the random number
* generator is seeded by the given seed. Otherwise, it is seeded by a random seed
* @li dropout:0-D. Number of bit 1 . \n
* @par Attributes:
* @li random_algorithm:The default value is "Philox". \n
* @par Outputs:
* y:Output (1-D) random number using uint data format . \n
* @see DSAGenBitMask()
*/
REG_OP(DSAGenBitMask)
.INPUT(count, TensorType({DT_INT64}))
.INPUT(seed, TensorType({DT_UINT64}))
.INPUT(dropout, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16}))
.OUTPUT(out, TensorType({DT_UINT8}))
.ATTR(random_algorithm, String, "Philox")
.OP_END_FACTORY_REG(DSAGenBitMask)
/**
* @brief Generate DSA truncatenormal data in random. \n
* @par Inputs:
include:
* @li count: The shape of the input tensor.
* @li seed: If seed is set to be non-zero, the random number
* generator is seeded by the given seed. Otherwise, it is seeded by a random seed
* @li mean: A Tensor. Must be one of the following types: float16, float32, double
* @li stdev: A Tensor. Must be one of the following types: float16, float32, double. \n
* @par Attributes:
* @li random_algorithm:The default value is "Philox". \n
* @par Outputs:
* y:Output (1-D) random number using float and bf data format . \n
* @see DSARandomTruncatedNormal()
*/
REG_OP(DSARandomTruncatedNormal)
.INPUT(count, TensorType({DT_INT64}))
.INPUT(seed, TensorType({DT_UINT64}))
.INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16}))
.INPUT(stdev, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16}))
.OUTPUT(out, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16}))
.ATTR(random_algorithm, String, "Philox")
.OP_END_FACTORY_REG(DSARandomTruncatedNormal)
/**
* @brief Generate DSA normal data in random. \n
* @par Inputs:
include:
* @li count: The shape of the input tensor.
* @li seed: If seed is set to be non-zero, the random number
* generator is seeded by the given seed. Otherwise, it is seeded by a random seed
* @li mean: A Tensor. Must be one of the following types: float16, float32, double
* @li stdev: A Tensor. Must be one of the following types: float16, float32, double. \n
* @par Attributes:
* @li random_algorithm:The default value is "Philox". \n
* @par Outputs:
* y:Output (1-D) random number using float and bf data format . \n
* @see DSARandomNormal()
*/
REG_OP(DSARandomNormal)
.INPUT(count, TensorType({DT_INT64}))
.INPUT(seed, TensorType({DT_UINT64}))
.INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16}))
.INPUT(stdev, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16}))
.OUTPUT(out, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16}))
.ATTR(random_algorithm, String, "Philox")
.OP_END_FACTORY_REG(DSARandomNormal)
/**
* @brief Generate DSA uniform data in random. \n
* @par Inputs:
include:
* @li count: The shape of the input tensor.
* @li seed: If seed is set to be non-zero, the random number
* generator is seeded by the given seed. Otherwise, it is seeded by a random seed
* @li low: A Tensor. Must be one of the following types: int, float, bf
* @li high: A Tensor. Must be one of the following types: int, float, bf. \n
* @par Attributes:
* @li random_algorithm:The default value is "Philox". \n
* @par Outputs:
* y:Output (1-D) random number using float int and bf data format . \n
* @see DSARandomUniform()
*/
REG_OP(DSARandomUniform)
.INPUT(count, TensorType({DT_INT64}))
.INPUT(seed, TensorType({DT_UINT64}))
.INPUT(low, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
.INPUT(high, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
.OUTPUT(out, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
.ATTR(random_algorithm, String, "Philox")
.OP_END_FACTORY_REG(DSARandomUniform)
}
#endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOMDSA_OPS_H

+ 123
- 0
third_party/fwkacllib/inc/ops/reduce_ops.h View File

@@ -515,6 +515,34 @@ REG_OP(ReduceSumD)
.ATTR(keep_dims, Bool, false) .ATTR(keep_dims, Bool, false)
.OP_END_FACTORY_REG(ReduceSumD) .OP_END_FACTORY_REG(ReduceSumD)


/**
*@brief Calculate the total mean based on the mean of each device . \n

*@par Inputs:
* Three inputs, including:
*@li x: A Tensor. Must be one of the following types: float16, float32 .
*@li count: A Tensor. Must be one of the following types: float16, float32 .
*@li count_sum: A Tensor. Must be one of the following types: float16, float32 . \n

*@par Attributes:
*@li axes: A required 1D list or tuple of int32 or int64. Specifies the dimensions to reduce.
*@li keepdims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n

*@par Outputs:
*y: The reduced tensor. Has the same type and format as input "x" . \n

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator Sum.
*/
REG_OP(ReduceMeanWithCount)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(count, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.REQUIRED_ATTR(axes, ListInt)
.ATTR(keep_dims, Bool, false)
.OP_END_FACTORY_REG(ReduceMeanWithCount)

/** /**
*@brief Calculates the "logical sum" of elements of a tensor in a dimension . \n *@brief Calculates the "logical sum" of elements of a tensor in a dimension . \n


@@ -1326,6 +1354,101 @@ REG_OP(ReduceMeanVariance)
.ATTR(axes, ListInt, {}) .ATTR(axes, ListInt, {})
.ATTR(keep_dims, Bool, true) .ATTR(keep_dims, Bool, true)
.OP_END_FACTORY_REG(ReduceMeanVariance) .OP_END_FACTORY_REG(ReduceMeanVariance)

/**
* @brief Calculates the standard deviation or the variance of Tensors with the average value.

* @par Inputs:
* Two inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32. \n
* @li mean: A Tensor. It's the mean of X. Has the same shape and type as "x" \n

* @par Attributes:
* Four Attributes, including:
* @li dim: An listint. \n
* @li if_std: An optional bool. Defaults to "False"
* If "True", Calculate the standard deviation
* If "False", Calculate the variance
* @li unbiased: An optional bool. Defaults to "True".
* If "True", Use Bessel Correction.
* If "False", Do not use Bessel Correction. \n
* @li keepdim: An optional bool. Defaults to "False".
* If "True", Keep the original tensor dimension.
* If "False", Do not keep the original tensor dimension. \n

* @par Outputs:
* @li output_var: A Tensor. It's the standard deviation or the variance of X. Has the same type as "x".

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Var_mean.
*/
REG_OP(ReduceStdV2Update)
.INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16}))
.INPUT(mean, TensorType({DT_FLOAT,DT_FLOAT16}))
.OUTPUT(output_var, TensorType({DT_FLOAT,DT_FLOAT16}))
.REQUIRED_ATTR(dim, ListInt)
.ATTR(if_std, Bool, false)
.ATTR(unbiased, Bool, true)
.ATTR(keepdim, Bool, false)
.OP_END_FACTORY_REG(ReduceStdV2Update)
/**
*@brief Computes the log and sum and exp of elements across dimensions of a tensor.
* Reduces "x" along the dimensions given in "axes".
* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each
* entry in "axes". If "keep_dims" is true, the reduced dimensions
* are retained with length 1.
*
*@par Inputs:
* Two inputs, including:
*@li x: A Tensor. Must be one of the following types:
* float32, float16, int32, int64, uint32, uint64, double
*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n
*
*@par Attributes:
*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n
*
*@par Outputs:
*y: The reduced tensor. Has the same type and format as input "x" . \n
*
*@par Third-party framework compatibility
* Compatible with the Onnx operator ReduceLogSumExp.
*/
REG_OP(ReduceLogSumExp)
.INPUT(x, TensorType::NumberType())
.INPUT(axes, TensorType::IndexNumberType())
.OUTPUT(y, TensorType::NumberType())
.ATTR(keep_dims, Bool, false)
.OP_END_FACTORY_REG(ReduceLogSumExp)

/**
*@brief Computes the log and sum of elements across dimensions of a tensor.
* Reduces "x" along the dimensions given in "axes".
* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each
* entry in "axes". If "keep_dims" is true, the reduced dimensions
* are retained with length 1.
*
*@par Inputs:
* Two inputs, including:
*@li x: A Tensor. Must be one of the following types:
* float32, float16, int32, int64, uint32, uint64, double
*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n
*
*@par Attributes:
*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n
*
*@par Outputs:
*y: The reduced tensor. Has the same type and format as input "x" . \n
*
*@par Third-party framework compatibility
* Compatible with the Onnx operator ReduceLogSum.
*/
REG_OP(ReduceLogSum)
.INPUT(x, TensorType::NumberType())
.INPUT(axes, TensorType::IndexNumberType())
.OUTPUT(y, TensorType::NumberType())
.ATTR(keep_dims, Bool, false)
.OP_END_FACTORY_REG(ReduceLogSum)
} //namespace ge } //namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_

+ 38
- 0
third_party/fwkacllib/inc/ops/rnn.h View File

@@ -1280,6 +1280,44 @@ REG_OP(EmbeddingBag)
.ATTR(sparse, Bool, false) .ATTR(sparse, Bool, false)
.ATTR(include_last_offset, Bool, false) .ATTR(include_last_offset, Bool, false)
.OP_END_FACTORY_REG(EmbeddingBag) .OP_END_FACTORY_REG(EmbeddingBag)
/**
* @brief:LSTMP calculation
* @par Inputs:
* eight inputs:
* @li x:A required Tensor(seq, batch, dim). Must be one of the following types: float16, float32.
* @li real_mask:A optional Tensor(seq, batch). Must be one of the following types: float16, float32.
* @li init_h:A optional Tensor(batch, state). Must be one of the following types: float16, float32.
* @li init_c:A optional Tensor(batch, hidden). Must be one of the following types: float16, float32.
* @li wx:A required Tensor(4*hidden, dim). Must be one of the following types: float16, float32.
* @li wr:A required Tensor(4*hidden, state). Must be one of the following types: float16, float32.
* @li bias:A optional Tensor(hidden). Must be one of the following types: float16, float32. The format must be ND.
* @li project: A optional Tensor. Must be one of the following types: float16, float32.
*
* @par Outputs:
*three outputs:
*@li y:A Tensor. Must be one of the following types: float16, float32.
*@li output_h:A Tensor. Must be one of the following types: float16, float32.
*@li output_c:A Tensor. Must be one of the following types: float16, float32.
*
*@par Attributes:
*time_major:An bool identifying the time major in the op. Default to false.
* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(LSTMP)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(wx, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(wr, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(project, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(real_mask, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(time_major, Bool, false)
.OP_END_FACTORY_REG(LSTMP)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_

+ 124
- 6
third_party/fwkacllib/inc/ops/selection_ops.h View File

@@ -259,13 +259,39 @@ REG_OP(GatherV2D)
*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with the PyTorch operator Gather. *Compatible with the PyTorch operator Gather.
*/ */

REG_OP(GatherElements) REG_OP(GatherElements)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_INT16,DT_INT32,
DT_INT64,DT_UINT8,DT_UINT16,DT_UINT32,DT_UINT64}))
.INPUT(index, TensorType({DT_INT32,DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_INT16,DT_INT32,
DT_INT64,DT_UINT8,DT_UINT16,DT_UINT32,DT_UINT64}))
.ATTR(dim, Int, 0)
.OP_END_FACTORY_REG(GatherElements)

/**
*@Gathers values along an axis specified by dim . \n

*@par Inputs:
*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
* int64, uint16, float16, uint32, uint64, bool.
*@li dim: A Tensor. Must be one of the following types: int32, int64.
*@li index: A Tensor. Must be one of the following types: int32, int64 . \n


*@par Outputs:
* y: A Tensor. Has the same type as "x" . \n

*@par Third-party framework compatibility
*Compatible with the PyTorch operator Gather.
*/
REG_OP(GatherD)
.INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32
DT_INT64, DT_UINT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(dim, TensorType({DT_INT32, DT_INT64}))
.INPUT(index, TensorType({DT_INT32, DT_INT64})) .INPUT(index, TensorType({DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
.ATTR(dim, Int, 0) .ATTR(dim, Int, 0)
.OP_END_FACTORY_REG(GatherElements)
.OP_END_FACTORY_REG(GatherD)


/** /**
*@brief Extracts a strided slice of a tensor. Roughly speaking, this op *@brief Extracts a strided slice of a tensor. Roughly speaking, this op
@@ -360,9 +386,9 @@ REG_OP(StridedSlice)
* Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSlice instead. * Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSlice instead.
*/ */
REG_OP(StridedSliceD) REG_OP(StridedSliceD)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8,
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT64, DT_UINT8, DT_INT8,
DT_BOOL})) DT_BOOL}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8,
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT64, DT_UINT8, DT_INT8,
DT_BOOL})) DT_BOOL}))
.REQUIRED_ATTR(begin, ListInt) .REQUIRED_ATTR(begin, ListInt)
.REQUIRED_ATTR(end, ListInt) .REQUIRED_ATTR(end, ListInt)
@@ -700,6 +726,27 @@ REG_OP(SegmentMax)
.OUTPUT(y, TensorType::RealNumberType()) .OUTPUT(y, TensorType::RealNumberType())
.OP_END_FACTORY_REG(SegmentMax) .OP_END_FACTORY_REG(SegmentMax)


/**
*@brief Computes the sum along segments of a tensor . \n

*@par Inputs:
*Two inputs, including:
* @li x: A Tensor of type NumberType.
* @li segment_ids: A Tensor of type IndexNumberType, whose shape is a prefix
* of "x.shape".

*@par Outputs:
*y: A Tensor of type NumberType . \n

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator SegmentSum.
*/
REG_OP(SegmentSum)
.INPUT(x, TensorType::NumberType())
.INPUT(segment_ids, TensorType::IndexNumberType())
.OUTPUT(y, TensorType::NumberType())
.OP_END_FACTORY_REG(SegmentSum)

/** /**
*@brief: Computes the maximum along segments of a tensor. *@brief: Computes the maximum along segments of a tensor.
*Computes a tensor such that output[i]=(data[i]) where max is over j *Computes a tensor such that output[i]=(data[i]) where max is over j
@@ -929,6 +976,49 @@ REG_OP(TopKD)
* @brief Finds values and indices of the "k" largest elements for the last * @brief Finds values and indices of the "k" largest elements for the last
* dimension . \n * dimension . \n


* @par Inputs:
* Two inputs, including:
* @li x: A 1D or higher tensor of type BasicType, with the last dimension
* at least "k".
* @li k: A 0D Tensor of type int32.
* Number of top elements to look for along the last dimension (along each row
* for matrices) .
* @li assist_seq: A 1D tensor of type float16.
* with size of 2N, which "N" is the last dimension.
* The first N numbers is indices, and the next N numbers is deviation of casting
* int32 to float16. \n

* @par Attributes:
* @li sorted: An optional bool. Defaults to true.
* If true, the resulting "k" elements will be sorted by the values in descending
* order.
* @li dim: An optional int. Defaults to -1. For reserved use.
* @li largest: An optional bool. Defaults to true. For reserved use. \n

* @par Outputs:
* @li values: A Tensor, specifying the sorted data. Has the same type as
* "input".
* @li indices: A Tensor of type int32, specifying the indices of sorted data . \n

* @see TopK()
* @par Third-party framework compatibility
* @li Compatible with the TensorFlow operator TopKV2.
*/
REG_OP(TopKV2D)
.INPUT(x, TensorType::RealNumberType())
.INPUT(k, TensorType({DT_INT32}))
.INPUT(assist_seq, TensorType({DT_FLOAT16}))
.OUTPUT(values, TensorType::RealNumberType())
.OUTPUT(indices, TensorType({DT_INT32}))
.ATTR(sorted, Bool, true)
.ATTR(dim, Int, -1)
.ATTR(largest, Bool, true)
.OP_END_FACTORY_REG(TopKV2D)

/**
* @brief Finds values and indices of the "k" largest elements for the last
* dimension . \n

* @par Inputs: * @par Inputs:
* Two inputs, including: * Two inputs, including:
* @li x: A 1D or higher tensor of type BasicType, with the last dimension * @li x: A 1D or higher tensor of type BasicType, with the last dimension
@@ -2340,7 +2430,7 @@ REG_OP(AddRowRanges)
*@par Outputs: *@par Outputs:
*y: A ND Tensor of float32/float16/int32/int8 with shapes 1-D (D,), 2-D(N, D), 3-D(N, C, D) *y: A ND Tensor of float32/float16/int32/int8 with shapes 1-D (D,), 2-D(N, D), 3-D(N, C, D)


* @par Restrictions:
*@attention Constraints:
* Warning: input shape's length must not be bigger than 1024 * 1024 * 1024. * Warning: input shape's length must not be bigger than 1024 * 1024 * 1024.
*/ */
REG_OP(MaskedFillRange) REG_OP(MaskedFillRange)
@@ -2442,6 +2532,34 @@ REG_OP(StridedSliceV3)
.OPTIONAL_INPUT(strides, TensorType::IndexNumberType()) .OPTIONAL_INPUT(strides, TensorType::IndexNumberType())
.OUTPUT(y, TensorType::BasicType()) .OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(StridedSliceV3) .OP_END_FACTORY_REG(StridedSliceV3)

/**
*@brief MovingSumWithSigmoid.

*@par Inputs:
*Four inputs, including:
* @li alpha: A Tensor. Must be one of the following types: float32, float16.
* @li energy: A Tensor. Must be one of the following types: float32, float16.
* @li beam_size: A Tensor of type int32.
* @li frame_size: A Tensor of type int32. \n

*@par Outputs:
* y: A Tensor. Has the same type as "alpha". \n
*
* @par Attributes:
* window_size: A int.
*
* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(MovingSumWithSigmoid)
.INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(energy, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(beam_size, TensorType({DT_INT32}))
.INPUT(frame_size, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(window_size, Int)
.OP_END_FACTORY_REG(MovingSumWithSigmoid)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_

+ 24
- 0
third_party/fwkacllib/inc/ops/split_combination_ops.h View File

@@ -381,6 +381,30 @@ REG_OP(ConcatOffsetD)
.REQUIRED_ATTR(concat_dim, Int) .REQUIRED_ATTR(concat_dim, Int)
.REQUIRED_ATTR(N, Int) .REQUIRED_ATTR(N, Int)
.OP_END_FACTORY_REG(ConcatOffsetD) .OP_END_FACTORY_REG(ConcatOffsetD)

/**
*@brief Compute combinations of length of the given tensor. \n

*@par Inputs:
*x: A list of 1D Tensor objects. \n

*@par Attributes:
*@li r: An optional int indicates number of elements to combine. Defaults to 2.
*@li with_replacement: An optional bool indicates whether to allow duplication
*in combination. Defaults to "False". \n

*@par Outputs:
*y: A Tensor list with same type as "x" . \n

*@par Third-party framework compatibility
*@ Compatible with the Pytorch operator Combinations.
*/
REG_OP(Combinations)
.INPUT(x, TensorType::ALL())
.OUTPUT(y, TensorType::ALL())
.ATTR(r, Int, 2)
.ATTR(with_replacement, Bool, false)
.OP_END_FACTORY_REG(Combinations)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_SPLIT_COMBINATION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_SPLIT_COMBINATION_OPS_H_

+ 1
- 38
third_party/fwkacllib/inc/ops/vector_search.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2021 Huawei Technologies Co., Ltd
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -154,43 +154,6 @@ REG_OP(CalcBucketsLimitAndOffset)
.OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) .OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64}))
.REQUIRED_ATTR(total_limit, Int) .REQUIRED_ATTR(total_limit, Int)
.OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) .OP_END_FACTORY_REG(CalcBucketsLimitAndOffset)

/**
* @brief Calculate ProdVirialSeA. \n
*
* @par Inputs:
* Five inputs, including:
* @li net_deriv: A Tensor. Must be one of the following types: float16, float32, float64.
* @li in_deriv: A Tensor. Must be one of the following types: float16, float32, float64.
* @li rij: A Tensor. Must be one of the following types: float16, float32, float64.
* @li nlist: A Tensor. dtype is int32.
* @li natoms: A Tensor. dtype is int32. \n
*
* @par Outputs:
* Two outputs, including:
* @li virial: A Tensor. Must be one of the following types: float16, float32, float64.
* @li atom_virial: A Tensor. Must be one of the following types: float16, float32, float64. \n
*
* @par Attributes:
* Two attributes, including:
* @li n_a_sel: A Scalar.
* @li n_r_sel: A Scalar. \n
*
* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(ProdVirialSeA)
.INPUT(net_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(in_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(rij, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(nlist, TensorType({DT_INT32}))
.INPUT(natoms, TensorType({DT_INT32}))
.OUTPUT(virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(atom_virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.REQUIRED_ATTR(n_a_sel, Int)
.REQUIRED_ATTR(n_r_sel, Int)
.ATTR(nall, Int, 28328)
.OP_END_FACTORY_REG(ProdVirialSeA)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_

+ 11
- 2
third_party/fwkacllib/inc/runtime/base.h View File

@@ -98,11 +98,11 @@ typedef struct rtExceptionInfo {
uint32_t tid; uint32_t tid;
uint32_t deviceid; uint32_t deviceid;
uint32_t retcode; uint32_t retcode;
} rtExceptionInfo;
} rtExceptionInfo_t;


typedef void (*rtErrorCallback)(rtExceptionType); typedef void (*rtErrorCallback)(rtExceptionType);


typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo);
typedef void (*rtTaskFailCallback)(rtExceptionInfo_t *exceptionInfo);


typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen); typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen);


@@ -429,6 +429,15 @@ RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *lbl, rtModel_t mdl, rtStream_t st
* @return RT_ERROR_INVALID_VALUE for input null ptr * @return RT_ERROR_INVALID_VALUE for input null ptr
*/ */
RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId); RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId);

/**
* @ingroup dvrt_base
* @brief get max model num
* @param [out] max model num
* @param [in] null
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtGetMaxModelNum(uint32_t *maxModelCount);
#if defined(__cplusplus) #if defined(__cplusplus)
} }
#endif #endif


+ 21
- 3
third_party/fwkacllib/inc/runtime/config.h View File

@@ -22,7 +22,8 @@ typedef enum tagRtArchType {
ARCH_BEGIN = 0, ARCH_BEGIN = 0,
ARCH_V100 = ARCH_BEGIN, ARCH_V100 = ARCH_BEGIN,
ARCH_V200 = 1, ARCH_V200 = 1,
ARCH_END = 2,
ARCH_V300 = 2,
ARCH_END = 3,
} rtArchType_t; } rtArchType_t;


typedef enum tagRtChipType { typedef enum tagRtChipType {
@@ -34,7 +35,8 @@ typedef enum tagRtChipType {
CHIP_DC = 4, CHIP_DC = 4,
CHIP_CLOUD_V2 = 5, CHIP_CLOUD_V2 = 5,
CHIP_NO_DEVICE = 6, CHIP_NO_DEVICE = 6,
CHIP_END = 7,
CHIP_MINI_V3 = 7,
CHIP_END = 8,
} rtChipType_t; } rtChipType_t;


typedef enum tagRtAicpuScheType { typedef enum tagRtAicpuScheType {
@@ -74,7 +76,8 @@ typedef enum tagRtPlatformType {
PLATFORM_DC = 5, PLATFORM_DC = 5,
PLATFORM_CLOUD_V2 = 6, PLATFORM_CLOUD_V2 = 6,
PLATFORM_LHISI_SD3403 = 7, PLATFORM_LHISI_SD3403 = 7,
PLATFORM_END = 8,
PLATFORM_MINI_V3 = 8,
PLATFORM_END = 9,
} rtPlatformType_t; } rtPlatformType_t;


typedef enum tagRtCubeFracMKNFp16 { typedef enum tagRtCubeFracMKNFp16 {
@@ -140,6 +143,12 @@ typedef enum tagRTTaskTimeoutType {
RT_TIMEOUT_TYPE_OP_EXECUTE, RT_TIMEOUT_TYPE_OP_EXECUTE,
} rtTaskTimeoutType_t; } rtTaskTimeoutType_t;


typedef enum tagRtFloatOverflowMode {
RT_OVERFLOW_MODE_SATURATION = 0,
RT_OVERFLOW_MODE_INFNAN,
RT_OVERFLOW_MODE_UNDEF,
} rtFloatOverflowMode_t;

/** /**
* @ingroup * @ingroup
* @brief get AI core count * @brief get AI core count
@@ -180,6 +189,15 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate
*/ */
RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig); RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig);


/**
* @ingroup
* @brief get float overflow mode
* @param [out] floatOverflowMode
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtGetFloatOverflowMode(rtFloatOverflowMode_t * const floatOverflowMode);

/** /**
* @ingroup * @ingroup
* @brief get l2 buffer Info,virtual baseaddr,Size * @brief get l2 buffer Info,virtual baseaddr,Size


+ 1
- 1
third_party/fwkacllib/inc/runtime/context.h View File

@@ -140,7 +140,7 @@ RTS_API rtError_t rtSetGroup(int32_t groupId);
* @param [in] groupid count * @param [in] groupid count
* @return RT_ERROR_NONE for ok, errno for failed * @return RT_ERROR_NONE for ok, errno for failed
*/ */
RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t *groupInfo, uint32_t count);
RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t *groupInfo, uint32_t cnt);


/** /**
* @ingroup * @ingroup


+ 3
- 3
third_party/fwkacllib/inc/runtime/dev.h View File

@@ -94,11 +94,11 @@ typedef enum tagGetDevMsgType {
/** /**
* @ingroup dvrt_dev * @ingroup dvrt_dev
* @brief get total device number. * @brief get total device number.
* @param [in|out] count the device number
* @param [in|out] cnt the device number
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_INVALID_VALUE for error input
*/ */
RTS_API rtError_t rtGetDeviceCount(int32_t *count);
RTS_API rtError_t rtGetDeviceCount(int32_t *cnt);
/** /**
* @ingroup dvrt_dev * @ingroup dvrt_dev
* @brief get device ids * @brief get device ids
@@ -338,7 +338,7 @@ RTS_API rtError_t rtSetTSDevice(uint32_t tsId);
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok
* @return RT_ERROR_DRV_ERR for can not get run mode * @return RT_ERROR_DRV_ERR for can not get run mode
*/ */
RTS_API rtError_t rtGetRunMode(rtRunMode *mode);
RTS_API rtError_t rtGetRunMode(rtRunMode *runMode);


/** /**
* @ingroup dvrt_dev * @ingroup dvrt_dev


+ 2
- 2
third_party/fwkacllib/inc/runtime/dvfsprofile.h View File

@@ -23,11 +23,11 @@ typedef enum dvfsProfileMode {
/** /**
* @ingroup dvrt_dvfsprofile * @ingroup dvrt_dvfsprofile
* @brief Set the performance mode of the device * @brief Set the performance mode of the device
* @param [in] mode dvfsProfileMode
* @param [in] profMode dvfsProfileMode
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_INVALID_VALUE for error input
*/ */
RTS_API rtError_t rtSetDvfsProfile(DvfsProfileMode mode);
RTS_API rtError_t rtSetDvfsProfile(DvfsProfileMode profMode);


/** /**
* @ingroup dvrt_dvfsprofile * @ingroup dvrt_dvfsprofile


+ 15
- 0
third_party/fwkacllib/inc/runtime/event.h View File

@@ -19,6 +19,11 @@ typedef enum rtEventWaitStatus {
EVENT_STATUS_MAX = 2, EVENT_STATUS_MAX = 2,
} rtEventWaitStatus_t; } rtEventWaitStatus_t;


typedef enum rtEventStatus {
RT_EVENT_INIT = 0,
RT_EVENT_RECORDED = 1,
} rtEventStatus_t;

/** /**
* @ingroup event_flags * @ingroup event_flags
* @brief event op bit flags * @brief event op bit flags
@@ -115,6 +120,16 @@ RTS_API rtError_t rtEventQuery(rtEvent_t evt);
*/ */
RTS_API rtError_t rtEventQueryWaitStatus(rtEvent_t evt, rtEventWaitStatus_t *status); RTS_API rtError_t rtEventQueryWaitStatus(rtEvent_t evt, rtEventWaitStatus_t *status);


/**
* @ingroup dvrt_event
* @brief Queries an event's status
* @param [in] evt event to query
* @param [in out] rtEventStatus_t status
* @return RT_EVENT_RECORDED for recorded
* @return RT_EVENT_INIT for not recorded
*/
RTS_API rtError_t rtEventQueryStatus(rtEvent_t evt, rtEventStatus_t *status);

/** /**
* @ingroup dvrt_event * @ingroup dvrt_event
* @brief computes the elapsed time between events. * @brief computes the elapsed time between events.


+ 35
- 11
third_party/fwkacllib/inc/runtime/kernel.h View File

@@ -287,13 +287,13 @@ RTS_API rtError_t rtDependencyRegister(void *mHandle, void *sHandle);
* @param [in] binHandle device binary handle * @param [in] binHandle device binary handle
* @param [in] stubFunc stub function * @param [in] stubFunc stub function
* @param [in] stubName stub function name * @param [in] stubName stub function name
* @param [in] devFunc device function description. symbol name or address
* offset, depending binary type.
* @param [in] kernelInfoExt kernel Info extension. device function description or tiling key,
* depending static shape or dynmaic shape.
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_INVALID_VALUE for error input
*/ */
RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char_t *stubName, RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char_t *stubName,
const void *devFunc, uint32_t funcMode);
const void *kernelInfoExt, uint32_t funcMode);


/** /**
* @ingroup rt_kernel * @ingroup rt_kernel
@@ -354,7 +354,8 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *
* @ingroup rt_kernel * @ingroup rt_kernel
* @brief launch kernel with handle to device * @brief launch kernel with handle to device
* @param [in] hdl program * @param [in] hdl program
* @param [in] devFunc device function description.
* @param [in] kernelInfoExt kernel Info extension. device function description or tiling key,
* depending static shape or dynmaic shape.
* @param [in] blockDim block dimentions * @param [in] blockDim block dimentions
* @param [in] args argments address for kernel function * @param [in] args argments address for kernel function
* @param [in] argsSize argements size * @param [in] argsSize argements size
@@ -364,7 +365,7 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_INVALID_VALUE for error input
*/ */
RTS_API rtError_t rtKernelLaunchWithHandle(void *hdl, const void *devFunc, uint32_t blockDim,
RTS_API rtError_t rtKernelLaunchWithHandle(void *hdl, const void *kernelInfoExt, uint32_t blockDim,
void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream_, void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream_,
const void *kernelInfo); const void *kernelInfo);


@@ -497,6 +498,28 @@ RTS_API rtError_t rtDumpAddrSet(rtModel_t mdl, void *addr, uint32_t dumpSize, ui
*/ */
RTS_API rtError_t rtDatadumpInfoLoad(const void *dumpInfo, uint32_t length); RTS_API rtError_t rtDatadumpInfoLoad(const void *dumpInfo, uint32_t length);


/**
* @ingroup rt_kernel
* @brief launch npu get float status task
* @param [in] outputAddr pointer to op output addr
* @param [in] outputSize op output size
* @param [in] checkMode check mode
* @param [in] stm associated stream
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtNpuGetFloatStatus(void *outputAddr, uint64_t outputSize, uint32_t checkMode, rtStream_t stm);

/**
* @ingroup rt_kernel
* @brief launch npu clear float status task
* @param [in] checkMode check mode
* @param [in] stm associated stream
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtNpuClearFloatStatus(uint32_t checkMode, rtStream_t stm);

#ifndef __CLANG_CCE_RUNTIME_H__ #ifndef __CLANG_CCE_RUNTIME_H__
#define __CLANG_CCE_RUNTIME_H__ #define __CLANG_CCE_RUNTIME_H__
/** /**
@@ -519,13 +542,13 @@ RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc, rtStre
/** /**
* @ingroup rt_kernel * @ingroup rt_kernel
* @brief setup argment for next rtLaunch in current thread * @brief setup argment for next rtLaunch in current thread
* @param [in] arg argment address for kernel function
* @param [in] args argment address for kernel function
* @param [in] size argment size * @param [in] size argment size
* @param [in] offset argment table offset * @param [in] offset argment table offset
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_INVALID_VALUE for error input
*/ */
RTS_API rtError_t rtSetupArgument(const void *arg, uint32_t size, uint32_t offset);
RTS_API rtError_t rtSetupArgument(const void *args, uint32_t size, uint32_t offset);


/** /**
* @ingroup rt_kernel * @ingroup rt_kernel
@@ -544,11 +567,11 @@ RTS_API rtError_t rtLaunch(const void *stubFunc);
* @param [in] ptr host memory * @param [in] ptr host memory
* @param [in] size host memory size * @param [in] size host memory size
* @param [in] flag reserved. set to 0 * @param [in] flag reserved. set to 0
* @param [out] arg returned arg. used for next kernel's arg.
* @param [out] args returned arg. used for next kernel's arg.
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_INVALID_VALUE for error input
*/ */
RTS_API rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg);
RTS_API rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **args);


/** /**
* @ingroup rt_kernel * @ingroup rt_kernel
@@ -675,7 +698,8 @@ RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockD
* @ingroup rt_kernel * @ingroup rt_kernel
* @brief launch kernel with handle and tiling data to device * @brief launch kernel with handle and tiling data to device
* @param [in] hdl program * @param [in] hdl program
* @param [in] devFunc device function description.
* @param [in] kernelInfoExt kernel Info extension. device function description or tiling key,
* depending static shape or dynmaic shape.
* @param [in] blockDim block dimentions * @param [in] blockDim block dimentions
* @param [in] argsInfo argments info address for kernel function * @param [in] argsInfo argments info address for kernel function
* @param [in] smDesc shared memory description * @param [in] smDesc shared memory description
@@ -684,7 +708,7 @@ RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockD
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_INVALID_VALUE for error input
*/ */
RTS_API rtError_t rtKernelLaunchWithHandleAndTiling(void *hdl, const void *devFunc, uint32_t blockDim,
RTS_API rtError_t rtKernelLaunchWithHandleAndTiling(void *hdl, const void *kernelInfoExt, uint32_t blockDim,
rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm, const void* kernelInfo); rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm, const void* kernelInfo);


#if defined(__cplusplus) #if defined(__cplusplus)


+ 24
- 0
third_party/fwkacllib/inc/runtime/mem.h View File

@@ -341,6 +341,20 @@ RTS_API rtError_t rtInvalidCache(void *base, size_t len);
*/ */
RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind); RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind);


/**
* @ingroup dvrt_mem
* @brief host task memcpy
* @param [in] dst destination address pointer
* @param [in] destMax length of destination address memory
* @param [in] src source address pointer
* @param [in] cnt the number of byte to copy
* @param [in] kind memcpy type
* @param [in] stm task stream
* @return RT_ERROR_NONE for ok, errno for failed
*/
RTS_API rtError_t rtMemcpyHostTask(void * const dst, const uint64_t destMax, const void * const src,
const uint64_t cnt, rtMemcpyKind_t kind, rtStream_t stm);

/** /**
* @ingroup dvrt_mem * @ingroup dvrt_mem
* @brief asynchronized memcpy * @brief asynchronized memcpy
@@ -424,6 +438,16 @@ RTS_API rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
*/ */
RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize); RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);


/**
* @ingroup dvrt_mem
* @brief Specifies how memory is use
* @param [in] devPtr memory pointer
* @param [in] count memory count
* @param [in] advise reserved, set to 1
* @return RT_ERROR_NONE for ok
* @return others for error
*/
RTS_API rtError_t rtMemAdvise(void *devPtr, uint64_t count, uint32_t advise);
/** /**
* @ingroup dvrt_mem * @ingroup dvrt_mem
* @brief set memory with uint32_t value * @brief set memory with uint32_t value


+ 10
- 0
third_party/fwkacllib/inc/runtime/rt_dfx.h View File

@@ -28,6 +28,16 @@ extern "C" {
*/ */
RTS_API rtError_t rtSetTaskTag(const char_t *taskTag); RTS_API rtError_t rtSetTaskTag(const char_t *taskTag);


/**
* @brief set aicpu device attribute.
* it is used for aicpu device to be aware of enviroment config
* @param [in] key attrubute key.
* @param [in] val attrubute value.
* @return RT_ERROR_NONE for ok
* @return other failed
*/
RTS_API rtError_t rtSetAicpuAttr(const char_t *key, const char_t *val);

#if defined(__cplusplus) #if defined(__cplusplus)
} }
#endif #endif


+ 2
- 1
third_party/fwkacllib/inc/runtime/rt_ffts_plus.h View File

@@ -25,7 +25,8 @@ typedef struct tagFftsPlusTaskInfo {


#pragma pack(pop) #pragma pack(pop)


RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *hdl, const void *devFunc, void **addr, uint32_t *prefetchCnt);
RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *hdl, const void *kernelInfoExt, void **addr,
uint32_t *prefetchCnt);


RTS_API rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stm); RTS_API rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stm);




+ 49
- 16
third_party/fwkacllib/inc/runtime/rt_mem_queue.h View File

@@ -184,6 +184,13 @@ typedef enum rtGroupType {
RT_GRP_TYPE_BIND_DP_CPU_EXCLUSIVE /* Bound to a AICPU, intra-group threads are mutex awakened */ RT_GRP_TYPE_BIND_DP_CPU_EXCLUSIVE /* Bound to a AICPU, intra-group threads are mutex awakened */
} rtGroupType_t; } rtGroupType_t;


typedef struct tagInitFlowGwInfo {
const char_t *groupName;
uint64_t schedPolicy;
uint64_t reschedInterval;
char_t rsv[128];
} rtInitFlowGwInfo_t;

/** /**
* @ingroup rt_mem_queue * @ingroup rt_mem_queue
* @brief init queue schedule * @brief init queue schedule
@@ -193,6 +200,15 @@ typedef enum rtGroupType {
*/ */
RTS_API rtError_t rtMemQueueInitQS(int32_t devId, const char_t *grpName); RTS_API rtError_t rtMemQueueInitQS(int32_t devId, const char_t *grpName);


/**
* @ingroup rt_mem_queue
* @brief init flow gateway
* @param [in] devId the logical device id
* @param [in] initInfo Initialization parameters
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMemQueueInitFlowGw(int32_t devId, const rtInitFlowGwInfo_t * const initInfo);

/** /**
* @ingroup rt_mem_queue * @ingroup rt_mem_queue
* @brief create mbuf queue * @brief create mbuf queue
@@ -222,24 +238,24 @@ RTS_API rtError_t rtMemQueueInit(int32_t devId);


/** /**
* @ingroup rt_mem_queue * @ingroup rt_mem_queue
* @brief enqueu mbuf
* @brief enqueue memBuf
* @param [in] devId the logical device id * @param [in] devId the logical device id
* @param [in] qid queue id * @param [in] qid queue id
* @param [in] mbuf enqueue mbuf
* @param [in] memBuf enqueue memBuf
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok
*/ */
RTS_API rtError_t rtMemQueueEnQueue(int32_t devId, uint32_t qid, void *mbuf);
RTS_API rtError_t rtMemQueueEnQueue(int32_t devId, uint32_t qid, void *memBuf);




/** /**
* @ingroup rt_mem_queue * @ingroup rt_mem_queue
* @brief enqueu mbuf
* @brief dequeue memBuf
* @param [in] devId the logical device id * @param [in] devId the logical device id
* @param [in] qid queue id * @param [in] qid queue id
* @param [out] mbuf dequeue mbuf
* @param [out] memBuf dequeue memBuf
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok
*/ */
RTS_API rtError_t rtMemQueueDeQueue(int32_t devId, uint32_t qid, void **mbuf);
RTS_API rtError_t rtMemQueueDeQueue(int32_t devId, uint32_t qid, void **memBuf);


/** /**
* @ingroup rt_mem_queue * @ingroup rt_mem_queue
@@ -350,47 +366,56 @@ RTS_API rtError_t rtMbufInit(rtMemBuffCfg_t *cfg);
/** /**
* @ingroup rt_mem_queue * @ingroup rt_mem_queue
* @brief alloc buff * @brief alloc buff
* @param [out] buff: buff addr alloced
* @param [out] memBuf: buff addr alloced
* @param [in] size: The amount of memory space requested * @param [in] size: The amount of memory space requested
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok
*/ */
RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *mbuf, uint64_t size);
RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *memBuf, uint64_t size);


/** /**
* @ingroup rt_mem_queue * @ingroup rt_mem_queue
* @brief free buff * @brief free buff
* @param [in] buff: buff addr to be freed
* @param [in] memBuf: buff addr to be freed
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok
*/ */
RTS_API rtError_t rtMbufFree(rtMbufPtr_t mbuf);
RTS_API rtError_t rtMbufFree(rtMbufPtr_t memBuf);

/**
* @ingroup rt_mem_queue
* @brief set Data len of Mbuf
* @param [in] memBuf: Mbuf addr
* @param [in] len: data len
* @return RT_ERROR_NONE for success, others for fail
*/
RTS_API rtError_t rtMbufSetDataLen(rtMbufPtr_t memBuf, uint64_t len);


/** /**
* @ingroup rt_mem_queue * @ingroup rt_mem_queue
* @brief get Data addr of Mbuf * @brief get Data addr of Mbuf
* @param [in] mbuf: Mbuf addr
* @param [in] memBuf: Mbuf addr
* @param [out] buf: Mbuf data addr * @param [out] buf: Mbuf data addr
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok
*/ */
RTS_API rtError_t rtMbufGetBuffAddr(rtMbufPtr_t mbuf, void **buf);
RTS_API rtError_t rtMbufGetBuffAddr(rtMbufPtr_t memBuf, void **buf);


/** /**
* @ingroup rt_mem_queue * @ingroup rt_mem_queue
* @brief get total Buffer size of Mbuf * @brief get total Buffer size of Mbuf
* @param [in] mbuf: Mbuf addr
* @param [in] memBuf: Mbuf addr
* @param [out] totalSize: total buffer size of Mbuf * @param [out] totalSize: total buffer size of Mbuf
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok
*/ */
RTS_API rtError_t rtMbufGetBuffSize(rtMbufPtr_t mbuf, uint64_t *totalSize);
RTS_API rtError_t rtMbufGetBuffSize(rtMbufPtr_t memBuf, uint64_t *totalSize);


/** /**
* @ingroup rt_mem_queue * @ingroup rt_mem_queue
* @brief Get the address and length of its user_data from the specified Mbuf * @brief Get the address and length of its user_data from the specified Mbuf
* @param [in] mbuf: Mbuf addr
* @param [in] memBuf: Mbuf addr
* @param [out] priv: address of its user_data * @param [out] priv: address of its user_data
* @param [out] size: length of its user_data * @param [out] size: length of its user_data
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok
*/ */
RTS_API rtError_t rtMbufGetPrivInfo (rtMbufPtr_t mbuf, void **priv, uint64_t *size);
RTS_API rtError_t rtMbufGetPrivInfo(rtMbufPtr_t memBuf, void **priv, uint64_t *size);


// mem group // mem group
typedef struct { typedef struct {
@@ -573,6 +598,14 @@ RTS_API rtError_t rtQueueSubF2NFEvent(int32_t devId, uint32_t qId, uint32_t grou
*/ */
RTS_API rtError_t rtQueueSubscribe(int32_t devId, uint32_t qId, uint32_t groupId, int32_t type); RTS_API rtError_t rtQueueSubscribe(int32_t devId, uint32_t qId, uint32_t groupId, int32_t type);


/**
* @ingroup rtBufEventTrigger
* @brief buf event trigger
* @param [in] name, group name
* @return 0 for success, others for fail
*/
RTS_API rtError_t rtBufEventTrigger(const char_t *name);

#if defined(__cplusplus) #if defined(__cplusplus)
} }
#endif #endif


+ 24
- 9
third_party/fwkacllib/inc/runtime/rt_model.h View File

@@ -44,6 +44,11 @@ typedef enum tagModelTaskType {
RT_MODEL_TASK_PROFILER_TRACE_EX, RT_MODEL_TASK_PROFILER_TRACE_EX,
RT_MODEL_TASK_FFTS_TASK, RT_MODEL_TASK_FFTS_TASK,
RT_MODEL_TASK_FFTS_PLUS_TASK, RT_MODEL_TASK_FFTS_PLUS_TASK,
RT_MODEL_TASK_DSA_TASK,
RT_MODEL_TASK_CMO,
RT_MODEL_TASK_BARRIER,
RT_MODEL_TASK_NPU_GET_FLOAT_STATUS,
RT_MODEL_TASK_NPU_CLEAR_FLOAT_STATUS,
} rtModelTaskType_t; } rtModelTaskType_t;


typedef enum tagModelStreamType { typedef enum tagModelStreamType {
@@ -115,9 +120,9 @@ typedef struct tagKernelTaskInfo {
uint16_t argsCount; uint16_t argsCount;
uint16_t argsSize; uint16_t argsSize;
uint16_t reserved; uint16_t reserved;
char_t *stubFunc;
const char_t *stubFunc;
uint8_t *smDesc; uint8_t *smDesc;
uint8_t *args;
const uint8_t *args;
uint16_t *argsOffset; uint16_t *argsOffset;
} rtKernelTaskInfo_t; } rtKernelTaskInfo_t;


@@ -126,17 +131,17 @@ typedef struct tagAllKernelTaskInfo {
uint16_t argsCount; uint16_t argsCount;
uint16_t argsSize; uint16_t argsSize;
uint16_t reserved; uint16_t reserved;
void *devfunc;
const void *kernelInfoExt;
void *handle; void *handle;
uint8_t *smDesc; uint8_t *smDesc;
uint8_t *args;
const uint8_t *args;
uint16_t *argsOffset; uint16_t *argsOffset;
} rtAllKernelTaskInfo_t; } rtAllKernelTaskInfo_t;


typedef struct tagKernelTaskInfoEx { typedef struct tagKernelTaskInfoEx {
uint32_t flags; uint32_t flags;
uint32_t argsSize; uint32_t argsSize;
void *args;
const void *args;
uint32_t reserved[6]; uint32_t reserved[6];
} rtKernelTaskInfoEx_t; } rtKernelTaskInfoEx_t;


@@ -198,9 +203,9 @@ typedef struct tagProfilerTraceExTaskInfo {
} rtProfilerTraceEx_t; } rtProfilerTraceEx_t;


typedef struct tagrtMemcpyAsyncTaskInfo { typedef struct tagrtMemcpyAsyncTaskInfo {
void *dst;
const void *dst;
uint64_t destMax; uint64_t destMax;
void *src;
const void *src;
uint64_t count; uint64_t count;
uint32_t kind; uint32_t kind;
uint32_t reserved; uint32_t reserved;
@@ -212,9 +217,9 @@ typedef struct tagrtNotifyTaskInfo {
} rtNotifyTaskInfo_t; } rtNotifyTaskInfo_t;


typedef struct tagrtReduceAsyncTaskInfo { typedef struct tagrtReduceAsyncTaskInfo {
void *dst;
const void *dst;
uint64_t destMax; uint64_t destMax;
void *src;
const void *src;
uint64_t count; uint64_t count;
uint32_t kind; uint32_t kind;
uint32_t type; uint32_t type;
@@ -481,6 +486,16 @@ RTS_API rtError_t rtDebugRegister(rtModel_t mdl, uint32_t flag, const void *addr
*/ */
RTS_API rtError_t rtDebugUnRegister(rtModel_t mdl); RTS_API rtError_t rtDebugUnRegister(rtModel_t mdl);


/**
* @ingroup rt_model
* @brief set model group id
* @param [in] mdl model
* @param [in] schGrpId groupId (0,4) 0:default invalid value 1-4 valid value Maximum support 4 groups
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtModelSetSchGroupId(rtModel_t mdl, const int16_t schGrpId);

#if defined(__cplusplus) #if defined(__cplusplus)
} }
#endif #endif


+ 20
- 1
third_party/fwkacllib/inc/runtime/rt_stars.h View File

@@ -7,7 +7,7 @@
#define CCE_RUNTIME_RT_STARS_H #define CCE_RUNTIME_RT_STARS_H


#include "base.h" #include "base.h"
#include "rt_stars_define.h"
#if defined(__cplusplus) #if defined(__cplusplus)
extern "C" { extern "C" {
#endif #endif
@@ -80,6 +80,25 @@ RTS_API rtError_t rtCdqEnQueue(const char_t *queName, uint32_t cdqeIndex, void *
RTS_API rtError_t rtCdqEnQueuePtrMode(const char_t *queName, uint32_t cdqeIndex, const void *ptrAddr, RTS_API rtError_t rtCdqEnQueuePtrMode(const char_t *queName, uint32_t cdqeIndex, const void *ptrAddr,
rtStream_t stm); rtStream_t stm);


/**
* @ingroup rt_stars
* @brief launch common cmo task on the stream.
* @param [in] taskInfo cmo task info
* @param [in] stm launch task on the stream
* @param [in] flag flag
* @return RT_ERROR_NONE for ok, others failed
*/
RTS_API rtError_t rtCmoTaskLaunch(rtCmoTaskInfo_t *taskInfo, rtStream_t stm, uint32_t flag);

/**
* @ingroup rt_stars
* @brief launch barrier cmo task on the stream.
* @param [in] taskInfo barrier task info
* @param [in] stm launch task on the stream
* @param [in] flag flag
* @return RT_ERROR_NONE for ok, others failed
*/
RTS_API rtError_t rtBarrierTaskLaunch(rtBarrierTaskInfo_t *taskInfo, rtStream_t stm, uint32_t flag);
#if defined(__cplusplus) #if defined(__cplusplus)


} }


+ 58
- 0
third_party/fwkacllib/inc/runtime/rt_stars_define.h View File

@@ -32,6 +32,37 @@ typedef struct tagStarsSqeHeader {
uint16_t taskId; uint16_t taskId;
} rtStarsSqeHeader_t; } rtStarsSqeHeader_t;


typedef struct tagStarsDsaSqe {
// 0-7 bytes
rtStarsSqeHeader_t sqeHeader;
// 8-11 bytes
uint32_t start : 1;
uint32_t functionType : 3;
uint32_t dataType : 3;
uint32_t algoType : 3;
uint32_t paramVldBitmap : 5;
uint32_t paramAddrValBitmap : 7;
uint32_t reserved0 : 10;
// 12-15 bytes
uint16_t sqeIndex;
uint8_t kernelCredit;
uint8_t reserved1;
// 16-31 bytes
uint32_t dsaCfgResultAddrLow;
uint32_t dsaCfgResultAddrHigh;
uint32_t dsaCfgStateAddrLow;
uint32_t dsaCfgStateAddrHigh;
// 32-47 bytes
uint32_t dsaCfgParamAddrLow;
uint32_t dsaCfgParamAddrHigh;
uint32_t dsaCfgSeedLow;
uint32_t dsaCfgSeedHigh;
// 48-63 bytes
uint32_t dsaCfgNumberLow;
uint32_t dsaCfgNumberHigh;
uint32_t reserved2[2];
} rtStarsDsaSqe_t;

// ffts+ type // ffts+ type
typedef enum tagFftsPlusType { typedef enum tagFftsPlusType {
RT_FFTS_PLUS_TYPE_RES1 = 2, // Reserved RT_FFTS_PLUS_TYPE_RES1 = 2, // Reserved
@@ -83,6 +114,33 @@ typedef struct tagFftsPlusSqe {
uint32_t reserved16[4]; uint32_t reserved16[4];
} rtFftsPlusSqe_t; } rtFftsPlusSqe_t;


typedef struct tagCmoTaskInfo {
uint8_t qos;
uint8_t partId;
uint8_t pmg;
uint8_t reserved;
uint16_t cmoType;
uint16_t opCode;
uint16_t numInner;
uint16_t numOuter;
uint32_t logicId;
uint32_t lengthInner;
uint64_t sourceAddr;
uint32_t striderOuter;
uint32_t striderInner;
} rtCmoTaskInfo_t;

typedef struct tagBarrierCmoInfo {
uint16_t cmoType; // 0 is barrier, 1 is invalid, Prefetch is 2, Write_back is 3, FE/GE only use invalid type.
uint32_t logicId;
} rtBarrierCmoInfo_t;

#define RT_CMO_MAX_BARRIER_NUM 6U // 6U is max support
typedef struct tagBarrierTaskInfo {
uint8_t logicIdNum;
rtBarrierCmoInfo_t cmoInfo[RT_CMO_MAX_BARRIER_NUM];
} rtBarrierTaskInfo_t;

#pragma pack(pop) #pragma pack(pop)


#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)


+ 7
- 14
third_party/fwkacllib/inc/tdt/data_common.h View File

@@ -1,21 +1,14 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* @file data_common.h
*
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
*
* This program is used to data structure
*/


#ifndef HOST_INNER_INC_DATA_COMMON_H_ #ifndef HOST_INNER_INC_DATA_COMMON_H_
#define HOST_INNER_INC_DATA_COMMON_H_ #define HOST_INNER_INC_DATA_COMMON_H_
#include <string>


namespace tdt { namespace tdt {
#ifndef TDT_DATA_TYPE #ifndef TDT_DATA_TYPE


+ 6
- 14
third_party/fwkacllib/inc/tdt/index_transform.h View File

@@ -1,18 +1,10 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* @file index_transform.h
*
* Copyright (C) Huawei Technologies Co., Ltd. 2018-2019. All Rights Reserved.
*
* This program is used to get logical device id by phy device id.
*/


#ifndef INC_TDT_INDEX_TRANSFORM_H #ifndef INC_TDT_INDEX_TRANSFORM_H
#define INC_TDT_INDEX_TRANSFORM_H #define INC_TDT_INDEX_TRANSFORM_H


+ 7
- 15
third_party/fwkacllib/inc/tdt/status.h View File

@@ -1,18 +1,10 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @file status.h
*
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
*
* This program is used to describe status
*/


#ifndef INC_TDT_STATUS_H_ #ifndef INC_TDT_STATUS_H_
#define INC_TDT_STATUS_H_ #define INC_TDT_STATUS_H_


+ 6
- 14
third_party/fwkacllib/inc/tdt/tdt_host_interface.h View File

@@ -1,18 +1,10 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* @file tdt_host_interface.h
*
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
*
* This program is used to host server
*/


#ifndef HOST_INNER_INC_TDT_HOST_INTERFACE_H_ #ifndef HOST_INNER_INC_TDT_HOST_INTERFACE_H_
#define HOST_INNER_INC_TDT_HOST_INTERFACE_H_ #define HOST_INNER_INC_TDT_HOST_INTERFACE_H_


+ 73
- 10
third_party/fwkacllib/inc/tdt/tsd_client.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright (c) Hisilicon Technologies Co., Ltd. 2018-2021. All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -14,17 +14,22 @@
* limitations under the License. * limitations under the License.
*/ */


#ifndef TDT_HOST_INNER_INC_TSD_CLIENT_H_
#define TDT_HOST_INNER_INC_TSD_CLIENT_H_
#ifndef TDT_HOST_INNER_INC_TSD_CLIENT_H
#define TDT_HOST_INNER_INC_TSD_CLIENT_H


#include <condition_variable> #include <condition_variable>
#include <map> #include <map>
#include <memory> #include <memory>
#include <mutex> #include <mutex>
#include "tdt/status.h"
#include "tdt/data_common.h"
#include "tsd/status.h"
#include "toolchain/prof_callback.h" #include "toolchain/prof_callback.h"


#ifdef WIN_TSD
#define TDT_LIB_EXPORT __declspec(dllexport)
#else
#define TDT_LIB_EXPORT __attribute__((visibility("default")))
#endif

#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif // __cplusplus #endif // __cplusplus
@@ -50,7 +55,51 @@ extern "C" {
* @li tsd_client.h: Header file where the interface declaration is located. * @li tsd_client.h: Header file where the interface declaration is located.
* @li data_common.h: Header file where 'TDT_StatusT' defined * @li data_common.h: Header file where 'TDT_StatusT' defined
*/ */
TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize);
TDT_LIB_EXPORT uint32_t TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize);

/**
* @ingroup Open
* @brief Used for the Framework process to communicate with the TSDDaemon process in 1981,
* and notify TSD to complete the initialization of other processes
*
* @par Function
* Used for the Framework process to communicate with the TSDDaemon process,
* and notify TSD to complete the initialization of other processes
*
* @param logicDeviceId [IN] type #unsigned int. Logic device ID
* @param rankSize [IN] type #unsigned int. The rankSize of the training.
* The default value is 1. When rankSize is greater than 1,
* HCCP will be pulled to perform set communication related operations.
* @param deviceMode [IN] type unsigned int. The device running mode of aicpuSd,
* it include chipMode and DieMode
* @retval TDT_OK Success
* @retval OtherValues Failure
*
* @par Dependency
* @li data_common.h: Header file where 'TDT_StatusT' defined
*/
TDT_LIB_EXPORT uint32_t TsdOpenEx(const uint32_t logicDeviceId, const uint32_t rankSize, const uint32_t deviceMode);

/**
* @ingroup InitialQs
* @brief Used for the Framework process to communicate with the TSDDaemon process,
* and notify TSD to complete the initialization of QS processes
*
* @par Function
* Used for the Framework process to communicate with the TSDDaemon process,
* and notify TSD to complete the initialization of other processes
*
* @param logicDeviceId [IN] type #unsigned int. Logic device ID
* @param groupName [IN] type #char pointer. qs group name send by host process
* @retval TDT_OK Success
* @retval OtherValues Failure
*
* @par Dependency
* @li libtsdclient.so: Library to which the interface belongs.
* @li tsd_client.h: Header file where the interface declaration is located.
* @li data_common.h: Header file where 'TDT_StatusT' defined
*/
TDT_LIB_EXPORT uint32_t TsdInitQs(const uint32_t logicDeviceId, const char_t * const groupName = nullptr);


/** /**
* @ingroup Close * @ingroup Close
@@ -64,11 +113,12 @@ TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t
* @retval OtherValues Failure * @retval OtherValues Failure
* *
* @par Dependency * @par Dependency

* @li libtsdclient.so: Library to which the interface belongs. * @li libtsdclient.so: Library to which the interface belongs.
* @li tsd_client.h: Header file where the interface declaration is located. * @li tsd_client.h: Header file where the interface declaration is located.
* @li data_common.h: Header file where 'TDT_StatusT' defined * @li data_common.h: Header file where 'TDT_StatusT' defined
*/ */
TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId);
TDT_LIB_EXPORT uint32_t TsdClose(const uint32_t logicDeviceId);


/** /**
* @ingroup UpdateProfilingMode * @ingroup UpdateProfilingMode
@@ -86,7 +136,7 @@ TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId);
* @li tsd_client.h: Header file where the interface declaration is located. * @li tsd_client.h: Header file where the interface declaration is located.
* @li data_common.h: Header file where 'TDT_StatusT' defined * @li data_common.h: Header file where 'TDT_StatusT' defined
*/ */
TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag);
TDT_LIB_EXPORT uint32_t UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag);


/** /**
* @ingroup TsdSetMsprofReporterCallback * @ingroup TsdSetMsprofReporterCallback
@@ -105,9 +155,22 @@ TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, con
* @li data_common.h: Header file where 'TDT_StatusT' defined * @li data_common.h: Header file where 'TDT_StatusT' defined
* @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined * @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined
*/ */
TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback);
TDT_LIB_EXPORT uint32_t TsdSetMsprofReporterCallback(const MsprofReporterCallback callback);


/**
* @ingroup TsdSetAttr
* @brief used to set tsd attr
*
* @par key
* key set for tsd attr,now only support RunMode
*
* @par value
* value set to run correspond mode, PROCESS_MODE or THREAD_MODE
* @retval TDT_OK Success
* @retval OtherValues Failure
*/
TDT_LIB_EXPORT uint32_t TsdSetAttr(const char * const attrKey, const char * const attrValue);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif // __cplusplus #endif // __cplusplus
#endif // TDT_HOST_INNER_INC_TSD_CLIENT_H_
#endif // TDT_HOST_INNER_INC_TSD_CLIENT_H

+ 42
- 19
third_party/fwkacllib/inc/toolchain/prof_acl_api.h View File

@@ -1,17 +1,8 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved.
* Description: handle perf data
* Author: xp
* Create: 2019-10-13
*/ */


#ifndef MSPROFILER_API_PROF_ACL_API_H_ #ifndef MSPROFILER_API_PROF_ACL_API_H_
@@ -25,6 +16,8 @@
#define PROF_L2CACHE 0x00000010ULL #define PROF_L2CACHE 0x00000010ULL
#define PROF_HCCL_TRACE 0x00000020ULL #define PROF_HCCL_TRACE 0x00000020ULL
#define PROF_TRAINING_TRACE 0x00000040ULL #define PROF_TRAINING_TRACE 0x00000040ULL
#define PROF_MSPROFTX 0x00000080ULL
#define PROF_RUNTIME_API 0x00000100ULL


// system profilinig switch // system profilinig switch
#define PROF_CPU 0x00010000ULL #define PROF_CPU 0x00010000ULL
@@ -36,17 +29,18 @@
#define PROF_AIVECTORCORE_SAMPLE 0x00400000ULL #define PROF_AIVECTORCORE_SAMPLE 0x00400000ULL


#define PROF_MODEL_EXECUTE 0x0000001000000ULL #define PROF_MODEL_EXECUTE 0x0000001000000ULL
#define PROF_RUNTIME_API 0x0000002000000ULL
#define PROF_RUNTIME_TRACE 0x0000004000000ULL #define PROF_RUNTIME_TRACE 0x0000004000000ULL
#define PROF_SCHEDULE_TIMELINE 0x0000008000000ULL #define PROF_SCHEDULE_TIMELINE 0x0000008000000ULL
#define PROF_SCHEDULE_TRACE 0x0000010000000ULL #define PROF_SCHEDULE_TRACE 0x0000010000000ULL
#define PROF_AIVECTORCORE_METRICS 0x0000020000000ULL #define PROF_AIVECTORCORE_METRICS 0x0000020000000ULL
#define PROF_SUBTASK_TIME 0x0000040000000ULL #define PROF_SUBTASK_TIME 0x0000040000000ULL

#define PROF_TASK_TRACE 0x0000005000062ULL
#define PROF_OP_DETAIL 0x0000080000000ULL


#define PROF_MODEL_LOAD 0x8000000000000000ULL #define PROF_MODEL_LOAD 0x8000000000000000ULL


#define PROF_TASK_TRACE (PROF_MODEL_EXECUTE | PROF_RUNTIME_TRACE | PROF_TRAINING_TRACE | \
PROF_HCCL_TRACE | PROF_TASK_TIME)

// DataTypeConfig MASK // DataTypeConfig MASK
#define PROF_ACL_API_MASK 0x00000001ULL #define PROF_ACL_API_MASK 0x00000001ULL
#define PROF_TASK_TIME_MASK 0x00000002ULL #define PROF_TASK_TIME_MASK 0x00000002ULL
@@ -55,6 +49,8 @@
#define PROF_L2CACHE_MASK 0x00000010ULL #define PROF_L2CACHE_MASK 0x00000010ULL
#define PROF_HCCL_TRACE_MASK 0x00000020ULL #define PROF_HCCL_TRACE_MASK 0x00000020ULL
#define PROF_TRAINING_TRACE_MASK 0x00000040ULL #define PROF_TRAINING_TRACE_MASK 0x00000040ULL
#define PROF_MSPROFTX_MASK 0x00000080ULL
#define PROF_RUNTIME_API_MASK 0x00000100ULL


// system profilinig mask // system profilinig mask
#define PROF_CPU_MASK 0x00010000ULL #define PROF_CPU_MASK 0x00010000ULL
@@ -66,12 +62,12 @@
#define PROF_AIVECTORCORE_SAMPLE_MASK 0x00400000ULL #define PROF_AIVECTORCORE_SAMPLE_MASK 0x00400000ULL


#define PROF_MODEL_EXECUTE_MASK 0x0000001000000ULL #define PROF_MODEL_EXECUTE_MASK 0x0000001000000ULL
#define PROF_RUNTIME_API_MASK 0x0000002000000ULL
#define PROF_RUNTIME_TRACE_MASK 0x0000004000000ULL #define PROF_RUNTIME_TRACE_MASK 0x0000004000000ULL
#define PROF_SCHEDULE_TIMELINE_MASK 0x0000008000000ULL #define PROF_SCHEDULE_TIMELINE_MASK 0x0000008000000ULL
#define PROF_SCHEDULE_TRACE_MASK 0x0000010000000ULL #define PROF_SCHEDULE_TRACE_MASK 0x0000010000000ULL
#define PROF_AIVECTORCORE_METRICS_MASK 0x0000020000000ULL #define PROF_AIVECTORCORE_METRICS_MASK 0x0000020000000ULL
#define PROF_SUBTASK_TIME_MASK 0x0000040000000ULL #define PROF_SUBTASK_TIME_MASK 0x0000040000000ULL
#define PROF_OP_DETAIL_MASK 0x0000080000000ULL


#define PROF_MODEL_LOAD_MASK 0x8000000000000000ULL #define PROF_MODEL_LOAD_MASK 0x8000000000000000ULL


@@ -104,7 +100,7 @@ extern "C" {


MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index);


typedef int32_t Status;
typedef in32_t Status;
typedef struct aclprofSubscribeConfig aclprofSubscribeConfig1; typedef struct aclprofSubscribeConfig aclprofSubscribeConfig1;
/// ///
/// @ingroup AscendCL /// @ingroup AscendCL
@@ -135,6 +131,33 @@ MSVP_PROF_API Status aclgrphProfGraphUnSubscribe(const uint32_t graphId);
* @retval 0 for failed * @retval 0 for failed
*/ */
MSVP_PROF_API size_t aclprofGetGraphId(const void *opInfo, size_t opInfoLen, uint32_t index); MSVP_PROF_API size_t aclprofGetGraphId(const void *opInfo, size_t opInfoLen, uint32_t index);

/**
* @ingroup AscendCL
* @brief set stamp pay load
*
*
* @retval void
*/
MSVP_PROF_API int aclprofSetStampPayload(void *stamp, const int32_t type, void *value);

/**
* @ingroup AscendCL
* @brief set category and name
*
*
* @retval void
*/
MSVP_PROF_API int aclprofSetCategoryName(uint32_t category, const char *categoryName);

/**
* @ingroup AscendCL
* @brief set category to stamp
*
*
* @retval void
*/
MSVP_PROF_API int aclprofSetStampCategory(void *stamp, uint32_t category);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif


+ 8
- 16
third_party/fwkacllib/inc/toolchain/prof_callback.h View File

@@ -1,17 +1,8 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved.
* Description: handle perf data
* Author: xp
* Create: 2019-10-13
*/ */


#ifndef MSPROFILER_PROF_CALLBACK_H_ #ifndef MSPROFILER_PROF_CALLBACK_H_
@@ -76,7 +67,8 @@ enum MsprofReporterModuleId {
MSPROF_MODULE_HCCL, // HCCL MSPROF_MODULE_HCCL, // HCCL
MSPROF_MODULE_ACL, // AclModule MSPROF_MODULE_ACL, // AclModule
MSPROF_MODULE_FRAMEWORK, // Framework MSPROF_MODULE_FRAMEWORK, // Framework
MSPROF_MODULE_RUNTIME // runtime
MSPROF_MODULE_RUNTIME, // runtime
MSPROF_MODULE_MSPROF // msprofTx
}; };


/** /**
@@ -119,7 +111,7 @@ struct MsprofGeOptions {
*/ */
enum MsprofCtrlCallbackType { enum MsprofCtrlCallbackType {
MSPROF_CTRL_INIT_ACL_ENV = 0, // start profiling with acl env MSPROF_CTRL_INIT_ACL_ENV = 0, // start profiling with acl env
MSPROF_CTRL_INIT_ACL_JSON, // start profiling with acl.json
MSPROF_CTRL_INIT_ACL_JSON, // start pro with acl.json
MSPROF_CTRL_INIT_GE_OPTIONS, // start profiling with ge env and options MSPROF_CTRL_INIT_GE_OPTIONS, // start profiling with ge env and options
MSPROF_CTRL_FINALIZE, // stop profiling MSPROF_CTRL_FINALIZE, // stop profiling
MSPROF_CTRL_INIT_DYNA = 0xFF, // start profiling for dynamic profiling MSPROF_CTRL_INIT_DYNA = 0xFF, // start profiling for dynamic profiling


+ 449
- 0
third_party/fwkacllib/inc/toolchain/prof_common.h View File

@@ -0,0 +1,449 @@
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved.
* Description: handle perf data
* Author: Huawei Technologies Co., Ltd.
* Create: 2019-10-13
*/
#ifndef MSPROFILER_PROF_COMMON_H_
#define MSPROFILER_PROF_COMMON_H_

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus

#include <stdint.h>

#define MSPROF_DATA_HEAD_MAGIC_NUM 0x5a5a

enum MsprofDataTag {
MSPROF_ACL_DATA_TAG = 0, //acl data tag, range: 0~19
MSPROF_GE_DATA_TAG_MODEL_LOAD = 20, //ge data tag, range: 20~39
MSPROF_GE_DATA_TAG_FUSION = 21,
MSPROF_GE_DATA_TAG_INFER = 22,
MSPROF_GE_DATA_TAG_TASK = 23,
MSPROF_GE_DATA_TAG_TENSOR = 24,
MSPROF_GE_DATA_TAG_STEP = 25,
MSPROF_GE_DATA_TAG_ID_MAP = 26,
MSPROF_GE_DATA_TAG_HOST_SCH = 27,
MSPROF_RUNTIME_DATA_TAG_API = 40, //runtime data tag, range: 40~59
MSPROF_RUNTIME_DATA_TAG_TRACK = 41,
MSPROF_AICPU_DATA_TAG = 60, //aicpu data tag, range: 60~79
MSPROF_HCCL_DATA_TAG = 80, //hccl data tag, range: 80~99
MSPROF_DP_DATA_TAG = 100, //dp data tag, range: 100~119
MSPROF_MSPROFTX_DATA_TAG = 120, //hccl data tag, range: 120~139
MSPROF_DATA_TAG_MAX = 65536, //data tag value type is uint16_t
};

/**
* @brief struct of mixed data
*/
#define MSPROF_MIX_DATA_RESERVE_BYTES 7
#define MSPROF_MIX_DATA_STRING_LEN 120
enum MsprofMixDataType {
MSPROF_MIX_DATA_HASH_ID = 0,
MSPROF_MIX_DATA_STRING,
};
struct MsprofMixData {
uint8_t type; // MsprofMixDataType
uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES];
union {
uint64_t hashId;
char dataStr[MSPROF_MIX_DATA_STRING_LEN];
} data;
};

/**
* @brief profiling command info
*/
#define MSPROF_MAX_DEV_NUM 64
struct MsprofCommandHandle {
uint64_t profSwitch;
uint64_t profSwitchHi;
uint32_t devNums;
uint32_t devIdList[MSPROF_MAX_DEV_NUM];
uint32_t modelId;
uint32_t type;
};

/**
* @brief struct of data reported by acl
*/
#define MSPROF_ACL_DATA_RESERVE_BYTES 32
#define MSPROF_ACL_API_NAME_LEN 64
enum MsprofAclApiType {
MSPROF_ACL_API_TYPE_OP = 1,
MSPROF_ACL_API_TYPE_MODEL,
MSPROF_ACL_API_TYPE_RUNTIME,
MSPROF_ACL_API_TYPE_OTHERS,
};
struct MsprofAclProfData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_ACL_DATA_TAG;
uint32_t apiType; // enum MsprofAclApiType
uint64_t beginTime;
uint64_t endTime;
uint32_t processId;
uint32_t threadId;
char apiName[MSPROF_ACL_API_NAME_LEN];
uint8_t reserve[MSPROF_ACL_DATA_RESERVE_BYTES];
};

/**
* @brief struct of data reported by GE
*/
#define MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES 104
struct MsprofGeProfModelLoadData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_GE_DATA_TAG_MODEL_LOAD;
uint32_t modelId;
MsprofMixData modelName;
uint64_t startTime;
uint64_t endTime;
uint8_t reserve[MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES];
};

#define MSPROF_GE_FUSION_DATA_RESERVE_BYTES 8
#define MSPROF_GE_FUSION_OP_NUM 8
struct MsprofGeProfFusionData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_GE_DATA_TAG_FUSION;
uint32_t modelId;
MsprofMixData fusionName;
uint64_t inputMemSize;
uint64_t outputMemSize;
uint64_t weightMemSize;
uint64_t workspaceMemSize;
uint64_t totalMemSize;
uint64_t fusionOpNum;
uint64_t fusionOp[MSPROF_GE_FUSION_OP_NUM];
uint8_t reserve[MSPROF_GE_FUSION_DATA_RESERVE_BYTES];
};

#define MSPROF_GE_INFER_DATA_RESERVE_BYTES 64
struct MsprofGeProfInferData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_GE_DATA_TAG_INFER;
uint32_t modelId;
MsprofMixData modelName;
uint32_t requestId;
uint32_t threadId;
uint64_t inputDataStartTime;
uint64_t inputDataEndTime;
uint64_t inferStartTime;
uint64_t inferEndTime;
uint64_t outputDataStartTime;
uint64_t outputDataEndTime;
uint8_t reserve[MSPROF_GE_INFER_DATA_RESERVE_BYTES];
};

#define MSPROF_GE_TASK_DATA_RESERVE_BYTES 16
#define MSPROF_GE_OP_TYPE_LEN 56
enum MsprofGeTaskType {
MSPROF_GE_TASK_TYPE_AI_CORE = 0,
MSPROF_GE_TASK_TYPE_AI_CPU,
MSPROF_GE_TASK_TYPE_AIV,
};
enum MsprofGeShapeType {
MSPROF_GE_SHAPE_TYPE_STATIC = 0,
MSPROF_GE_SHAPE_TYPE_DYNAMIC,
};
struct MsprofGeOpType {
uint8_t type; // MsprofMixDataType
uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES];
union {
uint64_t hashId;
char dataStr[MSPROF_GE_OP_TYPE_LEN];
} data;
};
struct MsprofGeProfTaskData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_GE_DATA_TAG_TASK;
uint32_t taskType; // MsprofGeTaskType
MsprofMixData opName;
MsprofGeOpType opType;
uint64_t curIterNum;
uint64_t timeStamp;
uint32_t shapeType; // MsprofGeShapeType
uint32_t blockDims;
uint32_t modelId;
uint32_t streamId;
uint32_t taskId;
uint32_t threadId;
uint8_t reserve[MSPROF_GE_TASK_DATA_RESERVE_BYTES];
};

#define MSPROF_GE_TENSOR_DATA_RESERVE_BYTES 8
#define MSPROF_GE_TENSOR_DATA_SHAPE_LEN 8
#define MSPROF_GE_TENSOR_DATA_NUM 5
enum MsprofGeTensorType {
MSPROF_GE_TENSOR_TYPE_INPUT = 0,
MSPROF_GE_TENSOR_TYPE_OUTPUT,
};
struct MsprofGeTensorData {
uint32_t tensorType; // MsprofGeTensorType
uint32_t format;
uint32_t dataType;
uint32_t shape[MSPROF_GE_TENSOR_DATA_SHAPE_LEN];
};

struct MsprofGeProfTensorData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_GE_DATA_TAG_TENSOR;
uint32_t modelId;
uint64_t curIterNum;
uint32_t streamId;
uint32_t taskId;
uint32_t tensorNum;
MsprofGeTensorData tensorData[MSPROF_GE_TENSOR_DATA_NUM];
uint8_t reserve[MSPROF_GE_TENSOR_DATA_RESERVE_BYTES];
};

#define MSPROF_GE_STEP_DATA_RESERVE_BYTES 27
enum MsprofGeStepTag {
MSPROF_GE_STEP_TAG_BEGIN = 0,
MSPROF_GE_STEP_TAG_END,
};
struct MsprofGeProfStepData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_GE_DATA_TAG_STEP;
uint32_t modelId;
uint32_t streamId;
uint32_t taskId;
uint64_t timeStamp;
uint64_t curIterNum;
uint32_t threadId;
uint8_t tag; // MsprofGeStepTag
uint8_t reserve[MSPROF_GE_STEP_DATA_RESERVE_BYTES];
};

#define MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES 6
struct MsprofGeProfIdMapData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_GE_DATA_TAG_ID_MAP;
uint32_t graphId;
uint32_t modelId;
uint32_t sessionId;
uint64_t timeStamp;
uint16_t mode;
uint8_t reserve[MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES];
};

#define MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES 24
struct MsprofGeProfHostSchData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_GE_DATA_TAG_HOST_SCH;
uint32_t threadId; // record in start event
uint64_t element;
uint64_t event;
uint64_t startTime; // record in start event
uint64_t endTime; // record in end event
uint8_t reserve[MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES];
};

/**
* @brief struct of data reported by RunTime
*/
#define MSPROF_RUNTIME_API_DATA_RESERVE_BYTES 106
#define MSPROF_RUNTIME_TASK_ID_NUM 10
#define MSPROF_RUNTIME_API_NAME_LEN 64
struct MsprofRuntimeProfApiData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_API;
uint32_t threadId;
uint64_t entryTime;
uint64_t exitTime;
uint64_t dataSize;
uint8_t apiName[MSPROF_RUNTIME_API_NAME_LEN];
uint32_t retCode;
uint32_t streamId;
uint32_t taskNum;
uint32_t taskId[MSPROF_RUNTIME_TASK_ID_NUM];
uint16_t memcpyDirection;
uint8_t reserve[MSPROF_RUNTIME_API_DATA_RESERVE_BYTES];
};

#define MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES 10
#define MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN 32
struct MsprofRuntimeProfTrackData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_TRACK;
uint32_t threadId;
uint64_t timeStamp;
char taskType[MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN];
uint32_t taskId;
uint16_t streamId;
uint8_t reserve[MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES];
};

/**
* @brief struct of data reported by RunTime
*/
#define MSPROF_AICPU_DATA_RESERVE_BYTES 9
struct MsprofAicpuProfData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_AICPU_DATA_TAG;
uint16_t streamId;
uint16_t taskId;
uint64_t runStartTime;
uint64_t runStartTick;
uint64_t computeStartTime;
uint64_t memcpyStartTime;
uint64_t memcpyEndTime;
uint64_t runEndTime;
uint64_t runEndTick;
uint32_t threadId;
uint32_t deviceId;
uint64_t submitTick;
uint64_t scheduleTick;
uint64_t tickBeforeRun;
uint64_t tickAfterRun;
uint32_t kernelType;
uint32_t dispatchTime;
uint32_t totalTime;
uint16_t fftsThreadId;
uint8_t version;
uint8_t reserve[MSPROF_AICPU_DATA_RESERVE_BYTES];
};

/**
* @brief struct of data reported by DP
*/
#define MSPROF_DP_DATA_RESERVE_BYTES 16
#define MSPROF_DP_DATA_ACTION_LEN 16
#define MSPROF_DP_DATA_SOURCE_LEN 64
struct MsprofDpProfData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_DP_DATA_TAG;
uint32_t rsv; // Ensure 8-byte alignment
uint64_t timeStamp;
char action[MSPROF_DP_DATA_ACTION_LEN];
char source[MSPROF_DP_DATA_SOURCE_LEN];
uint64_t index;
uint64_t size;
uint8_t reserve[MSPROF_DP_DATA_RESERVE_BYTES];
};

/**
* @brief struct of data reported by HCCL
*/
#pragma pack(4)
struct MsprofHcclProfNotify {
uint32_t taskID;
uint64_t notifyID;
uint32_t stage;
uint32_t remoteRank;
uint32_t transportType;
uint32_t role; // role {0: dst, 1:src}
double durationEstimated;
};

struct MsprofHcclProfReduce {
uint32_t taskID;
uint64_t src;
uint64_t dst;
uint64_t size;
uint32_t op; // {0: sum, 1: mul, 2: max, 3: min}
uint32_t dataType; // data type {0: INT8, 1: INT16, 2: INT32, 3: FP16, 4:FP32, 5:INT64, 6:UINT64}
uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
uint32_t remoteRank;
uint32_t transportType; // transport type {0: SDMA, 1: RDMA, 2:LOCAL}
uint32_t role; // role {0: dst, 1:src}
double durationEstimated;
};

struct MsprofHcclProfRDMA {
uint32_t taskID;
uint64_t src;
uint64_t dst;
uint64_t size;
uint64_t notifyID;
uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
uint32_t remoteRank;
uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL}
uint32_t role; // role {0: dst, 1:src}
uint32_t type; // RDMA type {0: RDMASendNotify, 1:RDMASendPayload}
double durationEstimated;
};

struct MsprofHcclProfMemcpy {
uint32_t taskID;
uint64_t src;
uint64_t dst;
uint64_t size;
uint64_t notifyID;
uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
uint32_t remoteRank;
uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL}
uint32_t role; // role {0: dst, 1:src}
double durationEstimated;
};

struct MsprofHcclProfStageStep {
uint32_t rank;
uint32_t rankSize;
};

struct MsprofHcclProfFlag {
uint64_t cclTag;
uint64_t groupName;
uint32_t localRank;
uint32_t workFlowMode;
};

/**
* @name MsprofHcclProfData
* @brief struct of data reported by hccl
*/
struct MsprofHcclProfData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_HCCL_DATA_TAG;
uint32_t planeID;
uint32_t deviceID;
uint32_t streamID;
double ts;
char name[16];
union {
MsprofHcclProfNotify notify;
MsprofHcclProfReduce reduce;
MsprofHcclProfStageStep stageStep;
MsprofHcclProfMemcpy forMemcpy;
MsprofHcclProfRDMA RDMA;
MsprofHcclProfFlag flag;
} args;
};
#pragma pack()

/**
* @name MsprofStampInfo
* @brief struct of data reported by msproftx
*/
struct MsprofStampInfo {
uint16_t magicNumber;
uint16_t dataTag;
uint32_t processId;
uint32_t threadId;
uint32_t category; //marker category
uint32_t eventType;
int32_t payloadType;
union PayloadValue //payload info for marker
{
uint64_t ullValue;
int64_t llValue;
double dValue;
uint32_t uiValue[2];
int32_t iValue[2];
float fValue[2];
} payload;
uint64_t startTime;
uint64_t endTime;
int32_t messageType;
char message[128];
uint8_t reserve0[4];
uint8_t reserve1[72];
};

#ifdef __cplusplus
}
#endif

#endif // MSPROFILER_PROF_COMMON_H_

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save