Merge pull request !2108 from yanghaoran/releasetags/v1.8.0^2
@@ -134,6 +134,7 @@ static const int ACL_ERROR_DRV_FAILURE = 500004; | |||
static const int ACL_ERROR_PROFILING_FAILURE = 500005; | |||
#define ACL_TENSOR_SHAPE_RANGE_NUM 2 | |||
#define ACL_TENSOR_VALUE_RANGE_NUM 2 | |||
#define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE | |||
typedef enum { | |||
@@ -336,6 +337,19 @@ ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc); | |||
ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount, | |||
int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief set value range for aclTensorDesc | |||
* | |||
* @param desc [OUT] pointer to the data of aclTensorDesc | |||
* @param valueCount [IN] the number of value | |||
* @param valueRange [IN] the range of value | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclSetTensorValueRange(aclTensorDesc *desc, size_t valueCount, | |||
int64_t valueRange[][ACL_TENSOR_VALUE_RANGE_NUM]); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get data type specified by the tensor description | |||
@@ -41,6 +41,8 @@ typedef enum { | |||
typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclOpCompileFlag; | |||
typedef struct aclGraphDumpOption aclGraphDumpOption; | |||
/** | |||
* @ingroup AscendCL | |||
* @brief compile op | |||
@@ -114,6 +116,55 @@ ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *val | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclopSetCompileFlag(aclOpCompileFlag flag); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief generate graph and dump | |||
* | |||
* @param opType [IN] op type | |||
* @param numInputs [IN] number of inputs | |||
* @param inputDesc [IN] pointer to array of input tensor descriptions | |||
* @param inputs [IN] pointer to array of input buffers | |||
* @param numOutputs [IN] number of outputs | |||
* @param outputDesc [IN] pointer to array of output tensor descriptions | |||
* @param outputs [IN] pointer to array of outputs buffers | |||
* @param attr [IN] pointer to instance of aclopAttr. | |||
* may pass nullptr if the op has no attribute | |||
* @param engineType [IN] engine type | |||
* @param graphDumpPath [IN] dump path, if the suffix is ".txt", it means file path, else it means directory path | |||
* @param graphDumpOpt [IN] dump option, nullptr is supported | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclGenGraphAndDumpForOp( | |||
const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], | |||
int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr, | |||
aclopEngineType engineType, const char *graphDumpPath, const aclGraphDumpOption *graphDumpOpt); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Create the graph dump option | |||
* | |||
* @retval null for failed | |||
* @retval OtherValues success | |||
* | |||
* @see aclDestroyGraphDumpOpt | |||
*/ | |||
ACL_FUNC_VISIBILITY aclGraphDumpOption *aclCreateGraphDumpOpt(); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Destroy graph dump option | |||
* | |||
* @param graphDumpOpt [IN] pointer to the graph dump option | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclCreateGraphDumpOpt | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclDestroyGraphDumpOpt(const aclGraphDumpOption *graphDumpOpt); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
@@ -37,6 +37,7 @@ extern "C" { | |||
#define ACL_PROF_HCCL_TRACE 0x0020ULL | |||
#define ACL_PROF_TRAINING_TRACE 0x0040ULL | |||
#define ACL_PROF_MSPROFTX 0x0080ULL | |||
#define ACL_PROF_RUNTIME_API 0x0100ULL | |||
/** | |||
* @deprecated please use aclprofGetOpTypeLen and aclprofGetOpTNameLen instead | |||
@@ -367,6 +368,79 @@ MSVP_PROF_API aclprofStepInfo *aclprofCreateStepInfo(); | |||
*/ | |||
MSVP_PROF_API void aclprofDestroyStepInfo(aclprofStepInfo *stepinfo); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief create pointer to aclprofstamp | |||
* | |||
* | |||
* @retval aclprofStamp pointer | |||
*/ | |||
MSVP_PROF_API void *aclprofCreateStamp(); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief destory stamp pointer | |||
* | |||
* | |||
* @retval void | |||
*/ | |||
MSVP_PROF_API void aclprofDestroyStamp(void *stamp); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Record push timestamp | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
MSVP_PROF_API aclError aclprofPush(void *stamp); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Record pop timestamp | |||
* | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
MSVP_PROF_API aclError aclprofPop(); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Record range start timestamp | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
MSVP_PROF_API aclError aclprofRangeStart(void *stamp, uint32_t *rangeId); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Record range end timestamp | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
MSVP_PROF_API aclError aclprofRangeStop(uint32_t rangeId); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief set message to stamp | |||
* | |||
* | |||
* @retval void | |||
*/ | |||
MSVP_PROF_API aclError aclprofSetStampTraceMessage(void *stamp, const char *msg, uint32_t msgLen); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Record mark timestamp | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
MSVP_PROF_API aclError aclprofMark(void *stamp); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
@@ -44,6 +44,11 @@ typedef enum aclrtEventStatus { | |||
ACL_EVENT_STATUS_RESERVED = 2, | |||
} aclrtEventStatus; | |||
typedef enum aclrtEventRecordedStatus { | |||
ACL_EVENT_RECORDED_STATUS_NOT_READY = 0, | |||
ACL_EVENT_RECORDED_STATUS_COMPLETE = 1, | |||
} aclrtEventRecordedStatus; | |||
typedef enum aclrtEventWaitStatus { | |||
ACL_EVENT_WAIT_STATUS_COMPLETE = 0, | |||
ACL_EVENT_WAIT_STATUS_NOT_READY = 1, | |||
@@ -503,8 +508,21 @@ ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream strea | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_DEPRECATED_MESSAGE("aclrtQueryEvent is deprecated, use aclrtQueryEventStatus instead") | |||
ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Queries an event's status | |||
* | |||
* @param event [IN] event to query | |||
* @param status [OUT] event recorded status | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtQueryEventStatus(aclrtEvent event, aclrtEventRecordedStatus *status); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Queries an event's wait-status | |||
@@ -32,42 +32,43 @@ | |||
#endif | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000; | |||
static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001; | |||
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002; | |||
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003; | |||
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006; | |||
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007; | |||
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008; | |||
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009; | |||
static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011; | |||
static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012; | |||
static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013; | |||
static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014; | |||
static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015; | |||
static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016; | |||
static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017; | |||
static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018; | |||
static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019; | |||
static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020; | |||
static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021; | |||
static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022; | |||
static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000; | |||
static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001; | |||
static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000; | |||
static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001; | |||
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002; | |||
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003; | |||
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004; | |||
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005; | |||
static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006; | |||
static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007; | |||
static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008; | |||
static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009; | |||
static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000U; | |||
static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001U; | |||
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002U; | |||
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003U; | |||
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006U; | |||
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007U; | |||
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008U; | |||
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009U; | |||
static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011U; | |||
static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012U; | |||
static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013U; | |||
static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014U; | |||
static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015U; | |||
static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016U; | |||
static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017U; | |||
static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018U; | |||
static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019U; | |||
static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020U; | |||
static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021U; | |||
static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022U; | |||
static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000U; | |||
static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001U; | |||
static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000U; | |||
static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001U; | |||
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002U; | |||
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003U; | |||
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004U; | |||
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005U; | |||
static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006U; | |||
static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007U; | |||
static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008U; | |||
static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009U; | |||
#ifdef __cplusplus | |||
} // namespace ge | |||
@@ -44,6 +44,7 @@ static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callbac | |||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | |||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||
@@ -61,6 +62,7 @@ static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit | |||
static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty | |||
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | |||
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | |||
static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
@@ -99,6 +101,11 @@ static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // devic | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode | |||
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die | |||
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id | |||
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set | |||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||
@@ -107,5 +114,4 @@ static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconn | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ |
@@ -158,7 +158,13 @@ enum acldvppJpegFormat { | |||
ACL_JPEG_CSS_UNKNOWN = 1000 | |||
}; | |||
enum acldvppChannelDescParamType { ACL_DVPP_CSC_MATRIX_UINT32 = 0, ACL_DVPP_MODE_UINT32, ACL_DVPP_CHANNEL_ID_UINT64 }; | |||
enum acldvppChannelDescParamType { | |||
ACL_DVPP_CSC_MATRIX_UINT32 = 0, | |||
ACL_DVPP_MODE_UINT32, | |||
ACL_DVPP_CHANNEL_ID_UINT64, | |||
ACL_DVPP_CHANNEL_HEIGHT_UINT32, | |||
ACL_DVPP_CHANNEL_WIDTH_UINT32 | |||
}; | |||
enum aclvdecChannelDescParamType { | |||
ACL_VDEC_CSC_MATRIX_UINT32 = 0, | |||
@@ -20,15 +20,27 @@ | |||
#include <map> | |||
#include <string> | |||
#include "ge_error_codes.h" | |||
#include "graph/types.h" | |||
#include "ge_api_types.h" | |||
namespace ge { | |||
#ifdef __GNUC__ | |||
#define ATTRIBUTED_DEPRECATED(replacement) __attribute__((deprecated("Please use " #replacement " instead."))) | |||
#else | |||
#define ATTRIBUTED_DEPRECATED(replacement) __declspec(deprecated("Please use " #replacement " instead.")) | |||
#endif | |||
// Code compose(4 byte), runtime: 2 bit, type: 2 bit, level: 3 bit, sysid: 8 bit, modid: 5 bit, value: 12 bit | |||
#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc) \ | |||
constexpr ge::Status name = (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(runtime))) << 30U) | \ | |||
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(type))) << 28U) | \ | |||
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(level))) << 25U) | \ | |||
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(sysid))) << 17U) | \ | |||
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(modid))) << 12U) | \ | |||
(static_cast<uint32_t>(0x0FFFU) & (static_cast<uint32_t>(value))); \ | |||
const ErrorNoRegisterar g_errorno_##name((name), (desc)); | |||
#define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_errorno_##name((name), (desc)); | |||
namespace ge { | |||
class GE_FUNC_VISIBILITY StatusFactory { | |||
public: | |||
static StatusFactory *Instance() { | |||
@@ -56,7 +68,7 @@ class GE_FUNC_VISIBILITY StatusFactory { | |||
} | |||
std::string GetErrDesc(const uint32_t err) { | |||
const auto iter_find = err_desc_.find(err); | |||
const std::map<uint32_t, std::string>::const_iterator iter_find = err_desc_.find(err); | |||
if (iter_find == err_desc_.end()) { | |||
return ""; | |||
} | |||
@@ -82,59 +94,10 @@ class GE_FUNC_VISIBILITY ErrorNoRegisterar { | |||
~ErrorNoRegisterar() {} | |||
}; | |||
// Code compose(4 byte), runtime: 2 bit, type: 2 bit, level: 3 bit, sysid: 8 bit, modid: 5 bit, value: 12 bit | |||
#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc) \ | |||
constexpr ge::Status name = (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(runtime))) << 30U) | \ | |||
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(type))) << 28U) | \ | |||
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(level))) << 25U) | \ | |||
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(sysid))) << 17U) | \ | |||
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(modid))) << 12U) | \ | |||
(static_cast<uint32_t>(0x0FFFU) & (static_cast<uint32_t>(value))); \ | |||
const ErrorNoRegisterar g_##name##_errorno(name, desc); | |||
#define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_##name##_errorno(name, desc); | |||
using Status = uint32_t; | |||
// General error code | |||
GE_ERRORNO(0, 0, 0, 0, 0, SUCCESS, 0, "success"); | |||
GE_ERRORNO(0b11, 0b11, 0b111, 0xFFU, 0b11111, FAILED, 0xFFFU, "failed"); /*lint !e401*/ | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_PARAM_INVALID, "Parameter invalid."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_NOT_INIT, "GE executor not initialized yet."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "Model file path invalid."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "Model id invalid."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "Data size of model invalid."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID, "Model addr invalid."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Queue id of model invalid."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED, "The model loaded repeatedly."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, "Dynamic input addr invalid."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Dynamic input size invalid."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, "Dynamic batch size invalid."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_BATCH_EMPTY, "AIPP batch parameter empty."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_NOT_EXIST, "AIPP parameter not exist."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_MODE_INVALID, "AIPP mode invalid."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Task type invalid."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Kernel type invalid."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_PLGMGR_PATH_INVALID, "Plugin path is invalid."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_FORMAT_INVALID, "Format is invalid."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_SHAPE_INVALID, "Shape is invalid."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DATATYPE_INVALID, "Datatype is invalid."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_ALLOCATION, "Memory allocation error."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate memory."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_INTERNAL_ERROR, "Internal error."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_LOAD_MODEL, "Load model error."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED, "Failed to load model partition."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED, "Failed to load weight partition."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED, "Failed to load task partition."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED, "Failed to load op kernel partition."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA, "Failed to release the model data."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_COMMAND_HANDLE, "Command handle error."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_GET_TENSOR_INFO, "Get tensor info error."); | |||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_UNLOAD_MODEL, "Load model error."); | |||
} // namespace ge | |||
#endif // INC_EXTERNAL_GE_GE_API_ERROR_CODES_H_ |
@@ -28,96 +28,98 @@ | |||
namespace ge { | |||
// Option key: graph run mode | |||
const char *const OPTION_GRAPH_RUN_MODE = "ge.graphRunMode"; | |||
const char *const OPTION_DEVICE_TYPE = "ge.deviceType"; | |||
const char_t *const OPTION_GRAPH_RUN_MODE = "ge.graphRunMode"; | |||
const char_t *const OPTION_DEVICE_TYPE = "ge.deviceType"; | |||
// Option key: ome init | |||
const char *const OPTION_EXEC_SESSION_ID = "ge.exec.sessionId"; | |||
const char *const OPTION_EXEC_DEVICE_ID = "ge.exec.deviceId"; | |||
const char *const OPTION_EXEC_JOB_ID = "ge.exec.jobId"; | |||
const char *const OPTION_EXEC_IS_USEHCOM = "ge.exec.isUseHcom"; | |||
const char *const OPTION_EXEC_IS_USEHVD = "ge.exec.isUseHvd"; | |||
const char *const OPTION_EXEC_RANK_ID = "ge.exec.rankId"; | |||
const char *const OPTION_EXEC_POD_NAME = "ge.exec.podName"; | |||
const char *const OPTION_EXEC_DEPLOY_MODE = "ge.exec.deployMode"; | |||
const char *const OPTION_EXEC_RANK_TABLE_FILE = "ge.exec.rankTableFile"; | |||
const char *const GE_AICPU_FLAG = "ge.aicpuFlag"; | |||
const char *const OPTION_EXEC_EXTERN_PLUGIN_PATH = "ge.soLoadPath"; | |||
const char_t *const OPTION_EXEC_SESSION_ID = "ge.exec.sessionId"; | |||
const char_t *const OPTION_EXEC_DEVICE_ID = "ge.exec.deviceId"; | |||
const char_t *const OPTION_EXEC_JOB_ID = "ge.exec.jobId"; | |||
const char_t *const OPTION_EXEC_IS_USEHCOM = "ge.exec.isUseHcom"; | |||
const char_t *const OPTION_EXEC_IS_USEHVD = "ge.exec.isUseHvd"; | |||
const char_t *const OPTION_EXEC_RANK_ID = "ge.exec.rankId"; | |||
const char_t *const OPTION_EXEC_POD_NAME = "ge.exec.podName"; | |||
const char_t *const OPTION_EXEC_DEPLOY_MODE = "ge.exec.deployMode"; | |||
const char_t *const OPTION_EXEC_RANK_TABLE_FILE = "ge.exec.rankTableFile"; | |||
const char_t *const GE_AICPU_FLAG = "ge.aicpuFlag"; | |||
const char_t *const OPTION_EXEC_EXTERN_PLUGIN_PATH = "ge.soLoadPath"; | |||
// Dump flag and para | |||
const char *const OPTION_EXEC_ENABLE_DUMP = "ge.exec.enableDump"; | |||
const char *const OPTION_EXEC_DUMP_PATH = "ge.exec.dumpPath"; | |||
const char *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep"; | |||
const char *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode"; | |||
const char *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug"; | |||
const char *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode"; | |||
const char *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild"; | |||
const char *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath"; | |||
const char *const OPTION_EXEC_ENABLE_EXCEPTION_DUMP = "ge.exec.enable_exception_dump"; | |||
const char *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses"; | |||
const char *const OPTION_EXEC_PROFILING_FPPONIT_OPTIONS = "ge.exec.profilingFpPointOptions"; | |||
const char *const OPTION_EXEC_PROFILING_BPPONIT_OPTIONS = "ge.exec.profilingBpPointOptions"; | |||
const char_t *const OPTION_EXEC_ENABLE_DUMP = "ge.exec.enableDump"; | |||
const char_t *const OPTION_EXEC_DUMP_PATH = "ge.exec.dumpPath"; | |||
const char_t *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep"; | |||
const char_t *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode"; | |||
const char_t *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug"; | |||
const char_t *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode"; | |||
const char_t *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild"; | |||
const char_t *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath"; | |||
const char_t *const OPTION_EXEC_ENABLE_EXCEPTION_DUMP = "ge.exec.enable_exception_dump"; | |||
const char_t *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses"; | |||
const char_t *const OPTION_EXEC_PROFILING_FPPONIT_OPTIONS = "ge.exec.profilingFpPointOptions"; | |||
const char_t *const OPTION_EXEC_PROFILING_BPPONIT_OPTIONS = "ge.exec.profilingBpPointOptions"; | |||
// profiling flag | |||
const char *const OPTION_EXEC_PROFILING_MODE = "ge.exec.profilingMode"; | |||
const char *const OPTION_EXEC_PROFILING_OPTIONS = "ge.exec.profilingOptions"; | |||
const char_t *const OPTION_EXEC_PROFILING_MODE = "ge.exec.profilingMode"; | |||
const char_t *const OPTION_EXEC_PROFILING_OPTIONS = "ge.exec.profilingOptions"; | |||
// Hccl flag, if ge.exec.hcclFlag =1, it means load plugin for opskernel, else:ge.exec.hcclFlag =0 | |||
const char *const OPTION_EXEC_HCCL_FLAG = "ge.exec.hcclFlag"; | |||
const char *const OPTION_EXEC_ATOMIC_FLAG = "ge.exec.enable_atomic"; | |||
const char *const OPTION_EXEC_DISABLE_REUSED_MEMORY = "ge.exec.disableReuseMemory"; | |||
const char *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOptimization"; | |||
const char_t *const OPTION_EXEC_HCCL_FLAG = "ge.exec.hcclFlag"; | |||
const char_t *const OPTION_EXEC_ATOMIC_FLAG = "ge.exec.enable_atomic"; | |||
const char_t *const OPTION_EXEC_DISABLE_REUSED_MEMORY = "ge.exec.disableReuseMemory"; | |||
const char_t *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOptimization"; | |||
// Dynamic input flag. ge.exec.dynamicInput=1, means enable dynaimc input, | |||
// ge.exec.dynamicGraphExecuteMode, dynamic_execute[default] | |||
const char *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput"; | |||
const char *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode"; | |||
const char *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; | |||
const char *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr"; | |||
const char_t *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput"; | |||
const char_t *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode"; | |||
const char_t *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; | |||
const char_t *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr"; | |||
// Option key: memory init | |||
const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; | |||
const char *const VARIABLE_MEMORY_MAX_SIZE = "ge.variableMemoryMaxSize"; | |||
const char_t *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; | |||
const char_t *const VARIABLE_MEMORY_MAX_SIZE = "ge.variableMemoryMaxSize"; | |||
const char_t *const OPTION_EXEC_REUSE_ZERO_COPY_MEMORY = "ge.exec.reuseZeroCopyMemory"; | |||
namespace configure_option { | |||
const char *const STREAM_NUM = "ge.streamNum"; | |||
const char *const HEAD_STREAM = "ge.headStream"; | |||
const char *const PERF_LEVEL = "ge.perfLevel"; | |||
const char *const ENCRYPT_MODE = "ge.encryptMode"; | |||
const char *const EK_FILE = "ge.ekFile"; | |||
const char *const CERT_FILE = "ge.certFile"; | |||
const char *const HW_KEY_FILE = "ge.hwKeyFile"; | |||
const char *const PRIVATE_KEY_FILE = "ge.privateKeyFile"; | |||
const char *const FRAMEWORK_TYPE = "ge.frameworkType"; | |||
const char *const CALIBRATION_CONF_FILE = "ge.calibrationConfFile"; | |||
const char *const INSERT_OP_FILE = "ge.insertOpFile"; | |||
const char *const OUTPUT_NODE_NAME = "ge.outputNodeName"; | |||
const char *const COMPRESS_FLAG = "ge.compressFlag"; | |||
const char *const PRECISION_MODE = "ge.exec.precision_mode"; | |||
const char *const SINGLE_OP_FLAG = "ge.exec.single_op"; | |||
const char *const TRAIN_FLAG = "ge.trainFlag"; | |||
const char *const RUN_FLAG = "ge.runFlag"; | |||
const char *const LOCAL_FMKOP_FLAG = "ge.enabledLocalFmkop"; | |||
const char *const TBE_PLUGIN_PATH_FLAG = "ge.TBE_plugin_path"; | |||
const char *const DDK_VERSION_FLAG = "ge.DDK_version"; | |||
const char *const GE_FE_FLAG = "ge.feFlag"; | |||
const char *const STREAM_MAX_PARALLEL_NUM = "ge.streamMaxParallelNum"; | |||
const char *const OUTPUT_DATATYPE = "ge.outputDatatype"; | |||
const char *const OP_SELECT_IMPL_MODE = "ge.opSelectImplmode"; | |||
const char *const OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode"; | |||
const char *const HCOM_PARALLEL = "ge.hcomParallel"; | |||
const char *const AUTO_TUNE_MODE = "ge.autoTuneMode"; | |||
const char *const SOC_VERSION = "ge.socVersion"; | |||
const char *const CORE_TYPE = "ge.engineType"; | |||
const char *const AICORE_NUM = "ge.aicoreNum"; | |||
const char *const L1_FUSION = "ge.l1Fusion"; | |||
const char *const BUFFER_OPTIMIZE = "ge.bufferOptimize"; | |||
const char *const ENABLE_SMALL_CHANNEL = "ge.enableSmallChannel"; | |||
const char *const ENABLE_COMPRESS_WEIGHT = "ge.enableCompressWeight"; | |||
const char *const FUSION_SWITCH_FILE = "ge.fusionSwitchFile"; | |||
const char *const SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel"; | |||
const char *const ORIGINAL_MODEL_FILE = "ge.originalModelFile"; | |||
const char *const INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16"; | |||
const char *const OP_DEBUG_LEVEL = "ge.opDebugLevel"; | |||
const char *const PERFORMANCE_MODE = "ge.performance_mode"; | |||
const char *const SHAPE_GENERALIZED_BUILD_MODE = "ge.shape_generalized_build_mode"; | |||
const char *const MODIFY_MIXLIST = "ge.exec.modify_mixlist"; | |||
const char *const OP_PRECISION_MODE = "ge.exec.op_precision_mode"; | |||
const char_t *const STREAM_NUM = "ge.streamNum"; | |||
const char_t *const HEAD_STREAM = "ge.headStream"; | |||
const char_t *const PERF_LEVEL = "ge.perfLevel"; | |||
const char_t *const ENCRYPT_MODE = "ge.encryptMode"; | |||
const char_t *const EK_FILE = "ge.ekFile"; | |||
const char_t *const CERT_FILE = "ge.certFile"; | |||
const char_t *const HW_KEY_FILE = "ge.hwKeyFile"; | |||
const char_t *const PRIVATE_KEY_FILE = "ge.privateKeyFile"; | |||
const char_t *const FRAMEWORK_TYPE = "ge.frameworkType"; | |||
const char_t *const CALIBRATION_CONF_FILE = "ge.calibrationConfFile"; | |||
const char_t *const INSERT_OP_FILE = "ge.insertOpFile"; | |||
const char_t *const OUTPUT_NODE_NAME = "ge.outputNodeName"; | |||
const char_t *const COMPRESS_FLAG = "ge.compressFlag"; | |||
const char_t *const PRECISION_MODE = "ge.exec.precision_mode"; | |||
const char_t *const SINGLE_OP_FLAG = "ge.exec.single_op"; | |||
const char_t *const TRAIN_FLAG = "ge.trainFlag"; | |||
const char_t *const RUN_FLAG = "ge.runFlag"; | |||
const char_t *const LOCAL_FMKOP_FLAG = "ge.enabledLocalFmkop"; | |||
const char_t *const TBE_PLUGIN_PATH_FLAG = "ge.TBE_plugin_path"; | |||
const char_t *const DDK_VERSION_FLAG = "ge.DDK_version"; | |||
const char_t *const GE_FE_FLAG = "ge.feFlag"; | |||
const char_t *const STREAM_MAX_PARALLEL_NUM = "ge.streamMaxParallelNum"; | |||
const char_t *const OUTPUT_DATATYPE = "ge.outputDatatype"; | |||
const char_t *const OP_SELECT_IMPL_MODE = "ge.opSelectImplmode"; | |||
const char_t *const OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode"; | |||
const char_t *const HCOM_PARALLEL = "ge.hcomParallel"; | |||
const char_t *const AUTO_TUNE_MODE = "ge.autoTuneMode"; | |||
const char_t *const SOC_VERSION = "ge.socVersion"; | |||
const char_t *const CORE_TYPE = "ge.engineType"; | |||
const char_t *const AICORE_NUM = "ge.aicoreNum"; | |||
const char_t *const L1_FUSION = "ge.l1Fusion"; | |||
const char_t *const BUFFER_OPTIMIZE = "ge.bufferOptimize"; | |||
const char_t *const ENABLE_SMALL_CHANNEL = "ge.enableSmallChannel"; | |||
const char_t *const ENABLE_COMPRESS_WEIGHT = "ge.enableCompressWeight"; | |||
const char_t *const FUSION_SWITCH_FILE = "ge.fusionSwitchFile"; | |||
const char_t *const SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel"; | |||
const char_t *const ORIGINAL_MODEL_FILE = "ge.originalModelFile"; | |||
const char_t *const INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16"; | |||
const char_t *const OP_DEBUG_LEVEL = "ge.opDebugLevel"; | |||
const char_t *const PERFORMANCE_MODE = "ge.performance_mode"; | |||
const char_t *const SHAPE_GENERALIZED_BUILD_MODE = "ge.shape_generalized_build_mode"; | |||
const char_t *const MODIFY_MIXLIST = "ge.exec.modify_mixlist"; | |||
const char_t *const OP_PRECISION_MODE = "ge.exec.op_precision_mode"; | |||
} // namespace configure_option | |||
// Configure stream num by Session constructor options param, | |||
// its value should be int32_t type, default value is "1" | |||
@@ -227,7 +229,7 @@ const std::string OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode"; | |||
const std::string HCOM_PARALLEL = "ge.hcomParallel"; | |||
// configure whether to use dynamic batch size | |||
const char *const kDynamicBatchSize = "ge.dynamicBatchSize"; | |||
const char_t *const kDynamicBatchSize = "ge.dynamicBatchSize"; | |||
// configure threshold of fusion data size for communication op | |||
const std::string FUSION_TENSOR_SIZE = "ge.fusionTensorSize"; | |||
@@ -236,10 +238,10 @@ const std::string INPUT_SHAPE = "ge.inputShape"; | |||
const std::string DYNAMIC_NODE_TYPE = "ge.dynamicNodeType"; | |||
// configure whether to use dynamic image size | |||
const char *const kDynamicImageSize = "ge.dynamicImageSize"; | |||
const char_t *const kDynamicImageSize = "ge.dynamicImageSize"; | |||
// Configure whether to use dynamic dims | |||
const char *const kDynamicDims = "ge.dynamicDims"; | |||
const char_t *const kDynamicDims = "ge.dynamicDims"; | |||
// Configure auto tune mode, this option only take effect while AUTO_TUNE_FLAG is Y, | |||
// example: GA|RL, support configure multiple, split by | | |||
@@ -275,29 +277,29 @@ const std::string SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel"; | |||
// Save original model file name | |||
const std::string ORIGINAL_MODEL_FILE = "ge.originalModelFile"; | |||
const char *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum"; | |||
const char *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize"; | |||
const char *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum"; | |||
const char_t *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum"; | |||
const char_t *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize"; | |||
const char_t *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum"; | |||
// Configure for print op pass | |||
// Its value should be "0" or "1", default value is "1" | |||
const char *const ENABLE_PRINT_OP_PASS = "ge.enablePrintOpPass"; | |||
const char_t *const ENABLE_PRINT_OP_PASS = "ge.enablePrintOpPass"; | |||
// Configure operator compilation path | |||
// Its value should be file path, default value is "./" | |||
const char *const DEBUG_DIR = "ge.debugDir"; | |||
const char_t *const DEBUG_DIR = "ge.debugDir"; | |||
// Configure operator compiler cache path | |||
// Its value should be file path, default value is "./" | |||
const char *const OP_COMPILER_CACHE_DIR = "ge.op_compiler_cache_dir"; | |||
const char_t *const OP_COMPILER_CACHE_DIR = "ge.op_compiler_cache_dir"; | |||
// Configure operator compiler cache mode | |||
// Its value should be "disable", "enable" or "force", default value is "disable" | |||
const char *const OP_COMPILER_CACHE_MODE = "ge.op_compiler_cache_mode"; | |||
const char_t *const OP_COMPILER_CACHE_MODE = "ge.op_compiler_cache_mode"; | |||
// Configure whether to use single stream. | |||
// Its value should be "true" or "false", default value is "false" | |||
const char *const ENABLE_SINGLE_STREAM = "ge.enableSingleStream"; | |||
const char_t *const ENABLE_SINGLE_STREAM = "ge.enableSingleStream"; | |||
// Configure input fp16 nodes | |||
const std::string INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16"; | |||
@@ -322,7 +324,7 @@ const std::string OP_BANK_UPDATE_FLAG = "ge.op_bank_update"; | |||
const std::string HCOM_MULTI_MODE = "ge.hcomMultiMode"; | |||
// atc and ir option | |||
const char *const INPUT_SHAPE_RANGE = "input_shape_range"; | |||
const char_t *const INPUT_SHAPE_RANGE = "input_shape_range"; | |||
// Configure express high compile performance or high execute performance | |||
// normal: no need to compile, used saved .o files directly | |||
@@ -338,7 +340,11 @@ const std::string MODIFY_MIXLIST = "ge.exec.modify_mixlist"; | |||
const std::string OP_PRECISION_MODE = "ge.exec.op_precision_mode"; | |||
const char *const FILE_CONSTANT_PATH = "ge.exec.value_bins"; | |||
const std::string OP_WAIT_TIMEOUT = "ge.exec.opWaitTimeout"; | |||
const std::string OP_EXECUTE_TIMEOUT = "ge.exec.opExecuteTimeout"; | |||
const char_t *const FILE_CONSTANT_PATH = "ge.exec.value_bins"; | |||
// Graph run mode | |||
enum GraphRunMode { PREDICTION = 0, TRAIN }; | |||
@@ -378,49 +384,49 @@ using RunAsyncCallback = std::function<void(Status, std::vector<ge::Tensor> &)>; | |||
// for ir build | |||
namespace ir_option { | |||
static const char *const INPUT_FORMAT = "input_format"; | |||
static const char *const INPUT_SHAPE = "input_shape"; | |||
static const char *const INPUT_SHAPE_RANGE = ge::INPUT_SHAPE_RANGE; | |||
static const char *const OP_NAME_MAP = "op_name_map"; | |||
static const char *const IS_DYNAMIC_INPUT = "is_dynamic_input"; | |||
static const char *const IS_INPUT_ADJUST_HW_LAYOUT = "is_input_adjust_hw_layout"; | |||
static const char *const IS_OUTPUT_ADJUST_HW_LAYOUT = "is_output_adjust_hw_layout"; | |||
static const char *const ENABLE_SCOPE_FUSION_PASSES = "enable_scope_fusion_passes"; | |||
static const char *const OUTPUT = "output"; | |||
static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize; | |||
static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize; | |||
static const char *const DYNAMIC_DIMS = kDynamicDims; | |||
static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str(); | |||
static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str(); | |||
static const char *const TUNE_DEVICE_IDS = ge::TUNE_DEVICE_IDS.c_str(); | |||
static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY; | |||
static const char *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str(); | |||
static const char *const CORE_TYPE = ge::CORE_TYPE.c_str(); | |||
static const char *const SOC_VERSION = ge::SOC_VERSION.c_str(); | |||
static const char *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM; | |||
static const char *const AICORE_NUM = ge::AICORE_NUM.c_str(); | |||
static const char *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str(); | |||
static const char *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str(); | |||
static const char *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str(); | |||
static const char *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str(); | |||
static const char *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str(); | |||
static const char *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str(); | |||
static const char *const COMPRESS_WEIGHT_CONF = "compress_weight_conf"; | |||
static const char *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str(); | |||
static const char *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str(); | |||
static const char *const LOG_LEVEL = "log"; | |||
static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str(); | |||
static const char *const DEBUG_DIR = ge::DEBUG_DIR; | |||
static const char *const OP_COMPILER_CACHE_DIR = ge::OP_COMPILER_CACHE_DIR; | |||
static const char *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE; | |||
static const char *const MDL_BANK_PATH = ge::MDL_BANK_PATH_FLAG.c_str(); | |||
static const char *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str(); | |||
static const char *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str(); | |||
static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str(); | |||
static const char *const PERFORMANCE_MODE = ge::PERFORMANCE_MODE.c_str(); | |||
static const char *const SHAPE_GENERALIZED_BUILD_MODE = ge::SHAPE_GENERALIZED_BUILD_MODE.c_str(); | |||
static const char *const MODIFY_MIXLIST = ge::MODIFY_MIXLIST.c_str(); | |||
static const char *const OP_PRECISION_MODE = ge::OP_PRECISION_MODE.c_str(); | |||
static const char_t *const INPUT_FORMAT = "input_format"; | |||
static const char_t *const INPUT_SHAPE = "input_shape"; | |||
static const char_t *const INPUT_SHAPE_RANGE = ge::INPUT_SHAPE_RANGE; | |||
static const char_t *const OP_NAME_MAP = "op_name_map"; | |||
static const char_t *const IS_DYNAMIC_INPUT = "is_dynamic_input"; | |||
static const char_t *const IS_INPUT_ADJUST_HW_LAYOUT = "is_input_adjust_hw_layout"; | |||
static const char_t *const IS_OUTPUT_ADJUST_HW_LAYOUT = "is_output_adjust_hw_layout"; | |||
static const char_t *const ENABLE_SCOPE_FUSION_PASSES = "enable_scope_fusion_passes"; | |||
static const char_t *const OUTPUT = "output"; | |||
static const char_t *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize; | |||
static const char_t *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize; | |||
static const char_t *const DYNAMIC_DIMS = kDynamicDims; | |||
static const char_t *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str(); | |||
static const char_t *const PRECISION_MODE = ge::PRECISION_MODE.c_str(); | |||
static const char_t *const TUNE_DEVICE_IDS = ge::TUNE_DEVICE_IDS.c_str(); | |||
static const char_t *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY; | |||
static const char_t *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str(); | |||
static const char_t *const CORE_TYPE = ge::CORE_TYPE.c_str(); | |||
static const char_t *const SOC_VERSION = ge::SOC_VERSION.c_str(); | |||
static const char_t *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM; | |||
static const char_t *const AICORE_NUM = ge::AICORE_NUM.c_str(); | |||
static const char_t *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str(); | |||
static const char_t *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str(); | |||
static const char_t *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str(); | |||
static const char_t *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str(); | |||
static const char_t *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str(); | |||
static const char_t *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str(); | |||
static const char_t *const COMPRESS_WEIGHT_CONF = "compress_weight_conf"; | |||
static const char_t *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str(); | |||
static const char_t *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str(); | |||
static const char_t *const LOG_LEVEL = "log"; | |||
static const char_t *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str(); | |||
static const char_t *const DEBUG_DIR = ge::DEBUG_DIR; | |||
static const char_t *const OP_COMPILER_CACHE_DIR = ge::OP_COMPILER_CACHE_DIR; | |||
static const char_t *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE; | |||
static const char_t *const MDL_BANK_PATH = ge::MDL_BANK_PATH_FLAG.c_str(); | |||
static const char_t *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str(); | |||
static const char_t *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str(); | |||
static const char_t *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str(); | |||
static const char_t *const PERFORMANCE_MODE = ge::PERFORMANCE_MODE.c_str(); | |||
static const char_t *const SHAPE_GENERALIZED_BUILD_MODE = ge::SHAPE_GENERALIZED_BUILD_MODE.c_str(); | |||
static const char_t *const MODIFY_MIXLIST = ge::MODIFY_MIXLIST.c_str(); | |||
static const char_t *const OP_PRECISION_MODE = ge::OP_PRECISION_MODE.c_str(); | |||
// for interface: aclgrphBuildModel | |||
#ifdef __GNUC__ | |||
@@ -98,10 +98,10 @@ GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph, | |||
* @retval GRAPH_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *, const ModelBufferData &)) | |||
ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char_t *, const ModelBufferData &)) | |||
GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const std::string &output_file, const ModelBufferData &model); | |||
GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model); | |||
GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char_t *output_file, const ModelBufferData &model); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -126,7 +126,7 @@ GE_FUNC_VISIBILITY graphStatus aclgrphGetIRVersion(int32_t *major_version, int32 | |||
* @retval GRAPH_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
GE_FUNC_VISIBILITY graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len); | |||
GE_FUNC_VISIBILITY graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char_t *file, const size_t len); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -150,7 +150,7 @@ GE_FUNC_VISIBILITY graphStatus aclgrphGenerateForOp(const AscendString &op_type, | |||
* @param cfg_path [IN] the config file path | |||
* @return graphStatus | |||
*/ | |||
GE_FUNC_VISIBILITY graphStatus aclgrphSetOpAttr(Graph &graph, aclgrphAttrType attr_type, const char *cfg_path); | |||
GE_FUNC_VISIBILITY graphStatus aclgrphSetOpAttr(Graph &graph, aclgrphAttrType attr_type, const char_t *cfg_path); | |||
}; // namespace ge | |||
#endif // INC_EXTERNAL_GE_IR_BUILD_H_ |
@@ -62,6 +62,7 @@ static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit | |||
static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty | |||
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | |||
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | |||
static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
@@ -113,5 +114,4 @@ static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconn | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ |
@@ -40,7 +40,7 @@ enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP }; | |||
class GE_FUNC_VISIBILITY GeLog { | |||
public: | |||
static const uint64_t GetTid() { | |||
static uint64_t GetTid() { | |||
#ifdef __GNUC__ | |||
const uint64_t tid = static_cast<uint64_t>(syscall(__NR_gettid)); | |||
#else | |||
@@ -56,11 +56,11 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) { | |||
return (enable == 1); | |||
} | |||
#define GELOGE(ERROR_CODE, fmt, ...) \ | |||
do { \ | |||
dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE, \ | |||
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ | |||
##__VA_ARGS__); \ | |||
#define GELOGE(ERROR_CODE, fmt, ...) \ | |||
do { \ | |||
dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], (ERROR_CODE), \ | |||
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ | |||
##__VA_ARGS__); \ | |||
} while (false) | |||
#define GELOGW(fmt, ...) \ | |||
@@ -91,7 +91,7 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) { | |||
#define GELOGT(VALUE, fmt, ...) \ | |||
do { \ | |||
TraceStatus stat = VALUE; \ | |||
TraceStatus stat = (VALUE); \ | |||
const char_t *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ | |||
const int32_t idx = static_cast<int32_t>(stat); \ | |||
char_t *k = const_cast<char_t *>("status"); \ | |||
@@ -102,7 +102,7 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) { | |||
#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ | |||
do { \ | |||
dlog_error(MOD_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE, \ | |||
dlog_error((MOD_NAME), "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], (ERROR_CODE), \ | |||
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ | |||
##__VA_ARGS__); \ | |||
} while (false) | |||
@@ -69,7 +69,7 @@ | |||
do { \ | |||
const ge::Status _chk_status = (expr); \ | |||
if (_chk_status != ge::SUCCESS) { \ | |||
GELOGE((ge::FAILED), __VA_ARGS__); \ | |||
GELOGE(_chk_status, __VA_ARGS__); \ | |||
} \ | |||
} while (false) | |||
@@ -213,9 +213,9 @@ | |||
// If expr is not RT_ERROR_NONE, print the log | |||
#define GE_CHK_RT(expr) \ | |||
do { \ | |||
const rtError_t _rt_ret = (expr); \ | |||
if (_rt_ret != RT_ERROR_NONE) { \ | |||
GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \ | |||
const rtError_t _rt_err = (expr); \ | |||
if (_rt_err != RT_ERROR_NONE) { \ | |||
GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_err); \ | |||
} \ | |||
} while (false) | |||
@@ -278,7 +278,7 @@ | |||
return (_status); \ | |||
} \ | |||
} while (false) | |||
namespace ge { | |||
template <typename T> | |||
GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) { | |||
std::string fmt; | |||
@@ -287,5 +287,5 @@ GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) { | |||
fmt = st.str(); | |||
return fmt; | |||
} | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_COMMON_DEBUG_LOG_H_ |
@@ -27,11 +27,6 @@ | |||
#include "graph/ge_tensor.h" | |||
namespace ge { | |||
extern const int64_t kBlockSize; | |||
extern const std::string kBinFileValues; | |||
extern const std::string kBinIdValue; | |||
extern const std::string kBinFilePathValue; | |||
struct FileConstantInfo { | |||
std::string value_bin_file_id; | |||
std::string value_bin_file_path; | |||
@@ -47,14 +42,11 @@ void from_json(const nlohmann::json &j, OptionInfo &option_info); | |||
Status GetFilePathFromOption(std::map<std::string, std::string> &file_id_and_path_map); | |||
Status CopyOneWeightFromFile(const void *curr_dev_ptr, const std::string &value, const size_t file_constant_size, | |||
Status CopyOneWeightFromFile(const void *const curr_dev_ptr, const std::string &value, const size_t file_constant_size, | |||
size_t &left_size); | |||
Status GetFilePath(const OpDescPtr &op_desc, const std::map<std::string, std::string> &file_id_and_path_map, | |||
std::string &file_path); | |||
Status GetFileConstantElementTotalSize(const GeShape &shape, const DataType data_type, int64_t &mem_size, | |||
const Format format = FORMAT_ND); | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_COMMON_FILE_CONSTANT_UTIL_H |
@@ -44,7 +44,7 @@ | |||
// Each module uses the following four macros to define error codes: | |||
#define DECLARE_ERRORNO_OMG(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OMG, (name), (value)) | |||
#define DECLARE_ERRORNO_OME(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OME, (name), (value)) | |||
#define DECLARE_ERRORNO_CALIBRATION(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_CALIBRATION, name, value) | |||
#define DECLARE_ERRORNO_CALIBRATION(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_CALIBRATION, (name), (value)) | |||
#define DEF_ERRORNO(name, desc) const ErrorNoRegisterar g_##name##_errorno((name), (desc)); | |||
@@ -74,7 +74,7 @@ class GE_FUNC_VISIBILITY StatusFactory { | |||
class GE_FUNC_VISIBILITY ErrorNoRegisterar { | |||
public: | |||
ErrorNoRegisterar(uint32_t err, const std::string &desc) { | |||
ErrorNoRegisterar(const uint32_t err, const std::string &desc) { | |||
StatusFactory::Instance()->RegisterErrorNo(err, desc); | |||
} | |||
~ErrorNoRegisterar() {} | |||
@@ -22,17 +22,57 @@ | |||
#include <string> | |||
#include "ge/ge_api_error_codes.h" | |||
// Each module defines error codes using the following macros, name can not be modified to (name) | |||
#define GE_ERRORNO_COMMON(name, value, desc) \ | |||
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ | |||
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::COMMON_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_CLIENT(name, value, desc) \ | |||
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ | |||
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::CLIENT_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_INIT(name, value, desc) \ | |||
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ | |||
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::INIT_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_SESSION(name, value, desc) \ | |||
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ | |||
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::SESSION_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_GRAPH(name, value, desc) \ | |||
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ | |||
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::GRAPH_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_ENGINE(name, value, desc) \ | |||
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ | |||
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::ENGINE_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_OPS(name, value, desc) \ | |||
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ | |||
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::OPS_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_PLUGIN(name, value, desc) \ | |||
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ | |||
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::PLUGIN_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_RUNTIME(name, value, desc) \ | |||
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ | |||
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::RUNTIME_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_EXECUTOR(name, value, desc) \ | |||
GE_ERRORNO(ge::InnLogRuntime::RT_DEVICE, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ | |||
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::EXECUTOR_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_GENERATOR(name, value, desc) \ | |||
GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \ | |||
ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::GENERATOR_MODULE, name, (value), (desc)) | |||
// Get error code description | |||
#define GE_GET_ERRORNO_STR(value) ge::StatusFactory::Instance()->GetErrDesc(value) | |||
#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast<Status>(RT_ERROR) | |||
namespace ge { | |||
// System ID | |||
enum SystemIdType { SYSID_GE = 8 }; | |||
enum class InnSystemIdType { SYSID_GE = 8 }; | |||
// Runtime location | |||
enum LogRuntime { | |||
enum class InnLogRuntime { | |||
RT_HOST = 0b01, | |||
RT_DEVICE = 0b10, | |||
}; | |||
// Sub model | |||
enum SubModuleId { | |||
enum class InnSubModuleId { | |||
COMMON_MODULE = 0, | |||
CLIENT_MODULE = 1, | |||
INIT_MODULE = 2, | |||
@@ -47,13 +87,13 @@ enum SubModuleId { | |||
}; | |||
// Error code type | |||
enum ErrorCodeType { | |||
enum class InnErrorCodeType { | |||
ERROR_CODE = 0b01, | |||
EXCEPTION_CODE = 0b10, | |||
}; | |||
// Error level | |||
enum ErrorLevel { | |||
enum class InnErrorLevel { | |||
COMMON_LEVEL = 0b000, | |||
SUGGESTION_LEVEL = 0b001, | |||
MINOR_LEVEL = 0b010, | |||
@@ -61,33 +101,6 @@ enum ErrorLevel { | |||
CRITICAL_LEVEL = 0b100, | |||
}; | |||
// Each module defines error codes using the following macros, name can not be modified to (name) | |||
#define GE_ERRORNO_COMMON(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, COMMON_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_CLIENT(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, CLIENT_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_INIT(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, INIT_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_SESSION(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, SESSION_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_GRAPH(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GRAPH_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_ENGINE(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, ENGINE_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_OPS(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, OPS_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_PLUGIN(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, PLUGIN_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_RUNTIME(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, RUNTIME_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_EXECUTOR(name, value, desc) \ | |||
GE_ERRORNO(RT_DEVICE, ERROR_CODE, COMMON_LEVEL, SYSID_GE, EXECUTOR_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_GENERATOR(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GENERATOR_MODULE, name, (value), (desc)) | |||
// Get error code description | |||
#define GE_GET_ERRORNO_STR(value) ge::StatusFactory::Instance()->GetErrDesc(value) | |||
// Common module error code definition | |||
GE_ERRORNO_COMMON(MEMALLOC_FAILED, 0, "Failed to allocate memory!"); // 1343225856 | |||
GE_ERRORNO_COMMON(PARAM_INVALID, 1, "Parameter's invalid!"); // 1343225857 | |||
@@ -313,10 +326,6 @@ GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, 3, "Graph ma | |||
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, 4, "Graph manager finalize failed."); | |||
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_SAVE_MODEL_FAILED, 5, "Graph manager save model failed."); | |||
static inline Status TransRtErrorCode(const int32_t error_code) { | |||
return static_cast<Status>(error_code); | |||
} | |||
#define RT_ERROR_TO_GE_STATUS(RT_ERROR) TransRtErrorCode(RT_ERROR) | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_ |
@@ -40,13 +40,13 @@ enum FrameworkType { | |||
CAFFE = 0, | |||
MINDSPORE = 1, | |||
TENSORFLOW = 3, | |||
ANDROID_NN, | |||
ONNX, | |||
ANDROID_NN = 4, | |||
ONNX = 5, | |||
}; | |||
enum class GraphStage : int64_t { GRAPH_STAGE_FUZZ = 0, GRAPH_STAGE_RESERVED }; | |||
const char *const kGraphDumpStage = "DumpStage"; | |||
const char_t *const kGraphDumpStage = "DumpStage"; | |||
const std::map<std::string, std::string> kFwkTypeToStr = { | |||
{"0", "Caffe"}, {"1", "MindSpore"}, {"3", "TensorFlow"}, {"4", "Android_NN"}, {"5", "Onnx"}}; | |||
@@ -70,21 +70,42 @@ const std::string kTaskTypeAicore = "AI_CORE"; | |||
const std::string kTaskTypeAicpu = "AI_CPU"; | |||
const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID"; | |||
const std::string kTaskTypeFftsPlus = "FFTS_PLUS"; | |||
const std::string kEngineNameVectorCore = "VectorEngine"; | |||
const std::string kEngineNameHccl = "ops_kernel_info_hccl"; | |||
const std::string kEngineNameRts = "DNN_VM_RTS_OP_STORE"; | |||
const std::string kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE"; | |||
const std::string kEngineNameGeLocal = "DNN_VM_GE_LOCAL_OP_STORE"; | |||
const std::string kEngineNameAiCpu = "aicpu_ascend_kernel"; | |||
const std::string kEngineNameAiCpuTf = "aicpu_tf_kernel"; | |||
const std::string kEngineNameAiCore = "AIcoreEngine"; | |||
const std::string kAtomicOpType = "DynamicAtomicAddrClean"; | |||
const std::string kShapeTypeStatic = "static"; | |||
const std::string kShapeTypeDynamic = "dynamic"; | |||
constexpr uint64_t kInferSessionId = 0U; | |||
constexpr uint64_t kReleaseFlag = 1U; | |||
constexpr uint32_t kInvalidModelId = 0xFFFFFFFFU; | |||
constexpr size_t kNumTaskWithAtomicAddrCleanTask = 2U; | |||
// dynamic execute mode | |||
const char_t *const kLazyRecompile = "lazy_recompile"; | |||
constexpr size_t kMaxHostMemInputLen = 64U; | |||
// Data cache, including data address and length | |||
struct DataBuffer { | |||
public: | |||
void *data; // Data address | |||
uint64_t length; // Data length | |||
bool isDataSupportMemShare = false; | |||
uint32_t placement = 0U; | |||
DataBuffer(void *data_in, uint64_t data_len, bool is_support_mem_share, uint32_t placement = 0U) | |||
: data(data_in), length(data_len), isDataSupportMemShare(is_support_mem_share), placement(placement) {} | |||
DataBuffer() : data(nullptr), length(0UL), isDataSupportMemShare(false) {} | |||
DataBuffer(void *const data_in, const uint64_t data_len, const bool is_support_mem_share = false, | |||
const uint32_t data_placement = 0U) | |||
: data(data_in), length(data_len), isDataSupportMemShare(is_support_mem_share), placement(data_placement) {} | |||
DataBuffer() : data(nullptr), length(0UL), isDataSupportMemShare(false), placement(0U) {} | |||
}; | |||
/// | |||
@@ -232,6 +253,9 @@ struct ModelInfo { | |||
class GE_FUNC_VISIBILITY ModelListener { | |||
public: | |||
virtual ~ModelListener() {} | |||
ModelListener() = default; | |||
ModelListener(const ModelListener &) = delete; | |||
ModelListener &operator=(const ModelListener &) = delete; | |||
/// | |||
/// @brief Asynchronous callback interface | |||
/// @param [in] model_id Model ID of the callback | |||
@@ -241,7 +265,9 @@ class GE_FUNC_VISIBILITY ModelListener { | |||
virtual Status OnComputeDone(uint32_t model_id, uint32_t data_index, uint32_t result_code, | |||
std::vector<ge::Tensor> &outputs) = 0; | |||
virtual void SetCallback(const RunAsyncCallback &callback){}; | |||
virtual void SetCallback(const RunAsyncCallback &callback) { | |||
(void)callback; | |||
} | |||
virtual uint32_t GetResultCode() { | |||
return 0U; | |||
@@ -34,12 +34,13 @@ class GE_FUNC_VISIBILITY ModelHelper { | |||
~ModelHelper(); | |||
Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, const std::string &output_file, | |||
ge::ModelBufferData &model); | |||
ge::ModelBufferData &model) const; | |||
Status SaveToOmRootModel(const GeRootModelPtr &ge_root_model, const SaveParam &save_param, | |||
const std::string &output_file, ModelBufferData &model, const bool is_unknown_shape); | |||
Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file); | |||
Status LoadModel(const ge::ModelData &model_data); | |||
Status LoadRootModel(const ge::ModelData &model_data); | |||
static void SetModelToGeModel(GeModelPtr &ge_model, Model &model); | |||
GeModelPtr GetGeModel(); | |||
GeRootModelPtr GetGeRootModel(); | |||
@@ -67,7 +68,6 @@ class GE_FUNC_VISIBILITY ModelHelper { | |||
Status GenerateGeModel(OmFileLoadHelper &om_load_helper); | |||
Status GenerateGeRootModel(OmFileLoadHelper &om_load_helper); | |||
Status LoadModelData(OmFileLoadHelper &om_load_helper); | |||
void SetModelToGeModel(GeModelPtr &ge_model, Model &model) const; | |||
Status LoadModelData(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const; | |||
Status LoadWeights(OmFileLoadHelper &om_load_helper); | |||
Status LoadWeights(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const; | |||
@@ -21,25 +21,20 @@ | |||
#include <vector> | |||
#include "external/ge/ge_ir_build.h" | |||
#include "framework/common/fmk_types.h" | |||
#include "framework/common/types.h" | |||
#include "framework/common/ge_types.h" | |||
using ProcParam = struct PROC_PARAM; | |||
using std::string; | |||
using std::vector; | |||
namespace ge { | |||
struct ModelPartition { | |||
ModelPartitionType type; | |||
uint8_t *data = 0; | |||
uint32_t size = 0; | |||
const uint8_t *data = nullptr; | |||
uint32_t size = 0U; | |||
}; | |||
struct OmFileContext { | |||
std::vector<ModelPartition> partition_datas_; | |||
std::vector<char> partition_table_; | |||
uint32_t model_data_len_ = 0; | |||
std::vector<char_t> partition_table_; | |||
uint32_t model_data_len_ = 0U; | |||
}; | |||
struct SaveParam { | |||
@@ -55,13 +50,13 @@ class GE_FUNC_VISIBILITY OmFileLoadHelper { | |||
public: | |||
Status Init(const ge::ModelData &model); | |||
Status Init(uint8_t *model_data, const uint32_t model_data_size); | |||
Status Init(uint8_t *const model_data, const uint32_t model_data_size); | |||
Status Init(uint8_t *model_data, const uint32_t model_data_size, uint32_t model_num); | |||
Status Init(uint8_t *const model_data, const uint32_t model_data_size, const uint32_t model_num); | |||
Status GetModelPartition(ModelPartitionType type, ModelPartition &partition); | |||
Status GetModelPartition(const ModelPartitionType type, ModelPartition &partition); | |||
Status GetModelPartition(ModelPartitionType type, ModelPartition &partition, size_t model_index); | |||
Status GetModelPartition(const ModelPartitionType type, ModelPartition &partition, const size_t model_index); | |||
OmFileContext context_; | |||
@@ -70,9 +65,9 @@ class GE_FUNC_VISIBILITY OmFileLoadHelper { | |||
private: | |||
Status CheckModelValid(const ge::ModelData &model) const; | |||
Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size); | |||
Status LoadModelPartitionTable(uint8_t *const model_data, const uint32_t model_data_size); | |||
Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size, uint32_t model_num); | |||
Status LoadModelPartitionTable(uint8_t *const model_data, const uint32_t model_data_size, const uint32_t model_num); | |||
bool is_inited_{false}; | |||
}; | |||
@@ -89,25 +84,24 @@ class GE_FUNC_VISIBILITY OmFileSaveHelper { | |||
ModelPartitionTable *GetPartitionTable(); | |||
Status AddPartition(ModelPartition &partition); | |||
Status AddPartition(ModelPartition &partition, size_t cur_index); | |||
Status AddPartition(const ModelPartition &partition); | |||
const std::vector<ModelPartition> &GetModelPartitions() const; | |||
Status AddPartition(const ModelPartition &partition, const size_t cur_index); | |||
Status SaveModel(const SaveParam &save_param, const char *target_file, ge::ModelBufferData &model, | |||
bool is_offline = true); | |||
Status SaveModel(const SaveParam &save_param, const char_t *const output_file, ge::ModelBufferData &model, | |||
const bool is_offline = true); | |||
Status SaveModelToFile(const char *output_file, ge::ModelBufferData &model, bool is_offline = true); | |||
Status SaveModelToFile(const char_t *const output_file, ge::ModelBufferData &model, const bool is_offline = true); | |||
std::vector<OmFileContext> model_contexts_; | |||
ModelFileHeader model_header_; | |||
OmFileContext context_; | |||
ModelPartitionTable *GetPartitionTable(size_t cur_ctx_index); | |||
ModelPartitionTable *GetPartitionTable(const size_t cur_ctx_index); | |||
Status SaveRootModel(const SaveParam &save_param, const char *output_file, ModelBufferData &model, bool is_offline); | |||
Status SaveRootModel(const SaveParam &save_param, const char_t *const output_file, ModelBufferData &model, | |||
const bool is_offline); | |||
}; | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_COMMON_HELPER_OM_FILE_HELPER_H_ |
@@ -28,97 +28,13 @@ | |||
#include "framework/common/util.h" | |||
#include "graph/compute_graph.h" | |||
using std::vector; | |||
namespace ge { | |||
// Size of RC memory alignment, 2M | |||
constexpr size_t ALIGN_SIZE = 2097152; | |||
constexpr uint32_t RC_VALUE_DEFAULT = 1; | |||
constexpr uint32_t RC_VALUE_MAX = 32; | |||
// RC data type classification | |||
enum RCType { | |||
RC_DEFAULT, // Such as temporary workspace memory of operator, variable (including global and local variable) | |||
RC_HCOM, // Output of gradient aggregation, RC value should be set to 0 | |||
RC_L2LOSS, // Parameter of L2 loss operator, RC value should be set to 0 | |||
RC_INPUTOUTPUT, // Input and output tensor of operator, RC value is returned by FE calculation | |||
RC_WEIGHTS, // The weight, fp16, RC value used by FP/BP operator should be set to 1 or the actual access numbers | |||
RC_DW, // The gradient data DW and RC value output by BP operator | |||
// should be set to 1 or the actual access numbers | |||
RC_ARGS // Args of FlowTable, actual access numbers | |||
}; | |||
enum MemType { INPUT_TENSOR, OUTPUT_TENSOR, WEIGHT, WORKSPACE }; | |||
// Memory usage information < node, type, number > | |||
struct NodeInfo { | |||
std::string nodeName; | |||
MemType memType; | |||
size_t index; | |||
}; | |||
// Memory block RC value | |||
struct RCMemoryBlock { | |||
RCType type; // RC type | |||
size_t blockSize; // memory block size | |||
size_t headOffset; // Start offset from base address | |||
size_t tailOffset; // End offset from base address | |||
uint32_t rcCount; // RC value | |||
NodeInfo nodeInfo; // Input and output indexes of node objects to which RC belongs | |||
}; | |||
// L2Cache optimizer | |||
class GE_FUNC_VISIBILITY L2CacheOptimize { | |||
public: | |||
explicit L2CacheOptimize(ge::ComputeGraphPtr &graph); | |||
~L2CacheOptimize(); | |||
// Collect the information L2Cache Memory optimization | |||
Status Gath(); | |||
private: | |||
ge::ComputeGraphPtr graph_; | |||
// Save RC block information list | |||
std::vector<RCMemoryBlock> weightRCs; | |||
std::vector<RCMemoryBlock> opRCs; | |||
// Extract RC information generated by FE from compiled graph | |||
void RetirveRCinfo(); | |||
// Take the maximum common divisor of RC values for the duplicate address | |||
void Merge(std::vector<RCMemoryBlock> &blocks); | |||
// The RC information is aligned with the 2m address | |||
void Align(std::vector<RCMemoryBlock> &blocks); | |||
// Weight of l2loss operator, output of gradient aggregation output, RC value set to 0 | |||
void HandleOutputZeroRC(RCType type, ge::NodePtr node, std::vector<int64_t> &outputList, | |||
std::vector<RCMemoryBlock> &blocks); | |||
// Processing operator input Tensor's RC | |||
void HandOPInput(ge::NodePtr node, std::vector<int64_t> &inputList, std::vector<RCMemoryBlock> &blocks); | |||
// Processing operator output Tensor's RC | |||
void HandOPoutput(ge::NodePtr node, std::vector<int64_t> &outputList, std::vector<RCMemoryBlock> &blocks); | |||
constexpr size_t ALIGN_SIZE = 2097152U; | |||
// maximum common divisor | |||
uint32_t Measure(uint32_t x, uint32_t y) { | |||
if ((x == 0) || (y == 0)) return RC_VALUE_DEFAULT; | |||
uint32_t z = y; | |||
while (x % y != 0) { | |||
z = x % y; | |||
x = y; | |||
y = z; | |||
} | |||
return z; | |||
} | |||
constexpr uint32_t RC_VALUE_DEFAULT = 1U; | |||
constexpr uint32_t RC_VALUE_MAX = 32U; | |||
bool Contain(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block); | |||
bool Cross(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block); | |||
bool Connect(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block); | |||
}; | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_COMMON_L2_CACHE_OPTIMIZE_H_ |
@@ -34,143 +34,11 @@ | |||
#include <google/protobuf/map.h> | |||
#include <unordered_map> | |||
#include <string> | |||
#include "external/graph/types.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "proto/om.pb.h" | |||
using domi::AttrDef; | |||
using domi::AttrDef_ListValue; | |||
using domi::ModelDef; | |||
using domi::NamedAttrs; | |||
using domi::OpDef; | |||
namespace ge { | |||
using AttrDefMap = ::google::protobuf::Map<::std::string, ::domi::AttrDef>; | |||
using AttrDefPair = ::google::protobuf::MapPair<std::string, domi::AttrDef>; | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, AttrDef &attr, OpDef *opdef); | |||
// DEFINE_ADD_ATTR_VALUE | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const std::string &value, AttrDefMap *attrs); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const char *value, AttrDefMap *attrs); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const char *key, const char *value, AttrDefMap *attrs); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const uint32_t value, AttrDefMap *attrs); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int32_t value, AttrDefMap *attrs); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int64_t value, AttrDefMap *attrs); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const float value, AttrDefMap *attrs); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const double value, AttrDefMap *attrs); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const bool value, AttrDefMap *attrs); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, AttrDefMap *attrs); | |||
// DEFINE_ADD_ATTR_VALUE | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const std::string &value, OpDef *opdef); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const char *value, OpDef *opdef); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const char *key, const char *value, OpDef *opdef); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const uint32_t value, OpDef *opdef); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int32_t value, OpDef *opdef); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int64_t value, OpDef *opdef); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const float value, OpDef *opdef); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const double value, OpDef *opdef); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const bool value, OpDef *opdef); | |||
GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, OpDef *opdef); | |||
GE_FUNC_VISIBILITY void AddOpBytesAttr(const std::string &key, const void *value, size_t size, OpDef *opdef); | |||
// DEFINE_ADD_ATTR_VALUE_LIST | |||
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const double value, AttrDefMap *attrs); | |||
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const float value, AttrDefMap *attrs); | |||
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const uint32_t value, AttrDefMap *attrs); | |||
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int32_t value, AttrDefMap *attrs); | |||
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const std::string value, AttrDefMap *attrs); | |||
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const double value, OpDef *opdef); | |||
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const float value, OpDef *opdef); | |||
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const uint32_t value, OpDef *opdef); | |||
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int32_t value, OpDef *opdef); | |||
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const bool value, OpDef *opdef); | |||
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int64_t value, OpDef *opdef); | |||
GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const std::string &value, OpDef *opdef); | |||
GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, std::string *value, const OpDef *opdef); | |||
GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, int32_t *value, const OpDef *opdef); | |||
GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, int64_t *value, const OpDef *opdef); | |||
GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, uint32_t *value, const OpDef *opdef); | |||
GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, float *value, const OpDef *opdef); | |||
GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, double *value, const OpDef *opdef); | |||
GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, bool *value, const OpDef *opdef); | |||
GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, AttrDef_ListValue *value, const OpDef *opdef); | |||
GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, std::string value, const OpDef *opdef); | |||
GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, int32_t value, const OpDef *opdef); | |||
GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, int64_t value, const OpDef *opdef); | |||
GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, uint32_t value, const OpDef *opdef); | |||
GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, float value, const OpDef *opdef); | |||
GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, double value, const OpDef *opdef); | |||
GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, bool value, const OpDef *opdef); | |||
GE_FUNC_VISIBILITY bool GetBytesAttr(const std::string &key, std::string *value, const OpDef *opdef); | |||
GE_FUNC_VISIBILITY bool GetBytesAttr(const std::string &key, std::string *value, const ModelDef *model_def); | |||
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const std::string &value, ModelDef *model_def); | |||
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const char *value, ModelDef *model_def); | |||
GE_FUNC_VISIBILITY void AddModelAttr(const char *key, const char *value, ModelDef *model_def); | |||
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const uint32_t value, ModelDef *model_def); | |||
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const int32_t value, ModelDef *model_def); | |||
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const int64_t value, ModelDef *model_def); | |||
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const float value, ModelDef *model_def); | |||
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const double value, ModelDef *model_def); | |||
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const bool value, ModelDef *model_def); | |||
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const void *value, size_t size, ModelDef *model_def); | |||
GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const AttrDef_ListValue &value, ModelDef *model_def); | |||
GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const double value, ModelDef *model_def); | |||
GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const float value, ModelDef *model_def); | |||
GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const uint32_t value, ModelDef *model_def); | |||
GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const int32_t value, ModelDef *model_def); | |||
GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const std::string &value, ModelDef *model_def); | |||
GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, std::string *value, const ModelDef *model_def); | |||
GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, int32_t *value, const ModelDef *model_def); | |||
GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, int64_t *value, const ModelDef *model_def); | |||
GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, uint32_t *value, const ModelDef *model_def); | |||
GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, float *value, const ModelDef *model_def); | |||
GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, double *value, const ModelDef *model_def); | |||
GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, bool *value, const ModelDef *model_def); | |||
GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, AttrDef_ListValue *value, const ModelDef *model_def); | |||
GE_FUNC_VISIBILITY bool HasOpAttr(const OpDef *opdef, const std::string &attr_name); | |||
GE_FUNC_VISIBILITY void SetAttrDef(const std::string &value, AttrDef *out); | |||
GE_FUNC_VISIBILITY void SetAttrDef(const char *value, AttrDef *out); | |||
GE_FUNC_VISIBILITY void SetAttrDef(const uint32_t value, AttrDef *out); | |||
GE_FUNC_VISIBILITY void SetAttrDef(const int32_t value, AttrDef *out); | |||
GE_FUNC_VISIBILITY void SetAttrDef(const float value, AttrDef *out); | |||
GE_FUNC_VISIBILITY void SetAttrDef(const double value, AttrDef *out); | |||
GE_FUNC_VISIBILITY void SetAttrDef(const bool value, AttrDef *out); | |||
GE_FUNC_VISIBILITY void SetAttrList(const std::string &value, AttrDef *out); | |||
GE_FUNC_VISIBILITY void SetAttrList(const bool value, AttrDef *out); | |||
GE_FUNC_VISIBILITY void SetAttrList(const float value, AttrDef *out); | |||
GE_FUNC_VISIBILITY void SetAttrList(const double value, AttrDef *out); | |||
GE_FUNC_VISIBILITY void SetAttrList(const uint32_t value, AttrDef *out); | |||
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, std::string *value, const AttrDefMap &attr); | |||
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, int32_t *value, const AttrDefMap &attr); | |||
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, int64_t *value, const AttrDefMap &attr); | |||
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, uint32_t *value, const AttrDefMap &attr); | |||
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, float *value, const AttrDefMap &attr); | |||
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, double *value, const AttrDefMap &attr); | |||
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, bool *value, const AttrDefMap &attr); | |||
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, AttrDef_ListValue *value, const AttrDefMap &attr); | |||
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, NamedAttrs *&value, AttrDefMap *attr); | |||
GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, const NamedAttrs *&value, const AttrDefMap &attr); | |||
GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, int32_t *value, | |||
const AttrDefMap &attr); | |||
GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, uint32_t *value, | |||
const AttrDefMap &attr); | |||
GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, float *value, const AttrDefMap &attr); | |||
GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, double *value, const AttrDefMap &attr); | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ | |||
GE_FUNC_VISIBILITY void SetAttrDef(const std::string &value, domi::AttrDef *const out); | |||
} | |||
#endif // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ |
@@ -31,18 +31,16 @@ | |||
#include "proto/insert_op.pb.h" | |||
namespace ge { | |||
using domi::Status; | |||
// Add Sub Mul | |||
GE_FUNC_VISIBILITY extern const uint32_t ADD_INPUT_NUM; | |||
GE_FUNC_VISIBILITY extern const uint32_t SUB_INPUT_NUM; | |||
GE_FUNC_VISIBILITY extern const uint32_t MUL_INPUT_NUM; | |||
// Permute | |||
GE_FUNC_VISIBILITY extern const int32_t PERMUTE_ORDER_NUM; | |||
// Ssd PriroBox | |||
GE_FUNC_VISIBILITY extern const double SSD_PRIORBOX_ASPECT_RATIO_VALUE; | |||
GE_FUNC_VISIBILITY extern const float64_t SSD_PRIORBOX_ASPECT_RATIO_VALUE; | |||
GE_FUNC_VISIBILITY extern const uint32_t STRIDEDSLICE_INPUT_NUM; | |||
@@ -55,8 +53,8 @@ GE_FUNC_VISIBILITY extern const uint32_t SWITCH_DATA_INPUT; | |||
GE_FUNC_VISIBILITY extern const uint32_t SWITCH_PRED_INPUT; | |||
// Merge | |||
GE_FUNC_VISIBILITY extern const uint32_t MERGE_DATA_OUTPUT; | |||
GE_FUNC_VISIBILITY extern const uint32_t MERGE_INDEX_OUTPUT; | |||
GE_FUNC_VISIBILITY extern const int32_t MERGE_DATA_OUTPUT; | |||
GE_FUNC_VISIBILITY extern const int32_t MERGE_INDEX_OUTPUT; | |||
// FunctionOp | |||
GE_FUNC_VISIBILITY extern const uint32_t IF_COND_INPUT; | |||
@@ -66,86 +64,35 @@ GE_FUNC_VISIBILITY extern const uint32_t FOR_DELTA_INPUT; | |||
GE_FUNC_VISIBILITY extern const uint32_t FOR_DATA_INPUT; | |||
GE_FUNC_VISIBILITY extern const int32_t NORMAL_TENSOR_SIZE; | |||
/*lint -e148*/ | |||
class GE_FUNC_VISIBILITY OpUtils { | |||
public: | |||
/// | |||
/// @ingroup domi_ome | |||
/// @brief Check whether check_value is in [min_enum_value, max_enum_value] | |||
/// @return true Within | |||
/// @return false out of range | |||
// | |||
static inline bool CheckEnumValid(int32_t check_value, int32_t min_enum_value, int32_t max_enum_value) { | |||
return check_value < min_enum_value ? false : (check_value >= max_enum_value ? false : true); | |||
} | |||
/// | |||
/// @ingroup domi_omg | |||
/// @brief Determine whether to manually calculate the tensor size based on the values of format and dim | |||
/// @param [in] format, Format information of the tensor | |||
/// @param [in] real_dim_cnt, Tensor dim | |||
/// @return true Manually calculate the size based on dim and datatype | |||
/// @return false skip | |||
/// | |||
static bool IsComputDimsSize(const int32_t format, const uint32_t real_dim_cnt); | |||
/// | |||
/// @brief Extract AIPP parameters from AttrDefMap and splice them | |||
/// @param [in] aipp_attr attr of operator | |||
/// @param [out] aipp_params aipp parameters | |||
/// @return enum of tagCCAippInputFormat | |||
/// | |||
static Status ConvertAippParams(const NamedAttrs &aipp_attr, domi::AippOpParams *aipp_params); | |||
static Status TransferDim(const std::vector<int64_t> &dim, std::vector<int64_t> &dim_vector); | |||
static Status ConvertAippParams(const GeAttrValue::NamedAttrs &aipp_attr, domi::AippOpParams &aipp_params); | |||
template <typename T> | |||
static void SliceData(const std::vector<char *> &input, int64_t chunk_size, std::vector<char *> &output, | |||
int64_t begin, int64_t out_dim, int64_t stride); | |||
static void SliceData(const std::vector<char_t *> &input, const int64_t chunk_size, std::vector<char_t *> &output, | |||
const int64_t begin, const int64_t out_dim, const int64_t stride); | |||
template <typename T> | |||
static Status SetDataByDataType(size_t out_size, const std::vector<char *> &chunk_input, | |||
const std::vector<char *> &chunk_output, GeTensor *output); | |||
static Status SetDataByDataType(const size_t out_size, const std::vector<char_t *> &chunk_input, | |||
const std::vector<char_t *> &chunk_output, GeTensor *const output); | |||
template <typename T> | |||
static Status SetOutputSliceDataByDataType(void *data, int64_t data_size, const std::vector<int64_t> &input_dims, | |||
const std::vector<int64_t> &begin, const std::vector<int64_t> &output_dims, | |||
ge::GeTensor *output, const std::vector<int64_t> &stride); | |||
static Status SetOutputSliceData(void *data, int64_t data_size, int32_t data_type, | |||
static Status SetOutputSliceDataByDataType(void *const data, const int64_t data_size, | |||
const std::vector<int64_t> &input_dims, const std::vector<int64_t> &begin, | |||
const std::vector<int64_t> &output_dims, ge::GeTensor *const output, | |||
const std::vector<int64_t> &stride); | |||
static Status SetOutputSliceData(void *const data, const int64_t data_size, const int32_t data_type, | |||
const std::vector<int64_t> &input_dims, const std::vector<int64_t> &begin, | |||
const std::vector<int64_t> &output_dims, ge::GeTensor *const output, | |||
const std::vector<int64_t> &output_dims, GeTensor *const output, | |||
const std::vector<int64_t> &stride); | |||
/// | |||
/// @ingroup domi_omg | |||
/// @brief Convert the convolutional weight data from [h, w, c, k] to [k, c, h, w] | |||
/// @param [in] input Weight data in HWCK format | |||
/// @param [in] H value of H dimension | |||
/// @param [in] W value of W dimension | |||
/// @param [in] C value of C dimension | |||
/// @param [in] K value of K dimension | |||
/// @param [out] output Data pointer after conversion. The format is KCHW. | |||
/// | |||
static void TransDataHWCK2KCHW(const void *input, int64_t h, int64_t w, int64_t c, int64_t k, void **output); | |||
/// | |||
/// @ingroup domi_omg | |||
/// @brief Converts the convolutional weight data from [k, c, h, w] to [h, w, c, k]. | |||
/// @param [in] input Weight data in HWCK format | |||
/// @param [in] K value of K dimension | |||
/// @param [in] C value of C dimension | |||
/// @param [in] H value of H dimension | |||
/// @param [in] W value of W dimension | |||
/// @param [out] output Data pointer after conversion. The format is HWCK | |||
/// | |||
static void TransDataKCHW2HWCK(const void *input, int64_t k, int64_t c, int64_t h, int64_t w, void *output); | |||
static std::vector<ConstGeTensorPtr> GetWeights(const ge::Node &node); | |||
static std::vector<ConstGeTensorPtr> GetWeights(ge::ConstNodePtr node); | |||
static std::vector<GeTensorPtr> MutableWeights(const ge::Node &node); | |||
static std::vector<GeTensorPtr> MutableWeights(const ge::NodePtr node); | |||
static Status SetWeights(ge::Node &node, const std::vector<ge::GeTensorPtr> &weights); | |||
static Status SetWeights(const ge::NodePtr node, const std::vector<ge::GeTensorPtr> &weights); | |||
static Status GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, const DataType type, | |||
std::vector<int64_t> &dims); | |||
private: | |||
static uint32_t GetRealDimCnt(const GeTensorDesc &tensor_desc); | |||
}; | |||
/*lint +e148*/ | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_ |
@@ -20,6 +20,8 @@ | |||
#include <set> | |||
#include <string> | |||
#include "graph/types.h" | |||
namespace ge { | |||
class GE_FUNC_VISIBILITY OpTypeContainer { | |||
public: | |||
@@ -30,12 +32,11 @@ class GE_FUNC_VISIBILITY OpTypeContainer { | |||
~OpTypeContainer() = default; | |||
void Register(const std::string &op_type) { | |||
op_type_list_.insert(op_type); | |||
static_cast<void>(op_type_list_.insert(op_type)); | |||
} | |||
bool IsExisting(const std::string &op_type) { | |||
auto iter_find = op_type_list_.find(op_type); | |||
return iter_find != op_type_list_.end(); | |||
return op_type_list_.find(op_type) != op_type_list_.end(); | |||
} | |||
protected: | |||
@@ -47,20 +48,19 @@ class GE_FUNC_VISIBILITY OpTypeContainer { | |||
class GE_FUNC_VISIBILITY OpTypeRegistrar { | |||
public: | |||
explicit OpTypeRegistrar(const std::string &op_type) { | |||
explicit OpTypeRegistrar(const std::string &op_type) noexcept { | |||
OpTypeContainer::Instance()->Register(op_type); | |||
} | |||
~OpTypeRegistrar() {} | |||
}; | |||
} // namespace ge | |||
#define REGISTER_OPTYPE_DECLARE(var_name, str_name) \ | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *var_name; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char_t *var_name; | |||
#define REGISTER_OPTYPE_DEFINE(var_name, str_name) \ | |||
const char *var_name = str_name; \ | |||
const OpTypeRegistrar g_##var_name##_reg(str_name); | |||
#define IS_OPTYPE_EXISTING(str_name) (OpTypeContainer::Instance()->IsExisting(str_name)) | |||
} // namespace ge | |||
const char_t *var_name = str_name; \ | |||
const ge::OpTypeRegistrar g_##var_name##_reg(str_name); | |||
#define IS_OPTYPE_EXISTING(str_name) (ge::OpTypeContainer::Instance()->IsExisting(str_name)) | |||
#endif // INC_FRAMEWORK_COMMON_OP_TYPES_H_ |
@@ -24,10 +24,8 @@ | |||
/// @brief Output the profiling data of single operator in Pytorch, and does not support multithreading | |||
/// @return Status result | |||
/// | |||
GE_FUNC_VISIBILITY ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id, rtStream_t stream); | |||
GE_FUNC_VISIBILITY ge::Status ProfSetStepInfo(const uint64_t index_id, const uint16_t tag_id, rtStream_t const stream); | |||
GE_FUNC_VISIBILITY ge::Status ProfGetDeviceFormGraphId(uint32_t graph_id, uint32_t &device_id); | |||
GE_FUNC_VISIBILITY void ProfSetGraphIdToDeviceMap(uint32_t graph_id, uint32_t &device_id); | |||
GE_FUNC_VISIBILITY ge::Status ProfGetDeviceFormGraphId(const uint32_t graph_id, uint32_t &device_id); | |||
#endif // INC_FRAMEWORK_COMMON_GE_PROFILING_H_ |
@@ -0,0 +1,173 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef AIR_CXX_PROFILING_DEFINITIONS_H | |||
#define AIR_CXX_PROFILING_DEFINITIONS_H | |||
#include <string> | |||
#include <iostream> | |||
#include <mutex> | |||
#include <unordered_map> | |||
#include "graph/profiler.h" | |||
#include "external/ge/ge_api_types.h" | |||
#include "toolchain/prof_callback.h" | |||
namespace ge { | |||
namespace profiling { | |||
enum { | |||
kAclCompileAndExecute, | |||
kAclMatchOpModel, | |||
kAclMatchStaticOpModel, | |||
kAclMatchDynamicOpModel, | |||
kAclExecuteAsync, | |||
kAclLoadSingleOp, | |||
kAclBuildOpModel, | |||
kInferShape, | |||
kTiling, | |||
kUpdateShape, | |||
kConstPrepare, | |||
kInitHybridExecuteArgs, | |||
kInitInferShapeContext, | |||
kDestroyInferShapeContext, | |||
kResetSubgraphExecutor, | |||
kCommitInferShapeTask, | |||
kDeviceToHost, | |||
kPrepareTask, | |||
kLaunchTask, | |||
kCommitTilingTask, | |||
kAtomic, | |||
kKernelLaunchPrepare, | |||
kRtKernelLaunch, | |||
kOpExecute, | |||
kAllocMem, | |||
kCopyH2D, | |||
// Add new definitions here | |||
kProfilingIndexEnd | |||
}; | |||
constexpr uint64_t kInvalidHashId = 0UL; | |||
class ProfilingContext { | |||
public: | |||
static bool IsDumpToStdEnabled(); | |||
static ProfilingContext &GetInstance(); | |||
ProfilingContext(); | |||
~ProfilingContext(); | |||
/* | |||
* 还有一种思路是`IsEnabled`只判断profiler_是否为空指针,不再设置单独的enabled标记位,这样可以少一个标记位。 | |||
* 但是这么做就意味着,profiler_实例在未使能profiling时,必须是空指针状态。 | |||
* 为了性能考虑,profiling机制在编译和加载时,就会调用`RegisterString`,向profiler_注册字符串,后续执行时,只会使用注册好的index了。 | |||
* 因此存在一种场景:编译时并未使能profiling(因为编译时间很长,使能profiling也无法真实反应执行时的耗时状态), | |||
* 因此编译时注册字符串的动作并没有生效。在执行时,动态的打开了profiling,这种场景下,执行时无法拿到注册后字符串 | |||
*/ | |||
bool IsEnabled() const noexcept { | |||
return enabled_ && profiler_ != nullptr; | |||
} | |||
void SetEnable() noexcept { | |||
enabled_ = true; | |||
} | |||
void SetDisable() noexcept { | |||
enabled_ = false; | |||
} | |||
void RecordCurrentThread(const int64_t element, const int64_t event, const EventType et, | |||
const std::chrono::time_point<std::chrono::system_clock> time_point) { | |||
if (IsEnabled()) { | |||
profiler_->RecordCurrentThread(element, event, et, time_point); | |||
} | |||
} | |||
void RecordCurrentThread(const int64_t element, const int64_t event, const EventType et) { | |||
RecordCurrentThread(element, event, et, std::chrono::system_clock::now()); | |||
} | |||
const Profiler *GetProfiler() const { | |||
return profiler_.get(); | |||
} | |||
void Dump(std::ostream &out_stream) const { | |||
if (IsEnabled()) { | |||
profiler_->Dump(out_stream); | |||
} else { | |||
out_stream << "Profiling not enable, skip to dump" << std::endl; | |||
} | |||
} | |||
void DumpToStdOut() const { | |||
Dump(std::cout); | |||
} | |||
void Reset() { | |||
if (IsEnabled()) { | |||
profiler_->Reset(); | |||
} | |||
} | |||
int64_t RegisterString(const std::string &str); | |||
int64_t RegisterStringHash(const uint64_t hash_id, const std::string &str); | |||
void UpdateElementHashId(const MsprofReporterCallback reporter_callback); | |||
static Status QueryHashId(const MsprofReporterCallback reporter_callback, const std::string &src_str, | |||
uint64_t &hash_id); | |||
size_t GetRegisterStringNum() const { | |||
return strings_to_index_.size(); | |||
} | |||
void Init(); | |||
private: | |||
void UpdateHashByStr(const std::string &str, const uint64_t hash); | |||
private: | |||
bool inited_; | |||
bool enabled_; | |||
int64_t str_index_; | |||
std::unordered_map<std::string, int64_t> strings_to_index_; | |||
std::mutex strings_to_index_mutex_; | |||
std::unique_ptr<Profiler> profiler_; | |||
}; | |||
class ScopeProfiler { | |||
public: | |||
ScopeProfiler(const int64_t element, const int64_t event) : element_(element), event_(event) { | |||
if (ProfilingContext::GetInstance().IsEnabled()) { | |||
start_trace_ = std::chrono::system_clock::now(); | |||
} | |||
} | |||
~ScopeProfiler() { | |||
if (ProfilingContext::GetInstance().IsEnabled()) { | |||
ProfilingContext::GetInstance().RecordCurrentThread(element_, event_, EventType::kEventStart, start_trace_); | |||
ProfilingContext::GetInstance().RecordCurrentThread(element_, event_, EventType::kEventEnd); | |||
} | |||
} | |||
void SetElement(const int64_t element) { | |||
element_ = element; | |||
} | |||
private: | |||
std::chrono::time_point<std::chrono::system_clock> start_trace_; | |||
int64_t element_; | |||
int64_t event_; | |||
}; | |||
} // namespace profiling | |||
} // namespace ge | |||
#define PROFILING_START(element, event) \ | |||
ge::profiling::ProfilingContext::GetInstance().RecordCurrentThread((element), (event), \ | |||
ge::profiling::EventType::kEventStart) | |||
#define PROFILING_END(element, event) \ | |||
ge::profiling::ProfilingContext::GetInstance().RecordCurrentThread((element), (event), \ | |||
ge::profiling::EventType::kEventEnd) | |||
#define PROFILING_SCOPE(element, event) ge::profiling::ScopeProfiler profiler((element), (event)) | |||
#define PROFILING_SCOPE_ELEMENT(element) profiler.SetElement((element)) | |||
#endif // AIR_CXX_PROFILING_DEFINITIONS_H |
@@ -25,9 +25,9 @@ | |||
/// MAKE_GUARD([&] { Release Resource 1 }) | |||
/// Acquire Resource 2 | |||
// MAKE_GUARD([&] { Release Resource 2 }) | |||
#define GE_MAKE_GUARD(var, callback) const ScopeGuard const_guard_##var(callback) | |||
#define GE_MAKE_GUARD(var, callback) const ::ge::ScopeGuard const_guard_##var(callback) | |||
#define GE_DISMISSABLE_GUARD(var, callback) ScopeGuard make_guard_##var(callback) | |||
#define GE_DISMISSABLE_GUARD(var, callback) ::ge::ScopeGuard make_guard_##var(callback) | |||
#define GE_DISMISS_GUARD(var) make_guard_##var.Dismiss() | |||
namespace ge { | |||
@@ -44,7 +44,7 @@ class GE_FUNC_VISIBILITY ScopeGuard { | |||
if (on_exit_scope_ != nullptr) { | |||
try { | |||
on_exit_scope_(); | |||
} catch (std::bad_function_call &e) { | |||
} catch (std::bad_function_call &) { | |||
} catch (...) { | |||
} | |||
} | |||
@@ -39,13 +39,14 @@ | |||
#include <sstream> | |||
#include <string> | |||
#include <vector> | |||
#include "graph/types.h" | |||
namespace ge { | |||
class GE_FUNC_VISIBILITY StringUtils { | |||
public: | |||
static std::string &Ltrim(std::string &s) { | |||
#if __cplusplus >= 201103L | |||
(void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int32_t c) { return std::isspace(c) == 0; })); | |||
(void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](const int32_t c) { return std::isspace(c) == 0; })); | |||
#else | |||
(void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int32_t, int32_t>(std::isspace)))); | |||
#endif | |||
@@ -54,7 +55,8 @@ class GE_FUNC_VISIBILITY StringUtils { | |||
// lint -esym(551,*) | |||
static std::string &Rtrim(std::string &s) { /*lint !e618*/ | |||
#if __cplusplus >= 201103L | |||
(void)s.erase(std::find_if(s.rbegin(), s.rend(), [](int32_t c) { return std::isspace(c) == 0; }).base(), s.end()); | |||
(void)s.erase(std::find_if(s.rbegin(), s.rend(), [](const int32_t c) { return std::isspace(c) == 0; }).base(), | |||
s.end()); | |||
#else | |||
(void)s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int32_t, int32_t>(std::isspace))).base(), | |||
s.end()); | |||
@@ -79,7 +81,7 @@ class GE_FUNC_VISIBILITY StringUtils { | |||
/// @param [in] delim separator | |||
/// @return string array after segmentation | |||
/// | |||
static std::vector<std::string, std::allocator<std::string>> Split(const std::string &str, char delim) { | |||
static std::vector<std::string, std::allocator<std::string>> Split(const std::string &str, const char_t delim) { | |||
std::vector<std::string, std::allocator<std::string>> elems; | |||
if (str.empty()) { | |||
@@ -94,8 +96,8 @@ class GE_FUNC_VISIBILITY StringUtils { | |||
elems.push_back(item); | |||
} | |||
auto str_size = str.size(); | |||
if ((str_size > 0) && (str[str_size - 1] == delim)) { | |||
const auto str_size = str.size(); | |||
if ((str_size > 0U) && (str[str_size - 1U] == delim)) { | |||
elems.emplace_back(""); | |||
} | |||
@@ -107,13 +109,13 @@ class GE_FUNC_VISIBILITY StringUtils { | |||
/// @param [in] s path name | |||
/// @return file name | |||
/// | |||
static std::string GetFileName(std::string &s) { | |||
static std::string GetFileName(const std::string &s) { | |||
if (s.empty()) { | |||
return ""; | |||
} | |||
std::vector<std::string> files = StringUtils::Split(s, '/'); | |||
const std::vector<std::string> files = StringUtils::Split(s, '/'); | |||
return files.empty() ? "" : files[files.size() - 1]; | |||
return files.empty() ? "" : files[files.size() - 1U]; | |||
} | |||
/// | |||
/// @ingroup domi_common | |||
@@ -125,12 +127,13 @@ class GE_FUNC_VISIBILITY StringUtils { | |||
/// @return string after replacement | |||
/// | |||
static std::string ReplaceAll(std::string str, const std::string &old_value, const std::string &new_value) { | |||
std::string::size_type cur_pos = 0; | |||
std::string::size_type old_length = old_value.length(); | |||
std::string::size_type new_length = new_value.length(); | |||
std::string::size_type cur_pos = 0U; | |||
const std::string::size_type old_length = old_value.length(); | |||
const std::string::size_type new_length = new_value.length(); | |||
// cycle replace | |||
for (; cur_pos != std::string::npos; cur_pos += new_length) { | |||
if ((cur_pos = str.find(old_value, cur_pos)) != std::string::npos) { | |||
cur_pos = str.find(old_value, cur_pos); | |||
if (cur_pos != std::string::npos) { | |||
(void)str.replace(cur_pos, old_length, new_value); | |||
} else { | |||
break; | |||
@@ -148,7 +151,7 @@ class GE_FUNC_VISIBILITY StringUtils { | |||
/// @return if the value is a prefix, true is returned. Otherwise, false is returned | |||
/// | |||
static bool StartWith(const std::string &str, const std::string str_x) { | |||
return ((str.size() >= str_x.size()) && (str.compare(0, str_x.size(), str_x) == 0)); | |||
return ((str.size() >= str_x.size()) && (str.compare(0U, str_x.size(), str_x) == 0)); | |||
} | |||
/// | |||
@@ -159,14 +162,14 @@ class GE_FUNC_VISIBILITY StringUtils { | |||
/// @param [in] ... format Filling Content | |||
/// @return formatted string | |||
/// | |||
static std::string FormatString(const char *format, ...) { | |||
const uint32_t MAX_BUFFER_LEN = 1024; // the stack memory plint check result must be less than 1024 | |||
static std::string FormatString(const char_t *const format, ...) { | |||
const uint32_t MAX_BUFFER_LEN = 1024U; // the stack memory plint check result must be less than 1024 | |||
va_list args; | |||
va_start(args, format); | |||
char buffer[MAX_BUFFER_LEN] = {0}; | |||
int32_t ret = vsnprintf_s(buffer, MAX_BUFFER_LEN, MAX_BUFFER_LEN - 1, format, args); | |||
char_t buffer[MAX_BUFFER_LEN] = {}; | |||
const int32_t ret = vsnprintf_s(&buffer[0], MAX_BUFFER_LEN, MAX_BUFFER_LEN - 1U, format, args); | |||
va_end(args); | |||
return ret > 0 ? buffer : ""; | |||
return (ret > 0) ? buffer : ""; | |||
} | |||
}; | |||
} // namespace ge | |||
@@ -23,7 +23,7 @@ namespace ge { | |||
const int32_t CC_FUSION_OP_MAX = 32; | |||
typedef enum tagCcStatus { | |||
enum class ccStatus_t { | |||
CC_STATUS_SUCCESS = 0, /**< succ */ | |||
CC_STATUS_NOT_INITIALIZED = 1, /**< not init */ | |||
CC_STATUS_ALLOC_FAILED = 2, /**< alloc mem failed */ | |||
@@ -33,10 +33,10 @@ typedef enum tagCcStatus { | |||
CC_STATUS_RUNTIME_ERROR = 6, /**< runtime error */ | |||
CC_STATUS_NOT_SUPPORTED = 7, /**< unsupport error */ | |||
CC_STATUS_INVALID_VALUE = 7, /**< invalid value error for blas*/ | |||
CC_STATUS_RESERVED /**< just for check */ | |||
} ccStatus_t; | |||
CC_STATUS_RESERVED = 8, /**< just for check */ | |||
}; | |||
typedef enum tagccKernelType { | |||
enum class ccKernelType { | |||
CCE_AI_CORE = 0, /* cce aicore */ | |||
CCE_AI_CPU = 1, /* cce aicpu */ | |||
TE = 2, /* te operator*/ | |||
@@ -47,9 +47,9 @@ typedef enum tagccKernelType { | |||
CUST_AI_CPU = 7, /* custom aicpu*/ | |||
HOST_CPU = 8, /* host cpu */ | |||
INVALID = 10000 /* unknown kernel type */ | |||
} ccKernelType; | |||
}; | |||
typedef struct tagOpContext { | |||
using ccOpContext = struct tagOpContext { | |||
ccKernelType kernelType; | |||
uint32_t opId; | |||
uint32_t kernelFuncId; | |||
@@ -66,7 +66,28 @@ typedef struct tagOpContext { | |||
uint64_t genVariableBaseAddr; | |||
uint64_t genVariableBaseSize; | |||
uint64_t l2ctrlSize; | |||
} ccOpContext; | |||
} // namespace ge | |||
}; | |||
enum class tagOpTensorFormat { OP_TENSOR_FORMAT_NC1HWC0 = 0, OP_TENSOR_FORMAT_ND, OP_TENSOR_FORMAT_RESERVED }; | |||
enum class tagOpDataType { | |||
OP_DATA_FLOAT = 0, /**< float type */ | |||
OP_DATA_HALF, /**< fp16 type */ | |||
OP_DATA_INT8, /**< int8 type */ | |||
OP_DATA_INT32, /**< int32 type */ | |||
OP_DATA_UINT8, /**< uint8 type */ | |||
OP_DATA_HALF_UINT16_PROPOSAL, /**< mixed type for proposal */ | |||
OP_DATA_RESERVED | |||
}; | |||
// AICPU Tensor | |||
using ccAICPUTensor = struct tagOpTensor { | |||
// real dim info | |||
tagOpTensorFormat format; | |||
tagOpDataType data_type; | |||
int32_t dim_cnt; | |||
int32_t mm; | |||
int32_t dim[8]; | |||
}; | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_ |
@@ -19,7 +19,6 @@ | |||
#include <climits> | |||
#include <cstdint> | |||
#include <algorithm> | |||
#include <map> | |||
#include <memory> | |||
#include <string> | |||
@@ -44,32 +43,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEB | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEBUG_ALL; | |||
// Profile-related constants | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OME_PROFILE; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string CCE_PROFILE; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string RTS_PROFILE; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_MODEL_ID; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASKS; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_FUSION_MODEL_DEF; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint64_t ALLOC_MEMORY_MAX_SIZE; // Max size of 8 GB. | |||
template <typename K, typename V> | |||
static std::pair<V, K> flip_pair(const std::pair<K, V> &p) { | |||
return std::pair<V, K>(p.second, p.first); | |||
} | |||
template <typename K, typename V> | |||
static std::map<V, K> flip_map(std::map<K, V> src) { | |||
std::map<V, K> dst; | |||
std::transform(src.begin(), src.end(), std::inserter(dst, dst.begin()), flip_pair<K, V>); | |||
return dst; | |||
} | |||
REGISTER_OPTYPE_DECLARE(DATA, "Data"); | |||
REGISTER_OPTYPE_DECLARE(AIPPDATA, "AippData"); | |||
REGISTER_OPTYPE_DECLARE(QUEUE_DATA, "QueueData"); | |||
REGISTER_OPTYPE_DECLARE(CONVOLUTION, "Convolution"); | |||
REGISTER_OPTYPE_DECLARE(CORRELATION, "Correlation"); | |||
REGISTER_OPTYPE_DECLARE(CORRELATIONV2, "Correlation_V2"); | |||
@@ -140,6 +124,8 @@ REGISTER_OPTYPE_DECLARE(SQUEEZE, "Squeeze"); | |||
REGISTER_OPTYPE_DECLARE(UNSQUEEZE, "Unsqueeze"); | |||
REGISTER_OPTYPE_DECLARE(SQUEEZEV2, "SqueezeV2"); | |||
REGISTER_OPTYPE_DECLARE(UNSQUEEZEV2, "UnsqueezeV2"); | |||
REGISTER_OPTYPE_DECLARE(SQUEEZEV3, "SqueezeV3"); | |||
REGISTER_OPTYPE_DECLARE(UNSQUEEZEV3, "UnsqueezeV3"); | |||
REGISTER_OPTYPE_DECLARE(STRIDEDSLICE, "StridedSlice"); | |||
REGISTER_OPTYPE_DECLARE(RANGE, "Range"); | |||
REGISTER_OPTYPE_DECLARE(RPNPROPOSALS, "GenerateRpnProposals"); | |||
@@ -438,6 +424,7 @@ REGISTER_OPTYPE_DECLARE(MODELEXIT, "ModelExit"); | |||
REGISTER_OPTYPE_DECLARE(SEND, "Send"); | |||
REGISTER_OPTYPE_DECLARE(RECV, "Recv"); | |||
REGISTER_OPTYPE_DECLARE(ENDOFSEQUENCE, "EndOfSequence"); | |||
REGISTER_OPTYPE_DECLARE(STARTOFSEQUENCE, "StartOfSequence"); | |||
REGISTER_OPTYPE_DECLARE(LABELSET, "LabelSet"); | |||
REGISTER_OPTYPE_DECLARE(LABELGOTO, "LabelGoto"); | |||
@@ -461,8 +448,6 @@ REGISTER_OPTYPE_DECLARE(ELU_GRAD, "EluGrad"); | |||
REGISTER_OPTYPE_DECLARE(ADD_V2, "AddV2"); | |||
REGISTER_OPTYPE_DECLARE(DATAFORMATDIMMAP, "DataFormatDimMap"); | |||
REGISTER_OPTYPE_DECLARE(DATAFORMATVECPERMUTE, "DataFormatVecPermute"); | |||
REGISTER_OPTYPE_DECLARE(BESSELI0e, "BesselI0e"); | |||
REGISTER_OPTYPE_DECLARE(BESSELI1e, "BesselI1e"); | |||
REGISTER_OPTYPE_DECLARE(DEQUANTIZE, "Dequantize"); | |||
REGISTER_OPTYPE_DECLARE(APPLYADADELTA, "ApplyAdadelta"); | |||
REGISTER_OPTYPE_DECLARE(APPLYADAGRAD, "ApplyAdagrad"); | |||
@@ -516,29 +501,11 @@ REGISTER_OPTYPE_DECLARE(GETDYNAMICDIMS, "GetDynamicDims"); | |||
// profiling training trace node | |||
REGISTER_OPTYPE_DECLARE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace"); | |||
enum InputMode { INPUT = 0, CONST_INPUT }; | |||
// Definition of the processing status enum of the process module | |||
enum ModelProcessState { | |||
INIT_STATE = 0, // init status | |||
WAIT_EVENT_STATE, // Wait for the event status | |||
IND_RSLT_STATE, // The model execution result is being output to the high level | |||
STOPPED_STATE, // Model execution completed. The model enters this state after Model Manager::Stop | |||
RESERVED_STATE, // reserved | |||
}; | |||
// Indicates the enun definition of the execution mode of the access module | |||
enum SysMode { | |||
INFERENCE = 0, // Normal, that is, Inference mode | |||
DEBUG, // Debug mode | |||
TIME, // Model execution time mode, including the execution time of each OP | |||
STOP, // STOP mode | |||
RESET, // RESET mode | |||
PERFORMANCE, // Impact of enabling the performance model: 1. The input data of the model is considered ready and does | |||
// not need to be converted | |||
ANDROID_DEBUG, // Exports Android platform computing data | |||
RESERVED, // reserved | |||
}; | |||
// Stack series | |||
REGISTER_OPTYPE_DECLARE(STACK, "Stack"); | |||
REGISTER_OPTYPE_DECLARE(STACKPUSH, "StackPush"); | |||
REGISTER_OPTYPE_DECLARE(STACKPOP, "StackPop"); | |||
REGISTER_OPTYPE_DECLARE(STACKCLOSE, "StackClose"); | |||
// @brief encryption type of the model file | |||
enum ModelEncryptType { | |||
@@ -577,22 +544,22 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_FIL | |||
/// | |||
/// @brief model name length | |||
/// | |||
static constexpr uint32_t MODEL_NAME_LENGTH = 32; | |||
constexpr uint32_t MODEL_NAME_LENGTH = 32U; | |||
/// | |||
/// @brief length of user-defined information | |||
/// | |||
static constexpr uint32_t USER_DEFINE_INFO_LENGTH = 32; | |||
constexpr uint32_t USER_DEFINE_INFO_LENGTH = 32U; | |||
/// | |||
/// @brief length of the model file signature | |||
/// | |||
static constexpr uint32_t MODEL_FILE_CHECKSUM_LENGTH = 64; | |||
constexpr uint32_t MODEL_FILE_CHECKSUM_LENGTH = 64U; | |||
/// | |||
/// @brief length of the reserved field in the model file header | |||
/// | |||
static constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 75; | |||
constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 75U; | |||
// DATA node type | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DATA_TYPE; | |||
@@ -617,7 +584,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_TYP | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_CONF_DELIMITER; | |||
// dim default size value | |||
static const int32_t DIM_DEFAULT_SIZE = 4; | |||
constexpr int32_t DIM_DEFAULT_SIZE = 4; | |||
// dim extension default value | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t DIM_DEFAULT_VALUE; | |||
@@ -650,34 +617,35 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t STREAM_SW | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_GLOBAL_STEP; | |||
static const uint32_t PLATFORM_VERSION_LEN = 20; | |||
constexpr uint32_t PLATFORM_VERSION_LEN = 20U; | |||
// Definition of the file header of the model file | |||
struct ModelFileHeader { | |||
uint32_t magic = MODEL_FILE_MAGIC_NUM; // magic number of DOMI | |||
uint32_t headsize = MODEL_FILE_HEAD_LEN; // length of the model header. The value is fixed at 256 | |||
uint32_t version = MODEL_VERSION; // version 1.0 | |||
uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0}; // signature | |||
uint32_t length = 0; // Ciphertext length. In the non-encryption model, the length is the plaintext length. | |||
uint8_t is_encrypt = ModelEncryptType::UNENCRYPTED; // whether encrypted 0:not encrypt, 1:encrypt | |||
uint8_t is_checksum = ModelCheckType::CHECK; // whether to check the checksum | |||
uint8_t modeltype = 0; // 0:IR model 1:standard model 2: OM Tiny model | |||
uint8_t genmode = 0; // 0:offline generate 1:online generate | |||
uint8_t name[MODEL_NAME_LENGTH] = {0}; // Model name, which contains 32 characters | |||
uint32_t ops = 0; // Computing power (Kops) | |||
uint8_t userdefineinfo[USER_DEFINE_INFO_LENGTH] = {0}; // User-defined information. The value contains 32 characters | |||
uint32_t om_ir_version = 0; | |||
uint32_t model_num = 0; | |||
uint8_t platform_version[PLATFORM_VERSION_LEN] = {0}; | |||
uint8_t platform_type = {0}; | |||
uint8_t reserved[MODEL_FILE_RESERVED_LENGTH] = {0}; // Reserved field 75 | |||
uint32_t magic = MODEL_FILE_MAGIC_NUM; // magic number of DOMI | |||
uint32_t headsize = MODEL_FILE_HEAD_LEN; // length of the model header. The value is fixed at 256 | |||
uint32_t version = MODEL_VERSION; // version 1.0 | |||
uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0U}; // signature | |||
uint32_t length = 0U; // Ciphertext length. In the non-encryption model, the length is the plaintext length. | |||
uint8_t is_encrypt = | |||
static_cast<uint8_t>(ModelEncryptType::UNENCRYPTED); // whether encrypted 0:not encrypt, 1:encrypt | |||
uint8_t is_checksum = static_cast<uint8_t>(ModelCheckType::CHECK); // whether to check the checksum | |||
uint8_t modeltype = 0U; // 0:IR model 1:standard model 2: OM Tiny model | |||
uint8_t genmode = 0U; // 0:offline generate 1:online generate | |||
uint8_t name[MODEL_NAME_LENGTH] = {0U}; // Model name, which contains 32 characters | |||
uint32_t ops = 0U; // Computing power (Kops) | |||
uint8_t userdefineinfo[USER_DEFINE_INFO_LENGTH] = {0U}; // User-defined information. The value contains 32 characters | |||
uint32_t om_ir_version = 0U; | |||
uint32_t model_num = 0U; | |||
uint8_t platform_version[PLATFORM_VERSION_LEN] = {0U}; | |||
uint8_t platform_type = {0U}; | |||
uint8_t reserved[MODEL_FILE_RESERVED_LENGTH] = {0U}; // Reserved field 75 | |||
}; | |||
static constexpr uint8_t TARGET_TYPE_LTTE_8BIT = 0; | |||
static constexpr uint8_t TARGET_TYPE_MINI_8BIT = 1; | |||
constexpr uint8_t TARGET_TYPE_LTTE_8BIT = 0U; | |||
constexpr uint8_t TARGET_TYPE_MINI_8BIT = 1U; | |||
// number of partitions in the current model | |||
static constexpr uint32_t PARTITION_SIZE = 5; | |||
constexpr uint32_t PARTITION_SIZE = 5U; | |||
enum ModelPartitionType { MODEL_DEF = 0, WEIGHTS_DATA, TASK_INFO, TBE_KERNELS, CUST_AICPU_KERNELS }; | |||
@@ -692,22 +660,9 @@ struct ModelPartitionTable { | |||
ModelPartitionMemInfo partition[0]; | |||
}; | |||
#define SIZE_OF_MODEL_PARTITION_TABLE(table) (sizeof(ModelPartitionTable) + sizeof(ModelPartitionMemInfo) * (table).num) | |||
// Filter format | |||
typedef enum tagDomiFilterFormat { | |||
DOMI_FILTER_KCHW, // KCHW | |||
DOMI_FILTER_HWCK, // HWCK | |||
DOMI_FILTER_RESERVED | |||
} domiFilterFormat_t; | |||
// Const data trans type | |||
typedef enum tagDomiConstDataTransType { | |||
DOMI_CONST_DATA_NOT_CHANGE = 0, // No action is required | |||
DOMI_CONST_DATA_TRANS_MATMUL, // The const input to MatMul and needs to be transposed | |||
DOMI_CONST_DATA_RESERVED | |||
} domiConstDataTransType_t; | |||
inline uint64_t SizeOfModelPartitionTable(const ModelPartitionTable &table) { | |||
return sizeof(ModelPartitionTable) + (sizeof(ModelPartitionMemInfo) * static_cast<uint64_t>(table.num)); | |||
} | |||
// mode of activation | |||
typedef enum tagDomiActivationMode { | |||
DOMI_ACTIVATION_SIGMOID = 0, // sigmoid | |||
@@ -727,190 +682,6 @@ typedef enum tagDomiActivationMode { | |||
DOMI_ACTIVATION_RESERVED | |||
} domiActivationMode_t; | |||
// mode of batchnorm | |||
typedef enum tagDomiBatchNormMode { | |||
DOMI_BATCHNORM_PER_ACTIVATION = 0, // bnScale, bnBias tensor dims are 1xCxHxW | |||
DOMI_BATCHNORM_SPATIAL, // bnScale, bnBias tensor dims are 1xCx1x1 | |||
DOMI_BATCHNORM_RESERVED | |||
} domiBatchNormMode_t; | |||
// eltwise mode | |||
typedef enum tagDomiEltwiseMode { | |||
DOMI_ELTWISE_PROD = 0, // prod | |||
DOMI_ELTWISE_SUM, // sum | |||
DOMI_ELTWISE_MAX, // max | |||
DOMI_ELTWISE_RESERVED | |||
} domiEltwiseMode_t; | |||
// mode of padding | |||
typedef enum tagDomiPaddingMode { | |||
DOMI_PADDING_CEIL = 0, // Default padding mode | |||
DOMI_PADDING_DIRECTASSIGN, // Default padding mode: NOTSET | |||
DOMI_PADDING_VALID, // VALID padding mode | |||
DOMI_PADDING_SAME, // Padding values of 0 are always used | |||
DOMI_PADDING_CEIL_NEW, // Padding values of 0 are always used | |||
DOMI_PADDING_VALID_NEW, // Padding values of 0 are always used | |||
DOMI_PADDING_SAME_NEW, // Padding values of 0 are always used | |||
DOMI_PADDING_RESERVED | |||
} domiPaddingMode_t; | |||
// algorithm of convolution forward | |||
typedef enum tagDomiConvolutionFwdAlgo { | |||
DOMI_CONVOLUTION_FWD_ALGO_GEMM = 0, // matrix gemm algo | |||
DOMI_CONVOLUTION_FWD_ALGO_WINOGRAD, // Winograd Transform algo | |||
DOMI_CONVOLUTION_FWD_ALGO_GEMM_ACCU_FLOAT32, // accumulate in L0c with FP32 | |||
DOMI_CONVOLUTION_FWD_ALGO_RESERVED | |||
} domiConvolutionFwdAlgo_t; | |||
typedef enum tagDomiFullConnectFwdAlgo { | |||
DOMI_FULLCONNECT_FWD_ALGO_HALF = 0, // accumulate in L0c with FP16 | |||
DOMI_FULLCONNECT_FWD_ALGO_FLOAT32 // accumulate in L0c with FP32 | |||
} domiFullConnectFwdAlgo_t; | |||
typedef enum tagDomiPooingFwdAlgo { | |||
DOMI_POOLING_FWD_ALGO_HALF = 0, // accumulate in L0c with FP16 | |||
DOMI_POOLING_FWD_ALGO_FLOAT32 // accumulate in L0c with FP32 | |||
} domiPooingFwdAlgo_t; | |||
// mode of convolution | |||
typedef enum tagDomiConvolutionMode { | |||
DOMI_CONV_CONVOLUTION = 0, // math convolution | |||
DOMI_CONV_CROSS_CORRELATION, // cross-correlation convolution | |||
DOMI_CONV_DECONVOLUTION, // deconvolution, also named transposed convolution | |||
DOMI_CONV_MODE_DEPTHWISE, // depthwise convolution | |||
DOMI_CONV_MODE_RESERVED | |||
} domiConvolutionMode_t; | |||
// softmax mode | |||
typedef enum tagDomiSoftmaxMode { | |||
DOMI_SOFTMAX_MODE_INSTANCE = 0, // compute the softmax over all C, H, W for each N | |||
DOMI_SOFTMAX_MODE_CHANNEL, // compute the softmax over all C for each H, W, N | |||
DOMI_SOFTMAX_MODE_HEIGHT, // compute the softmax over all H for each N, C, W | |||
DOMI_SOFTMAX_MODE_WIDTH, // compute the softmax over all W for each N, C, H | |||
DOMI_SOFTMAX_MODE_RESERVED | |||
} domiSoftmaxMode_t; | |||
// softmax algorithm | |||
typedef enum tagDomiSoftmaxAlgo { | |||
DOMI_SOFTMAX_FAST = 0, // straightforward implementation | |||
DOMI_SOFTMAX_ACCURATE, // subtract max from every point to avoid overflow | |||
DOMI_SOFTMAX_LOG, // perform the Log softmax operation to avoid overflow | |||
DOMI_SOFTMAX_ACCURATE_FP32, | |||
DOMI_SOFTMAX_RESERVED | |||
} domiSoftmaxAlgo_t; | |||
// algorithm of convolution backward | |||
typedef enum tagDomiConvolutionBwdAlgo { | |||
DOMI_CONVOLUTION_BWD_ALGO_GEMM = 0, // matrix gemm algo | |||
DOMI_CONVOLUTION_BWD_ALGO_WINOGRAD, // Winograd Transform algo | |||
DOMI_CONVOLUTION_BWD_ALGO_RESERVED | |||
} domiConvolutionBwdAlgo_t; | |||
// mode of pooling | |||
typedef enum tagDomiPoolingMode { | |||
DOMI_POOLING_MAX = 0, // max pooling | |||
DOMI_POOLING_AVG, // average pooling | |||
DOMI_POOLING_L2, // L2 pooling | |||
DOMI_POOLING_RESERVED | |||
} domiPoolingMode_t; | |||
// propagate Nan | |||
typedef enum tagDomiNanPropagation { | |||
DOMI_NAN_NOT_PROPAGATE = 0, // Nan numbers are not propagated | |||
DOMI_NAN_PROPAGATE, // Nan numbers are propagated | |||
DOMI_NAN_PROPAGATE_RESERVED | |||
} domiNanPropagation_t; | |||
// mode of cropandresize | |||
typedef enum tagDomiCropAndResizeMode { | |||
DOMI_RESIZE_METHOD_BILINEAR = 0, // resize bilinear | |||
DOMI_RESIZE_METHOD_NEAREST, // resize nearest | |||
DOMI_RESIZE_RESERVED | |||
} domiCropAndResizeMode_t; | |||
// yolo version | |||
typedef enum tagDomiYoloVersion { DOMI_YOLO_V2 = 1, DOMI_YOLO_V3, DOMI_YOLO_TRSERVED } domiYoloVersion_t; | |||
typedef enum tagDomiRNNScopePassType { | |||
DOMI_STATIC_BIDIRECTIONAL_RNN_GENERAL_PASS = 0, | |||
DOMI_DYNAMIC_BIDIRECTIONAL_RNN_GENERAL_PASS, | |||
DOMI_DYNAMIC_BIDIRECTIONAL_RNN_BIDAF_PASS | |||
} domiRNNScopePassType; | |||
// RNNDataLayout | |||
typedef enum tagDomiRNNDataLayout { | |||
DOMI_RNN_ND_TBX = 0, // data[max_time,batch_size,Xt] | |||
DOMI_RNN_ND_BTX, // data[batch_size,max_time,Xt] | |||
DOMI_RNN_5D_TX1BX, // data[max_time,Xt,1,batch_size,Xt] | |||
DOMI_RNN_5D_BX1TX, // dataa[batch_size,Xt,1,max_time,Xt] | |||
DOMI_RNN_4DTBX1, | |||
DOMI_ENN_DL_RESERVED | |||
} domiRNNDataLayout_t; | |||
// RNNInputMode | |||
typedef enum tagDomiRNNInputMode { DOMI_RNN_LINEAR_INPUT = 0, DOMI_RNN_SKIP_INPUT } domiRNNInputMode_t; | |||
// RNNDirectionMode | |||
typedef enum tagDomiRNNDirectionMode { DOMI_RNN_UNIDIRECTIONAL = 0, DOMI_RNN_BIDIRECTIONAL } domiDirectionMode_t; | |||
typedef enum tagDomiPoolingCeilMode { DOMI_POOLING_FLOOR = 0, DOMI_POOLING_CEIL } domiPoolingCeilMode_t; | |||
// RNNMode | |||
typedef enum tagDomiRNNActivationMode { | |||
DOMI_RNN_ACTIVATION_SIGMOID = 0, // sigmoid | |||
DOMI_RNN_ACTIVATION_TANH, // tanh | |||
DOMI_RNN_ACTIVATION_RELU, // ReLU | |||
DOMI_RNN_ACTIVATION_RELU1, // ReLU1 | |||
DOMI_RNN_ACTIVATION_RELU6, // ReLU6 | |||
DOMI_RNN_ACTIVATION_RESERVED | |||
} domiRNNActivationMode_t; | |||
typedef enum tagDomiRNNLSTMOutMode { | |||
DOMI_RNN_LSTM_OUT_SEPARATE = 0, | |||
DOMI_RNN_LSTM_OUT_CONCAT, | |||
DOMI_RNN_LSTM_OUT_RESERVED | |||
} domiRNNLSTMOutPutMode_t; | |||
typedef enum tagDomiRNNLSTMStateOutMode { | |||
DOMI_RNN_LSTM_STATE_OUT_SEPARATE = 0, | |||
DOMI_RNN_LSTM_STATE_OUT_CONCAT_ALL, | |||
DOMI_RNN_LSTM_STATE_OUT_RESERVED | |||
} domiRNNLSTMStateOutMode_t; | |||
typedef enum tagDomiRNNMode { | |||
DOMI_RNN_RELU = 0, | |||
DOMI_RNN_TANH, | |||
DOMI_LSTM, | |||
DOMI_GRU, | |||
DOMI_RNN_MODE_RESERVED | |||
} domiRNNMode_t; | |||
typedef enum tagDomiResizeBilinearMode { | |||
DOMI_RESIZE_OUTPUT_DIM_BY_ZOOM_FACTOR = 0, // Output dimension specified by zoom factor | |||
DOMI_RESIZE_OUTPUT_DIM_BY_SHRINK_FACTOR, // specified by shrink factor | |||
DOMI_RESIZE_OUTPUT_DIM_EXPLICIT, // specified explicitly | |||
DOMI_RESIZE_OUTPUT_DIM_RESERVED | |||
} domiResizeOutputDimMode_t; | |||
#pragma pack(1) // single-byte alignment | |||
// DUMP file struct | |||
struct FileHeader { | |||
int32_t Version; // version | |||
int32_t Output_Offset; // output offset address | |||
char Reserved[24] = {0}; // 24 bytes reserved | |||
}; | |||
struct BasicInfo { | |||
struct FileHeader header; // file header | |||
int32_t stream_id; // stread id | |||
uint64_t start_time; // start time | |||
uint64_t end_time; // end time | |||
uint32_t input_size; // input memory size | |||
uint32_t output_size; // output memory size | |||
uint32_t weight_size; // weight Memory Size | |||
uint32_t workspace_size; // workspace | |||
uint32_t total_size; // total memory size | |||
}; | |||
#pragma pack() // Cancels single-byte alignment | |||
enum class MemorySizeCalcType { NORMAL = 0, ALWAYS_EMPTY }; | |||
} // namespace ge | |||
@@ -14,8 +14,8 @@ | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_FRAMEWORK_COMMON_UTIL_H_ | |||
#define INC_FRAMEWORK_COMMON_UTIL_H_ | |||
#ifndef AIR_INC_FRAMEWORK_COMMON_UTIL_H_ | |||
#define AIR_INC_FRAMEWORK_COMMON_UTIL_H_ | |||
#include <climits> | |||
#include <cmath> | |||
@@ -24,13 +24,16 @@ | |||
#include <vector> | |||
#include <google/protobuf/text_format.h> | |||
#include "external/graph/types.h" | |||
#include "external/register/register.h" | |||
#include "framework/common/debug/log.h" | |||
#include "framework/common/scope_guard.h" | |||
#include "framework/common/ge_inner_error_codes.h" | |||
#include "graph/detail/attributes_holder.h" | |||
#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ | |||
do { \ | |||
if (size <= 0) { \ | |||
if ((size) <= 0) { \ | |||
GELOGE(ge::FAILED, "param[%s] is not a positive number", #size); \ | |||
return PARAM_INVALID; \ | |||
} \ | |||
@@ -46,15 +49,19 @@ | |||
// new ge marco | |||
// Encapsulate common resource releases | |||
#define GE_MAKE_GUARD_RTMEM(var) \ | |||
GE_MAKE_GUARD(var, [&] { \ | |||
if (var) GE_CHK_RT(rtFreeHost(var)); \ | |||
}); | |||
#define GE_MAKE_GUARD_RTMEM(var) \ | |||
GE_MAKE_GUARD(var, [&] { \ | |||
if ((var) != nullptr) { \ | |||
GE_CHK_RT(rtFreeHost(var)); \ | |||
} \ | |||
}) | |||
#define GE_MAKE_GUARD_RTSTREAM(var) \ | |||
GE_MAKE_GUARD(var, [&] { \ | |||
if (var) GE_CHK_RT(rtStreamDestroy(var)); \ | |||
}); | |||
#define GE_MAKE_GUARD_RTSTREAM(var) \ | |||
GE_MAKE_GUARD(var, [&] { \ | |||
if ((var) != nullptr) { \ | |||
GE_CHK_RT(rtStreamDestroy(var)); \ | |||
} \ | |||
}) | |||
// For propagating errors when calling a function. | |||
#define GE_RETURN_IF_ERROR(expr) \ | |||
@@ -115,7 +122,7 @@ | |||
// Check if the parameter is null. If yes, return PARAM_INVALID and record the error | |||
#define GE_CHECK_NOTNULL(val) \ | |||
do { \ | |||
if (val == nullptr) { \ | |||
if ((val) == nullptr) { \ | |||
REPORT_INNER_ERROR("E19999", "Param:%s is nullptr, check invalid", #val); \ | |||
GELOGE(ge::FAILED, "[Check][Param:%s]null is invalid.", #val); \ | |||
return ge::PARAM_INVALID; \ | |||
@@ -125,7 +132,7 @@ | |||
// Check if the parameter is null. If yes, just return and record the error | |||
#define GE_CHECK_NOTNULL_JUST_RETURN(val) \ | |||
do { \ | |||
if (val == nullptr) { \ | |||
if ((val) == nullptr) { \ | |||
GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ | |||
return; \ | |||
} \ | |||
@@ -134,7 +141,7 @@ | |||
// Check whether the parameter is null. If so, execute the exec_expr expression and record the error log | |||
#define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \ | |||
do { \ | |||
if (val == nullptr) { \ | |||
if ((val) == nullptr) { \ | |||
GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ | |||
exec_expr; \ | |||
} \ | |||
@@ -143,7 +150,7 @@ | |||
// Check whether the parameter is null. If yes, return directly and record the error log | |||
#define GE_RT_VOID_CHECK_NOTNULL(val) \ | |||
do { \ | |||
if (val == nullptr) { \ | |||
if ((val) == nullptr) { \ | |||
GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ | |||
return; \ | |||
} \ | |||
@@ -152,7 +159,7 @@ | |||
// Check if the parameter is null. If yes, return false and record the error log | |||
#define GE_RT_FALSE_CHECK_NOTNULL(val) \ | |||
do { \ | |||
if (val == nullptr) { \ | |||
if ((val) == nullptr) { \ | |||
GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ | |||
return false; \ | |||
} \ | |||
@@ -161,7 +168,7 @@ | |||
// Check if the parameter is out of bounds | |||
#define GE_CHECK_SIZE(size) \ | |||
do { \ | |||
if (size == 0) { \ | |||
if ((size) == 0U) { \ | |||
GELOGE(ge::FAILED, "param[%s] is out of range", #size); \ | |||
return ge::PARAM_INVALID; \ | |||
} \ | |||
@@ -170,7 +177,7 @@ | |||
// Check if the value on the left is greater than or equal to the value on the right | |||
#define GE_CHECK_GE(lhs, rhs) \ | |||
do { \ | |||
if (lhs < rhs) { \ | |||
if ((lhs) < (rhs)) { \ | |||
GELOGE(ge::FAILED, "param[%s] is less than[%s]", #lhs, #rhs); \ | |||
return ge::PARAM_INVALID; \ | |||
} \ | |||
@@ -179,7 +186,7 @@ | |||
// Check if the value on the left is less than or equal to the value on the right | |||
#define GE_CHECK_LE(lhs, rhs) \ | |||
do { \ | |||
if (lhs > rhs) { \ | |||
if ((lhs) > (rhs)) { \ | |||
GELOGE(ge::FAILED, "param[%s] is greater than[%s]", #lhs, #rhs); \ | |||
return ge::PARAM_INVALID; \ | |||
} \ | |||
@@ -187,102 +194,37 @@ | |||
#define GE_DELETE_NEW_SINGLE(var) \ | |||
do { \ | |||
if (var != nullptr) { \ | |||
delete var; \ | |||
var = nullptr; \ | |||
if ((var) != nullptr) { \ | |||
delete (var); \ | |||
(var) = nullptr; \ | |||
} \ | |||
} while (false) | |||
#define GE_DELETE_NEW_ARRAY(var) \ | |||
do { \ | |||
if (var != nullptr) { \ | |||
delete[] var; \ | |||
var = nullptr; \ | |||
if ((var) != nullptr) { \ | |||
delete[](var); \ | |||
(var) = nullptr; \ | |||
} \ | |||
} while (false) | |||
#define GE_FREE_RT_LOG(addr) \ | |||
do { \ | |||
if (addr != nullptr) { \ | |||
if ((addr) != nullptr) { \ | |||
const rtError_t error = rtFree(addr); \ | |||
if (error != RT_ERROR_NONE) { \ | |||
GELOGE(RT_FAILED, "Call rtFree failed, error: %#x", error); \ | |||
} \ | |||
addr = nullptr; \ | |||
(addr) = nullptr; \ | |||
} \ | |||
} while (false) | |||
namespace ge { | |||
/** | |||
* @ingroup domi_common | |||
* @brief version of om.proto file | |||
*/ | |||
static constexpr int32_t OM_PROTO_VERSION = 2; | |||
/** | |||
* Finding an Integer Ceiling Value Without Precision Loss | |||
*/ | |||
#define CEIL(N, n) (((N) + (n)-1) / (n)) | |||
namespace ge { | |||
using google::protobuf::Message; | |||
/// | |||
/// @ingroup domi_common | |||
/// @brief Reads the proto structure from an array. | |||
/// @param [in] data proto data to be read | |||
/// @param [in] size proto data size | |||
/// @param [out] proto Memory for storing the proto file | |||
/// @return true success | |||
/// @return false fail | |||
/// | |||
GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *data, int32_t size, Message *proto); | |||
/// | |||
/// @ingroup domi_proto | |||
/// @brief Reads the proto file in the text format. | |||
/// @param [in] file path of proto file | |||
/// @param [out] message Memory for storing the proto file | |||
/// @return true success | |||
/// @return false fail | |||
/// | |||
GE_FUNC_VISIBILITY bool ReadProtoFromText(const char *file, google::protobuf::Message *message); | |||
/// | |||
/// @ingroup: domi_common | |||
/// @brief: get length of file | |||
/// @param [in] input_file: path of file | |||
/// @return long: File length. If the file length fails to be obtained, the value -1 is returned. | |||
/// | |||
GE_FUNC_VISIBILITY extern long GetFileLength(const std::string &input_file); | |||
/// | |||
/// @ingroup domi_common | |||
/// @brief Reads all data from a binary file. | |||
/// @param [in] file_name path of file | |||
/// @param [out] buffer Output memory address, which needs to be released by the caller. | |||
/// @param [out] length Output memory size | |||
/// @return false fail | |||
/// @return true success | |||
/// | |||
GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *const file_name, char **buffer, int32_t &length); | |||
GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *file_name, std::vector<char> &buffer); | |||
/// | |||
/// @ingroup domi_common | |||
/// @brief Recursively Creating a Directory | |||
/// @param [in] directory_path Path, which can be a multi-level directory. | |||
/// @return 0 success | |||
/// @return -1 fail | |||
/// | |||
GE_FUNC_VISIBILITY extern int32_t CreateDirectory(const std::string &directory_path); | |||
/// | |||
/// @ingroup domi_common | |||
/// @brief Obtains the current time string. | |||
/// @return Time character string in the format : %Y%m%d%H%M%S, eg: 20171011083555 | |||
/// | |||
GE_FUNC_VISIBILITY std::string CurrentTimeInStr(); | |||
constexpr int32_t OM_PROTO_VERSION = 2; | |||
/// | |||
/// @ingroup domi_common | |||
@@ -294,7 +236,7 @@ template <typename T> | |||
GE_FUNC_VISIBILITY std::string ToString(std::vector<T> &v) { | |||
std::stringstream ss; | |||
ss << "["; | |||
for (T x : v) { | |||
for (const T x : v) { | |||
ss << x; | |||
ss << ", "; | |||
} | |||
@@ -314,7 +256,7 @@ template <typename T> | |||
GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedField<T> &rpd_field) { | |||
std::stringstream ss; | |||
ss << "["; | |||
for (T x : rpd_field) { | |||
for (const T x : rpd_field) { | |||
ss << x; | |||
ss << ", "; | |||
} | |||
@@ -343,6 +285,63 @@ GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedPtrField | |||
return str_ret; | |||
} | |||
/// | |||
/// @ingroup domi_common | |||
/// @brief Reads the proto structure from an array. | |||
/// @param [in] data proto data to be read | |||
/// @param [in] size proto data size | |||
/// @param [out] proto Memory for storing the proto file | |||
/// @return true success | |||
/// @return false fail | |||
/// | |||
GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *const data, const int32_t size, | |||
google::protobuf::Message *const proto); | |||
/// | |||
/// @ingroup domi_proto | |||
/// @brief Reads the proto file in the text format. | |||
/// @param [in] file path of proto file | |||
/// @param [out] message Memory for storing the proto file | |||
/// @return true success | |||
/// @return false fail | |||
/// | |||
GE_FUNC_VISIBILITY bool ReadProtoFromText(const char_t *const file, google::protobuf::Message *const message); | |||
/// | |||
/// @ingroup: domi_common | |||
/// @brief: get length of file | |||
/// @param [in] input_file: path of file | |||
/// @return int64_t: File length. If the file length fails to be obtained, the value -1 is returned. | |||
/// | |||
GE_FUNC_VISIBILITY extern int64_t GetFileLength(const std::string &input_file); | |||
/// | |||
/// @ingroup domi_common | |||
/// @brief Reads all data from a binary file. | |||
/// @param [in] file_name path of file | |||
/// @param [out] buffer Output memory address, which needs to be released by the caller. | |||
/// @param [out] length Output memory size | |||
/// @return false fail | |||
/// @return true success | |||
/// | |||
GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char_t *const file_name, char_t **const buffer, int32_t &length); | |||
/// | |||
/// @ingroup domi_common | |||
/// @brief Recursively Creating a Directory | |||
/// @param [in] directory_path Path, which can be a multi-level directory. | |||
/// @return 0 success | |||
/// @return -1 fail | |||
/// | |||
GE_FUNC_VISIBILITY extern int32_t CreateDirectory(const std::string &directory_path); | |||
/// | |||
/// @ingroup domi_common | |||
/// @brief Obtains the current time string. | |||
/// @return Time character string in the format : %Y%m%d%H%M%S, eg: 20171011083555 | |||
/// | |||
GE_FUNC_VISIBILITY std::string CurrentTimeInStr(); | |||
/// | |||
/// @ingroup domi_common | |||
/// @brief Obtains the absolute time (timestamp) of the current system. | |||
@@ -366,7 +365,7 @@ GE_FUNC_VISIBILITY uint32_t GetCurrentSecondTimestap(); | |||
/// @param [in] b | |||
/// @return false: true: The result is within the normal int64 range. | |||
/// | |||
GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(int64_t a, int64_t b); | |||
GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(const int64_t a, const int64_t b); | |||
/// | |||
/// @ingroup domi_common | |||
@@ -374,7 +373,7 @@ GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(int64_t a, int64_t b); | |||
/// @param [in] path of input file | |||
/// @param [out] Absolute path of a file. If the absolute path cannot be obtained, an empty string is returned | |||
/// | |||
GE_FUNC_VISIBILITY std::string RealPath(const char *path); | |||
GE_FUNC_VISIBILITY std::string RealPath(const char_t *path); | |||
/// | |||
/// @ingroup domi_common | |||
@@ -401,17 +400,9 @@ GE_FUNC_VISIBILITY bool CheckOutputPathValid(const std::string &file_path, const | |||
/// @param [in] str file path | |||
/// @param [out] result | |||
/// | |||
GE_FUNC_VISIBILITY bool ValidateStr(const std::string &str, const std::string &mode); | |||
GE_FUNC_VISIBILITY bool ValidateStr(const std::string &file_path, const std::string &mode); | |||
/// | |||
/// @ingroup domi_common | |||
/// @brief Check path invalid | |||
/// @param [in] path, path to be checked | |||
/// @param [in] length, length of path | |||
/// @return 0 success | |||
/// @return -1 fail | |||
/// | |||
GE_FUNC_VISIBILITY Status CheckPath(const char *path, size_t length); | |||
GE_FUNC_VISIBILITY Status ConvertToInt32(const std::string &str, int32_t &val); | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_COMMON_UTIL_H_ | |||
#endif // AIR_INC_FRAMEWORK_COMMON_UTIL_H_ |
@@ -26,11 +26,11 @@ | |||
#include "graph/types.h" | |||
namespace ge { | |||
enum PriorityEnum { | |||
enum class PriorityEnum { | |||
COST_0 = 0, | |||
COST_1, | |||
COST_2, | |||
COST_3, | |||
COST_1 = 1, | |||
COST_2 = 2, | |||
COST_3 = 3, | |||
COST_9 = 9, | |||
COST_10 = 10, | |||
}; | |||
@@ -38,7 +38,7 @@ enum PriorityEnum { | |||
struct DNNEngineAttribute { | |||
std::string engine_name; | |||
std::vector<std::string> mem_type; | |||
uint32_t compute_cost; | |||
PriorityEnum compute_cost; | |||
enum RuntimeType runtime_type; // HOST, DEVICE | |||
// If engine input format must be specific, set this attribute, else set FORMAT_RESERVED | |||
Format engine_input_format; | |||
@@ -53,10 +53,11 @@ class GE_FUNC_VISIBILITY DNNEngine { | |||
engine_attribute_ = attrs; | |||
} | |||
virtual ~DNNEngine() = default; | |||
Status Initialize(const std::map<std::string, std::string> &options) { | |||
Status Initialize(const std::map<std::string, std::string> &options) const { | |||
(void)options; | |||
return SUCCESS; | |||
} | |||
Status Finalize() { | |||
Status Finalize() const { | |||
return SUCCESS; | |||
} | |||
void GetAttributes(DNNEngineAttribute &attr) const { | |||
@@ -32,18 +32,19 @@ | |||
namespace ge { | |||
class SingleOp; | |||
class DynamicSingleOp; | |||
class GeRootModel; | |||
struct RunModelData { | |||
uint32_t index; // Data index | |||
uint32_t modelId; | |||
std::vector<DataBuffer> blobs; // All input/output data buffer | |||
uint32_t timestamp; // Data creation time | |||
uint32_t timeout; // Processing timeout | |||
uint64_t request_id = 0; // Request ID | |||
uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 | |||
uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 | |||
uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 | |||
std::vector<uint64_t> dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty | |||
std::vector<DataBuffer> blobs; // All input/output data buffer | |||
uint32_t timestamp; // Data creation time | |||
uint32_t timeout; // Processing timeout | |||
uint64_t request_id = 0UL; // Request ID | |||
uint64_t dynamic_batch_size = 0UL; // Dynamic batch size scene, set dynamic size, not supported by default:0 | |||
uint64_t dynamic_image_height = 0UL; // Dynamic image size scene, set image height, not supported by default:0 | |||
uint64_t dynamic_image_width = 0UL; // Dynamic image size scene, set image width, not supported by default:0 | |||
std::vector<uint64_t> dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty | |||
}; | |||
class GE_FUNC_VISIBILITY GeExecutor { | |||
@@ -69,11 +70,11 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
/// | |||
static Status FinalizeEx(); | |||
Status UnloadModel(uint32_t modelId); | |||
Status UnloadModel(const uint32_t model_id); | |||
// Get input and output descriptor | |||
Status GetModelDescInfo(uint32_t model_id, std::vector<TensorDesc> &input_desc, std::vector<TensorDesc> &output_desc, | |||
bool new_model_desc = false); | |||
Status GetModelDescInfo(const uint32_t model_id, std::vector<TensorDesc> &input_desc, | |||
std::vector<TensorDesc> &output_desc, const bool new_model_desc = false); | |||
/// | |||
/// @ingroup ge | |||
@@ -84,7 +85,8 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
/// @param [in] batch_size: batch size entered by user in dynamic multi-batch scenario | |||
/// @return execute result | |||
/// | |||
Status SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t batch_size); | |||
Status SetDynamicBatchSize(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length, | |||
const uint64_t batch_size); | |||
/// | |||
/// @ingroup ge | |||
@@ -96,8 +98,8 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
/// @param [in] image_width: image width entered by user in dynamic multi-resolution scenario | |||
/// @return execute result | |||
/// | |||
Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height, | |||
uint64_t image_width); | |||
Status SetDynamicImageSize(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length, | |||
const uint64_t image_height, const uint64_t image_width); | |||
/// | |||
/// @ingroup ge | |||
@@ -109,7 +111,7 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
/// @param [in] dynamic_dims: array of dynamic dimensions | |||
/// @return execute result | |||
/// | |||
Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | |||
Status SetDynamicDims(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length, | |||
const std::vector<uint64_t> &dynamic_dims); | |||
/// | |||
@@ -120,7 +122,7 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
/// @param [out] cur_dynamic_dims: current dynamic dims | |||
/// @return execute result | |||
/// | |||
Status GetCurDynamicDims(uint32_t model_id, const std::vector<uint64_t> &dynamic_dims, | |||
Status GetCurDynamicDims(const uint32_t model_id, const std::vector<uint64_t> &dynamic_dims, | |||
std::vector<uint64_t> &cur_dynamic_dims); | |||
/// | |||
@@ -131,7 +133,8 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
/// @param [out] dynamic_type | |||
/// @return execute result | |||
/// | |||
Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type); | |||
Status GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | |||
int32_t &dynamic_type); | |||
/// | |||
/// @ingroup ge | |||
@@ -140,7 +143,7 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
/// @param [out] batch_info | |||
/// @return execute result | |||
/// | |||
Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | |||
Status GetCombinedDynamicDims(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | |||
/// | |||
/// @ingroup ge | |||
@@ -149,7 +152,7 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
/// @param [out] user_designate_shape_order | |||
/// @return execute result | |||
/// | |||
Status GetUserDesignateShapeOrder(uint32_t model_id, std::vector<std::string> &user_designate_shape_order); | |||
Status GetUserDesignateShapeOrder(const uint32_t model_id, std::vector<std::string> &user_designate_shape_order); | |||
Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type); | |||
@@ -163,18 +166,18 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
/// @param [in] aippParms: kAippDynamicPara by user in dynamic aipp | |||
/// @return execute result | |||
/// | |||
Status SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | |||
Status SetDynamicAippData(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length, | |||
const std::vector<kAippDynamicBatchPara> &aipp_batch_para, | |||
const kAippDynamicPara &aippParms); | |||
const kAippDynamicPara &aipp_parms); | |||
Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | |||
Status GetAIPPInfo(const uint32_t model_id, const uint32_t index, AippConfigInfo &aipp_info); | |||
Status GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name, | |||
Status GetOpAttr(const uint32_t model_id, const std::string &op_name, const std::string &attr_name, | |||
std::string &attr_value); | |||
Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info); | |||
Status GetModelAttr(const uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info); | |||
Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); | |||
Status GetAippType(const uint32_t model_id, const uint32_t index, InputAippType &type, size_t &aipp_index); | |||
Status CommandHandle(const Command &command); | |||
@@ -188,7 +191,7 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
/// @return SUCCESS | |||
/// @return FAILED | |||
/// | |||
Status GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size); | |||
Status GetMaxUsedMemory(const uint32_t model_id, uint32_t &max_size); | |||
/// | |||
/// @ingroup ge | |||
@@ -210,8 +213,8 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
/// @param [out] uint32_t &model_id: Corresponding identification after model loading | |||
/// @return SUCCESS handle successfully / others handle failed | |||
/// | |||
Status LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t mem_size, | |||
void *weight_ptr, size_t weight_size); | |||
Status LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *const dev_ptr, const size_t mem_size, | |||
void *const weight_ptr, const size_t weight_size); | |||
/// | |||
/// @ingroup ge | |||
@@ -225,6 +228,18 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
Status LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector<uint32_t> &input_queue_ids, | |||
const std::vector<uint32_t> &output_queue_ids); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Load task list from ModelData with queue. | |||
/// @param [out] model_id: model id allocate from manager. | |||
/// @param [in] root_model: Instance of GeRootModel. | |||
/// @param [in] input_queue_ids: input queue ids create from user. | |||
/// @param [in] output_queue_ids: input queue ids create from user. | |||
/// @return: 0 for success / others for fail | |||
/// | |||
Status LoadModelWithQ(uint32_t &model_id, const std::shared_ptr<GeRootModel> &root_model, | |||
const std::vector<uint32_t> &input_queue_ids, const std::vector<uint32_t> &output_queue_ids); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Synchronous execution of offline model(Do not create thread) | |||
@@ -235,8 +250,17 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
/// @param [out] domi::OutputData *output_data: Model output data | |||
/// @return SUCCESS handle successfully / others handle failed | |||
/// | |||
Status ExecModel(uint32_t model_id, void *stream, const RunModelData &input_data, RunModelData &output_data, | |||
bool async_mode = false); | |||
Status ExecModel(const uint32_t model_id, void *const stream, const RunModelData &input_data, | |||
RunModelData &output_data, const bool async_mode = false); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Load task list from root_model without input queue or output queue. | |||
/// @param [out] model_id: model id allocate from manager. | |||
/// @param [in] root_model: Instance of GeRootModel. | |||
/// @return: 0 for success / others for fail | |||
/// | |||
Status LoadModelWithoutQ(uint32_t &model_id, const std::shared_ptr<GeRootModel> &root_model) const; | |||
/// | |||
/// @ingroup ge | |||
@@ -250,9 +274,9 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
/// @param [out] std::vector<GeTensorDesc> &output_desc: description of model output data | |||
/// @return SUCCESS handle successfully / others handle failed | |||
/// | |||
Status ExecModel(uint32_t model_id, void *stream, const RunModelData &run_input_data, | |||
Status ExecModel(const uint32_t model_id, void *const stream, const RunModelData &run_input_data, | |||
const std::vector<GeTensorDesc> &input_desc, RunModelData &run_output_data, | |||
std::vector<GeTensorDesc> &output_desc, bool async_mode = false); | |||
std::vector<GeTensorDesc> &output_desc, const bool async_mode = false); | |||
/// | |||
/// @ingroup ge | |||
@@ -273,36 +297,38 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||
/// @param [out] size_t &weight_size Weight memory space size | |||
/// @return SUCCESS handle successfully / others handle failed | |||
/// | |||
Status GetMemAndWeightSize(const void *model_data, size_t model_size, size_t &mem_size, size_t &weight_size); | |||
Status GetMemAndWeightSize(const void *const model_data, const size_t model_size, size_t &mem_size, | |||
size_t &weight_size); | |||
static Status LoadSingleOp(const std::string &modelName, const ModelData &modelData, void *stream, | |||
SingleOp **single_op); | |||
static Status LoadSingleOp(const std::string &model_name, const ModelData &model_data, void *const stream, | |||
SingleOp **const single_op); | |||
static Status LoadSingleOpV2(const std::string &modelName, const ModelData &modelData, void *stream, | |||
SingleOp **single_op, const uint64_t model_id); | |||
static Status LoadSingleOpV2(const std::string &model_name, const ModelData &model_data, void *const stream, | |||
SingleOp **const single_op, const uint64_t model_id); | |||
static Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs, | |||
static Status ExecuteAsync(SingleOp *const executor, const std::vector<DataBuffer> &inputs, | |||
std::vector<DataBuffer> &outputs); | |||
static Status LoadDynamicSingleOp(const std::string &model_name, const ModelData &modelData, void *stream, | |||
DynamicSingleOp **single_op); | |||
static Status LoadDynamicSingleOp(const std::string &model_name, const ModelData &model_data, void *const stream, | |||
DynamicSingleOp **const single_op); | |||
static Status LoadDynamicSingleOpV2(const std::string &model_name, const ModelData &modelData, void *stream, | |||
DynamicSingleOp **single_op, const uint64_t model_id); | |||
static Status LoadDynamicSingleOpV2(const std::string &model_name, const ModelData &model_data, void *const stream, | |||
DynamicSingleOp **const single_op, const uint64_t model_id); | |||
static Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc, | |||
static Status ExecuteAsync(DynamicSingleOp *const executor, const std::vector<GeTensorDesc> &input_desc, | |||
const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc, | |||
std::vector<DataBuffer> &outputs); | |||
static Status ReleaseSingleOpResource(void *stream); | |||
static Status ReleaseSingleOpResource(void *const stream); | |||
static Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id); | |||
static Status GetDeviceIdByModelId(const uint32_t model_id, uint32_t &device_id); | |||
Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count); | |||
Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); | |||
Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims, | |||
std::vector<InputOutputDims> &output_dims); | |||
Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info); | |||
Status GetBatchInfoSize(const uint32_t model_id, size_t &shape_count); | |||
Status GetOrigInputInfo(const uint32_t model_id, const uint32_t index, OriginInputInfo &orig_input_info); | |||
Status GetAllAippInputOutputDims(const uint32_t model_id, const uint32_t index, | |||
std::vector<InputOutputDims> &input_dims, std::vector<InputOutputDims> &output_dims); | |||
Status GetOpDescInfo(const uint32_t device_id, const uint32_t stream_id, const uint32_t task_id, | |||
OpDescInfo &op_desc_info); | |||
private: | |||
static std::atomic_bool is_inited_; | |||
@@ -31,6 +31,8 @@ | |||
#include "framework/omg/omg_inner_types.h" | |||
namespace ge { | |||
const std::string kAttrSupportDynamicShape = "support_dynamicshape"; | |||
class GeRootModel; | |||
class GE_FUNC_VISIBILITY GeGenerator { | |||
public: | |||
@@ -103,8 +105,8 @@ class GE_FUNC_VISIBILITY GeGenerator { | |||
/// @param [in] graph_name: graph name. | |||
/// @param [out] graph: graph of single op. | |||
/// @return SUCCESS or FAILED | |||
Status BuildSingleOpGraph(OpDescPtr &op_desc, const InOutTensorRef &inputs_outputs, std::string graph_name, | |||
Graph &graph, std::vector<std::pair<std::string, std::string>> &inputs_name_type); | |||
Status BuildSingleOpGraph(const OpDescPtr &op_desc, const InOutTensorRef &inputs_outputs, std::string graph_name, | |||
Graph &graph, std::vector<std::pair<std::string, std::string>> &inputs_name_type) const; | |||
Status BuildOriginalGraphInfo(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, | |||
const std::vector<GeTensor> &outputs, const std::string &model_file_name, | |||
bool is_offline, int32_t compile_flag, GraphStage graph_stage, Graph &graph, | |||
@@ -116,20 +118,20 @@ class GE_FUNC_VISIBILITY GeGenerator { | |||
ge::ModelBufferData &model, bool is_offline = true); | |||
Status BuildSingleOp(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, const std::vector<GeTensor> &outputs, | |||
const std::string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | |||
ComputeGraphPtr &compute_graph, bool is_offline = true, int32_t compile_flag = 0, | |||
ComputeGraphPtr &comp_graph, bool is_offline = true, int32_t compile_flag = 0, | |||
GraphStage graph_stage = GraphStage::GRAPH_STAGE_RESERVED); | |||
bool CheckNoAicore(const ComputeGraphPtr &graph); | |||
void RemoveConst(const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs); | |||
Status CheckForSingleOp(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, | |||
const std::vector<GeTensor> &outputs); | |||
Status InferFormatForSingleOp(OpDescPtr &op_desc, Graph &graph); | |||
void RemoveConst(const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs) const; | |||
Status CheckForSingleOp(const OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, | |||
const std::vector<GeTensor> &outputs) const; | |||
Status InferFormatForSingleOp(const OpDescPtr &op_desc, const Graph &graph) const; | |||
using GeRootModelPtr = std::shared_ptr<ge::GeRootModel>; | |||
Status SetModelNameForDump(const GeRootModelPtr &ge_root_model); | |||
Status CreateGeneralizedBuildAttrs(const GeRootModelPtr &ge_root_model, const std::vector<GeTensor> &inputs, | |||
const std::vector<GeTensor> &outputs, | |||
const std::vector<std::pair<std::string, std::string>> &inputs_name_type, | |||
std::vector<ge::NamedAttrs> &generalized_build_attrs); | |||
std::vector<ge::NamedAttrs> &generalized_build_attrs) const; | |||
class Impl; | |||
@@ -17,11 +17,7 @@ | |||
#ifndef INC_FRAMEWORK_MEMORY_MEMORY_API_H_ | |||
#define INC_FRAMEWORK_MEMORY_MEMORY_API_H_ | |||
#include <string> | |||
#include <vector> | |||
#include "external/ge/ge_api_error_codes.h" | |||
#include "graph/types.h" | |||
#include "runtime/mem.h" | |||
namespace ge { | |||
@@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY MemoryAssigner { | |||
MemoryAssigner &operator=(const MemoryAssigner &) = delete; | |||
Status AssignMemory(bool is_loop_graph, std::map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size); | |||
Status AssignMemory(std::map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size); | |||
private: | |||
ge::ComputeGraphPtr compute_graph_; | |||
@@ -64,7 +64,7 @@ GE_FUNC_VISIBILITY Status InitDomiOmgContext(const std::string &input_shape, con | |||
GE_FUNC_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<std::string, std::string> &atc_params, | |||
const char *model_file, const char *weights_file, domi::FrameworkType type, | |||
const char *op_conf = nullptr, const char *target = nullptr, | |||
RunMode run_mode = GEN_OM_MODEL, bool is_dynamic_input = false); | |||
RunMode run_mode = RunMode::GEN_OM_MODEL, bool is_dynamic_input = false); | |||
/** | |||
* @ingroup domi_omg | |||
@@ -89,15 +89,15 @@ GE_FUNC_VISIBILITY Status ConvertPbtxtToJson(const char *model_file, const char | |||
GE_FUNC_VISIBILITY Status ConvertFwkModelToJson(domi::FrameworkType framework, const char *model_file, | |||
const char *json_file); | |||
GE_FUNC_VISIBILITY void GetGroupName(ge::proto::ModelDef &model); | |||
GE_FUNC_VISIBILITY void GetGroupName(ge::proto::ModelDef &model_def); | |||
GE_FUNC_VISIBILITY void FindParserSo(const std::string &path, std::vector<std::string> &fileList, | |||
GE_FUNC_VISIBILITY void FindParserSo(const std::string &path, std::vector<std::string> &file_list, | |||
std::string &caffe_parser_path); | |||
GE_FUNC_VISIBILITY Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file); | |||
GE_FUNC_VISIBILITY Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, | |||
const std::string &output_format); | |||
const std::string &output); | |||
GE_FUNC_VISIBILITY Status GetOutputLeaf(ge::NodePtr node, | |||
std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info); | |||
@@ -31,12 +31,7 @@ | |||
using domi::DOMI_TENSOR_ND; | |||
using domi::DOMI_TENSOR_RESERVED; | |||
using domi::domiTensorFormat_t; | |||
using domi::FRAMEWORK_RESERVED; | |||
using domi::FrameworkType; | |||
using std::map; | |||
using std::string; | |||
using std::unordered_map; | |||
using std::vector; | |||
namespace ge { | |||
/** | |||
@@ -51,36 +46,13 @@ enum RunMode { | |||
DISPLAY_OM_INFO = 6 // display model info | |||
}; | |||
/// | |||
/// @ingroup domi_omg | |||
/// @brief high-precision mode | |||
/// | |||
enum HighPrecisionMode { | |||
// the FP16 high-precision function is disabled in common mode | |||
HIGH_PRECISION_DEFAULT = 0, | |||
// high-precision mode, enabling FP16 high-precision mode (Convolution/FullConnect/AvgPooling are involved) | |||
HIGH_PRECISION_FP16 = 1 | |||
}; | |||
/// | |||
/// @ingroup domi_omg | |||
/// @brief description buffer data | |||
/// | |||
struct OMGBufferData { | |||
void *data; | |||
uint32_t length; | |||
}; | |||
struct OmgContext { | |||
OmgContext() { | |||
format = DOMI_TENSOR_ND; | |||
} | |||
domiTensorFormat_t format; | |||
OmgContext() : format(domi::DOMI_TENSOR_ND) {} | |||
domi::domiTensorFormat_t format; | |||
// format of the input specified by the command line | |||
std::unordered_map<std::string, domiTensorFormat_t> input_nodes_format_map; | |||
std::vector<domiTensorFormat_t> output_formats; | |||
std::unordered_map<std::string, domi::domiTensorFormat_t> input_nodes_format_map; | |||
std::vector<domi::domiTensorFormat_t> output_formats; | |||
// user-designate input dims | |||
std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims; | |||
@@ -107,9 +79,9 @@ struct OmgContext { | |||
// net data nodes tensor names(caffe or onnx) | |||
std::vector<std::string> data_tensor_names; | |||
// preferential format used by the entire network | |||
domiTensorFormat_t net_format = DOMI_TENSOR_RESERVED; | |||
domi::domiTensorFormat_t net_format = domi::DOMI_TENSOR_RESERVED; | |||
domi::FrameworkType type = domi::FRAMEWORK_RESERVED; | |||
RunMode run_mode = ONLY_PRE_CHECK; | |||
RunMode run_mode = RunMode::ONLY_PRE_CHECK; | |||
bool train_flag = false; | |||
std::string output_type; | |||
@@ -108,6 +108,8 @@ class GE_FUNC_VISIBILITY ModelParser { | |||
* @return Others failed | |||
*/ | |||
virtual domi::Status ToJson(const char *model_file, const char *json_file) { | |||
(void)model_file; | |||
(void)json_file; | |||
return domi::SUCCESS; | |||
} | |||
@@ -130,6 +132,8 @@ class GE_FUNC_VISIBILITY ModelParser { | |||
* @return Others failed | |||
*/ | |||
virtual domi::Status ParseProto(const std::string &serialized_proto, ge::ComputeGraphPtr &graph) { | |||
(void)serialized_proto; | |||
(void)graph; | |||
return UNSUPPORTED; | |||
} | |||
@@ -144,6 +148,9 @@ class GE_FUNC_VISIBILITY ModelParser { | |||
*/ | |||
virtual domi::Status ParseProtoWithSubgraph(const std::string &serialized_proto, GetGraphCallbackV2 callback, | |||
ge::ComputeGraphPtr &graph) { | |||
(void)serialized_proto; | |||
(void)callback; | |||
(void)graph; | |||
return UNSUPPORTED; | |||
} | |||
}; | |||
@@ -50,7 +50,7 @@ class GE_FUNC_VISIBILITY OpParser { | |||
* @return SUCCESS | |||
* @return FAILED | |||
*/ | |||
virtual domi::Status ParseParams(const Message *op_src, ge::OpDescPtr &op_desc) = 0; | |||
virtual domi::Status ParseParams(const google::protobuf::Message *op_src, ge::OpDescPtr &op_desc) = 0; | |||
/** | |||
* @ingroup domi_omg | |||
@@ -60,7 +60,7 @@ class GE_FUNC_VISIBILITY OpParser { | |||
* @return SUCCESS | |||
* @return FAILED | |||
*/ | |||
virtual domi::Status ParseParams(const Message *op_src, ge::Operator &op_dest) = 0; | |||
virtual domi::Status ParseParams(const google::protobuf::Message *op_src, ge::Operator &op_dest) = 0; | |||
/** | |||
* @ingroup domi_omg | |||
@@ -70,7 +70,7 @@ class GE_FUNC_VISIBILITY OpParser { | |||
* @return SUCCESS | |||
* @return FAILED | |||
*/ | |||
virtual domi::Status ParseWeights(const Message *op_src, ge::NodePtr &node) = 0; | |||
virtual domi::Status ParseWeights(const google::protobuf::Message *op_src, ge::NodePtr &node) = 0; | |||
/** | |||
* @ingroup domi_omg | |||
@@ -80,7 +80,7 @@ class GE_FUNC_VISIBILITY OpParser { | |||
* @return SUCCESS | |||
* @return FAILED | |||
*/ | |||
virtual domi::Status GetFormat(const Message *op_src, domi::domiTensorFormat_t &format) { | |||
virtual domi::Status GetFormat(const google::protobuf::Message *op_src, domi::domiTensorFormat_t &format) { | |||
(void)op_src; | |||
// Indicates that the op does not provide a value for format | |||
format = domi::DOMI_TENSOR_RESERVED; | |||
@@ -24,13 +24,11 @@ | |||
#include "framework/omg/omg_inner_types.h" | |||
#include "framework/omg/parser/parser_types.h" | |||
using Status = domi::Status; | |||
namespace domi { | |||
class WeightsParser; | |||
class ModelParser; | |||
typedef std::shared_ptr<ModelParser> (*MODEL_PARSER_CREATOR_FUN)(void); | |||
using MODEL_PARSER_CREATOR_FUN = std::shared_ptr<ModelParser> (*)(void); | |||
// Create modelparser for different frameworks | |||
class GE_FUNC_VISIBILITY ModelParserFactory { | |||
@@ -82,7 +80,7 @@ class GE_FUNC_VISIBILITY ModelParserRegisterar { | |||
} \ | |||
ModelParserRegisterar g_##type##_Model_Parser_Creator(type, Creator_##type##_Model_Parser) | |||
typedef std::shared_ptr<WeightsParser> (*WEIGHTS_PARSER_CREATOR_FUN)(void); | |||
using WEIGHTS_PARSER_CREATOR_FUN = std::shared_ptr<WeightsParser> (*)(void); | |||
// Create weightsparser for different frameworks | |||
class GE_FUNC_VISIBILITY WeightsParserFactory { | |||
@@ -29,8 +29,8 @@ | |||
namespace ge { | |||
struct ParserContext { | |||
// format of the input specified by the command line | |||
std::unordered_map<std::string, domiTensorFormat_t> input_nodes_format_map; | |||
std::vector<domiTensorFormat_t> output_formats; | |||
std::unordered_map<std::string, domi::domiTensorFormat_t> input_nodes_format_map; | |||
std::vector<domi::domiTensorFormat_t> output_formats; | |||
// user-designate input dims | |||
std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims; | |||
std::map<std::string, std::vector<int64_t>> input_dims; | |||
@@ -58,7 +58,7 @@ struct ParserContext { | |||
bool train_flag = false; | |||
domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND; | |||
domi::FrameworkType type = domi::FRAMEWORK_RESERVED; | |||
RunMode run_mode = GEN_OM_MODEL; | |||
RunMode run_mode = RunMode::GEN_OM_MODEL; | |||
// save caffe custom proto path, used by caffe parse | |||
std::string custom_proto_path; | |||
// save caffe proto path, used by caffe parse | |||
@@ -19,8 +19,6 @@ | |||
#include <memory> | |||
#include <set> | |||
#include <string> | |||
#include <vector> | |||
#include "framework/common/debug/log.h" | |||
#include "framework/common/string_util.h" | |||
@@ -34,7 +32,7 @@ class GE_FUNC_VISIBILITY PlatformVersionManager { | |||
static Status GetPlatformVersion(std::string &ver) { | |||
ver = "1.11.z"; | |||
const std::vector<std::string> version_splits = StringUtils::Split(ver, '.'); | |||
GE_IF_BOOL_EXEC(version_splits.size() < 3, GELOGW("Read platform version error!"); return FAILED;); | |||
GE_IF_BOOL_EXEC(version_splits.size() < 3U, GELOGW("Read platform version error!"); return FAILED;); | |||
GELOGI("Read current platform version: %s.", ver.c_str()); | |||
return SUCCESS; | |||
@@ -1 +1 @@ | |||
Subproject commit 1d99928bfcb02e45acc7db73e3ee57304ff1131a | |||
Subproject commit 0a2335712484f85cd44a0f2402eac6932b22b40a |
@@ -1,57 +1,57 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef AICPU_OP_TYPE_LIST_H_ | |||
#define AICPU_OP_TYPE_LIST_H_ | |||
extern "C" { | |||
enum OpKernelType { | |||
TF_KERNEL, | |||
CPU_KERNEL | |||
}; | |||
enum ReturnCode { | |||
OP_TYPE_NOT_SUPPORT, | |||
FORMAT_NOT_SUPPORT, | |||
DTYPE_NOT_SUPPORT | |||
}; | |||
#pragma pack(push, 1) | |||
//One byte alignment | |||
struct SysOpInfo { | |||
uint64_t opLen; | |||
uint64_t opType; | |||
OpKernelType kernelsType; | |||
}; | |||
struct SysOpCheckInfo { | |||
uint64_t opListNum; | |||
uint64_t offSetLen; | |||
uint64_t sysOpInfoList; | |||
uint64_t opParamInfoList; | |||
}; | |||
struct SysOpCheckResp { | |||
uint64_t opListNum; | |||
bool isWithoutJson; | |||
uint64_t returnCodeList; | |||
uint64_t sysOpInfoList; | |||
uint64_t opParamInfoList; | |||
}; | |||
#pragma pack(pop) | |||
} | |||
#endif // AICPU_OP_TYPE_LIST_H_ | |||
/** | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved. | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef AICPU_OP_TYPE_LIST_H_ | |||
#define AICPU_OP_TYPE_LIST_H_ | |||
extern "C" { | |||
enum OpKernelType { | |||
TF_KERNEL, | |||
CPU_KERNEL | |||
}; | |||
enum ReturnCode { | |||
OP_TYPE_NOT_SUPPORT, | |||
FORMAT_NOT_SUPPORT, | |||
DTYPE_NOT_SUPPORT | |||
}; | |||
#pragma pack(push, 1) | |||
// One byte alignment | |||
struct SysOpInfo { | |||
uint64_t opLen; | |||
uint64_t opType; | |||
OpKernelType kernelsType; | |||
}; | |||
struct SysOpCheckInfo { | |||
uint64_t opListNum; | |||
uint64_t offSetLen; | |||
uint64_t sysOpInfoList; | |||
uint64_t opParamInfoList; | |||
}; | |||
struct SysOpCheckResp { | |||
uint64_t opListNum; | |||
bool isWithoutJson; | |||
uint64_t returnCodeList; | |||
uint64_t sysOpInfoList; | |||
uint64_t opParamInfoList; | |||
}; | |||
#pragma pack(pop) | |||
} | |||
#endif // AICPU_OP_TYPE_LIST_H_ |
@@ -29,6 +29,53 @@ struct AicpuParamHead | |||
uint32_t extInfoLength; // extInfo struct Length | |||
uint64_t extInfoAddr; // extInfo address | |||
}; | |||
enum class AicpuConfigMsgType { | |||
AICPU_CONFIG_MSG_TYPE_BUF_FREE = 0, /* free buf */ | |||
AICPU_CONFIG_MSG_TYPE_BUF_RESET = 1, /* reset buf */ | |||
AICPU_CONFIG_MSG_TYPE_BUF_SET_ADDR = 2, /* set buf addr to aicpu */ | |||
}; | |||
enum class AicpuErrMsgType { | |||
ERR_MSG_TYPE_NULL = 0, | |||
ERR_MSG_TYPE_AICORE = 1, | |||
ERR_MSG_TYPE_AICPU = 2, | |||
}; | |||
typedef struct tagAicpuConfigMsg { | |||
uint8_t msgType; | |||
uint8_t reserved1; | |||
uint16_t bufLen; | |||
uint32_t offset; | |||
uint64_t bufAddr; | |||
uint32_t tsId; | |||
uint32_t reserved2; | |||
} AicpuConfigMsg; | |||
typedef struct tagAicoreErrMsgInfo { | |||
uint8_t errType; | |||
uint8_t version; | |||
uint8_t reserved1[2]; /* reserved1, 4 byte alignment */ | |||
uint32_t errorCode; | |||
uint32_t modelId; | |||
uint32_t taskId; | |||
uint32_t streamId; | |||
uint64_t transactionId; | |||
uint8_t reserved2[228]; /* the total byte is 256, reserved2 len = 256 - other lens */ | |||
} AicoreErrMsgInfo; | |||
typedef struct tagAicpuErrMsgInfo { | |||
uint8_t errType; | |||
uint8_t version; | |||
uint8_t reserved1[2]; /* reserved1, 4 byte alignment */ | |||
uint32_t errorCode; | |||
uint32_t modelId; | |||
uint32_t streamId; | |||
uint64_t transactionId; | |||
char opName[64]; /* op name str */ | |||
char errDesc[128]; /* err msg desc info */ | |||
uint8_t reserved2[40]; /* the total byte is 256, reserved2 len = 256 - other lens */ | |||
} AicpuErrMsgInfo; | |||
#pragma pack(pop) | |||
} // namespace aicpu | |||
@@ -13,7 +13,6 @@ | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef AICPU_ENGINE_H__ | |||
#define AICPU_ENGINE_H__ | |||
@@ -21,7 +21,7 @@ | |||
namespace aicpu { | |||
namespace FWKAdapter { | |||
using char_t = char; | |||
// API RETURN CODE | |||
enum FWKAdptAPIRetCode { | |||
FWK_ADPT_SUCCESS = 0, // success | |||
@@ -63,6 +63,8 @@ enum FWKTaskExtInfoType { | |||
FWK_ADPT_EXT_BITMAP, | |||
FWK_ADPT_EXT_TOPIC_TYPE, | |||
FWK_ADPT_EXT_ASYNCWAIT, | |||
FWK_ADPT_EXT_UNKNOWN_SHAPE_INPUT_INDEX, | |||
FWK_ADPT_EXT_UNKNOWN_SHAPE_OUTPUT_INDEX, | |||
FWK_ADPT_EXT_INVALID | |||
}; | |||
@@ -113,7 +115,7 @@ struct StrFWKKernel { | |||
typedef StrFWKKernel FWKOperateParam; | |||
// Extent info ShapeAndType | |||
const uint32_t kMaxShapeDims = 8; | |||
const uint32_t kMaxShapeDims = 8U; | |||
#pragma pack(push, 1) | |||
struct ShapeAndType { | |||
int32_t type; | |||
@@ -122,13 +124,13 @@ struct ShapeAndType { | |||
#pragma pack(pop) | |||
// Extend info structure for extInfoAddr | |||
const uint32_t kExtInfoHeadSize = 8; | |||
const uint32_t kExtInfoHeadSize = 8U; | |||
#pragma pack(push, 1) | |||
struct ExtInfo { | |||
int32_t infoType; // extend type | |||
uint32_t infoLen; // length for infoMsg | |||
char infoMsg[0]; // extend value | |||
char_t infoMsg[0]; // extend value | |||
}; | |||
#pragma pack(pop) | |||
@@ -143,9 +145,9 @@ struct ResultSummary { | |||
#pragma pack(push, 1) | |||
struct AsyncWait { | |||
uint8_t waitType; // wait type, FWK_ADPT_WAIT_TYPE_EVENT: event wait | |||
uint32_t waitId; // wait id, GE refresh | |||
uint32_t timeOut; // reserved | |||
uint8_t waitType; // wait type, FWk_ADPT_WAIT_TPYE_EVENT: event wait | |||
uint32_t waitId; // wait id, GE refresh | |||
uint32_t timeOut; // reserved | |||
uint64_t reserved; | |||
}; | |||
#pragma pack(pop) | |||
@@ -94,13 +94,13 @@ enum HcclEventType { | |||
HCCL_EVENT_RESERVED /**< reserved */ | |||
}; | |||
const u32 TAG_MAX_LEN = 127; // 脳卯麓贸碌脛tag 鲁陇露脠 | |||
const u32 TAG_MAX_LEN = 127; // 最大的tag 长度 | |||
using TagAttr = struct TagAttrDef { | |||
char name[TAG_MAX_LEN + 1]; // tag卤锚脢露 | |||
// tag卤锚脢露碌脛陆脫脢脮脢媒戮脻拢卢碌梅脫脙脮脽脢脟路帽禄谩脰梅露炉碌梅脫脙陆脫脢脮陆脫驴脷拢卢0 = 路帽, 1 = 禄谩(脭陇脕么拢卢脭脻虏禄脰搂鲁脰)隆拢 | |||
// 露脭脫脷activeRecv = 0拢卢碌卤陆脫脢脮虏脿脢脮碌陆脢媒戮脻禄貌脮脽路垄脣脥脟毛脟贸脢卤拢卢脰梅露炉脥篓脰陋碌梅脫脙脮脽隆拢 | |||
char name[TAG_MAX_LEN + 1]; // tag标识 | |||
// tag标识的接收数据,调用者是否会主动调用接收接口,0 = 否, 1 = 会(预留,暂不支持)。 | |||
// 对于activeRecv = 0,当接收侧收到数据或者发送请求时,主动通知调用者。 | |||
uint32_t activeRecv; | |||
uint32_t sendCredit; // 脜盲脰脙赂脙tag脭脢脨铆inflight碌脛send赂枚脢媒 | |||
uint32_t sendCredit; // 配置该tag允许inflight的send个数 | |||
uint32_t eventId; | |||
}; | |||
@@ -188,6 +188,15 @@ struct HcomGatherAllToAllVParams { | |||
const char *group; // not used now | |||
}; | |||
typedef enum workMode { | |||
HCCL_MODE_NORMAL = 0, // 不支持任何Probe any,仅支持精确的probe | |||
HCCL_MODE_ANY = 1 // 仅支持ANY_SOURCE + ANY_TAG的probe | |||
} WorkMode; | |||
typedef struct tagCommAttr { | |||
WorkMode mode; // 通信域内的probe工作模式 | |||
uint32_t deviceId = 0; | |||
} CommAttr; | |||
#ifdef __cplusplus | |||
} | |||
#endif // __cplusplus | |||
@@ -126,72 +126,6 @@ extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, co | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); | |||
/** | |||
* @brief Initialize hcom executor. | |||
* | |||
* @param void | |||
* @return HcclResult | |||
*/ | |||
HcclResult HcomExecInitialize(); | |||
/** | |||
* @brief Finalize hcom executor. | |||
* | |||
* @param void | |||
* @return HcclResult | |||
*/ | |||
HcclResult HcomExecFinalize(); | |||
/** | |||
* @brief Put collective communication operation into hcom executor. | |||
* | |||
* @param opInfo information about collective communication operation. | |||
* @param callback callback after collective communication operation. | |||
* @return HcclResult | |||
*/ | |||
HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback); | |||
/** | |||
* @brief Put remote access operation into hcom executor. | |||
* | |||
* @param remoteAccessType operation type (read or write). | |||
* @param addrInfos address information about collective communication operation. | |||
* @param callback callback after collective communication operation. | |||
* @return HcclResult | |||
*/ | |||
HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, | |||
const std::vector<HcomRemoteAccessAddrInfo>& addrInfos, | |||
std::function<void(HcclResult status)> callback); | |||
/** | |||
* @brief Put alltoallv communication operation into hcom executor. | |||
* | |||
* @param params information about alltoallv communication operation. | |||
* @param callback callback after collective communication operation. | |||
* @return HcclResult | |||
*/ | |||
HcclResult HcomExecEnqueueAllToAllV(HcomAllToAllVParams params, std::function<void(HcclResult status)> callback); | |||
/** | |||
* @brief Put agther alltoallv communication operation into hcom executor. | |||
* | |||
* @param params information about agther alltoallv communication operation. | |||
* @param callback callback after collective communication operation. | |||
* @return HcclResult | |||
*/ | |||
HcclResult HcomExecEnqueueGatherAllToAllV(HcomGatherAllToAllVParams params, | |||
std::function<void(HcclResult status)> callback); | |||
/** | |||
* @brief Register memories and init resources for remote access. | |||
* | |||
* @param addrList memory addresses for remote access. | |||
* @param count number of remote memory addresses. | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count); | |||
#ifdef __cplusplus | |||
} | |||
#endif // __cplusplus | |||
@@ -1,18 +1,12 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
/* | |||
* @file mmpa_api.h | |||
* | |||
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved. | |||
* | |||
* This program is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
*/ | |||
#ifndef _MMPA_API_H_ | |||
#define _MMPA_API_H_ | |||
@@ -1,18 +1,12 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
/* | |||
* @file mmpa_linux.h | |||
* | |||
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved. | |||
* | |||
* This program is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
*/ | |||
#ifndef MMPA_LINUX_MMPA_LINUX_H | |||
#define MMPA_LINUX_MMPA_LINUX_H | |||
@@ -79,6 +79,9 @@ typedef long LONG; | |||
#define MMPA_THREAD_SCHED_OTHER SCHED_OTHER | |||
#define MMPA_THREAD_MIN_STACK_SIZE PTHREAD_STACK_MIN | |||
#define MMPA_PATH_SEPARATOR_STR "/" | |||
#define MMPA_PATH_SEPARATOR_CHAR '/' | |||
#define MM_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER | |||
#define MMPA_MAX_NI 19 | |||
@@ -1,83 +1,86 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef MMPA_TYPEDEF_WIN_H | |||
#define MMPA_TYPEDEF_WIN_H | |||
#ifdef __cplusplus | |||
#if __cplusplus | |||
extern "C" { | |||
#endif // __cpluscplus | |||
#endif // __cpluscplus | |||
#ifndef FALSE | |||
#define FALSE 0 | |||
#endif | |||
#ifndef TRUE | |||
#define TRUE 1 | |||
#endif | |||
#define EN_OK 0 | |||
#define EN_ERR 1 | |||
#define EN_ERROR (-1) | |||
#define EN_INVALID_PARAM (-2) | |||
#define EN_TIMEOUT (-3) | |||
#define HANDLE_INVALID_VALUE (-1) | |||
#define INVALID_SOCKET_HANDLE INVALID_SOCKET | |||
#define MMPA_MEM_MAX_LEN (0x7fffffff) | |||
#define MMPA_PROCESS_ERROR (0x7fffffff) | |||
#define MMPA_ONE_THOUSAND 1000 | |||
#define MMPA_COMPUTER_BEGIN_YEAR 1900 | |||
#define SUMMER_TIME_OR_NOT (-1) | |||
#define MMPA_ZERO 0 | |||
#define MMPA_VALUE_ONE 1 | |||
#define MMPA_SOCKET_MAIN_EDITION 2 | |||
#define MMPA_SOCKET_SECOND_EDITION 0 | |||
#define MMPA_PIPE_BUF_SIZE 1024 | |||
#define MMPA_MAX_SCANDIR_COUNT 1024 | |||
#define MAX_IOVEC_SIZE 32 | |||
#define MMPA_PIPE_COUNT 2 | |||
#define MMPA_THREADNAME_SIZE 16 | |||
#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1) | |||
#define MMPA_MIN_OS_VERSION_SIZE 64 | |||
#define MMPA_MAX_NI 19 | |||
#define MMPA_MIDDLE_NI 5 | |||
#define MMPA_LOW_NI (-5) | |||
#define MMPA_MIN_NI (-20) | |||
#define MMPA_MAX_FILE 128 | |||
#define MMPA_MAX_THREAD_PIO 99 | |||
#define MMPA_MIDDLE_THREAD_PIO 66 | |||
#define MMPA_LOW_THREAD_PIO 33 | |||
#define MMPA_MIN_THREAD_PIO 1 | |||
#define MMPA_THREAD_SCHED_RR 0 | |||
#define MMPA_THREAD_SCHED_FIFO 0 | |||
#define MMPA_THREAD_SCHED_OTHER 0 | |||
#define MMPA_THREAD_MIN_STACK_SIZE 0 | |||
#define MM_MUTEX_INITIALIZER NULL | |||
#ifdef __cplusplus | |||
#if __cplusplus | |||
} | |||
#endif // __cpluscplus | |||
#endif // __cpluscplus | |||
#endif // _MMPA_TYPEDEF_WIN_H_ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef MMPA_TYPEDEF_WIN_H | |||
#define MMPA_TYPEDEF_WIN_H | |||
#ifdef __cplusplus | |||
#if __cplusplus | |||
extern "C" { | |||
#endif // __cpluscplus | |||
#endif // __cpluscplus | |||
#ifndef FALSE | |||
#define FALSE 0 | |||
#endif | |||
#ifndef TRUE | |||
#define TRUE 1 | |||
#endif | |||
#define EN_OK 0 | |||
#define EN_ERR 1 | |||
#define EN_ERROR (-1) | |||
#define EN_INVALID_PARAM (-2) | |||
#define EN_TIMEOUT (-3) | |||
#define HANDLE_INVALID_VALUE (-1) | |||
#define INVALID_SOCKET_HANDLE INVALID_SOCKET | |||
#define MMPA_MEM_MAX_LEN (0x7fffffff) | |||
#define MMPA_PROCESS_ERROR (0x7fffffff) | |||
#define MMPA_ONE_THOUSAND 1000 | |||
#define MMPA_COMPUTER_BEGIN_YEAR 1900 | |||
#define SUMMER_TIME_OR_NOT (-1) | |||
#define MMPA_ZERO 0 | |||
#define MMPA_VALUE_ONE 1 | |||
#define MMPA_SOCKET_MAIN_EDITION 2 | |||
#define MMPA_SOCKET_SECOND_EDITION 0 | |||
#define MMPA_PIPE_BUF_SIZE 1024 | |||
#define MMPA_MAX_SCANDIR_COUNT 1024 | |||
#define MAX_IOVEC_SIZE 32 | |||
#define MMPA_PIPE_COUNT 2 | |||
#define MMPA_THREADNAME_SIZE 16 | |||
#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1) | |||
#define MMPA_MIN_OS_VERSION_SIZE 64 | |||
#define MMPA_MAX_NI 19 | |||
#define MMPA_MIDDLE_NI 5 | |||
#define MMPA_LOW_NI (-5) | |||
#define MMPA_MIN_NI (-20) | |||
#define MMPA_MAX_FILE 128 | |||
#define MMPA_PATH_SEPARATOR_STR "\\" | |||
#define MMPA_PATH_SEPARATOR_CHAR '\\' | |||
#define MMPA_MAX_THREAD_PIO 99 | |||
#define MMPA_MIDDLE_THREAD_PIO 66 | |||
#define MMPA_LOW_THREAD_PIO 33 | |||
#define MMPA_MIN_THREAD_PIO 1 | |||
#define MMPA_THREAD_SCHED_RR 0 | |||
#define MMPA_THREAD_SCHED_FIFO 0 | |||
#define MMPA_THREAD_SCHED_OTHER 0 | |||
#define MMPA_THREAD_MIN_STACK_SIZE 0 | |||
#define MM_MUTEX_INITIALIZER NULL | |||
#ifdef __cplusplus | |||
#if __cplusplus | |||
} | |||
#endif // __cpluscplus | |||
#endif // __cpluscplus | |||
#endif // _MMPA_TYPEDEF_WIN_H_ |
@@ -1,18 +1,12 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
/* | |||
* @file mmpa_win.h | |||
* | |||
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved. | |||
* | |||
* This program is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
*/ | |||
#ifndef MMPA_WIN_MMPA_WIN_H | |||
#define MMPA_WIN_MMPA_WIN_H | |||
@@ -0,0 +1,65 @@ | |||
approvers: | |||
- gegenhua | |||
- qiaohairong | |||
reviewers: | |||
- chuqingxi | |||
- wang-jintang | |||
- luanma_bl | |||
- chen-kang30 | |||
- li-xulong | |||
- Allan_Yu | |||
- minshen | |||
- pan-jixing | |||
- yl_wang | |||
- lijie176 | |||
- mabing726 | |||
- miao-fangzheng | |||
- huang-qiang002 | |||
- su-yueming | |||
- chenpeng-hw | |||
- wang_jianle | |||
- luanma_bl | |||
- LDLD0524 | |||
- wywismygod2020 | |||
- lipeiyang3699 | |||
- koala-zhang | |||
- zhu-jingjing | |||
- zhaozhihui5 | |||
- simbaliuxx | |||
- lyxyz | |||
- zhou-qilong | |||
- block0219 | |||
- hanfuwei | |||
- xchu42 | |||
- sheng-nan | |||
- yangjing88 | |||
- alexlak | |||
- xig514 | |||
- jellylj | |||
- brightlyking | |||
- liuzhenyuhw | |||
- djh602 | |||
- wangjiangben_hw | |||
- li1jie | |||
- clinglai | |||
- liujun2014 | |||
- soupkey | |||
- wu-shengji | |||
- cimeng | |||
- ccl_ligang | |||
- xiaozhedeng | |||
- granpad7 | |||
- tc1qaz | |||
- Ronnie_zheng | |||
- xiexianhu | |||
- zhouyujoe | |||
- zhaoping12 | |||
- tanshengshun | |||
- fanqirui | |||
- xu-binglin | |||
- yangyang016 | |||
- zhangzhongzt | |||
- gegenhua | |||
- qiaohairong | |||
options: | |||
no_parent_owners: true |
@@ -745,6 +745,28 @@ REG_OP(UnsqueezeV2) | |||
.ATTR(axis, ListInt, {}) | |||
.OP_END_FACTORY_REG(UnsqueezeV2) | |||
/** | |||
*@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape | |||
is changed, but the data is not changed. \n | |||
*@par Inputs: | |||
*x: A tensor. | |||
*axes: A list of int64, which indicates the dimensions to be inserted. \n | |||
*@par Outputs: | |||
*y: Reshape tensor with same data as input. \n | |||
*@par Third-party framework compatibility | |||
*Compatible with the Onnx operator Unsqueeze in V13. \n | |||
*/ | |||
REG_OP(UnsqueezeV3) | |||
.INPUT(x, TensorType::ALL()) | |||
.INPUT(axes, ListInt) | |||
.OUTPUT(y, TensorType::ALL()) | |||
.OP_END_FACTORY_REG(UnsqueezeV3) | |||
/** | |||
*@brief Reshapes a tensor. Only the tensor shape is changed, without changing the data. \n | |||
@@ -821,6 +843,28 @@ REG_OP(SqueezeV2) | |||
.ATTR(axis, ListInt, {}) | |||
.OP_END_FACTORY_REG(SqueezeV2) | |||
/** | |||
*@brief Removes dimensions of size 1 from the shape of a tensor according to axes. \n | |||
*@par Inputs: | |||
*x: A tensor. | |||
*axes: An optional list of int64. If not specified, squeezes all dimensions of | |||
size 1. If specified, only squeezes the dimensions listed. It is an error to | |||
squeeze a dimension that is not 1. \n | |||
*@par Outputs: | |||
*y: Reshape tensor with same data as input. \n | |||
*@par Third-party framework compatibility | |||
*Compatible with the onnx operator Squeeze in V13. \n | |||
*/ | |||
REG_OP(SqueezeV3) | |||
.INPUT(x, TensorType::ALL()) | |||
.OPTIONAL_INPUT(axes, ListInt) | |||
.OUTPUT(y, TensorType::ALL()) | |||
.OP_END_FACTORY_REG(SqueezeV3) | |||
/** | |||
*@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n | |||
@@ -1273,7 +1317,7 @@ REG_OP(SortV2) | |||
* @par Inputs: | |||
* One inputs, including: | |||
* @li x: A Tensor. Must be one of the following types: | |||
* float16, float32, int32, int8 ,uint8. \n | |||
* float16, float32, int32, int8, uint8, bool. \n | |||
* @li shape: A Tensor to specify the shape that the input tensor expanded to. \n | |||
* @par Outputs: | |||
@@ -1284,9 +1328,9 @@ REG_OP(SortV2) | |||
*/ | |||
REG_OP(Expand) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32,DT_INT64, DT_INT8, DT_UINT8, DT_BOOL})) | |||
.INPUT(shape, TensorType({DT_INT16, DT_INT32, DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32,DT_INT64, DT_INT8, DT_UINT8, DT_BOOL})) | |||
.OP_END_FACTORY_REG(Expand) | |||
/** | |||
@@ -1342,13 +1386,37 @@ REG_OP(NonZeroWithValue) | |||
.ATTR(dtype, Type, DT_INT32) | |||
.OP_END_FACTORY_REG(NonZeroWithValue) | |||
/** | |||
*@Returns a tensor with updated shape from NonZeroWithValue. \n | |||
*@par Inputs: | |||
*value: A Tensor. The output of NonZeroWithValue. \n | |||
*index: A Tensor. The output of NonZeroWithValue. \n | |||
*count: A Tensor. The type is INT32, means count for non_zero ele in input. \n | |||
* out_value: A Tensor. Has the same type as "value" . \n | |||
* out_index: A Tensor. Has the same type as "index". \n | |||
*/ | |||
REG_OP(NonZeroWithValueShape) | |||
.INPUT(value, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, | |||
DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL})) | |||
.INPUT(index, TensorType({DT_INT32})) | |||
.INPUT(count, TensorType({DT_INT32})) | |||
.OUTPUT(out_value, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, | |||
DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL})) | |||
.OUTPUT(out_index, TensorType({DT_INT32})) | |||
.OP_END_FACTORY_REG(NonZeroWithValueShape) | |||
/** | |||
* @brief Expand the input tensor to a compatible shape. \n | |||
* @par Inputs: | |||
* One inputs, including: | |||
* x: A Tensor. Must be one of the following types: | |||
* float16, float32, int32, int8 ,uint8. \n | |||
* float16, float32, int32, int8, uint8, bool. \n | |||
* @par Attributes: | |||
* shape: A required listInt to specify the shape that the input tensor expanded to. \n | |||
@@ -1362,8 +1430,8 @@ REG_OP(NonZeroWithValue) | |||
*/ | |||
REG_OP(ExpandD) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL})) | |||
.REQUIRED_ATTR(shape, ListInt) | |||
.OP_END_FACTORY_REG(ExpandD) | |||
@@ -1404,6 +1472,43 @@ REG_OP(UpdateTensorDesc) | |||
DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE})) | |||
.REQUIRED_ATTR(shape, ListInt) | |||
.OP_END_FACTORY_REG(UpdateTensorDesc) | |||
/** | |||
*@brief Queue data for other operators. \n | |||
*@par Attributes: | |||
*index: Index of the input tensor.The data type must be int32 or int64. | |||
Assume that net has three data nodes, one should be set 0, another should | |||
be set 1, and the left should be set 2. \n | |||
*queue_name: queue name | |||
*output_types: types of outputs data | |||
*output_shapes: shapes of outputs data | |||
*@par Outputs: | |||
*y: A DT_UINT8 tensor. \n | |||
*/ | |||
REG_OP(QueueData) | |||
.OUTPUT(y, TensorType({DT_UINT8})) | |||
.ATTR(index, Int, 0) | |||
.ATTR(queue_name, String, "") | |||
.ATTR(output_types, ListType, {}) | |||
.ATTR(output_shapes, ListListInt, {{}, {}}) | |||
.OP_END_FACTORY_REG(QueueData) | |||
/** | |||
* @brief Ensures that the tensor's shape matches the expected shape. \n | |||
* @par Inputs: | |||
* x: A Tensor. \n | |||
* @par Attributes: | |||
* shape: The shape that needs to be checked \n | |||
* @par Outputs: | |||
* y: A tensor. \n | |||
*/ | |||
REG_OP(EnsureShape) | |||
.INPUT(input, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \ | |||
DT_FLOAT,DT_DOUBLE})) | |||
.OUTPUT(output, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \ | |||
DT_FLOAT,DT_DOUBLE})) | |||
.REQUIRED_ATTR(shape, ListInt) | |||
.OP_END_FACTORY_REG(EnsureShape) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ |
@@ -146,7 +146,7 @@ REG_OP(CTCBeamSearchDecoder) | |||
*@li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size, | |||
and C = number of classes (including blank). | |||
It represent the logarithmized probabilities of the outputs. | |||
*@li targets: Tensor of size (N, S), where S= max target length. | |||
*@li targets: Tensor of size (N, S) or sum(target_lengths), where S = max target length. | |||
It represent the target sequences. | |||
*@li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs. | |||
*@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets. | |||
@@ -159,11 +159,12 @@ REG_OP(CTCBeamSearchDecoder) | |||
*@li blank : Blank label. Default 0. | |||
*@li reduction: Specifies the reduction to apply to the output. Default: 'mean'. | |||
*@li zero_infinity : Whether to zero infinite losses and the associated gradients. | |||
*@li label_max : The max length of targets. | |||
*@par Third-party framework compatibility | |||
* Compatible with Pytorch CTCLoss operator. | |||
*@par Restrictions: | |||
*@attention Constraints: | |||
*The limit of Label’s length is 1K. | |||
*/ | |||
REG_OP(CTCLossV2) | |||
@@ -176,6 +177,7 @@ REG_OP(CTCLossV2) | |||
.ATTR(blank, Int, 0) | |||
.ATTR(reduction, String, "mean") | |||
.ATTR(zero_infinity, Bool, false) | |||
.ATTR(label_max, Int, 0) | |||
.OP_END_FACTORY_REG(CTCLossV2) | |||
/** | |||
@@ -186,7 +188,7 @@ REG_OP(CTCLossV2) | |||
*@li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size, | |||
and C = number of classes (including blank). | |||
It represent the logarithmized probabilities of the outputs. | |||
*@li targets: Tensor of size (N, S), where S= max target length. | |||
*@li targets: Tensor of size (N, S) or sum(target_lengths), where S = max target length. | |||
It represent the target sequences. | |||
*@li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs. | |||
*@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets. | |||
@@ -200,11 +202,12 @@ REG_OP(CTCLossV2) | |||
*@li blank : Blank label. Default 0. | |||
*@li reduction: Specifies the reduction to apply to the output. Default: 'mean'. | |||
*@li zero_infinity : Whether to zero infinite losses and the associated gradients. | |||
*@li label_max : The max length of targets. | |||
*@par Third-party framework compatibility | |||
* Compatible with Pytorch CTCLoss operator. | |||
*@par Restrictions: | |||
*@attention Constraints: | |||
*The limit of Label’s length is 1K. | |||
*/ | |||
REG_OP(CTCLossV2Grad) | |||
@@ -219,6 +222,7 @@ REG_OP(CTCLossV2Grad) | |||
.ATTR(blank, Int, 0) | |||
.ATTR(reduction, String, "mean") | |||
.ATTR(zero_infinity, Bool, false) | |||
.ATTR(label_max, Int, 0) | |||
.OP_END_FACTORY_REG(CTCLossV2Grad) | |||
} // namespace ge | |||
@@ -2398,6 +2398,32 @@ REG_OP(DynamicGetNext) | |||
.ATTR(_getnext_inputs_shape_range, String, "") | |||
.OP_END_FACTORY_REG(DynamicGetNext) | |||
/** | |||
@brief DynamicGetNextV2, dynamic get next data | |||
* @par Inputs: | |||
*x: the iterator, all types are available | |||
* @par Outputs: | |||
* y: the date in iterator, all types are available | |||
* @par Attributes: | |||
* output_types: types of all outputs | |||
* output_shapes: shapes of all outputs | |||
*_dynamic_graph_execute_mode: dynamic graph execution mode, | |||
value is one of lazy_recompile and dynamic_execute | |||
*_getnext_inputs_shape_range: shape ranges of outputs, | |||
it works where _dynamic_graph_execute_mode is dynamic_execute | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(DynamicGetNextV2) | |||
.DYNAMIC_OUTPUT(y, TensorType::ALL()) | |||
.ATTR(output_types, ListType, {}) | |||
.ATTR(channel_name, String, "") | |||
.ATTR(output_shapes, ListListInt, {{}, {}}) | |||
.ATTR(_dynamic_graph_execute_mode, String, "lazy_recompile") | |||
.ATTR(_getnext_inputs_shape_range, String, "") | |||
.OP_END_FACTORY_REG(DynamicGetNextV2) | |||
/** | |||
*@brief AdpGetNext | |||
*@par Outputs: | |||
@@ -2433,5 +2459,24 @@ REG_OP(GetNextV2) | |||
.ATTR(output_shapes, ListListInt, {{}, {}}) | |||
.ATTR(channel_name, String, "") | |||
.OP_END_FACTORY_REG(GetNextV2) | |||
/** | |||
*@brief GetNextFromQueue | |||
*@par Inputs: | |||
*x: the data, only support uint8 | |||
*@par Outputs: | |||
*y: the data in iterator, all types are available | |||
*@par Attributes: | |||
*output_types: types of all outputs | |||
*output_shapes: shapes of all outputs | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(GetNextFromQueue) | |||
.INPUT(x, TensorType({DT_UINT8})) | |||
.DYNAMIC_OUTPUT(y, TensorType::ALL()) | |||
.ATTR(output_types, ListType, {}) | |||
.ATTR(output_shapes, ListListInt, {{}, {}}) | |||
.OP_END_FACTORY_REG(GetNextFromQueue) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* CCopyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -24,6 +24,87 @@ | |||
#include "graph/operator_reg.h" | |||
namespace ge { | |||
/** | |||
* @brief Calculate TabulateFusion. \n | |||
* | |||
* @par Inputs: | |||
* Five inputs, including: | |||
* @li table: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li table_info: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li em_x: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li em: A Tensor. Must be one of the following types: float16, float32, float64. \n | |||
* | |||
* @par Outputs: | |||
* descriptor: A Tensor. Must be one of the following types: float16, float32, float64. \n | |||
* | |||
* @par Attributes: | |||
* Three attributes, including: | |||
* @li last_layer_size: int value. | |||
* @li split_count: int value. | |||
* @li split_index: int value. \n | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(TabulateFusion) | |||
.INPUT(table, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(table_info, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(em_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(em, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.OUTPUT(descriptor, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.REQUIRED_ATTR(last_layer_size, Int) | |||
.ATTR(split_count, Int, 1) | |||
.ATTR(split_index, Int, 0) | |||
.OP_END_FACTORY_REG(TabulateFusion) | |||
/** | |||
* @brief Calculate ProdEnvMatA. \n | |||
* | |||
* @par Inputs: | |||
* @li coord: A Tensor. Must be one of the following types: float32, float64. | |||
* @li type: A Tensor. Must be one of the following types: int32. | |||
* @li natoms: A Tensor. Must be one of the following types: int32. | |||
* @li box: A Tensor. Must be one of the following types: float32, float64. | |||
* @li mesh: A Tensor. Must be one of the following types: int32. | |||
* @li davg: A Tensor. Must be one of the following types: float32, float64. | |||
* @li dstd: A Tensor. Must be one of the following types: float32, float64. | |||
* | |||
* @par Outputs: | |||
* descrpt: A Tensor. Must be one of the following types: float32, float64. | |||
* descrpt_deriv: A Tensor. Must be one of the following types: float32, float64. | |||
* rij: A Tensor. Must be one of the following types: float32, float64. | |||
* nlist: A Tensor. Must be one of the following types: int32. \n | |||
* | |||
* @par Attributes: | |||
* @li rcut_a: A Float. | |||
* @li rcut_r: A Float. | |||
* @li rcut_r_smth: A Float. | |||
* @li sel_a: A ListInt. | |||
* @li split_count: A Int. | |||
* @li split_index: A Int.\n | |||
* | |||
*/ | |||
REG_OP(ProdEnvMatA) | |||
.INPUT(coord, TensorType({DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(type, TensorType({DT_INT32})) | |||
.INPUT(natoms, TensorType({DT_INT32})) | |||
.INPUT(box, TensorType({DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(mesh, TensorType({DT_INT32})) | |||
.INPUT(davg, TensorType({DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(dstd, TensorType({DT_FLOAT, DT_DOUBLE})) | |||
.OUTPUT(descrpt, TensorType({DT_FLOAT, DT_DOUBLE})) | |||
.OUTPUT(descrpt_deriv, TensorType({DT_FLOAT, DT_DOUBLE})) | |||
.OUTPUT(rij, TensorType({DT_FLOAT, DT_DOUBLE})) | |||
.OUTPUT(nlist, TensorType({DT_INT32})) | |||
.ATTR(rcut_a, Float, 1.0) | |||
.ATTR(rcut_r, Float, 1.0) | |||
.ATTR(rcut_r_smth, Float, 1.0) | |||
.ATTR(sel_a, ListInt, {}) | |||
.ATTR(sel_r, ListInt, {}) | |||
.ATTR(split_count, Int, 1) | |||
.ATTR(split_index, Int, 0) | |||
.OP_END_FACTORY_REG(ProdEnvMatA) | |||
/** | |||
* @brief Calculate ProdForceSeA. \n | |||
* | |||
@@ -53,7 +134,80 @@ REG_OP(ProdForceSeA) | |||
.OUTPUT(atom_force, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.REQUIRED_ATTR(n_a_sel, Int) | |||
.REQUIRED_ATTR(n_r_sel, Int) | |||
.ATTR(split_count, Int, 1) | |||
.ATTR(split_index, Int, 0) | |||
.OP_END_FACTORY_REG(ProdForceSeA) | |||
/** | |||
* @brief Calculate ProdVirialSeA. \n | |||
* | |||
* @par Inputs: | |||
* Five inputs, including: | |||
* @li net_deriv: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li in_deriv: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li rij: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li nlist: A Tensor. dtype is int32. | |||
* @li natoms: A Tensor. dtype is int32. \n | |||
* | |||
* @par Outputs: | |||
* Two outputs, including: | |||
* @li virial: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li atom_virial: A Tensor. Must be one of the following types: float16, float32, float64. \n | |||
* | |||
* @par Attributes: | |||
* Two attributes, including: | |||
* @li n_a_sel: Int value. | |||
* @li n_r_sel: Int value. | |||
* @li split_count: Int value. | |||
* @li split_index: Int value. \n | |||
*/ | |||
REG_OP(ProdVirialSeA) | |||
.INPUT(net_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(in_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(rij, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(nlist, TensorType({DT_INT32})) | |||
.INPUT(natoms, TensorType({DT_INT32})) | |||
.OUTPUT(virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.OUTPUT(atom_virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.REQUIRED_ATTR(n_a_sel, Int) | |||
.REQUIRED_ATTR(n_r_sel, Int) | |||
.ATTR(split_count, Int, 1) | |||
.ATTR(split_index, Int, 0) | |||
.OP_END_FACTORY_REG(ProdVirialSeA) | |||
/** | |||
* @brief Calculate TabulateFusionGrad. \n | |||
* | |||
* @par Inputs: | |||
* Five inputs, including: | |||
* @li table: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li table_info: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li em_x: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li em: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li dy: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li descriptor: A Tensor. Must be one of the following types: float16, float32, float64. \n | |||
* | |||
* @par Outputs: | |||
* @li dy_dem_x: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li dy_dem: A Tensor. Must be one of the following types: float16, float32, float64. \n | |||
* | |||
* @par Attributes: | |||
* Two attributes, including: | |||
* @li split_count: A Scalar. | |||
* @li split_index: A Scalar. \n | |||
*/ | |||
REG_OP(TabulateFusionGrad) | |||
.INPUT(table, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(table_info, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(em_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(em, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(descriptor, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.OUTPUT(dy_dem_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.OUTPUT(dy_dem, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.ATTR(split_count, Int, 1) | |||
.ATTR(split_index, Int, 0) | |||
.OP_END_FACTORY_REG(TabulateFusionGrad) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_DEEP_MD_H_ |
@@ -331,7 +331,7 @@ REG_OP(Sub) | |||
*@par Inputs: | |||
*One input, including: \n | |||
*x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n | |||
*x: A Tensor. Must be one of the following types: float16, float32, double, int8, int16, int32, int64. \n | |||
*@par Outputs: | |||
*y: A Tensor. Has the same type as "x". \n | |||
@@ -340,8 +340,10 @@ REG_OP(Sub) | |||
*Compatible with the TensorFlow operator Abs. | |||
*/ | |||
REG_OP(Abs) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64})) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, | |||
DT_INT32, DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, | |||
DT_INT32, DT_INT64})) | |||
.OP_END_FACTORY_REG(Abs) | |||
/** | |||
@@ -3821,6 +3823,10 @@ REG_OP(CosineSimilarity) | |||
* @li max_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n | |||
* @li global_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n | |||
* @li weight_decay: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n | |||
* @li step_size: A Optional Tensor. Datatype is same as exp_avg. Shape (1, ).\n | |||
* @par Attributes: | |||
* @li adam_mode: An optional bool. Defaults to "adam". \n | |||
*@par Outputs: | |||
*three inputs, including: | |||
@@ -3840,9 +3846,11 @@ REG_OP(ApplyAdamV2) | |||
.INPUT(max_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.INPUT(global_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.INPUT(weight_decay, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.OPTIONAL_INPUT(step_size, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.OUTPUT(var, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.OUTPUT(m, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.OUTPUT(v, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.ATTR(adam_mode, String, "adam") | |||
.OP_END_FACTORY_REG(ApplyAdamV2) | |||
} // namespace ge | |||
@@ -132,7 +132,7 @@ nearest neighbor sampling to a common output size specified by crop_size . \n | |||
*@li x:A Tensor. Must be one of the following types:uint8, uint16, int8, | |||
int16, int32, int64, float16, float, double. A 4-D tensor of shape | |||
[batch, image_height, image_width, depth]. The format must be NHWC. | |||
*@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4]. | |||
*@li boxes: A Tensor. Must be one of the following types: float16, float. A 2-D tensor of shape [num_boxes, 4]. | |||
*@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with | |||
int32 values in [0, batch). | |||
*@li crop_size: A Tensor of type int32. A 1-D tensor of 2 elements, crop_size | |||
@@ -146,7 +146,7 @@ extrapolation, when applicable. | |||
NearestNeighbor . \n | |||
*@par Outputs: | |||
*y:A Tensor of type float. The format must be NHWC. \n | |||
*y: A Tensor. Must be one of the following types: float16, float. The format must be NHWC. \n | |||
*@attention Constraints: | |||
*Input images must be a 4-D tensor . \n | |||
@@ -158,10 +158,10 @@ NearestNeighbor . \n | |||
REG_OP(CropAndResize) | |||
.INPUT(x, TensorType({DT_UINT8, DT_UINT16, DT_INT8, \ | |||
DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(boxes, TensorType({DT_FLOAT})) | |||
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(box_index, TensorType({DT_INT32})) | |||
.INPUT(crop_size, TensorType({DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(extrapolation_value, Float, 0) | |||
.ATTR(method, String, "bilinear") | |||
.OP_END_FACTORY_REG(CropAndResize) | |||
@@ -175,7 +175,7 @@ REG_OP(CropAndResize) | |||
*Input images must be a 5HD tensor. Inputs include: | |||
*@li x:A Tensor. Must be one of the following types:float16, float. A 5HD tensor of shape | |||
* [batch, C1, image_height, image_width, C0]. | |||
*@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4]. | |||
*@li boxes: A Tensor. Must be one of the following types: float16, float. A 2-D tensor of shape [num_boxes, 4]. | |||
*@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch) . \n | |||
*@par Attributes: | |||
@@ -184,7 +184,7 @@ REG_OP(CropAndResize) | |||
*@li method: An optional string from: '"bilinear"'. Defaults to "bilinear" . \n | |||
*@par Outputs: | |||
*y:A Tensor of type float . \n | |||
*y: A Tensor. Must be one of the following types: float16, float. \n | |||
*@attention Constraints: | |||
*Input images must be a 5HD tensor . \n | |||
@@ -197,9 +197,9 @@ REG_OP(CropAndResize) | |||
*/ | |||
REG_OP(CropAndResizeD) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(boxes, TensorType({DT_FLOAT})) | |||
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(box_index, TensorType({DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.REQUIRED_ATTR(crop_size, ListInt) | |||
.ATTR(extrapolation_value, Float, 0) | |||
.ATTR(method, String, "bilinear") | |||
@@ -888,10 +888,10 @@ Defaults to false . \n | |||
*@li half_pixel_centers: An optional bool. Defaults to False . \n | |||
*@par Outputs: | |||
*y: 4-D with shape [batch, new_height, new_width, channels] . \n | |||
*y: A Tensor with the same type and format as input "images" . \n | |||
*@par Third-party framework compatibility | |||
*Compatible with tensorflow ResizeNearestNeighborV2 operator. | |||
*Compatible with tensorflow ResizeNearestNeighbor operator. | |||
*/ | |||
REG_OP(ResizeNearestNeighborV2) | |||
@@ -378,7 +378,7 @@ to each component of an element of this dataset. | |||
REG_OP(GetNext) | |||
.DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, | |||
DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL})) | |||
.ATTR(output_types, ListInt, {}) | |||
.ATTR(output_types, ListType, {}) | |||
.ATTR(output_shapes, ListListInt, {}) | |||
.ATTR(output_num, Int, 1) | |||
.ATTR(channel_name, String, "") | |||
@@ -213,9 +213,9 @@ REG_OP(GEMM) | |||
*/ | |||
REG_OP(BatchMatMul) | |||
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) | |||
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) | |||
.ATTR(adj_x1, Bool, false) | |||
.ATTR(adj_x2, Bool, false) | |||
.OP_END_FACTORY_REG(BatchMatMul) | |||
@@ -246,11 +246,11 @@ REG_OP(BatchMatMul) | |||
*/ | |||
REG_OP(BatchMatMulV2) | |||
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4})) | |||
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4})) | |||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16})) | |||
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16})) | |||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) | |||
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) | |||
.ATTR(adj_x1, Bool, false) | |||
.ATTR(adj_x2, Bool, false) | |||
.ATTR(offset_x, Int, 0) | |||
@@ -505,17 +505,17 @@ REG_OP(ScatterElements) | |||
* Three inputs, including: | |||
*@li var: An ND Tensor . | |||
*Must be one of the following types: float16, float32, int32, int8, uint8 | |||
*@li indices: An ND Tensor of type int32 or int64 | |||
*Must be one of the following types: float16, float, int32, int8, uint8 | |||
*@li indices: An ND Tensor . \n | |||
*@li updates: An Tensor. format:NCHW, NHWC . | |||
*Must be one of the following types: int32 or int64 | |||
*@li updates: An ND Tensor . | |||
*Must be one of the following types: float16, float32, int32, int8, uint8 | |||
*Must be one of the following types: float16, float, int32, int8, uint8 | |||
*@par Attributes: | |||
* use_locking: An optional bool. Defaults to "False". If "True", the operation | |||
* will be protected by a lock . \n | |||
*use_locking: An optional bool. Defaults to "False". If "True", | |||
* the operation will be protected by a lock . \n | |||
*@par Outputs: | |||
*var: A Tensor. Has the same type and format as input "var" . \n | |||
@@ -792,13 +792,13 @@ REG_OP(DiagPart) | |||
* Four inputs, including: | |||
*@li x: A Tensor of type float16, int8. | |||
*@li w: A weight matrix of type float16, int8. | |||
*@li b: A Tensor of type float16, int32, float32. | |||
*@li offset_w: A Tensor of type int8 . \n | |||
*@li b: An optional Tensor of type float16, int32, float32. | |||
*@li offset_w: An optional Tensor of type int8. Reserved. Only None Supported. \n | |||
*@par Attributes: | |||
*@li num_output: Reserved. | |||
*@li num_output: Required. An int, output neuron number. Reserved. | |||
*@li transpose: A bool, specifying weight whether to transpose input w, either "true" or "false". Defaults to "false". | |||
*@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1. | |||
*@li axis: Optional. An int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1. | |||
* The product of the subsequent dimensions starting form first dimension or the second dimension is "K". | |||
*@li offset_x: An optional integer for quantized FullyConnection. | |||
*The negative offset added to the input image for int8 type. Ensure offset_x within the | |||
@@ -814,11 +814,11 @@ REG_OP(DiagPart) | |||
* Yes | |||
*/ | |||
REG_OP(FullyConnection) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_INT8, DT_INT4})) | |||
.INPUT(w, TensorType({DT_FLOAT16, DT_INT8, DT_INT4})) | |||
.OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32})) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT32, DT_BF16})) | |||
.INPUT(w, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT32, DT_BF16})) | |||
.OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32, DT_BF16})) | |||
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32, DT_BF16})) | |||
.REQUIRED_ATTR(num_output, Int) | |||
.ATTR(transpose, Bool, false) | |||
.ATTR(axis, Int, 1) | |||
@@ -1360,6 +1360,45 @@ REG_OP(FillDiagonal) | |||
.ATTR(wrap, Bool, false) | |||
.OP_END_FACTORY_REG(FillDiagonal) | |||
/** | |||
*@brief: Returns the sum of the elements of the diagonal of the input 2-D matrix. \n | |||
*@par Inputs: | |||
*x: A Tensor. Must be one of the following types: | |||
* float16, float. \n | |||
*@par Outputs: | |||
*y: A Tensor. Has the same type as "x" . \n | |||
*@par Third-party framework compatibility | |||
* Compatible with the Pytorch operator Trace. | |||
*/ | |||
REG_OP(Trace) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OP_END_FACTORY_REG(Trace) | |||
/** | |||
*@brief Computes the generalized inverse of any matrix. \n | |||
*@par Inputs: | |||
* @li x: input matrix. Must be one of the following types: | |||
* double, float. \n | |||
*@par Attributes: | |||
* @li rcond: An optional float >= 0 or inf. Defaults to 1e-15. \n | |||
*@par Outputs: | |||
* y: A Tensor with the same type and shape of x's transpose. \n | |||
*/ | |||
REG_OP(Pinverse) | |||
.INPUT(x, TensorType({ DT_FLOAT, DT_DOUBLE })) | |||
.OUTPUT(y, TensorType({ DT_FLOAT, DT_DOUBLE })) | |||
.ATTR(rcond, Float, 1e-15) | |||
.OP_END_FACTORY_REG(Pinverse) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ |
@@ -142,6 +142,74 @@ REG_OP(BatchNorm) | |||
.ATTR(is_training, Bool, true) | |||
.OP_END_FACTORY_REG(BatchNorm) | |||
/** | |||
* @brief After the mean and reciprocal of standard deviation(invert_std) are separately calculated on each device, | |||
* the mena and reciprocal of standard deviation(invert_std) data on each device are normlized, | |||
* a total mean and reciprocal of standard deviation(invert_std) are returned, and running_var are updated. | |||
* @par Inputs: | |||
* include: | |||
* @li mean_all: A Tensor. The mean of each device. Must be one of the following types: float16, float32. | |||
* @li invert_std_all: A Tensor. Reciprocal of the variances of each device. Must be one of the following types: float16, float32. | |||
* @li count_all: A Tensor. Number of data for each device. Must be one of the following types: float16, float32. | |||
* @li mean_broadcast: A Tensor. The overall average and broadcast. Must be one of the following types: float16, float32. | |||
* @li count_sum: A Tensor. General statistics. Must be one of the following types: float16, float32. | |||
* @li running_var: A Tensor. Runtime variance. Must be one of the following types: float16, float32. \n | |||
* @par Attributes: | |||
* Two Attributes, including: | |||
* @li momentum: A optional float. Defaults to 0.01. \n | |||
* @li epsilon: An optional float. Defaults to 0.00001. \n | |||
* @par Outputs: | |||
* include: | |||
* @li invert_std: A Tensor. It's inverse of total variance. | |||
* @li running_var_update: A Tensor. It's moving variance of each device after the update. \n | |||
* @par Third-party framework compatibility | |||
* ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate | |||
* compatible with the Pytorch operator BatchNormGatherStatsWithCounts. | |||
*/ | |||
REG_OP(SyncBatchNormGatherStatsWithCounts) | |||
.INPUT(mean_all, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(invert_std_all, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(count_all, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(mean_broadcast, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(running_var, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(invert_std, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(running_var_update, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.ATTR(momentum, Float, 0.1) | |||
.ATTR(epsilon, Float, 0.001) | |||
.OP_END_FACTORY_REG(SyncBatchNormGatherStatsWithCounts) | |||
/** | |||
* @brief update running_mean. | |||
* @par Inputs: | |||
* include: | |||
* @li mean: A Tensor. The mean of each device. Must be one of the following types: float16, float32. | |||
* @li running_mean: A Tensor. Runtime Mean. Must be one of the following types: float16, float32. \n | |||
* @par Attributes: | |||
* One Attribute, including: | |||
* @li momentum: A optional float. Defaults to 0.01. \n | |||
* @par Outputs: | |||
* include: | |||
* @li running_mean_update: A Tensor. It's moving mean of each device after the update. \n | |||
* @par Third-party framework compatibility | |||
* ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate | |||
* compatible with the Pytorch operator BatchNormGatherStatsWithCounts. | |||
*/ | |||
REG_OP(SyncBNTrainingUpdate) | |||
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(running_mean, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(running_mean_update, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.ATTR(momentum, Float, 0.1) | |||
.OP_END_FACTORY_REG(SyncBNTrainingUpdate) | |||
/** | |||
*@brief part of SyncBatchNormBackward . \n | |||
@@ -134,9 +134,9 @@ REG_OP(DepthwiseConv2DBackpropFilter) | |||
* instead. | |||
*/ | |||
REG_OP(DepthwiseConv2DBackpropFilterD) | |||
.INPUT(input, TensorType({float16})) | |||
.INPUT(out_backprop, TensorType({float16})) | |||
.OUTPUT(filter_grad, TensorType({float32})) | |||
.INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16})) | |||
.INPUT(out_backprop, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16})) | |||
.OUTPUT(filter_grad, TensorType({DT_FLOAT32})) | |||
.REQUIRED_ATTR(filter_size, ListInt) | |||
.REQUIRED_ATTR(strides, ListInt) | |||
.ATTR(dilations, ListInt, {1, 1, 1, 1}) | |||
@@ -764,7 +764,7 @@ REG_OP(Conv2DBackpropFilterD) | |||
| | float32 | float32 | float32 | float32 |\n | |||
| | int8 | int8 | int32 | int32 |\n | |||
| Format | NCHW | NCHW | ND | NCHW |\n | |||
| | NHWC | HWCN | | NHWC |\n | |||
| | NHWC | HWCN | ND | NHWC |\n | |||
*\n | |||
* For float32 type, the actual calculation on the chip is based on | |||
* float16. | |||
@@ -1650,5 +1650,43 @@ REG_OP(Dilation) | |||
.ATTR(padding_value, Float, 0.0) | |||
.OP_END_FACTORY_REG(Dilation) | |||
/** | |||
*@brief Computes the post-cube processing output with the expected input | |||
*@par Inputs: | |||
* Ten inputs: | |||
* x1: A Tensor of type float16, bfloat16, float32, int32 | |||
* x2: A Tensor of type float16, int8, int4 | |||
* quant_scale_0: A Tensor of type uint64 | |||
* relu_weight_0: A Tensor of type float32 | |||
* clip_value_0: A Tensor of type float16, int8, int4 | |||
* quant_scale_1: A Tensor of type uint64 | |||
* relu_weight_1: A Tensor of type float32 | |||
* clip_value_1: A Tensor of type float16 | |||
* anti_quant_scale: A Tensor of type float16 | |||
* anti_quant_offset: A Tensor of type int8, int4 | |||
*@par Attributes: | |||
* @li fusion_op_list: A list of String. | |||
* @li unit_list: A list of String | |||
* @li eltwise_mode: An optional string from "ADD", "SUB" and "". | |||
*@par Outputs: | |||
* output: A Tensor. A Tensor of type float16, bfloat16, float32, int32, int8, int4. | |||
*/ | |||
REG_OP(FixPipe) | |||
.INPUT(x1, TensorType({DT_FLOAT16, DT_BF16, DT_FLOAT, DT_INT32})) | |||
.OPTIONAL_INPUT(x2, TensorType({DT_FLOAT16, DT_INT8, DT_INT4})) | |||
.OPTIONAL_INPUT(quant_scale_0, TensorType({DT_UINT64})) | |||
.OPTIONAL_INPUT(relu_weight_0, TensorType({DT_FLOAT})) | |||
.OPTIONAL_INPUT(clip_value_0, TensorType({DT_FLOAT16, DT_INT8, DT_INT4})) | |||
.OPTIONAL_INPUT(quant_scale_1, TensorType({DT_UINT64})) | |||
.OPTIONAL_INPUT(relu_weight_1, TensorType({DT_FLOAT})) | |||
.OPTIONAL_INPUT(clip_value_1, TensorType({DT_FLOAT16})) | |||
.OPTIONAL_INPUT(anti_quant_scale, TensorType({DT_FLOAT16})) | |||
.OPTIONAL_INPUT(anti_quant_offset, TensorType({DT_INT8, DT_INT4})) | |||
.OUTPUT(output, TensorType({DT_FLOAT16, DT_BF16, DT_FLOAT, DT_INT32, DT_INT8, DT_INT4})) | |||
.REQUIRED_ATTR(fusion_op_list, ListString) | |||
.REQUIRED_ATTR(unit_list, ListString) | |||
.ATTR(eltwise_mode, String, "") | |||
.OP_END_FACTORY_REG(FixPipe) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ |
@@ -1179,6 +1179,8 @@ REG_OP(SPP) | |||
* the index of the input feature map, "x1", "y1", "x2", or "y2" must be | |||
* greater than or equal to "0.0". | |||
* roi_max_num must be less than or equal to 6000 and must be divided by 16. | |||
* The input data of the rois cannot exceed the width and height range of the x, | |||
* otherwise, the accuracy of the output result may not be as expected. | |||
*@li roi_actual_num: A optional tensor of type int32, with shape [batch, 8], specifying | |||
* the number of ROIs per batch . \n | |||
@@ -2076,7 +2078,7 @@ REG_OP(GIoUGrad) | |||
* trans: An optional attr, true for 'xyxyt', false for 'xywht'. | |||
*@par Outputs: | |||
* overlaps: A 3D Tensor of type float16 or float32 with shape [B, N, K]. | |||
* overlaps: A 3D Tensor of type float32 with shape [B, N, K]. | |||
*@attention Constraints: | |||
* In each batch, the invalid box cannot appear before the valid box. | |||
@@ -2087,6 +2089,100 @@ REG_OP(RotatedOverlaps) | |||
.OUTPUT(overlaps, TensorType({DT_FLOAT})) | |||
.ATTR(trans, Bool, false) | |||
.OP_END_FACTORY_REG(RotatedOverlaps) | |||
/** | |||
*@brief RotatedIou . \n | |||
*@par Inputs: | |||
*@li boxes : data of grad increment, a 3D Tensor of type float32 with | |||
* shape (B, 5, N). "N" indicates the number of boxes, and the value | |||
* "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta]. | |||
*@li query_boxes: Bounding boxes, a 3D Tensor of type float32 with | |||
* shape (B, 5, K). "K" indicates the number of boxes, and the value | |||
* "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta]. | |||
*@par Attributes: | |||
*@li trans: An optional attr, true for 'xyxyt', false for 'xywht'. | |||
*@li mode: An optional attr, a character string with the value range of ['iou', 'iof'], | |||
* only support 'iou' now. | |||
*@li is_cross: Cross calculation when it is True, and one-to-one calculation when it is False. | |||
*@li v_threshold: An optional attr, provide condition relaxation for intersection calculation. | |||
*@li e_threshold: An optional attr, provide condition relaxation for intersection calculation. | |||
*@par Outputs: | |||
* iou: A 3D Tensor of float32 with shape [B, N, K]. | |||
*@attention Constraints: | |||
* In each batch, the invalid box cannot appear before the valid box. | |||
*/ | |||
REG_OP(RotatedIou) | |||
.INPUT(boxes, TensorType({DT_FLOAT})) | |||
.INPUT(query_boxes, TensorType({DT_FLOAT})) | |||
.OUTPUT(iou, TensorType({DT_FLOAT})) | |||
.ATTR(trans, Bool, false) | |||
.ATTR(mode, String, "iou") | |||
.ATTR(is_cross, Bool, true) | |||
.ATTR(v_threshold, Float, 0) | |||
.ATTR(e_threshold, Float, 0) | |||
.OP_END_FACTORY_REG(RotatedIou) | |||
/** | |||
*@brief RotatedBoxEncode. \n | |||
*@par Inputs: | |||
* Two inputs, including: | |||
*@li anchor_box: A 3D Tensor of float32 (float16) with shape (B, 5, N). | |||
* "B" indicates the number of batch size | |||
* "N" indicates the number of bounding boxes, and the value "5" refers to | |||
* "x0", "x1", "y0", "y1" and "angle". | |||
*@li gt_box: A 3D Tensor of float32 (float16) with shape (B, 5, N). | |||
* "B" indicates the number of batch size | |||
* "N" indicates the number of bounding boxes, and the value "5" refers to | |||
* "x0", "x1", "y0", "y1" and "angle". \n | |||
*@par Attributes: | |||
*@li weight: A float list for "x0", "x1", "y0", "y1" and "angle", | |||
* defaults to [1.0, 1.0, 1.0, 1.0, 1.0]. | |||
*@par Outputs: | |||
*@li y: A 3D Tensor of type float32 (float16) with shape (B, 5, N), | |||
* specifying the variations between all anchor boxes and ground truth boxes. | |||
*/ | |||
REG_OP(RotatedBoxEncode) | |||
.INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(gt_box, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(weight, ListFloat, {1.0, 1.0, 1.0, 1.0, 1.0}) | |||
.OP_END_FACTORY_REG(RotatedBoxEncode) | |||
/** | |||
*@brief RotatedBoxDecode. \n | |||
*@par Inputs: | |||
* Two inputs, including: | |||
*@li anchor_box: A 3D Tensor of float32 (float16) with shape (B, 5, N). | |||
* "B" indicates the number of batch size | |||
* "N" indicates the number of bounding boxes, and the value "5" refers to | |||
* "x0", "x1", "y0", "y1" and "angle". | |||
*@li deltas: A 3D Tensor of float32 (float16) with shape (B, 5, N). | |||
* "B" indicates the number of batch size | |||
* "N" indicates the number of bounding boxes, and the value "5" refers to | |||
* "x0", "x1", "y0", "y1" and "angle". \n | |||
*@par Attributes: | |||
*@li weight: A float list for "x0", "x1", "y0", "y1" and "angle", | |||
* defaults to [1.0, 1.0, 1.0, 1.0, 1.0]. | |||
*@par Outputs: | |||
*@li y: A 3D Tensor of type float32 (float16) with shape (B, 5, N), | |||
* specifying the variations between all anchor boxes and ground truth boxes. | |||
*/ | |||
REG_OP(RotatedBoxDecode) | |||
.INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(deltas, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(weight, ListFloat, {1.0, 1.0, 1.0, 1.0, 1.0}) | |||
.OP_END_FACTORY_REG(RotatedBoxDecode) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ | |||
@@ -1487,25 +1487,51 @@ REG_OP(Roll) | |||
.OP_END_FACTORY_REG(Roll) | |||
/** | |||
*@brief Calculate the loss. Creates a criterion that optimizes a two-class classification | |||
logistic loss between input_x and input_y (containing 1 or -1). \n | |||
* @brief Roll the tensor along the given dimension(s). | |||
*@par Inputs: | |||
*Tow inputs, including: | |||
* @par Inputs: | |||
* One inputs, including: | |||
* x: A tensor | |||
* @par Attributes: | |||
* @li shift: The number of places by which the elements of the tensor are shifted. \n | |||
* @li axes: Axis along which to roll. \n | |||
* @par Outputs: | |||
* y: A Tensor with the same type and shape of x's. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator Roll. \n | |||
*/ | |||
REG_OP(RollV2) | |||
.INPUT(input, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \ | |||
DT_FLOAT,DT_DOUBLE})) | |||
.INPUT(shift, TensorType({DT_INT32,DT_INT64})) | |||
.INPUT(axes, TensorType({DT_INT32,DT_INT64})) | |||
.OUTPUT(output, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \ | |||
DT_FLOAT,DT_DOUBLE})) | |||
.OP_END_FACTORY_REG(RollV2) | |||
/** | |||
* @brief Calculate the loss. Creates a criterion that optimizes a two-class classification | |||
* logistic loss between input_x and input_y (containing 1 or -1). \n | |||
* @par Inputs: | |||
* Tow inputs, including: | |||
* @li input_x: A tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
* @li input_y: A tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
*@par Attributes: | |||
*reduction: An optional string.Defaults to "mean". \n | |||
* @par Attributes: | |||
* reduction: An optional string.Defaults to "mean". \n | |||
*@par Outputs: | |||
*output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n | |||
* @par Outputs: | |||
* output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n | |||
* while reduction == "sum" or "mean", A Tensor with the same type of input_x , shape of which is (1,) | |||
*@par Third-party framework compatibility | |||
*Compatible with the Pytorch operator SoftMarginLoss. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator SoftMarginLoss. \n | |||
*/ | |||
REG_OP(SoftMarginLoss) | |||
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
@@ -1624,18 +1650,18 @@ REG_OP(MultilabelMarginLoss) | |||
.OP_END_FACTORY_REG(MultilabelMarginLoss) | |||
/** | |||
*@brief Performs batch normalization . \n | |||
*@par Inputs: | |||
* @brief Performs batch normalization . \n | |||
* @par Inputs: | |||
* Two inputs | |||
*@li input_x: A Tensor. Support float32. shape (n, c, d). | |||
*@li seq_len: A Tensor. Each batch normalize data num. Support Int32. Shape (n, ). \n | |||
*@par Attributes: | |||
*@li normalize_type: Str. Support "per_feature" or "all_features". | |||
*@li epsilon: An optional float32, specifying the small value added to | |||
variance to avoid dividing by zero. Defaults to "0.00001" . \n | |||
*@par Outputs: | |||
* @li input_x: A Tensor. Support float32. shape (n, c, d). | |||
* @li seq_len: A Tensor. Each batch normalize data num. Support Int32. Shape (n, ). \n | |||
* @par Attributes: | |||
* @li normalize_type: Str. Support "per_feature" or "all_features". | |||
* @li epsilon: An optional float32, specifying the small value added to | |||
* variance to avoid dividing by zero. Defaults to "0.00001" . \n | |||
* @par Outputs: | |||
* One outputs | |||
*@li output_y: A Tensor for the normalized "x".Support float32. shape (n, c, d).\n | |||
* @li output_y: A Tensor for the normalized "x".Support float32. shape (n, c, d).\n | |||
*/ | |||
REG_OP(NormalizeBatch) | |||
.INPUT(input_x, TensorType({ DT_FLOAT })) | |||
@@ -1644,6 +1670,36 @@ REG_OP(NormalizeBatch) | |||
.REQUIRED_ATTR(normalize_type, String) | |||
.ATTR(epsilon, Float, 0.00001) | |||
.OP_END_FACTORY_REG(NormalizeBatch) | |||
/** | |||
*@brief GroupNorm and Reul operator | |||
* calculating: x, gamma, beta | |||
* y = relu(gamma*((x - mean) / np.sqrt(variance + 0.001)) + beta) | |||
* @par Inputs: | |||
* Three inputs, including: | |||
* @li x: A Tensor. Must be one of the following types: float16, float32. | |||
* @li gamma: A Tensor. Must be one of the following types: float16, float32. | |||
* @li beta: A Tensor. Must be one of the following types: float16, float32 . \n | |||
* @par Attributes: | |||
* @li num_groups: A require attribute, the type is int32. | |||
* @li eps: A optional attribute, the type is float32. Defaults to 0.00001. \n | |||
* @par Outputs: | |||
* One outputs, including: | |||
* @li y: A Tensor. Must be one of the following types: float16, float32. | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use/ | |||
*/ | |||
REG_OP(GroupNormRelu) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.REQUIRED_ATTR(num_groups, Int) | |||
.ATTR(eps, Float, 0.00001) | |||
.OP_END_FACTORY_REG(GroupNormRelu) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ |
@@ -1747,7 +1747,8 @@ included in the sample.\n | |||
*@par Third-party framework compatibility | |||
*Compatible with the Pytorch operator SubSample. | |||
*@par Restrictions: | |||
*@attention Constraints: | |||
*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. | |||
*/ | |||
REG_OP(SubSample) | |||
@@ -1776,7 +1777,8 @@ included in the sample.\n | |||
*@par Third-party framework compatibility | |||
*Compatible with the Pytorch operator SubSampleLabels. | |||
*@par Restrictions: | |||
*@attention Constraints: | |||
*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. | |||
*/ | |||
REG_OP(SubSampleLabels) | |||
@@ -25,7 +25,8 @@ | |||
namespace ge { | |||
/** | |||
*@brief Computes the for the gelu of "x" . \n | |||
*@brief The GELU activation function is x*Φ(x), | |||
* where Φ(x) the standard Gaussian cumulative distribution function. \n | |||
*@par Inputs: | |||
*One input, including: | |||
@@ -144,7 +145,7 @@ REG_OP(GeluGrad) | |||
.OP_END_FACTORY_REG(GeluGrad) | |||
/** | |||
*@brief Computes the for the fast_gelu of "x" . \n | |||
*@brief The FastGelu activation function is x*e^(0.851*x)*(x-|x|)/(1+e^(-1.702|x|)). \n | |||
*@par Inputs: | |||
*One input, including: | |||
@@ -159,7 +160,23 @@ REG_OP(FastGelu) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OP_END_FACTORY_REG(FastGelu) | |||
/** | |||
*@brief The FastGeluV2 activation function is x*(sgn(x)*[(a/2)*(clip(|x|,max=-b)+b)^2+0.5]+0.5), | |||
* where sgn(x) function is (x+0.000000000001)/|(x+0.000000000001)|. \n | |||
*@par Inputs: | |||
*One input, including: | |||
*x: A Tensor. Must be one of the following types: float16, float32 | |||
*@par Outputs: | |||
*y: A Tensor. Has the same type as "x". | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator FastGeluV2 | |||
*/ | |||
REG_OP(FastGeluV2) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OP_END_FACTORY_REG(FastGeluV2) | |||
/** | |||
*@brief Computes the gradient for the fast_gelu of "x" . \n | |||
@@ -623,9 +640,7 @@ REG_OP(Elu) | |||
*x: A float16, float32, for the input data type . \n | |||
*@par Attributes: | |||
*@li alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . | |||
*@li alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . | |||
*@li alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n | |||
*li alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . | |||
*@par Outputs: | |||
*y: A float16, float32, for the normalized result . \n | |||
@@ -641,9 +656,7 @@ REG_OP(Elu) | |||
REG_OP(Celu) | |||
.INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16})) | |||
.ATTR(alpha1, Float, 1.0) | |||
.ATTR(alpha2, Float, 1.0) | |||
.ATTR(alpha3, Float, 1.0) | |||
.ATTR(alpha, Float, 1.0) | |||
.OP_END_FACTORY_REG(Celu) | |||
/** | |||
@@ -117,6 +117,33 @@ REG_OP(NPUGetFloatStatus) | |||
.INPUT(addr, TensorType{DT_FLOAT}) | |||
.OUTPUT(data, TensorType({DT_FLOAT})) | |||
.OP_END_FACTORY_REG(NPUGetFloatStatus) | |||
/** | |||
*@brief Set the value of global workspace to 0. \n | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(NPUClearFloatStatusV2) | |||
.OP_END_FACTORY_REG(NPUClearFloatStatusV2) | |||
/** | |||
*@brief Set the value of global workspace to 0. \n | |||
*@par Inputs: | |||
*addr: A nested structure of Tensors of type float32 . \n | |||
*@par Outputs: | |||
*data: A Tensor of type float32. | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(NPUGetFloatStatusV2) | |||
.DYNAMIC_INPUT(addr, TensorType{DT_FLOAT}) | |||
.OUTPUT(data, TensorType({DT_FLOAT})) | |||
.OP_END_FACTORY_REG(NPUGetFloatStatusV2) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_ |
@@ -81,6 +81,7 @@ REG_OP(OCRRecognitionPreHandle) | |||
.OUTPUT(imgs, TensorType({DT_UINT8})) | |||
.OUTPUT(imgs_relation, TensorType({DT_INT32})) | |||
.OUTPUT(imgs_lang, TensorType({DT_INT32})) | |||
.OUTPUT(imgs_piece_fillers, TensorType({DT_INT32})) | |||
.ATTR(batch_size, Int, 8) | |||
.ATTR(data_format, String, "NHWC") | |||
.ATTR(pad_mode, String, "REPLICATE") | |||
@@ -59,6 +59,65 @@ REG_OP(Multinomial) | |||
.ATTR(seed2, Int, 0) | |||
.OP_END_FACTORY_REG(Multinomial) | |||
/** | |||
*@brief Creates a multinomial distribution. \n | |||
*@par Inputs: | |||
*Inputs include: | |||
* @li q: A Tensor. Must be one of the following types: float, double. | |||
1-D Tensor with shape [num_classes]. | |||
* @li j: A Tensor. Must be one of the following types: int64. | |||
1-D Tensor with shape [num_classes]. | |||
* @li num_samples: A Tensor of type int32. 0-D. Number of independent samples to draw for each row slice . \n | |||
*@par Attributes: | |||
*@li output_dtype: An optional type from: int32, int64. Defaults to int64. | |||
*@li seed: An optional int. Defaults to 0. | |||
*@li seed2: An optional int. Defaults to 0. \n | |||
*@par Outputs: | |||
*y: A Tensor of type int32 or int64. \n | |||
*@attention Constraints: | |||
*The implementation for MultinomialAliasDraw on Ascend uses AICPU, with bad performance. | |||
*@par Third-party framework compatibility | |||
*@li compatible with torch _multinomial_alias_draw operator. | |||
*/ | |||
REG_OP(MultinomialAliasDraw) | |||
.INPUT(q, TensorType({DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(j, TensorType({DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_INT64})) | |||
.REQUIRED_ATTR(num_samples, Int) | |||
.ATTR(seed, Int, 0) | |||
.OP_END_FACTORY_REG(MultinomialAliasDraw) | |||
/** | |||
*@brief Prepares for MultinomialAliasDraw to create a multinomial distribution. \n | |||
*@par Inputs: | |||
*Inputs include: | |||
* @li probs: A Tensor. Must be one of the following types: float, double. | |||
1-D Tensor with shape [num_classes]. \n | |||
*@par Outputs: | |||
*j: A Tensor. Must be one of the following types: int64. | |||
1-D Tensor with shape [num_classes]. | |||
*q: A Tensor. Must be one of the following types: float, double. | |||
1-D Tensor with shape [num_classes]. \n | |||
*@attention Constraints: | |||
*The implementation for MultinomialAliasSetup on Ascend uses AICPU, with bad performance. | |||
*@par Third-party framework compatibility | |||
*@li compatible with torch _multinomial_alias_setup operator. | |||
*/ | |||
REG_OP(MultinomialAliasSetup) | |||
.INPUT(probs, TensorType({DT_FLOAT, DT_DOUBLE})) | |||
.OUTPUT(j, TensorType({DT_INT64})) | |||
.OUTPUT(q, TensorType({DT_FLOAT, DT_DOUBLE})) | |||
.OP_END_FACTORY_REG(MultinomialAliasSetup) | |||
/** | |||
*@brief Outputs random values from a normal distribution . \n | |||
@@ -173,6 +232,27 @@ REG_OP(Randperm) | |||
.ATTR(dtype, Type, DT_INT64) | |||
.OP_END_FACTORY_REG(Randperm) | |||
/** | |||
*@brief Fills a tensor with elements drawn from the poisson distribution. \n | |||
*@par Inputs: | |||
*x: A Tensor. Must be one of the following types: float16, float. \n | |||
*@par Attributes: | |||
*@li seed: An optional int. Defaults to 0. \n | |||
*@par Outputs: | |||
*y: A Tensor list with same type as "x" . \n | |||
*@par Third-party framework compatibility | |||
*@ Compatible with the Pytorch operator Poisson. | |||
*/ | |||
REG_OP(Poisson) | |||
.INPUT(x, TensorType({ DT_FLOAT16,DT_FLOAT })) | |||
.OUTPUT(y, TensorType({ DT_FLOAT16,DT_FLOAT })) | |||
.ATTR(seed, Int, 0) | |||
.OP_END_FACTORY_REG(Poisson) | |||
/** | |||
*@brief Outputs random values from the Poisson distribution(s) described by rate . \n | |||
@@ -446,6 +526,34 @@ REG_OP(DropOutGenMaskV3) | |||
.ATTR(seed2, Int, 0) | |||
.OP_END_FACTORY_REG(DropOutGenMaskV3) | |||
/** | |||
*@brief Generate stateless random bit mask for dropout . \n | |||
*@par Inputs: | |||
include: | |||
*@li shape:The shape of the output tensor. | |||
*@li prob:0-D. Number of bit 1 . \n | |||
*@li seed:If either seed or seed2 are set to be non-zero, the random number | |||
*generator is seeded by the given seed. Otherwise, it is seeded by a random seed. | |||
*@li seed2:A second seed to avoid seed collision . \n | |||
*@par Outputs: | |||
*y:Output (1-D) random number using uint data format . \n | |||
*@attention Constraints: | |||
*The output is aligned with 128 bits | |||
*@see StatelessDropOutGenMask() | |||
*/ | |||
REG_OP(StatelessDropOutGenMask) | |||
.INPUT(shape, TensorType({ DT_INT32, DT_INT64 })) | |||
.INPUT(prob, TensorType({ DT_FLOAT16, DT_FLOAT })) | |||
.INPUT(seed, TensorType({ DT_INT32, DT_INT64 })) | |||
.INPUT(seed1, TensorType({ DT_INT32, DT_INT64 })) | |||
.OUTPUT(y, TensorType({ DT_UINT8 })) | |||
.OP_END_FACTORY_REG(StatelessDropOutGenMask) | |||
/** | |||
*@brief Generates values in an interval . \n | |||
@@ -698,11 +806,62 @@ REG_OP(Uniform) | |||
*@attention Constraints: | |||
* Compatible with the Caffe operator ContinuationIndicator. | |||
*/ | |||
REG_OP(ContinuationIndicator) | |||
.REQUIRED_ATTR(time_step, Int) | |||
.REQUIRED_ATTR(batch_size, Int) | |||
.OUTPUT(y, TensorType({DT_FLOAT})) | |||
.OP_END_FACTORY_REG(ContinuationIndicator) | |||
/** | |||
*@brief Outputs random values from the Exponential distribution(s) described by rate . \n | |||
*@par Inputs: | |||
*Inputs include: | |||
* @li x: A Tensor. Must be one of the following types: half, float32, float64. \n | |||
*@par Attributes: | |||
*@li lambda: An optional float. Defaults to 1. | |||
*@li seed: An optional int. Defaults to 0.The random number generator is seeded by the given seed. | |||
Otherwise, it is seeded by a random seed. \n | |||
*@par Outputs: | |||
*y: A Tensor of type dtype float16, float, double. \n | |||
*@attention Constraints: | |||
*The implementation for Exponential on Ascend uses AICPU, with bad performance. | |||
*@par Third-party framework compatibility | |||
*@li compatible with tensorflow Exponential operator. | |||
*/ | |||
REG_OP(Exponential) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.ATTR(lambda, Float, 1) | |||
.ATTR(seed, Int, 0) | |||
.OP_END_FACTORY_REG(Exponential) | |||
/** | |||
*@brief Fills a tensor with elements drawn from the geometric distribution. \n | |||
*@par Inputs: | |||
*x: A Tensor. Must be one of the following types: float16, float. \n | |||
*@par Attributes: | |||
*@li p: The probability of experimental success in Bernoulli's experiment. | |||
*@li seed: An optional int. Defaults to 0. \n | |||
*@par Outputs: | |||
*y: A Tensor list with same type as "x" . \n | |||
*@par Third-party framework compatibility | |||
*@ Compatible with the Pytorch operator Geometric. | |||
*/ | |||
REG_OP(Geometric) | |||
.INPUT(x, TensorType({ DT_FLOAT16,DT_FLOAT })) | |||
.OUTPUT(y, TensorType({ DT_FLOAT16,DT_FLOAT })) | |||
.REQUIRED_ATTR(p, Float) | |||
.ATTR(seed, Int, 0) | |||
.OP_END_FACTORY_REG(Geometric) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ |
@@ -0,0 +1,139 @@ | |||
/** | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
/*! | |||
* \file randomdsa_ops.h | |||
* \brief | |||
*/ | |||
#ifndef OPS_BUILT_IN_OP_PROTO_INC_RANDOMDSA_OPS_H_ | |||
#define OPS_BUILT_IN_OP_PROTO_INC_RANDOMDSA_OPS_H_ | |||
#include <vector> | |||
#include "graph/operator_reg.h" | |||
#include "graph/operator.h" | |||
namespace ge { | |||
/** | |||
* @brief Generate DSA random bit mask for dropout. \n | |||
* @par Inputs: | |||
include: | |||
* @li count:The shape of the input tensor. | |||
* @li seed:If seed is set to be non-zero, the random number | |||
* generator is seeded by the given seed. Otherwise, it is seeded by a random seed | |||
* @li dropout:0-D. Number of bit 1 . \n | |||
* @par Attributes: | |||
* @li random_algorithm:The default value is "Philox". \n | |||
* @par Outputs: | |||
* y:Output (1-D) random number using uint data format . \n | |||
* @see DSAGenBitMask() | |||
*/ | |||
REG_OP(DSAGenBitMask) | |||
.INPUT(count, TensorType({DT_INT64})) | |||
.INPUT(seed, TensorType({DT_UINT64})) | |||
.INPUT(dropout, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16})) | |||
.OUTPUT(out, TensorType({DT_UINT8})) | |||
.ATTR(random_algorithm, String, "Philox") | |||
.OP_END_FACTORY_REG(DSAGenBitMask) | |||
/** | |||
* @brief Generate DSA truncatenormal data in random. \n | |||
* @par Inputs: | |||
include: | |||
* @li count: The shape of the input tensor. | |||
* @li seed: If seed is set to be non-zero, the random number | |||
* generator is seeded by the given seed. Otherwise, it is seeded by a random seed | |||
* @li mean: A Tensor. Must be one of the following types: float16, float32, double | |||
* @li stdev: A Tensor. Must be one of the following types: float16, float32, double. \n | |||
* @par Attributes: | |||
* @li random_algorithm:The default value is "Philox". \n | |||
* @par Outputs: | |||
* y:Output (1-D) random number using float and bf data format . \n | |||
* @see DSARandomTruncatedNormal() | |||
*/ | |||
REG_OP(DSARandomTruncatedNormal) | |||
.INPUT(count, TensorType({DT_INT64})) | |||
.INPUT(seed, TensorType({DT_UINT64})) | |||
.INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16})) | |||
.INPUT(stdev, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16})) | |||
.OUTPUT(out, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16})) | |||
.ATTR(random_algorithm, String, "Philox") | |||
.OP_END_FACTORY_REG(DSARandomTruncatedNormal) | |||
/** | |||
* @brief Generate DSA normal data in random. \n | |||
* @par Inputs: | |||
include: | |||
* @li count: The shape of the input tensor. | |||
* @li seed: If seed is set to be non-zero, the random number | |||
* generator is seeded by the given seed. Otherwise, it is seeded by a random seed | |||
* @li mean: A Tensor. Must be one of the following types: float16, float32, double | |||
* @li stdev: A Tensor. Must be one of the following types: float16, float32, double. \n | |||
* @par Attributes: | |||
* @li random_algorithm:The default value is "Philox". \n | |||
* @par Outputs: | |||
* y:Output (1-D) random number using float and bf data format . \n | |||
* @see DSARandomNormal() | |||
*/ | |||
REG_OP(DSARandomNormal) | |||
.INPUT(count, TensorType({DT_INT64})) | |||
.INPUT(seed, TensorType({DT_UINT64})) | |||
.INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16})) | |||
.INPUT(stdev, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16})) | |||
.OUTPUT(out, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16})) | |||
.ATTR(random_algorithm, String, "Philox") | |||
.OP_END_FACTORY_REG(DSARandomNormal) | |||
/** | |||
* @brief Generate DSA uniform data in random. \n | |||
* @par Inputs: | |||
include: | |||
* @li count: The shape of the input tensor. | |||
* @li seed: If seed is set to be non-zero, the random number | |||
* generator is seeded by the given seed. Otherwise, it is seeded by a random seed | |||
* @li low: A Tensor. Must be one of the following types: int, float, bf | |||
* @li high: A Tensor. Must be one of the following types: int, float, bf. \n | |||
* @par Attributes: | |||
* @li random_algorithm:The default value is "Philox". \n | |||
* @par Outputs: | |||
* y:Output (1-D) random number using float int and bf data format . \n | |||
* @see DSARandomUniform() | |||
*/ | |||
REG_OP(DSARandomUniform) | |||
.INPUT(count, TensorType({DT_INT64})) | |||
.INPUT(seed, TensorType({DT_UINT64})) | |||
.INPUT(low, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64})) | |||
.INPUT(high, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64})) | |||
.OUTPUT(out, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64})) | |||
.ATTR(random_algorithm, String, "Philox") | |||
.OP_END_FACTORY_REG(DSARandomUniform) | |||
} | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOMDSA_OPS_H |
@@ -515,6 +515,34 @@ REG_OP(ReduceSumD) | |||
.ATTR(keep_dims, Bool, false) | |||
.OP_END_FACTORY_REG(ReduceSumD) | |||
/** | |||
*@brief Calculate the total mean based on the mean of each device . \n | |||
*@par Inputs: | |||
* Three inputs, including: | |||
*@li x: A Tensor. Must be one of the following types: float16, float32 . | |||
*@li count: A Tensor. Must be one of the following types: float16, float32 . | |||
*@li count_sum: A Tensor. Must be one of the following types: float16, float32 . \n | |||
*@par Attributes: | |||
*@li axes: A required 1D list or tuple of int32 or int64. Specifies the dimensions to reduce. | |||
*@li keepdims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n | |||
*@par Outputs: | |||
*y: The reduced tensor. Has the same type and format as input "x" . \n | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator Sum. | |||
*/ | |||
REG_OP(ReduceMeanWithCount) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(count, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.REQUIRED_ATTR(axes, ListInt) | |||
.ATTR(keep_dims, Bool, false) | |||
.OP_END_FACTORY_REG(ReduceMeanWithCount) | |||
/** | |||
*@brief Calculates the "logical sum" of elements of a tensor in a dimension . \n | |||
@@ -1326,6 +1354,101 @@ REG_OP(ReduceMeanVariance) | |||
.ATTR(axes, ListInt, {}) | |||
.ATTR(keep_dims, Bool, true) | |||
.OP_END_FACTORY_REG(ReduceMeanVariance) | |||
/** | |||
* @brief Calculates the standard deviation or the variance of Tensors with the average value. | |||
* @par Inputs: | |||
* Two inputs, including: | |||
* @li x: A Tensor. Must be one of the following types: float16, float32. \n | |||
* @li mean: A Tensor. It's the mean of X. Has the same shape and type as "x" \n | |||
* @par Attributes: | |||
* Four Attributes, including: | |||
* @li dim: An listint. \n | |||
* @li if_std: An optional bool. Defaults to "False" | |||
* If "True", Calculate the standard deviation | |||
* If "False", Calculate the variance | |||
* @li unbiased: An optional bool. Defaults to "True". | |||
* If "True", Use Bessel Correction. | |||
* If "False", Do not use Bessel Correction. \n | |||
* @li keepdim: An optional bool. Defaults to "False". | |||
* If "True", Keep the original tensor dimension. | |||
* If "False", Do not keep the original tensor dimension. \n | |||
* @par Outputs: | |||
* @li output_var: A Tensor. It's the standard deviation or the variance of X. Has the same type as "x". | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator Var_mean. | |||
*/ | |||
REG_OP(ReduceStdV2Update) | |||
.INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) | |||
.INPUT(mean, TensorType({DT_FLOAT,DT_FLOAT16})) | |||
.OUTPUT(output_var, TensorType({DT_FLOAT,DT_FLOAT16})) | |||
.REQUIRED_ATTR(dim, ListInt) | |||
.ATTR(if_std, Bool, false) | |||
.ATTR(unbiased, Bool, true) | |||
.ATTR(keepdim, Bool, false) | |||
.OP_END_FACTORY_REG(ReduceStdV2Update) | |||
/** | |||
*@brief Computes the log and sum and exp of elements across dimensions of a tensor. | |||
* Reduces "x" along the dimensions given in "axes". | |||
* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each | |||
* entry in "axes". If "keep_dims" is true, the reduced dimensions | |||
* are retained with length 1. | |||
* | |||
*@par Inputs: | |||
* Two inputs, including: | |||
*@li x: A Tensor. Must be one of the following types: | |||
* float32, float16, int32, int64, uint32, uint64, double | |||
*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n | |||
* | |||
*@par Attributes: | |||
*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n | |||
* | |||
*@par Outputs: | |||
*y: The reduced tensor. Has the same type and format as input "x" . \n | |||
* | |||
*@par Third-party framework compatibility | |||
* Compatible with the Onnx operator ReduceLogSumExp. | |||
*/ | |||
REG_OP(ReduceLogSumExp) | |||
.INPUT(x, TensorType::NumberType()) | |||
.INPUT(axes, TensorType::IndexNumberType()) | |||
.OUTPUT(y, TensorType::NumberType()) | |||
.ATTR(keep_dims, Bool, false) | |||
.OP_END_FACTORY_REG(ReduceLogSumExp) | |||
/** | |||
*@brief Computes the log and sum of elements across dimensions of a tensor. | |||
* Reduces "x" along the dimensions given in "axes". | |||
* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each | |||
* entry in "axes". If "keep_dims" is true, the reduced dimensions | |||
* are retained with length 1. | |||
* | |||
*@par Inputs: | |||
* Two inputs, including: | |||
*@li x: A Tensor. Must be one of the following types: | |||
* float32, float16, int32, int64, uint32, uint64, double | |||
*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n | |||
* | |||
*@par Attributes: | |||
*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n | |||
* | |||
*@par Outputs: | |||
*y: The reduced tensor. Has the same type and format as input "x" . \n | |||
* | |||
*@par Third-party framework compatibility | |||
* Compatible with the Onnx operator ReduceLogSum. | |||
*/ | |||
REG_OP(ReduceLogSum) | |||
.INPUT(x, TensorType::NumberType()) | |||
.INPUT(axes, TensorType::IndexNumberType()) | |||
.OUTPUT(y, TensorType::NumberType()) | |||
.ATTR(keep_dims, Bool, false) | |||
.OP_END_FACTORY_REG(ReduceLogSum) | |||
} //namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ |
@@ -1280,6 +1280,44 @@ REG_OP(EmbeddingBag) | |||
.ATTR(sparse, Bool, false) | |||
.ATTR(include_last_offset, Bool, false) | |||
.OP_END_FACTORY_REG(EmbeddingBag) | |||
/** | |||
* @brief:LSTMP calculation | |||
* @par Inputs: | |||
* eight inputs: | |||
* @li x:A required Tensor(seq, batch, dim). Must be one of the following types: float16, float32. | |||
* @li real_mask:A optional Tensor(seq, batch). Must be one of the following types: float16, float32. | |||
* @li init_h:A optional Tensor(batch, state). Must be one of the following types: float16, float32. | |||
* @li init_c:A optional Tensor(batch, hidden). Must be one of the following types: float16, float32. | |||
* @li wx:A required Tensor(4*hidden, dim). Must be one of the following types: float16, float32. | |||
* @li wr:A required Tensor(4*hidden, state). Must be one of the following types: float16, float32. | |||
* @li bias:A optional Tensor(hidden). Must be one of the following types: float16, float32. The format must be ND. | |||
* @li project: A optional Tensor. Must be one of the following types: float16, float32. | |||
* | |||
* @par Outputs: | |||
*three outputs: | |||
*@li y:A Tensor. Must be one of the following types: float16, float32. | |||
*@li output_h:A Tensor. Must be one of the following types: float16, float32. | |||
*@li output_c:A Tensor. Must be one of the following types: float16, float32. | |||
* | |||
*@par Attributes: | |||
*time_major:An bool identifying the time major in the op. Default to false. | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(LSTMP) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(wx, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(wr, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(project, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(real_mask, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(time_major, Bool, false) | |||
.OP_END_FACTORY_REG(LSTMP) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ |
@@ -259,13 +259,39 @@ REG_OP(GatherV2D) | |||
*@par Third-party framework compatibility | |||
*Compatible with the PyTorch operator Gather. | |||
*/ | |||
REG_OP(GatherElements) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) | |||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_INT16,DT_INT32, | |||
DT_INT64,DT_UINT8,DT_UINT16,DT_UINT32,DT_UINT64})) | |||
.INPUT(index, TensorType({DT_INT32,DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_INT16,DT_INT32, | |||
DT_INT64,DT_UINT8,DT_UINT16,DT_UINT32,DT_UINT64})) | |||
.ATTR(dim, Int, 0) | |||
.OP_END_FACTORY_REG(GatherElements) | |||
/** | |||
*@Gathers values along an axis specified by dim . \n | |||
*@par Inputs: | |||
*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, | |||
* int64, uint16, float16, uint32, uint64, bool. | |||
*@li dim: A Tensor. Must be one of the following types: int32, int64. | |||
*@li index: A Tensor. Must be one of the following types: int32, int64 . \n | |||
*@par Outputs: | |||
* y: A Tensor. Has the same type as "x" . \n | |||
*@par Third-party framework compatibility | |||
*Compatible with the PyTorch operator Gather. | |||
*/ | |||
REG_OP(GatherD) | |||
.INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32 | |||
DT_INT64, DT_UINT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(dim, TensorType({DT_INT32, DT_INT64})) | |||
.INPUT(index, TensorType({DT_INT32, DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) | |||
.ATTR(dim, Int, 0) | |||
.OP_END_FACTORY_REG(GatherElements) | |||
.OP_END_FACTORY_REG(GatherD) | |||
/** | |||
*@brief Extracts a strided slice of a tensor. Roughly speaking, this op | |||
@@ -360,9 +386,9 @@ REG_OP(StridedSlice) | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSlice instead. | |||
*/ | |||
REG_OP(StridedSliceD) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8, | |||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT64, DT_UINT8, DT_INT8, | |||
DT_BOOL})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8, | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT64, DT_UINT8, DT_INT8, | |||
DT_BOOL})) | |||
.REQUIRED_ATTR(begin, ListInt) | |||
.REQUIRED_ATTR(end, ListInt) | |||
@@ -700,6 +726,27 @@ REG_OP(SegmentMax) | |||
.OUTPUT(y, TensorType::RealNumberType()) | |||
.OP_END_FACTORY_REG(SegmentMax) | |||
/** | |||
*@brief Computes the sum along segments of a tensor . \n | |||
*@par Inputs: | |||
*Two inputs, including: | |||
* @li x: A Tensor of type NumberType. | |||
* @li segment_ids: A Tensor of type IndexNumberType, whose shape is a prefix | |||
* of "x.shape". | |||
*@par Outputs: | |||
*y: A Tensor of type NumberType . \n | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator SegmentSum. | |||
*/ | |||
REG_OP(SegmentSum) | |||
.INPUT(x, TensorType::NumberType()) | |||
.INPUT(segment_ids, TensorType::IndexNumberType()) | |||
.OUTPUT(y, TensorType::NumberType()) | |||
.OP_END_FACTORY_REG(SegmentSum) | |||
/** | |||
*@brief: Computes the maximum along segments of a tensor. | |||
*Computes a tensor such that output[i]=(data[i]) where max is over j | |||
@@ -929,6 +976,49 @@ REG_OP(TopKD) | |||
* @brief Finds values and indices of the "k" largest elements for the last | |||
* dimension . \n | |||
* @par Inputs: | |||
* Two inputs, including: | |||
* @li x: A 1D or higher tensor of type BasicType, with the last dimension | |||
* at least "k". | |||
* @li k: A 0D Tensor of type int32. | |||
* Number of top elements to look for along the last dimension (along each row | |||
* for matrices) . | |||
* @li assist_seq: A 1D tensor of type float16. | |||
* with size of 2N, which "N" is the last dimension. | |||
* The first N numbers is indices, and the next N numbers is deviation of casting | |||
* int32 to float16. \n | |||
* @par Attributes: | |||
* @li sorted: An optional bool. Defaults to true. | |||
* If true, the resulting "k" elements will be sorted by the values in descending | |||
* order. | |||
* @li dim: An optional int. Defaults to -1. For reserved use. | |||
* @li largest: An optional bool. Defaults to true. For reserved use. \n | |||
* @par Outputs: | |||
* @li values: A Tensor, specifying the sorted data. Has the same type as | |||
* "input". | |||
* @li indices: A Tensor of type int32, specifying the indices of sorted data . \n | |||
* @see TopK() | |||
* @par Third-party framework compatibility | |||
* @li Compatible with the TensorFlow operator TopKV2. | |||
*/ | |||
REG_OP(TopKV2D) | |||
.INPUT(x, TensorType::RealNumberType()) | |||
.INPUT(k, TensorType({DT_INT32})) | |||
.INPUT(assist_seq, TensorType({DT_FLOAT16})) | |||
.OUTPUT(values, TensorType::RealNumberType()) | |||
.OUTPUT(indices, TensorType({DT_INT32})) | |||
.ATTR(sorted, Bool, true) | |||
.ATTR(dim, Int, -1) | |||
.ATTR(largest, Bool, true) | |||
.OP_END_FACTORY_REG(TopKV2D) | |||
/** | |||
* @brief Finds values and indices of the "k" largest elements for the last | |||
* dimension . \n | |||
* @par Inputs: | |||
* Two inputs, including: | |||
* @li x: A 1D or higher tensor of type BasicType, with the last dimension | |||
@@ -2340,7 +2430,7 @@ REG_OP(AddRowRanges) | |||
*@par Outputs: | |||
*y: A ND Tensor of float32/float16/int32/int8 with shapes 1-D (D,), 2-D(N, D), 3-D(N, C, D) | |||
* @par Restrictions: | |||
*@attention Constraints: | |||
* Warning: input shape's length must not be bigger than 1024 * 1024 * 1024. | |||
*/ | |||
REG_OP(MaskedFillRange) | |||
@@ -2442,6 +2532,34 @@ REG_OP(StridedSliceV3) | |||
.OPTIONAL_INPUT(strides, TensorType::IndexNumberType()) | |||
.OUTPUT(y, TensorType::BasicType()) | |||
.OP_END_FACTORY_REG(StridedSliceV3) | |||
/** | |||
*@brief MovingSumWithSigmoid. | |||
*@par Inputs: | |||
*Four inputs, including: | |||
* @li alpha: A Tensor. Must be one of the following types: float32, float16. | |||
* @li energy: A Tensor. Must be one of the following types: float32, float16. | |||
* @li beam_size: A Tensor of type int32. | |||
* @li frame_size: A Tensor of type int32. \n | |||
*@par Outputs: | |||
* y: A Tensor. Has the same type as "alpha". \n | |||
* | |||
* @par Attributes: | |||
* window_size: A int. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(MovingSumWithSigmoid) | |||
.INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(energy, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(beam_size, TensorType({DT_INT32})) | |||
.INPUT(frame_size, TensorType({DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.REQUIRED_ATTR(window_size, Int) | |||
.OP_END_FACTORY_REG(MovingSumWithSigmoid) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ |
@@ -381,6 +381,30 @@ REG_OP(ConcatOffsetD) | |||
.REQUIRED_ATTR(concat_dim, Int) | |||
.REQUIRED_ATTR(N, Int) | |||
.OP_END_FACTORY_REG(ConcatOffsetD) | |||
/** | |||
*@brief Compute combinations of length of the given tensor. \n | |||
*@par Inputs: | |||
*x: A list of 1D Tensor objects. \n | |||
*@par Attributes: | |||
*@li r: An optional int indicates number of elements to combine. Defaults to 2. | |||
*@li with_replacement: An optional bool indicates whether to allow duplication | |||
*in combination. Defaults to "False". \n | |||
*@par Outputs: | |||
*y: A Tensor list with same type as "x" . \n | |||
*@par Third-party framework compatibility | |||
*@ Compatible with the Pytorch operator Combinations. | |||
*/ | |||
REG_OP(Combinations) | |||
.INPUT(x, TensorType::ALL()) | |||
.OUTPUT(y, TensorType::ALL()) | |||
.ATTR(r, Int, 2) | |||
.ATTR(with_replacement, Bool, false) | |||
.OP_END_FACTORY_REG(Combinations) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_SPLIT_COMBINATION_OPS_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -154,43 +154,6 @@ REG_OP(CalcBucketsLimitAndOffset) | |||
.OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) | |||
.REQUIRED_ATTR(total_limit, Int) | |||
.OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) | |||
/** | |||
* @brief Calculate ProdVirialSeA. \n | |||
* | |||
* @par Inputs: | |||
* Five inputs, including: | |||
* @li net_deriv: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li in_deriv: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li rij: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li nlist: A Tensor. dtype is int32. | |||
* @li natoms: A Tensor. dtype is int32. \n | |||
* | |||
* @par Outputs: | |||
* Two outputs, including: | |||
* @li virial: A Tensor. Must be one of the following types: float16, float32, float64. | |||
* @li atom_virial: A Tensor. Must be one of the following types: float16, float32, float64. \n | |||
* | |||
* @par Attributes: | |||
* Two attributes, including: | |||
* @li n_a_sel: A Scalar. | |||
* @li n_r_sel: A Scalar. \n | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(ProdVirialSeA) | |||
.INPUT(net_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(in_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(rij, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(nlist, TensorType({DT_INT32})) | |||
.INPUT(natoms, TensorType({DT_INT32})) | |||
.OUTPUT(virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.OUTPUT(atom_virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.REQUIRED_ATTR(n_a_sel, Int) | |||
.REQUIRED_ATTR(n_r_sel, Int) | |||
.ATTR(nall, Int, 28328) | |||
.OP_END_FACTORY_REG(ProdVirialSeA) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_ |
@@ -98,11 +98,11 @@ typedef struct rtExceptionInfo { | |||
uint32_t tid; | |||
uint32_t deviceid; | |||
uint32_t retcode; | |||
} rtExceptionInfo; | |||
} rtExceptionInfo_t; | |||
typedef void (*rtErrorCallback)(rtExceptionType); | |||
typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); | |||
typedef void (*rtTaskFailCallback)(rtExceptionInfo_t *exceptionInfo); | |||
typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen); | |||
@@ -429,6 +429,15 @@ RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *lbl, rtModel_t mdl, rtStream_t st | |||
* @return RT_ERROR_INVALID_VALUE for input null ptr | |||
*/ | |||
RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId); | |||
/** | |||
* @ingroup dvrt_base | |||
* @brief get max model num | |||
* @param [out] max model num | |||
* @param [in] null | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtGetMaxModelNum(uint32_t *maxModelCount); | |||
#if defined(__cplusplus) | |||
} | |||
#endif | |||
@@ -22,7 +22,8 @@ typedef enum tagRtArchType { | |||
ARCH_BEGIN = 0, | |||
ARCH_V100 = ARCH_BEGIN, | |||
ARCH_V200 = 1, | |||
ARCH_END = 2, | |||
ARCH_V300 = 2, | |||
ARCH_END = 3, | |||
} rtArchType_t; | |||
typedef enum tagRtChipType { | |||
@@ -34,7 +35,8 @@ typedef enum tagRtChipType { | |||
CHIP_DC = 4, | |||
CHIP_CLOUD_V2 = 5, | |||
CHIP_NO_DEVICE = 6, | |||
CHIP_END = 7, | |||
CHIP_MINI_V3 = 7, | |||
CHIP_END = 8, | |||
} rtChipType_t; | |||
typedef enum tagRtAicpuScheType { | |||
@@ -74,7 +76,8 @@ typedef enum tagRtPlatformType { | |||
PLATFORM_DC = 5, | |||
PLATFORM_CLOUD_V2 = 6, | |||
PLATFORM_LHISI_SD3403 = 7, | |||
PLATFORM_END = 8, | |||
PLATFORM_MINI_V3 = 8, | |||
PLATFORM_END = 9, | |||
} rtPlatformType_t; | |||
typedef enum tagRtCubeFracMKNFp16 { | |||
@@ -140,6 +143,12 @@ typedef enum tagRTTaskTimeoutType { | |||
RT_TIMEOUT_TYPE_OP_EXECUTE, | |||
} rtTaskTimeoutType_t; | |||
typedef enum tagRtFloatOverflowMode { | |||
RT_OVERFLOW_MODE_SATURATION = 0, | |||
RT_OVERFLOW_MODE_INFNAN, | |||
RT_OVERFLOW_MODE_UNDEF, | |||
} rtFloatOverflowMode_t; | |||
/** | |||
* @ingroup | |||
* @brief get AI core count | |||
@@ -180,6 +189,15 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate | |||
*/ | |||
RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig); | |||
/** | |||
* @ingroup | |||
* @brief get float overflow mode | |||
* @param [out] floatOverflowMode | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtGetFloatOverflowMode(rtFloatOverflowMode_t * const floatOverflowMode); | |||
/** | |||
* @ingroup | |||
* @brief get l2 buffer Info,virtual baseaddr,Size | |||
@@ -140,7 +140,7 @@ RTS_API rtError_t rtSetGroup(int32_t groupId); | |||
* @param [in] groupid count | |||
* @return RT_ERROR_NONE for ok, errno for failed | |||
*/ | |||
RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t *groupInfo, uint32_t count); | |||
RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t *groupInfo, uint32_t cnt); | |||
/** | |||
* @ingroup | |||
@@ -94,11 +94,11 @@ typedef enum tagGetDevMsgType { | |||
/** | |||
* @ingroup dvrt_dev | |||
* @brief get total device number. | |||
* @param [in|out] count the device number | |||
* @param [in|out] cnt the device number | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtGetDeviceCount(int32_t *count); | |||
RTS_API rtError_t rtGetDeviceCount(int32_t *cnt); | |||
/** | |||
* @ingroup dvrt_dev | |||
* @brief get device ids | |||
@@ -338,7 +338,7 @@ RTS_API rtError_t rtSetTSDevice(uint32_t tsId); | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_DRV_ERR for can not get run mode | |||
*/ | |||
RTS_API rtError_t rtGetRunMode(rtRunMode *mode); | |||
RTS_API rtError_t rtGetRunMode(rtRunMode *runMode); | |||
/** | |||
* @ingroup dvrt_dev | |||
@@ -23,11 +23,11 @@ typedef enum dvfsProfileMode { | |||
/** | |||
* @ingroup dvrt_dvfsprofile | |||
* @brief Set the performance mode of the device | |||
* @param [in] mode dvfsProfileMode | |||
* @param [in] profMode dvfsProfileMode | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtSetDvfsProfile(DvfsProfileMode mode); | |||
RTS_API rtError_t rtSetDvfsProfile(DvfsProfileMode profMode); | |||
/** | |||
* @ingroup dvrt_dvfsprofile | |||
@@ -19,6 +19,11 @@ typedef enum rtEventWaitStatus { | |||
EVENT_STATUS_MAX = 2, | |||
} rtEventWaitStatus_t; | |||
typedef enum rtEventStatus { | |||
RT_EVENT_INIT = 0, | |||
RT_EVENT_RECORDED = 1, | |||
} rtEventStatus_t; | |||
/** | |||
* @ingroup event_flags | |||
* @brief event op bit flags | |||
@@ -115,6 +120,16 @@ RTS_API rtError_t rtEventQuery(rtEvent_t evt); | |||
*/ | |||
RTS_API rtError_t rtEventQueryWaitStatus(rtEvent_t evt, rtEventWaitStatus_t *status); | |||
/** | |||
* @ingroup dvrt_event | |||
* @brief Queries an event's status | |||
* @param [in] evt event to query | |||
* @param [in out] rtEventStatus_t status | |||
* @return RT_EVENT_RECORDED for recorded | |||
* @return RT_EVENT_INIT for not recorded | |||
*/ | |||
RTS_API rtError_t rtEventQueryStatus(rtEvent_t evt, rtEventStatus_t *status); | |||
/** | |||
* @ingroup dvrt_event | |||
* @brief computes the elapsed time between events. | |||
@@ -287,13 +287,13 @@ RTS_API rtError_t rtDependencyRegister(void *mHandle, void *sHandle); | |||
* @param [in] binHandle device binary handle | |||
* @param [in] stubFunc stub function | |||
* @param [in] stubName stub function name | |||
* @param [in] devFunc device function description. symbol name or address | |||
* offset, depending binary type. | |||
* @param [in] kernelInfoExt kernel Info extension. device function description or tiling key, | |||
* depending static shape or dynmaic shape. | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char_t *stubName, | |||
const void *devFunc, uint32_t funcMode); | |||
const void *kernelInfoExt, uint32_t funcMode); | |||
/** | |||
* @ingroup rt_kernel | |||
@@ -354,7 +354,8 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void * | |||
* @ingroup rt_kernel | |||
* @brief launch kernel with handle to device | |||
* @param [in] hdl program | |||
* @param [in] devFunc device function description. | |||
* @param [in] kernelInfoExt kernel Info extension. device function description or tiling key, | |||
* depending static shape or dynmaic shape. | |||
* @param [in] blockDim block dimentions | |||
* @param [in] args argments address for kernel function | |||
* @param [in] argsSize argements size | |||
@@ -364,7 +365,7 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void * | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtKernelLaunchWithHandle(void *hdl, const void *devFunc, uint32_t blockDim, | |||
RTS_API rtError_t rtKernelLaunchWithHandle(void *hdl, const void *kernelInfoExt, uint32_t blockDim, | |||
void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream_, | |||
const void *kernelInfo); | |||
@@ -497,6 +498,28 @@ RTS_API rtError_t rtDumpAddrSet(rtModel_t mdl, void *addr, uint32_t dumpSize, ui | |||
*/ | |||
RTS_API rtError_t rtDatadumpInfoLoad(const void *dumpInfo, uint32_t length); | |||
/** | |||
* @ingroup rt_kernel | |||
* @brief launch npu get float status task | |||
* @param [in] outputAddr pointer to op output addr | |||
* @param [in] outputSize op output size | |||
* @param [in] checkMode check mode | |||
* @param [in] stm associated stream | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtNpuGetFloatStatus(void *outputAddr, uint64_t outputSize, uint32_t checkMode, rtStream_t stm); | |||
/** | |||
* @ingroup rt_kernel | |||
* @brief launch npu clear float status task | |||
* @param [in] checkMode check mode | |||
* @param [in] stm associated stream | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtNpuClearFloatStatus(uint32_t checkMode, rtStream_t stm); | |||
#ifndef __CLANG_CCE_RUNTIME_H__ | |||
#define __CLANG_CCE_RUNTIME_H__ | |||
/** | |||
@@ -519,13 +542,13 @@ RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc, rtStre | |||
/** | |||
* @ingroup rt_kernel | |||
* @brief setup argment for next rtLaunch in current thread | |||
* @param [in] arg argment address for kernel function | |||
* @param [in] args argment address for kernel function | |||
* @param [in] size argment size | |||
* @param [in] offset argment table offset | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtSetupArgument(const void *arg, uint32_t size, uint32_t offset); | |||
RTS_API rtError_t rtSetupArgument(const void *args, uint32_t size, uint32_t offset); | |||
/** | |||
* @ingroup rt_kernel | |||
@@ -544,11 +567,11 @@ RTS_API rtError_t rtLaunch(const void *stubFunc); | |||
* @param [in] ptr host memory | |||
* @param [in] size host memory size | |||
* @param [in] flag reserved. set to 0 | |||
* @param [out] arg returned arg. used for next kernel's arg. | |||
* @param [out] args returned arg. used for next kernel's arg. | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg); | |||
RTS_API rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **args); | |||
/** | |||
* @ingroup rt_kernel | |||
@@ -675,7 +698,8 @@ RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockD | |||
* @ingroup rt_kernel | |||
* @brief launch kernel with handle and tiling data to device | |||
* @param [in] hdl program | |||
* @param [in] devFunc device function description. | |||
* @param [in] kernelInfoExt kernel Info extension. device function description or tiling key, | |||
* depending static shape or dynmaic shape. | |||
* @param [in] blockDim block dimentions | |||
* @param [in] argsInfo argments info address for kernel function | |||
* @param [in] smDesc shared memory description | |||
@@ -684,7 +708,7 @@ RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockD | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtKernelLaunchWithHandleAndTiling(void *hdl, const void *devFunc, uint32_t blockDim, | |||
RTS_API rtError_t rtKernelLaunchWithHandleAndTiling(void *hdl, const void *kernelInfoExt, uint32_t blockDim, | |||
rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm, const void* kernelInfo); | |||
#if defined(__cplusplus) | |||
@@ -341,6 +341,20 @@ RTS_API rtError_t rtInvalidCache(void *base, size_t len); | |||
*/ | |||
RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind); | |||
/** | |||
* @ingroup dvrt_mem | |||
* @brief host task memcpy | |||
* @param [in] dst destination address pointer | |||
* @param [in] destMax length of destination address memory | |||
* @param [in] src source address pointer | |||
* @param [in] cnt the number of byte to copy | |||
* @param [in] kind memcpy type | |||
* @param [in] stm task stream | |||
* @return RT_ERROR_NONE for ok, errno for failed | |||
*/ | |||
RTS_API rtError_t rtMemcpyHostTask(void * const dst, const uint64_t destMax, const void * const src, | |||
const uint64_t cnt, rtMemcpyKind_t kind, rtStream_t stm); | |||
/** | |||
* @ingroup dvrt_mem | |||
* @brief asynchronized memcpy | |||
@@ -424,6 +438,16 @@ RTS_API rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize); | |||
*/ | |||
RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize); | |||
/** | |||
* @ingroup dvrt_mem | |||
* @brief Specifies how memory is use | |||
* @param [in] devPtr memory pointer | |||
* @param [in] count memory count | |||
* @param [in] advise reserved, set to 1 | |||
* @return RT_ERROR_NONE for ok | |||
* @return others for error | |||
*/ | |||
RTS_API rtError_t rtMemAdvise(void *devPtr, uint64_t count, uint32_t advise); | |||
/** | |||
* @ingroup dvrt_mem | |||
* @brief set memory with uint32_t value | |||
@@ -28,6 +28,16 @@ extern "C" { | |||
*/ | |||
RTS_API rtError_t rtSetTaskTag(const char_t *taskTag); | |||
/** | |||
* @brief set aicpu device attribute. | |||
* it is used for aicpu device to be aware of enviroment config | |||
* @param [in] key attrubute key. | |||
* @param [in] val attrubute value. | |||
* @return RT_ERROR_NONE for ok | |||
* @return other failed | |||
*/ | |||
RTS_API rtError_t rtSetAicpuAttr(const char_t *key, const char_t *val); | |||
#if defined(__cplusplus) | |||
} | |||
#endif | |||
@@ -25,7 +25,8 @@ typedef struct tagFftsPlusTaskInfo { | |||
#pragma pack(pop) | |||
RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *hdl, const void *devFunc, void **addr, uint32_t *prefetchCnt); | |||
RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *hdl, const void *kernelInfoExt, void **addr, | |||
uint32_t *prefetchCnt); | |||
RTS_API rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stm); | |||
@@ -184,6 +184,13 @@ typedef enum rtGroupType { | |||
RT_GRP_TYPE_BIND_DP_CPU_EXCLUSIVE /* Bound to a AICPU, intra-group threads are mutex awakened */ | |||
} rtGroupType_t; | |||
typedef struct tagInitFlowGwInfo { | |||
const char_t *groupName; | |||
uint64_t schedPolicy; | |||
uint64_t reschedInterval; | |||
char_t rsv[128]; | |||
} rtInitFlowGwInfo_t; | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief init queue schedule | |||
@@ -193,6 +200,15 @@ typedef enum rtGroupType { | |||
*/ | |||
RTS_API rtError_t rtMemQueueInitQS(int32_t devId, const char_t *grpName); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief init flow gateway | |||
* @param [in] devId the logical device id | |||
* @param [in] initInfo Initialization parameters | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMemQueueInitFlowGw(int32_t devId, const rtInitFlowGwInfo_t * const initInfo); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief create mbuf queue | |||
@@ -222,24 +238,24 @@ RTS_API rtError_t rtMemQueueInit(int32_t devId); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief enqueu mbuf | |||
* @brief enqueue memBuf | |||
* @param [in] devId the logical device id | |||
* @param [in] qid queue id | |||
* @param [in] mbuf enqueue mbuf | |||
* @param [in] memBuf enqueue memBuf | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMemQueueEnQueue(int32_t devId, uint32_t qid, void *mbuf); | |||
RTS_API rtError_t rtMemQueueEnQueue(int32_t devId, uint32_t qid, void *memBuf); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief enqueu mbuf | |||
* @brief dequeue memBuf | |||
* @param [in] devId the logical device id | |||
* @param [in] qid queue id | |||
* @param [out] mbuf dequeue mbuf | |||
* @param [out] memBuf dequeue memBuf | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMemQueueDeQueue(int32_t devId, uint32_t qid, void **mbuf); | |||
RTS_API rtError_t rtMemQueueDeQueue(int32_t devId, uint32_t qid, void **memBuf); | |||
/** | |||
* @ingroup rt_mem_queue | |||
@@ -350,47 +366,56 @@ RTS_API rtError_t rtMbufInit(rtMemBuffCfg_t *cfg); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief alloc buff | |||
* @param [out] buff: buff addr alloced | |||
* @param [out] memBuf: buff addr alloced | |||
* @param [in] size: The amount of memory space requested | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *mbuf, uint64_t size); | |||
RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *memBuf, uint64_t size); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief free buff | |||
* @param [in] buff: buff addr to be freed | |||
* @param [in] memBuf: buff addr to be freed | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMbufFree(rtMbufPtr_t mbuf); | |||
RTS_API rtError_t rtMbufFree(rtMbufPtr_t memBuf); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief set Data len of Mbuf | |||
* @param [in] memBuf: Mbuf addr | |||
* @param [in] len: data len | |||
* @return RT_ERROR_NONE for success, others for fail | |||
*/ | |||
RTS_API rtError_t rtMbufSetDataLen(rtMbufPtr_t memBuf, uint64_t len); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief get Data addr of Mbuf | |||
* @param [in] mbuf: Mbuf addr | |||
* @param [in] memBuf: Mbuf addr | |||
* @param [out] buf: Mbuf data addr | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMbufGetBuffAddr(rtMbufPtr_t mbuf, void **buf); | |||
RTS_API rtError_t rtMbufGetBuffAddr(rtMbufPtr_t memBuf, void **buf); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief get total Buffer size of Mbuf | |||
* @param [in] mbuf: Mbuf addr | |||
* @param [in] memBuf: Mbuf addr | |||
* @param [out] totalSize: total buffer size of Mbuf | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMbufGetBuffSize(rtMbufPtr_t mbuf, uint64_t *totalSize); | |||
RTS_API rtError_t rtMbufGetBuffSize(rtMbufPtr_t memBuf, uint64_t *totalSize); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief Get the address and length of its user_data from the specified Mbuf | |||
* @param [in] mbuf: Mbuf addr | |||
* @param [in] memBuf: Mbuf addr | |||
* @param [out] priv: address of its user_data | |||
* @param [out] size: length of its user_data | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMbufGetPrivInfo (rtMbufPtr_t mbuf, void **priv, uint64_t *size); | |||
RTS_API rtError_t rtMbufGetPrivInfo(rtMbufPtr_t memBuf, void **priv, uint64_t *size); | |||
// mem group | |||
typedef struct { | |||
@@ -573,6 +598,14 @@ RTS_API rtError_t rtQueueSubF2NFEvent(int32_t devId, uint32_t qId, uint32_t grou | |||
*/ | |||
RTS_API rtError_t rtQueueSubscribe(int32_t devId, uint32_t qId, uint32_t groupId, int32_t type); | |||
/** | |||
* @ingroup rtBufEventTrigger | |||
* @brief buf event trigger | |||
* @param [in] name, group name | |||
* @return 0 for success, others for fail | |||
*/ | |||
RTS_API rtError_t rtBufEventTrigger(const char_t *name); | |||
#if defined(__cplusplus) | |||
} | |||
#endif | |||
@@ -44,6 +44,11 @@ typedef enum tagModelTaskType { | |||
RT_MODEL_TASK_PROFILER_TRACE_EX, | |||
RT_MODEL_TASK_FFTS_TASK, | |||
RT_MODEL_TASK_FFTS_PLUS_TASK, | |||
RT_MODEL_TASK_DSA_TASK, | |||
RT_MODEL_TASK_CMO, | |||
RT_MODEL_TASK_BARRIER, | |||
RT_MODEL_TASK_NPU_GET_FLOAT_STATUS, | |||
RT_MODEL_TASK_NPU_CLEAR_FLOAT_STATUS, | |||
} rtModelTaskType_t; | |||
typedef enum tagModelStreamType { | |||
@@ -115,9 +120,9 @@ typedef struct tagKernelTaskInfo { | |||
uint16_t argsCount; | |||
uint16_t argsSize; | |||
uint16_t reserved; | |||
char_t *stubFunc; | |||
const char_t *stubFunc; | |||
uint8_t *smDesc; | |||
uint8_t *args; | |||
const uint8_t *args; | |||
uint16_t *argsOffset; | |||
} rtKernelTaskInfo_t; | |||
@@ -126,17 +131,17 @@ typedef struct tagAllKernelTaskInfo { | |||
uint16_t argsCount; | |||
uint16_t argsSize; | |||
uint16_t reserved; | |||
void *devfunc; | |||
const void *kernelInfoExt; | |||
void *handle; | |||
uint8_t *smDesc; | |||
uint8_t *args; | |||
const uint8_t *args; | |||
uint16_t *argsOffset; | |||
} rtAllKernelTaskInfo_t; | |||
typedef struct tagKernelTaskInfoEx { | |||
uint32_t flags; | |||
uint32_t argsSize; | |||
void *args; | |||
const void *args; | |||
uint32_t reserved[6]; | |||
} rtKernelTaskInfoEx_t; | |||
@@ -198,9 +203,9 @@ typedef struct tagProfilerTraceExTaskInfo { | |||
} rtProfilerTraceEx_t; | |||
typedef struct tagrtMemcpyAsyncTaskInfo { | |||
void *dst; | |||
const void *dst; | |||
uint64_t destMax; | |||
void *src; | |||
const void *src; | |||
uint64_t count; | |||
uint32_t kind; | |||
uint32_t reserved; | |||
@@ -212,9 +217,9 @@ typedef struct tagrtNotifyTaskInfo { | |||
} rtNotifyTaskInfo_t; | |||
typedef struct tagrtReduceAsyncTaskInfo { | |||
void *dst; | |||
const void *dst; | |||
uint64_t destMax; | |||
void *src; | |||
const void *src; | |||
uint64_t count; | |||
uint32_t kind; | |||
uint32_t type; | |||
@@ -481,6 +486,16 @@ RTS_API rtError_t rtDebugRegister(rtModel_t mdl, uint32_t flag, const void *addr | |||
*/ | |||
RTS_API rtError_t rtDebugUnRegister(rtModel_t mdl); | |||
/** | |||
* @ingroup rt_model | |||
* @brief set model group id | |||
* @param [in] mdl model | |||
* @param [in] schGrpId groupId (0,4) 0:default invalid value 1-4 valid value Maximum support 4 groups | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtModelSetSchGroupId(rtModel_t mdl, const int16_t schGrpId); | |||
#if defined(__cplusplus) | |||
} | |||
#endif | |||
@@ -7,7 +7,7 @@ | |||
#define CCE_RUNTIME_RT_STARS_H | |||
#include "base.h" | |||
#include "rt_stars_define.h" | |||
#if defined(__cplusplus) | |||
extern "C" { | |||
#endif | |||
@@ -80,6 +80,25 @@ RTS_API rtError_t rtCdqEnQueue(const char_t *queName, uint32_t cdqeIndex, void * | |||
RTS_API rtError_t rtCdqEnQueuePtrMode(const char_t *queName, uint32_t cdqeIndex, const void *ptrAddr, | |||
rtStream_t stm); | |||
/** | |||
* @ingroup rt_stars | |||
* @brief launch common cmo task on the stream. | |||
* @param [in] taskInfo cmo task info | |||
* @param [in] stm launch task on the stream | |||
* @param [in] flag flag | |||
* @return RT_ERROR_NONE for ok, others failed | |||
*/ | |||
RTS_API rtError_t rtCmoTaskLaunch(rtCmoTaskInfo_t *taskInfo, rtStream_t stm, uint32_t flag); | |||
/** | |||
* @ingroup rt_stars | |||
* @brief launch barrier cmo task on the stream. | |||
* @param [in] taskInfo barrier task info | |||
* @param [in] stm launch task on the stream | |||
* @param [in] flag flag | |||
* @return RT_ERROR_NONE for ok, others failed | |||
*/ | |||
RTS_API rtError_t rtBarrierTaskLaunch(rtBarrierTaskInfo_t *taskInfo, rtStream_t stm, uint32_t flag); | |||
#if defined(__cplusplus) | |||
} | |||
@@ -32,6 +32,37 @@ typedef struct tagStarsSqeHeader { | |||
uint16_t taskId; | |||
} rtStarsSqeHeader_t; | |||
typedef struct tagStarsDsaSqe { | |||
// 0-7 bytes | |||
rtStarsSqeHeader_t sqeHeader; | |||
// 8-11 bytes | |||
uint32_t start : 1; | |||
uint32_t functionType : 3; | |||
uint32_t dataType : 3; | |||
uint32_t algoType : 3; | |||
uint32_t paramVldBitmap : 5; | |||
uint32_t paramAddrValBitmap : 7; | |||
uint32_t reserved0 : 10; | |||
// 12-15 bytes | |||
uint16_t sqeIndex; | |||
uint8_t kernelCredit; | |||
uint8_t reserved1; | |||
// 16-31 bytes | |||
uint32_t dsaCfgResultAddrLow; | |||
uint32_t dsaCfgResultAddrHigh; | |||
uint32_t dsaCfgStateAddrLow; | |||
uint32_t dsaCfgStateAddrHigh; | |||
// 32-47 bytes | |||
uint32_t dsaCfgParamAddrLow; | |||
uint32_t dsaCfgParamAddrHigh; | |||
uint32_t dsaCfgSeedLow; | |||
uint32_t dsaCfgSeedHigh; | |||
// 48-63 bytes | |||
uint32_t dsaCfgNumberLow; | |||
uint32_t dsaCfgNumberHigh; | |||
uint32_t reserved2[2]; | |||
} rtStarsDsaSqe_t; | |||
// ffts+ type | |||
typedef enum tagFftsPlusType { | |||
RT_FFTS_PLUS_TYPE_RES1 = 2, // Reserved | |||
@@ -83,6 +114,33 @@ typedef struct tagFftsPlusSqe { | |||
uint32_t reserved16[4]; | |||
} rtFftsPlusSqe_t; | |||
typedef struct tagCmoTaskInfo { | |||
uint8_t qos; | |||
uint8_t partId; | |||
uint8_t pmg; | |||
uint8_t reserved; | |||
uint16_t cmoType; | |||
uint16_t opCode; | |||
uint16_t numInner; | |||
uint16_t numOuter; | |||
uint32_t logicId; | |||
uint32_t lengthInner; | |||
uint64_t sourceAddr; | |||
uint32_t striderOuter; | |||
uint32_t striderInner; | |||
} rtCmoTaskInfo_t; | |||
typedef struct tagBarrierCmoInfo { | |||
uint16_t cmoType; // 0 is barrier, 1 is invalid, Prefetch is 2, Write_back is 3, FE/GE only use invalid type. | |||
uint32_t logicId; | |||
} rtBarrierCmoInfo_t; | |||
#define RT_CMO_MAX_BARRIER_NUM 6U // 6U is max support | |||
typedef struct tagBarrierTaskInfo { | |||
uint8_t logicIdNum; | |||
rtBarrierCmoInfo_t cmoInfo[RT_CMO_MAX_BARRIER_NUM]; | |||
} rtBarrierTaskInfo_t; | |||
#pragma pack(pop) | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
@@ -1,21 +1,14 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
* @file data_common.h | |||
* | |||
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved. | |||
* | |||
* This program is used to data structure | |||
*/ | |||
#ifndef HOST_INNER_INC_DATA_COMMON_H_ | |||
#define HOST_INNER_INC_DATA_COMMON_H_ | |||
#include <string> | |||
namespace tdt { | |||
#ifndef TDT_DATA_TYPE | |||
@@ -1,18 +1,10 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
* @file index_transform.h | |||
* | |||
* Copyright (C) Huawei Technologies Co., Ltd. 2018-2019. All Rights Reserved. | |||
* | |||
* This program is used to get logical device id by phy device id. | |||
*/ | |||
#ifndef INC_TDT_INDEX_TRANSFORM_H | |||
#define INC_TDT_INDEX_TRANSFORM_H | |||
@@ -1,18 +1,10 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
/** | |||
* @file status.h | |||
* | |||
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved. | |||
* | |||
* This program is used to describe status | |||
*/ | |||
#ifndef INC_TDT_STATUS_H_ | |||
#define INC_TDT_STATUS_H_ | |||
@@ -1,18 +1,10 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
* @file tdt_host_interface.h | |||
* | |||
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved. | |||
* | |||
* This program is used to host server | |||
*/ | |||
#ifndef HOST_INNER_INC_TDT_HOST_INTERFACE_H_ | |||
#define HOST_INNER_INC_TDT_HOST_INTERFACE_H_ | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright (c) Hisilicon Technologies Co., Ltd. 2018-2021. All rights reserved. | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -14,17 +14,22 @@ | |||
* limitations under the License. | |||
*/ | |||
#ifndef TDT_HOST_INNER_INC_TSD_CLIENT_H_ | |||
#define TDT_HOST_INNER_INC_TSD_CLIENT_H_ | |||
#ifndef TDT_HOST_INNER_INC_TSD_CLIENT_H | |||
#define TDT_HOST_INNER_INC_TSD_CLIENT_H | |||
#include <condition_variable> | |||
#include <map> | |||
#include <memory> | |||
#include <mutex> | |||
#include "tdt/status.h" | |||
#include "tdt/data_common.h" | |||
#include "tsd/status.h" | |||
#include "toolchain/prof_callback.h" | |||
#ifdef WIN_TSD | |||
#define TDT_LIB_EXPORT __declspec(dllexport) | |||
#else | |||
#define TDT_LIB_EXPORT __attribute__((visibility("default"))) | |||
#endif | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif // __cplusplus | |||
@@ -50,7 +55,51 @@ extern "C" { | |||
* @li tsd_client.h: Header file where the interface declaration is located. | |||
* @li data_common.h: Header file where 'TDT_StatusT' defined | |||
*/ | |||
TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize); | |||
TDT_LIB_EXPORT uint32_t TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize); | |||
/** | |||
* @ingroup Open | |||
* @brief Used for the Framework process to communicate with the TSDDaemon process in 1981, | |||
* and notify TSD to complete the initialization of other processes | |||
* | |||
* @par Function | |||
* Used for the Framework process to communicate with the TSDDaemon process, | |||
* and notify TSD to complete the initialization of other processes | |||
* | |||
* @param logicDeviceId [IN] type #unsigned int. Logic device ID | |||
* @param rankSize [IN] type #unsigned int. The rankSize of the training. | |||
* The default value is 1. When rankSize is greater than 1, | |||
* HCCP will be pulled to perform set communication related operations. | |||
* @param deviceMode [IN] type unsigned int. The device running mode of aicpuSd, | |||
* it include chipMode and DieMode | |||
* @retval TDT_OK Success | |||
* @retval OtherValues Failure | |||
* | |||
* @par Dependency | |||
* @li data_common.h: Header file where 'TDT_StatusT' defined | |||
*/ | |||
TDT_LIB_EXPORT uint32_t TsdOpenEx(const uint32_t logicDeviceId, const uint32_t rankSize, const uint32_t deviceMode); | |||
/** | |||
* @ingroup InitialQs | |||
* @brief Used for the Framework process to communicate with the TSDDaemon process, | |||
* and notify TSD to complete the initialization of QS processes | |||
* | |||
* @par Function | |||
* Used for the Framework process to communicate with the TSDDaemon process, | |||
* and notify TSD to complete the initialization of other processes | |||
* | |||
* @param logicDeviceId [IN] type #unsigned int. Logic device ID | |||
* @param groupName [IN] type #char pointer. qs group name send by host process | |||
* @retval TDT_OK Success | |||
* @retval OtherValues Failure | |||
* | |||
* @par Dependency | |||
* @li libtsdclient.so: Library to which the interface belongs. | |||
* @li tsd_client.h: Header file where the interface declaration is located. | |||
* @li data_common.h: Header file where 'TDT_StatusT' defined | |||
*/ | |||
TDT_LIB_EXPORT uint32_t TsdInitQs(const uint32_t logicDeviceId, const char_t * const groupName = nullptr); | |||
/** | |||
* @ingroup Close | |||
@@ -64,11 +113,12 @@ TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t | |||
* @retval OtherValues Failure | |||
* | |||
* @par Dependency | |||
* @li libtsdclient.so: Library to which the interface belongs. | |||
* @li tsd_client.h: Header file where the interface declaration is located. | |||
* @li data_common.h: Header file where 'TDT_StatusT' defined | |||
*/ | |||
TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId); | |||
TDT_LIB_EXPORT uint32_t TsdClose(const uint32_t logicDeviceId); | |||
/** | |||
* @ingroup UpdateProfilingMode | |||
@@ -86,7 +136,7 @@ TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId); | |||
* @li tsd_client.h: Header file where the interface declaration is located. | |||
* @li data_common.h: Header file where 'TDT_StatusT' defined | |||
*/ | |||
TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag); | |||
TDT_LIB_EXPORT uint32_t UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag); | |||
/** | |||
* @ingroup TsdSetMsprofReporterCallback | |||
@@ -105,9 +155,22 @@ TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, con | |||
* @li data_common.h: Header file where 'TDT_StatusT' defined | |||
* @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined | |||
*/ | |||
TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback); | |||
TDT_LIB_EXPORT uint32_t TsdSetMsprofReporterCallback(const MsprofReporterCallback callback); | |||
/** | |||
* @ingroup TsdSetAttr | |||
* @brief used to set tsd attr | |||
* | |||
* @par key | |||
* key set for tsd attr,now only support RunMode | |||
* | |||
* @par value | |||
* value set to run correspond mode, PROCESS_MODE or THREAD_MODE | |||
* @retval TDT_OK Success | |||
* @retval OtherValues Failure | |||
*/ | |||
TDT_LIB_EXPORT uint32_t TsdSetAttr(const char * const attrKey, const char * const attrValue); | |||
#ifdef __cplusplus | |||
} | |||
#endif // __cplusplus | |||
#endif // TDT_HOST_INNER_INC_TSD_CLIENT_H_ | |||
#endif // TDT_HOST_INNER_INC_TSD_CLIENT_H |
@@ -1,17 +1,8 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
/* | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved. | |||
* Description: handle perf data | |||
* Author: xp | |||
* Create: 2019-10-13 | |||
*/ | |||
#ifndef MSPROFILER_API_PROF_ACL_API_H_ | |||
@@ -25,6 +16,8 @@ | |||
#define PROF_L2CACHE 0x00000010ULL | |||
#define PROF_HCCL_TRACE 0x00000020ULL | |||
#define PROF_TRAINING_TRACE 0x00000040ULL | |||
#define PROF_MSPROFTX 0x00000080ULL | |||
#define PROF_RUNTIME_API 0x00000100ULL | |||
// system profilinig switch | |||
#define PROF_CPU 0x00010000ULL | |||
@@ -36,17 +29,18 @@ | |||
#define PROF_AIVECTORCORE_SAMPLE 0x00400000ULL | |||
#define PROF_MODEL_EXECUTE 0x0000001000000ULL | |||
#define PROF_RUNTIME_API 0x0000002000000ULL | |||
#define PROF_RUNTIME_TRACE 0x0000004000000ULL | |||
#define PROF_SCHEDULE_TIMELINE 0x0000008000000ULL | |||
#define PROF_SCHEDULE_TRACE 0x0000010000000ULL | |||
#define PROF_AIVECTORCORE_METRICS 0x0000020000000ULL | |||
#define PROF_SUBTASK_TIME 0x0000040000000ULL | |||
#define PROF_TASK_TRACE 0x0000005000062ULL | |||
#define PROF_OP_DETAIL 0x0000080000000ULL | |||
#define PROF_MODEL_LOAD 0x8000000000000000ULL | |||
#define PROF_TASK_TRACE (PROF_MODEL_EXECUTE | PROF_RUNTIME_TRACE | PROF_TRAINING_TRACE | \ | |||
PROF_HCCL_TRACE | PROF_TASK_TIME) | |||
// DataTypeConfig MASK | |||
#define PROF_ACL_API_MASK 0x00000001ULL | |||
#define PROF_TASK_TIME_MASK 0x00000002ULL | |||
@@ -55,6 +49,8 @@ | |||
#define PROF_L2CACHE_MASK 0x00000010ULL | |||
#define PROF_HCCL_TRACE_MASK 0x00000020ULL | |||
#define PROF_TRAINING_TRACE_MASK 0x00000040ULL | |||
#define PROF_MSPROFTX_MASK 0x00000080ULL | |||
#define PROF_RUNTIME_API_MASK 0x00000100ULL | |||
// system profilinig mask | |||
#define PROF_CPU_MASK 0x00010000ULL | |||
@@ -66,12 +62,12 @@ | |||
#define PROF_AIVECTORCORE_SAMPLE_MASK 0x00400000ULL | |||
#define PROF_MODEL_EXECUTE_MASK 0x0000001000000ULL | |||
#define PROF_RUNTIME_API_MASK 0x0000002000000ULL | |||
#define PROF_RUNTIME_TRACE_MASK 0x0000004000000ULL | |||
#define PROF_SCHEDULE_TIMELINE_MASK 0x0000008000000ULL | |||
#define PROF_SCHEDULE_TRACE_MASK 0x0000010000000ULL | |||
#define PROF_AIVECTORCORE_METRICS_MASK 0x0000020000000ULL | |||
#define PROF_SUBTASK_TIME_MASK 0x0000040000000ULL | |||
#define PROF_OP_DETAIL_MASK 0x0000080000000ULL | |||
#define PROF_MODEL_LOAD_MASK 0x8000000000000000ULL | |||
@@ -104,7 +100,7 @@ extern "C" { | |||
MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); | |||
typedef int32_t Status; | |||
typedef in32_t Status; | |||
typedef struct aclprofSubscribeConfig aclprofSubscribeConfig1; | |||
/// | |||
/// @ingroup AscendCL | |||
@@ -135,6 +131,33 @@ MSVP_PROF_API Status aclgrphProfGraphUnSubscribe(const uint32_t graphId); | |||
* @retval 0 for failed | |||
*/ | |||
MSVP_PROF_API size_t aclprofGetGraphId(const void *opInfo, size_t opInfoLen, uint32_t index); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief set stamp pay load | |||
* | |||
* | |||
* @retval void | |||
*/ | |||
MSVP_PROF_API int aclprofSetStampPayload(void *stamp, const int32_t type, void *value); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief set category and name | |||
* | |||
* | |||
* @retval void | |||
*/ | |||
MSVP_PROF_API int aclprofSetCategoryName(uint32_t category, const char *categoryName); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief set category to stamp | |||
* | |||
* | |||
* @retval void | |||
*/ | |||
MSVP_PROF_API int aclprofSetStampCategory(void *stamp, uint32_t category); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
@@ -1,17 +1,8 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
/* | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved. | |||
* Description: handle perf data | |||
* Author: xp | |||
* Create: 2019-10-13 | |||
*/ | |||
#ifndef MSPROFILER_PROF_CALLBACK_H_ | |||
@@ -76,7 +67,8 @@ enum MsprofReporterModuleId { | |||
MSPROF_MODULE_HCCL, // HCCL | |||
MSPROF_MODULE_ACL, // AclModule | |||
MSPROF_MODULE_FRAMEWORK, // Framework | |||
MSPROF_MODULE_RUNTIME // runtime | |||
MSPROF_MODULE_RUNTIME, // runtime | |||
MSPROF_MODULE_MSPROF // msprofTx | |||
}; | |||
/** | |||
@@ -119,7 +111,7 @@ struct MsprofGeOptions { | |||
*/ | |||
enum MsprofCtrlCallbackType { | |||
MSPROF_CTRL_INIT_ACL_ENV = 0, // start profiling with acl env | |||
MSPROF_CTRL_INIT_ACL_JSON, // start profiling with acl.json | |||
MSPROF_CTRL_INIT_ACL_JSON, // start pro with acl.json | |||
MSPROF_CTRL_INIT_GE_OPTIONS, // start profiling with ge env and options | |||
MSPROF_CTRL_FINALIZE, // stop profiling | |||
MSPROF_CTRL_INIT_DYNA = 0xFF, // start profiling for dynamic profiling | |||
@@ -0,0 +1,449 @@ | |||
/* | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved. | |||
* Description: handle perf data | |||
* Author: Huawei Technologies Co., Ltd. | |||
* Create: 2019-10-13 | |||
*/ | |||
#ifndef MSPROFILER_PROF_COMMON_H_ | |||
#define MSPROFILER_PROF_COMMON_H_ | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif // __cplusplus | |||
#include <stdint.h> | |||
#define MSPROF_DATA_HEAD_MAGIC_NUM 0x5a5a | |||
enum MsprofDataTag { | |||
MSPROF_ACL_DATA_TAG = 0, //acl data tag, range: 0~19 | |||
MSPROF_GE_DATA_TAG_MODEL_LOAD = 20, //ge data tag, range: 20~39 | |||
MSPROF_GE_DATA_TAG_FUSION = 21, | |||
MSPROF_GE_DATA_TAG_INFER = 22, | |||
MSPROF_GE_DATA_TAG_TASK = 23, | |||
MSPROF_GE_DATA_TAG_TENSOR = 24, | |||
MSPROF_GE_DATA_TAG_STEP = 25, | |||
MSPROF_GE_DATA_TAG_ID_MAP = 26, | |||
MSPROF_GE_DATA_TAG_HOST_SCH = 27, | |||
MSPROF_RUNTIME_DATA_TAG_API = 40, //runtime data tag, range: 40~59 | |||
MSPROF_RUNTIME_DATA_TAG_TRACK = 41, | |||
MSPROF_AICPU_DATA_TAG = 60, //aicpu data tag, range: 60~79 | |||
MSPROF_HCCL_DATA_TAG = 80, //hccl data tag, range: 80~99 | |||
MSPROF_DP_DATA_TAG = 100, //dp data tag, range: 100~119 | |||
MSPROF_MSPROFTX_DATA_TAG = 120, //hccl data tag, range: 120~139 | |||
MSPROF_DATA_TAG_MAX = 65536, //data tag value type is uint16_t | |||
}; | |||
/** | |||
* @brief struct of mixed data | |||
*/ | |||
#define MSPROF_MIX_DATA_RESERVE_BYTES 7 | |||
#define MSPROF_MIX_DATA_STRING_LEN 120 | |||
enum MsprofMixDataType { | |||
MSPROF_MIX_DATA_HASH_ID = 0, | |||
MSPROF_MIX_DATA_STRING, | |||
}; | |||
struct MsprofMixData { | |||
uint8_t type; // MsprofMixDataType | |||
uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES]; | |||
union { | |||
uint64_t hashId; | |||
char dataStr[MSPROF_MIX_DATA_STRING_LEN]; | |||
} data; | |||
}; | |||
/** | |||
* @brief profiling command info | |||
*/ | |||
#define MSPROF_MAX_DEV_NUM 64 | |||
struct MsprofCommandHandle { | |||
uint64_t profSwitch; | |||
uint64_t profSwitchHi; | |||
uint32_t devNums; | |||
uint32_t devIdList[MSPROF_MAX_DEV_NUM]; | |||
uint32_t modelId; | |||
uint32_t type; | |||
}; | |||
/** | |||
* @brief struct of data reported by acl | |||
*/ | |||
#define MSPROF_ACL_DATA_RESERVE_BYTES 32 | |||
#define MSPROF_ACL_API_NAME_LEN 64 | |||
enum MsprofAclApiType { | |||
MSPROF_ACL_API_TYPE_OP = 1, | |||
MSPROF_ACL_API_TYPE_MODEL, | |||
MSPROF_ACL_API_TYPE_RUNTIME, | |||
MSPROF_ACL_API_TYPE_OTHERS, | |||
}; | |||
struct MsprofAclProfData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_ACL_DATA_TAG; | |||
uint32_t apiType; // enum MsprofAclApiType | |||
uint64_t beginTime; | |||
uint64_t endTime; | |||
uint32_t processId; | |||
uint32_t threadId; | |||
char apiName[MSPROF_ACL_API_NAME_LEN]; | |||
uint8_t reserve[MSPROF_ACL_DATA_RESERVE_BYTES]; | |||
}; | |||
/** | |||
* @brief struct of data reported by GE | |||
*/ | |||
#define MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES 104 | |||
struct MsprofGeProfModelLoadData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_GE_DATA_TAG_MODEL_LOAD; | |||
uint32_t modelId; | |||
MsprofMixData modelName; | |||
uint64_t startTime; | |||
uint64_t endTime; | |||
uint8_t reserve[MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES]; | |||
}; | |||
#define MSPROF_GE_FUSION_DATA_RESERVE_BYTES 8 | |||
#define MSPROF_GE_FUSION_OP_NUM 8 | |||
struct MsprofGeProfFusionData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_GE_DATA_TAG_FUSION; | |||
uint32_t modelId; | |||
MsprofMixData fusionName; | |||
uint64_t inputMemSize; | |||
uint64_t outputMemSize; | |||
uint64_t weightMemSize; | |||
uint64_t workspaceMemSize; | |||
uint64_t totalMemSize; | |||
uint64_t fusionOpNum; | |||
uint64_t fusionOp[MSPROF_GE_FUSION_OP_NUM]; | |||
uint8_t reserve[MSPROF_GE_FUSION_DATA_RESERVE_BYTES]; | |||
}; | |||
#define MSPROF_GE_INFER_DATA_RESERVE_BYTES 64 | |||
struct MsprofGeProfInferData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_GE_DATA_TAG_INFER; | |||
uint32_t modelId; | |||
MsprofMixData modelName; | |||
uint32_t requestId; | |||
uint32_t threadId; | |||
uint64_t inputDataStartTime; | |||
uint64_t inputDataEndTime; | |||
uint64_t inferStartTime; | |||
uint64_t inferEndTime; | |||
uint64_t outputDataStartTime; | |||
uint64_t outputDataEndTime; | |||
uint8_t reserve[MSPROF_GE_INFER_DATA_RESERVE_BYTES]; | |||
}; | |||
#define MSPROF_GE_TASK_DATA_RESERVE_BYTES 16 | |||
#define MSPROF_GE_OP_TYPE_LEN 56 | |||
enum MsprofGeTaskType { | |||
MSPROF_GE_TASK_TYPE_AI_CORE = 0, | |||
MSPROF_GE_TASK_TYPE_AI_CPU, | |||
MSPROF_GE_TASK_TYPE_AIV, | |||
}; | |||
enum MsprofGeShapeType { | |||
MSPROF_GE_SHAPE_TYPE_STATIC = 0, | |||
MSPROF_GE_SHAPE_TYPE_DYNAMIC, | |||
}; | |||
struct MsprofGeOpType { | |||
uint8_t type; // MsprofMixDataType | |||
uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES]; | |||
union { | |||
uint64_t hashId; | |||
char dataStr[MSPROF_GE_OP_TYPE_LEN]; | |||
} data; | |||
}; | |||
struct MsprofGeProfTaskData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_GE_DATA_TAG_TASK; | |||
uint32_t taskType; // MsprofGeTaskType | |||
MsprofMixData opName; | |||
MsprofGeOpType opType; | |||
uint64_t curIterNum; | |||
uint64_t timeStamp; | |||
uint32_t shapeType; // MsprofGeShapeType | |||
uint32_t blockDims; | |||
uint32_t modelId; | |||
uint32_t streamId; | |||
uint32_t taskId; | |||
uint32_t threadId; | |||
uint8_t reserve[MSPROF_GE_TASK_DATA_RESERVE_BYTES]; | |||
}; | |||
#define MSPROF_GE_TENSOR_DATA_RESERVE_BYTES 8 | |||
#define MSPROF_GE_TENSOR_DATA_SHAPE_LEN 8 | |||
#define MSPROF_GE_TENSOR_DATA_NUM 5 | |||
enum MsprofGeTensorType { | |||
MSPROF_GE_TENSOR_TYPE_INPUT = 0, | |||
MSPROF_GE_TENSOR_TYPE_OUTPUT, | |||
}; | |||
struct MsprofGeTensorData { | |||
uint32_t tensorType; // MsprofGeTensorType | |||
uint32_t format; | |||
uint32_t dataType; | |||
uint32_t shape[MSPROF_GE_TENSOR_DATA_SHAPE_LEN]; | |||
}; | |||
struct MsprofGeProfTensorData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_GE_DATA_TAG_TENSOR; | |||
uint32_t modelId; | |||
uint64_t curIterNum; | |||
uint32_t streamId; | |||
uint32_t taskId; | |||
uint32_t tensorNum; | |||
MsprofGeTensorData tensorData[MSPROF_GE_TENSOR_DATA_NUM]; | |||
uint8_t reserve[MSPROF_GE_TENSOR_DATA_RESERVE_BYTES]; | |||
}; | |||
#define MSPROF_GE_STEP_DATA_RESERVE_BYTES 27 | |||
enum MsprofGeStepTag { | |||
MSPROF_GE_STEP_TAG_BEGIN = 0, | |||
MSPROF_GE_STEP_TAG_END, | |||
}; | |||
struct MsprofGeProfStepData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_GE_DATA_TAG_STEP; | |||
uint32_t modelId; | |||
uint32_t streamId; | |||
uint32_t taskId; | |||
uint64_t timeStamp; | |||
uint64_t curIterNum; | |||
uint32_t threadId; | |||
uint8_t tag; // MsprofGeStepTag | |||
uint8_t reserve[MSPROF_GE_STEP_DATA_RESERVE_BYTES]; | |||
}; | |||
#define MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES 6 | |||
struct MsprofGeProfIdMapData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_GE_DATA_TAG_ID_MAP; | |||
uint32_t graphId; | |||
uint32_t modelId; | |||
uint32_t sessionId; | |||
uint64_t timeStamp; | |||
uint16_t mode; | |||
uint8_t reserve[MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES]; | |||
}; | |||
#define MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES 24 | |||
struct MsprofGeProfHostSchData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_GE_DATA_TAG_HOST_SCH; | |||
uint32_t threadId; // record in start event | |||
uint64_t element; | |||
uint64_t event; | |||
uint64_t startTime; // record in start event | |||
uint64_t endTime; // record in end event | |||
uint8_t reserve[MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES]; | |||
}; | |||
/** | |||
* @brief struct of data reported by RunTime | |||
*/ | |||
#define MSPROF_RUNTIME_API_DATA_RESERVE_BYTES 106 | |||
#define MSPROF_RUNTIME_TASK_ID_NUM 10 | |||
#define MSPROF_RUNTIME_API_NAME_LEN 64 | |||
struct MsprofRuntimeProfApiData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_API; | |||
uint32_t threadId; | |||
uint64_t entryTime; | |||
uint64_t exitTime; | |||
uint64_t dataSize; | |||
uint8_t apiName[MSPROF_RUNTIME_API_NAME_LEN]; | |||
uint32_t retCode; | |||
uint32_t streamId; | |||
uint32_t taskNum; | |||
uint32_t taskId[MSPROF_RUNTIME_TASK_ID_NUM]; | |||
uint16_t memcpyDirection; | |||
uint8_t reserve[MSPROF_RUNTIME_API_DATA_RESERVE_BYTES]; | |||
}; | |||
#define MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES 10 | |||
#define MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN 32 | |||
struct MsprofRuntimeProfTrackData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_TRACK; | |||
uint32_t threadId; | |||
uint64_t timeStamp; | |||
char taskType[MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN]; | |||
uint32_t taskId; | |||
uint16_t streamId; | |||
uint8_t reserve[MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES]; | |||
}; | |||
/** | |||
* @brief struct of data reported by RunTime | |||
*/ | |||
#define MSPROF_AICPU_DATA_RESERVE_BYTES 9 | |||
struct MsprofAicpuProfData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_AICPU_DATA_TAG; | |||
uint16_t streamId; | |||
uint16_t taskId; | |||
uint64_t runStartTime; | |||
uint64_t runStartTick; | |||
uint64_t computeStartTime; | |||
uint64_t memcpyStartTime; | |||
uint64_t memcpyEndTime; | |||
uint64_t runEndTime; | |||
uint64_t runEndTick; | |||
uint32_t threadId; | |||
uint32_t deviceId; | |||
uint64_t submitTick; | |||
uint64_t scheduleTick; | |||
uint64_t tickBeforeRun; | |||
uint64_t tickAfterRun; | |||
uint32_t kernelType; | |||
uint32_t dispatchTime; | |||
uint32_t totalTime; | |||
uint16_t fftsThreadId; | |||
uint8_t version; | |||
uint8_t reserve[MSPROF_AICPU_DATA_RESERVE_BYTES]; | |||
}; | |||
/** | |||
* @brief struct of data reported by DP | |||
*/ | |||
#define MSPROF_DP_DATA_RESERVE_BYTES 16 | |||
#define MSPROF_DP_DATA_ACTION_LEN 16 | |||
#define MSPROF_DP_DATA_SOURCE_LEN 64 | |||
struct MsprofDpProfData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_DP_DATA_TAG; | |||
uint32_t rsv; // Ensure 8-byte alignment | |||
uint64_t timeStamp; | |||
char action[MSPROF_DP_DATA_ACTION_LEN]; | |||
char source[MSPROF_DP_DATA_SOURCE_LEN]; | |||
uint64_t index; | |||
uint64_t size; | |||
uint8_t reserve[MSPROF_DP_DATA_RESERVE_BYTES]; | |||
}; | |||
/** | |||
* @brief struct of data reported by HCCL | |||
*/ | |||
#pragma pack(4) | |||
struct MsprofHcclProfNotify { | |||
uint32_t taskID; | |||
uint64_t notifyID; | |||
uint32_t stage; | |||
uint32_t remoteRank; | |||
uint32_t transportType; | |||
uint32_t role; // role {0: dst, 1:src} | |||
double durationEstimated; | |||
}; | |||
struct MsprofHcclProfReduce { | |||
uint32_t taskID; | |||
uint64_t src; | |||
uint64_t dst; | |||
uint64_t size; | |||
uint32_t op; // {0: sum, 1: mul, 2: max, 3: min} | |||
uint32_t dataType; // data type {0: INT8, 1: INT16, 2: INT32, 3: FP16, 4:FP32, 5:INT64, 6:UINT64} | |||
uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} | |||
uint32_t remoteRank; | |||
uint32_t transportType; // transport type {0: SDMA, 1: RDMA, 2:LOCAL} | |||
uint32_t role; // role {0: dst, 1:src} | |||
double durationEstimated; | |||
}; | |||
struct MsprofHcclProfRDMA { | |||
uint32_t taskID; | |||
uint64_t src; | |||
uint64_t dst; | |||
uint64_t size; | |||
uint64_t notifyID; | |||
uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} | |||
uint32_t remoteRank; | |||
uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL} | |||
uint32_t role; // role {0: dst, 1:src} | |||
uint32_t type; // RDMA type {0: RDMASendNotify, 1:RDMASendPayload} | |||
double durationEstimated; | |||
}; | |||
struct MsprofHcclProfMemcpy { | |||
uint32_t taskID; | |||
uint64_t src; | |||
uint64_t dst; | |||
uint64_t size; | |||
uint64_t notifyID; | |||
uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} | |||
uint32_t remoteRank; | |||
uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL} | |||
uint32_t role; // role {0: dst, 1:src} | |||
double durationEstimated; | |||
}; | |||
struct MsprofHcclProfStageStep { | |||
uint32_t rank; | |||
uint32_t rankSize; | |||
}; | |||
struct MsprofHcclProfFlag { | |||
uint64_t cclTag; | |||
uint64_t groupName; | |||
uint32_t localRank; | |||
uint32_t workFlowMode; | |||
}; | |||
/** | |||
* @name MsprofHcclProfData | |||
* @brief struct of data reported by hccl | |||
*/ | |||
struct MsprofHcclProfData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_HCCL_DATA_TAG; | |||
uint32_t planeID; | |||
uint32_t deviceID; | |||
uint32_t streamID; | |||
double ts; | |||
char name[16]; | |||
union { | |||
MsprofHcclProfNotify notify; | |||
MsprofHcclProfReduce reduce; | |||
MsprofHcclProfStageStep stageStep; | |||
MsprofHcclProfMemcpy forMemcpy; | |||
MsprofHcclProfRDMA RDMA; | |||
MsprofHcclProfFlag flag; | |||
} args; | |||
}; | |||
#pragma pack() | |||
/** | |||
* @name MsprofStampInfo | |||
* @brief struct of data reported by msproftx | |||
*/ | |||
struct MsprofStampInfo { | |||
uint16_t magicNumber; | |||
uint16_t dataTag; | |||
uint32_t processId; | |||
uint32_t threadId; | |||
uint32_t category; //marker category | |||
uint32_t eventType; | |||
int32_t payloadType; | |||
union PayloadValue //payload info for marker | |||
{ | |||
uint64_t ullValue; | |||
int64_t llValue; | |||
double dValue; | |||
uint32_t uiValue[2]; | |||
int32_t iValue[2]; | |||
float fValue[2]; | |||
} payload; | |||
uint64_t startTime; | |||
uint64_t endTime; | |||
int32_t messageType; | |||
char message[128]; | |||
uint8_t reserve0[4]; | |||
uint8_t reserve1[72]; | |||
}; | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // MSPROFILER_PROF_COMMON_H_ |