| @@ -45,6 +45,7 @@ static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid | |||||
| static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | ||||
| static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | ||||
| static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | ||||
| static const int32_t ACL_ERROR_RT_TASK_TIMEOUT = 107020; // task timeout | |||||
| static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | ||||
| static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | ||||
| @@ -61,8 +61,7 @@ typedef enum { | |||||
| * @brief handle to HCCL communicator | * @brief handle to HCCL communicator | ||||
| */ | */ | ||||
| typedef void *HcclComm; | typedef void *HcclComm; | ||||
| typedef void *HcclMessage; | |||||
| typedef void *HcclRequest; | |||||
| /** | /** | ||||
| * @brief HCCL Reduction opperation | * @brief HCCL Reduction opperation | ||||
| */ | */ | ||||
| @@ -88,14 +87,6 @@ typedef enum { | |||||
| HCCL_DATA_TYPE_RESERVED /**< reserved */ | HCCL_DATA_TYPE_RESERVED /**< reserved */ | ||||
| } HcclDataType; | } HcclDataType; | ||||
| typedef struct { | |||||
| int srcRank; // 接收/探测到的msg/信封的发送端rank_id,MPI标准定义,调用者可以访问 | |||||
| int tag; // 接收/探测到的msg/信封的tag,MPI标准定义,调用者可以访问 | |||||
| int error; // 接收/探测的错误码0:no error,others:传输过程出错,MPI标准定义,调用者可以访问 | |||||
| int cancelled; // 指定实现,不建议调用者访问 | |||||
| int count; // 接收/探测到的payload大小,指定实现,不建议调用者访问 | |||||
| } HcclStatus; | |||||
| const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | ||||
| /** | /** | ||||
| * @brief HCCL root info | * @brief HCCL root info | ||||
| @@ -104,7 +95,6 @@ typedef struct HcclRootInfoDef { | |||||
| char internal[HCCL_ROOT_INFO_BYTES]; | char internal[HCCL_ROOT_INFO_BYTES]; | ||||
| } HcclRootInfo; | } HcclRootInfo; | ||||
| #define HCCL_REQUEST_NULL NULL | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif // __cplusplus | #endif // __cplusplus | ||||
| @@ -45,6 +45,7 @@ static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid | |||||
| static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | ||||
| static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | ||||
| static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | ||||
| static const int32_t ACL_ERROR_RT_TASK_TIMEOUT = 107020; // task timeout | |||||
| static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | ||||
| static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | ||||
| @@ -72,6 +72,14 @@ enum { | |||||
| kSelectBranch, | kSelectBranch, | ||||
| kExecuteSubGraph, | kExecuteSubGraph, | ||||
| kInitSubGraphExecutor, | kInitSubGraphExecutor, | ||||
| // fuzz compile | |||||
| kSelectBin, | |||||
| kFindCompileCache, | |||||
| kAddCompileCache, | |||||
| kFuzzCompileOp, | |||||
| kCalcRuningParam, | |||||
| kGenTask, | |||||
| kRegisterBin, | |||||
| // Add new definitions here | // Add new definitions here | ||||
| kProfilingIndexEnd | kProfilingIndexEnd | ||||
| @@ -1 +1 @@ | |||||
| Subproject commit e0efffc740a79d49ba0553478b51d9d3481771cb | |||||
| Subproject commit 22309b14838a763d41dccd636fec567dae3720fd | |||||
| @@ -197,6 +197,20 @@ typedef struct tagCommAttr { | |||||
| WorkMode mode; // 通信域内的probe工作模式 | WorkMode mode; // 通信域内的probe工作模式 | ||||
| uint32_t deviceId = 0; | uint32_t deviceId = 0; | ||||
| } CommAttr; | } CommAttr; | ||||
| typedef void* HcclMessage; | |||||
| typedef void* HcclRequest; | |||||
| typedef struct { | |||||
| int srcRank; // 接收/探测到的msg/信封的发送端rank_id,MPI标准定义,调用者可以访问 | |||||
| int tag; // 接收/探测到的msg/信封的tag,MPI标准定义,调用者可以访问 | |||||
| int error; // 接收/探测的错误码0:no error,others:传输过程出错,MPI标准定义,调用者可以访问 | |||||
| int cancelled; // 指定实现,不建议调用者访问 | |||||
| int count; // 接收/探测到的payload大小,指定实现,不建议调用者访问 | |||||
| } HcclStatus; | |||||
| #define HCCL_REQUEST_NULL NULL | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif // __cplusplus | #endif // __cplusplus | ||||
| @@ -3488,7 +3488,7 @@ REG_OP(Addcmul) | |||||
| REG_OP(AxpyV2) | REG_OP(AxpyV2) | ||||
| .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | ||||
| .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | ||||
| .INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | ||||
| .OP_END_FACTORY_REG(AxpyV2) | .OP_END_FACTORY_REG(AxpyV2) | ||||
| @@ -1737,17 +1737,17 @@ round_prefer_ceil, floor, ceil. Only used by nearest interpolation. | |||||
| */ | */ | ||||
| REG_OP(Resize) | REG_OP(Resize) | ||||
| .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, | |||||
| DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(roi, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(scales, TensorType({DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(sizes, TensorType({DT_INT64})) | |||||
| .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, | |||||
| DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(x, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32, | |||||
| DT_INT64,DT_FLOAT16,DT_FLOAT,DT_DOUBLE})) | |||||
| .OPTIONAL_INPUT(roi, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE})) | |||||
| .OPTIONAL_INPUT(scales, TensorType({DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(sizes, TensorType({DT_INT64,DT_INT32})) | |||||
| .OUTPUT(y, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32, | |||||
| DT_INT64,DT_FLOAT16,DT_FLOAT,DT_DOUBLE})) | |||||
| .ATTR(coordinate_transformation_mode, String, "half_pixel") | .ATTR(coordinate_transformation_mode, String, "half_pixel") | ||||
| .ATTR(cubic_coeff_a, Float, -0.75) | .ATTR(cubic_coeff_a, Float, -0.75) | ||||
| .ATTR(exclude_outside, Int, 0) | .ATTR(exclude_outside, Int, 0) | ||||
| .ATTR(extrapolation_value, Float, 0) | |||||
| .ATTR(extrapolation_value, Float, 0.0) | |||||
| .ATTR(mode, String, "nearest") | .ATTR(mode, String, "nearest") | ||||
| .ATTR(nearest_mode, String, "round_prefer_floor") | .ATTR(nearest_mode, String, "round_prefer_floor") | ||||
| .OP_END_FACTORY_REG(Resize) | .OP_END_FACTORY_REG(Resize) | ||||
| @@ -1801,7 +1801,7 @@ REG_OP(SoftmaxCrossEntropyLoss) | |||||
| * @par Restrictions: | * @par Restrictions: | ||||
| * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
| */ | */ | ||||
| REG_OP(AxpyWithSoftmaxAndDropoutdomask) | |||||
| REG_OP(AxpyWithSoftmaxAndDropOutDoMask) | |||||
| .INPUT(x1, TensorType({DT_FLOAT16})) | .INPUT(x1, TensorType({DT_FLOAT16})) | ||||
| .INPUT(x2, TensorType({DT_FLOAT16})) | .INPUT(x2, TensorType({DT_FLOAT16})) | ||||
| .INPUT(mask, TensorType({DT_UINT8})) | .INPUT(mask, TensorType({DT_UINT8})) | ||||
| @@ -1810,6 +1810,6 @@ REG_OP(AxpyWithSoftmaxAndDropoutdomask) | |||||
| .REQUIRED_ATTR(alpha, Float) | .REQUIRED_ATTR(alpha, Float) | ||||
| .REQUIRED_ATTR(input_keep_prob, Float) | .REQUIRED_ATTR(input_keep_prob, Float) | ||||
| .ATTR(axis, ListInt, {-1}) | .ATTR(axis, ListInt, {-1}) | ||||
| .OP_END_FACTORY_REG(AxpyWithSoftmaxAndDropoutdomask) | |||||
| .OP_END_FACTORY_REG(AxpyWithSoftmaxAndDropOutDoMask) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ | ||||
| @@ -116,6 +116,7 @@ REG_OP(FusedBatchNormV2) | |||||
| * @par Outputs: | * @par Outputs: | ||||
| * One output, including: | * One output, including: | ||||
| * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. | * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. | ||||
| * @par Restrictions: | |||||
| * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
| */ | */ | ||||
| REG_OP(SegmentSort) | REG_OP(SegmentSort) | ||||
| @@ -137,6 +138,7 @@ REG_OP(SegmentSort) | |||||
| * Two output, including: | * Two output, including: | ||||
| * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. | * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. | ||||
| * output_index: A Tensor.If include_index is true, output index. | * output_index: A Tensor.If include_index is true, output index. | ||||
| * @par Restrictions: | |||||
| * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
| */ | */ | ||||
| REG_OP(MultiMerge) | REG_OP(MultiMerge) | ||||
| @@ -147,6 +149,27 @@ REG_OP(MultiMerge) | |||||
| .ATTR(include_index, Bool, false) | .ATTR(include_index, Bool, false) | ||||
| .OP_END_FACTORY_REG(MultiMerge) | .OP_END_FACTORY_REG(MultiMerge) | ||||
| /** | |||||
| * @brief Large amount of data sort.Third operator of TopK. | |||||
| * @par Inputs: | |||||
| * One input, including: | |||||
| * input_proposal: A Tensor. Proposal sorted for each channel. Support float16 | |||||
| * @par Attributes: | |||||
| * k_num: Int.Number to be sorted. | |||||
| * @par Outputs: | |||||
| * Two output, including: | |||||
| * @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted. | |||||
| * @li output_index: A Tensor. int32. Data index. | |||||
| * @par Restrictions: | |||||
| * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | |||||
| REG_OP(SingleMerge) | |||||
| .INPUT(input_proposal, TensorType({DT_FLOAT16})) | |||||
| .OUTPUT(output_data, TensorType({DT_FLOAT16})) | |||||
| .OUTPUT(output_index, TensorType({DT_INT32})) | |||||
| .REQUIRED_ATTR(k_num, Int) | |||||
| .OP_END_FACTORY_REG(SingleMerge) | |||||
| /** | /** | ||||
| * @brief MultiHeadAttention. | * @brief MultiHeadAttention. | ||||
| * @par Inputs: | * @par Inputs: | ||||
| @@ -663,6 +663,9 @@ REG_OP(ReduceProdD) | |||||
| *keep_dims: A bool or NoneType. | *keep_dims: A bool or NoneType. | ||||
| * - If true, retains reduced dimensions with length 1. | * - If true, retains reduced dimensions with length 1. | ||||
| * - If false, the rank of the tensor is reduced by 1 for each entry in axis. | * - If false, the rank of the tensor is reduced by 1 for each entry in axis. | ||||
| *noop_with_empty_axes: A bool. | |||||
| * - If true, when axes = [], not reduce. | |||||
| * - If false, when axes = [], reduce all. | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: A Tensor. Has the same type as "x" . \n | *y: A Tensor. Has the same type as "x" . \n | ||||
| @@ -674,6 +677,7 @@ REG_OP(ReduceMean) | |||||
| .INPUT(axes, TensorType::IndexNumberType()) | .INPUT(axes, TensorType::IndexNumberType()) | ||||
| .OUTPUT(y, TensorType::NumberType()) | .OUTPUT(y, TensorType::NumberType()) | ||||
| .ATTR(keep_dims, Bool, false) | .ATTR(keep_dims, Bool, false) | ||||
| .ATTR(noop_with_empty_axes, Bool, true) | |||||
| .OP_END_FACTORY_REG(ReduceMean) | .OP_END_FACTORY_REG(ReduceMean) | ||||
| /** | /** | ||||