Merge pull request !2092 from yanghaoran/releasetags/v1.8.0^2
| @@ -60,6 +60,7 @@ static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resou | |||||
| static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit | static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit | ||||
| static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty | static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty | ||||
| static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | ||||
| static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | |||||
| static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | ||||
| static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | ||||
| @@ -293,6 +293,7 @@ struct OpDescInfo { | |||||
| std::string dev_func; | std::string dev_func; | ||||
| std::string tvm_magic; | std::string tvm_magic; | ||||
| uint32_t tiling_key = 0U; | uint32_t tiling_key = 0U; | ||||
| uintptr_t args = 0U; | |||||
| std::string tiling_data; | std::string tiling_data; | ||||
| std::string node_info; | std::string node_info; | ||||
| std::vector<int64_t> workspace_bytes; | std::vector<int64_t> workspace_bytes; | ||||
| @@ -1 +1 @@ | |||||
| Subproject commit 7d777404b3b7fe7daeaf00e566e431c6a05b040a | |||||
| Subproject commit fe47d04d75170006fc0d28538dec49a2da426ceb | |||||
| @@ -58,6 +58,10 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no str | |||||
| static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | ||||
| static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | ||||
| static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource | static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource | ||||
| static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit | |||||
| static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty | |||||
| static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | |||||
| static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | |||||
| static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | ||||
| static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | ||||
| @@ -97,6 +101,10 @@ static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector | |||||
| static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | ||||
| static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | ||||
| static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | ||||
| static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode | |||||
| static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die | |||||
| static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id | |||||
| static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set | |||||
| static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | ||||
| static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | ||||
| @@ -105,5 +113,4 @@ static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc di | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | ||||
| @@ -497,6 +497,25 @@ REG_OP(Constant) | |||||
| .ATTR(value, Tensor, Tensor()) | .ATTR(value, Tensor, Tensor()) | ||||
| .OP_END_FACTORY_REG(Constant) | .OP_END_FACTORY_REG(Constant) | ||||
| /** | |||||
| *@brief Creates a file constant tensor, The operator is used to process the very large weight which is store in file. \n | |||||
| *@par Attributes: | |||||
| *file_id: A string, used to record file id. \n | |||||
| *shape: data shape. \n | |||||
| *dtype: data type. \n | |||||
| *@par Outputs: | |||||
| *y: The FileConstant tensor. \n | |||||
| */ | |||||
| REG_OP(FileConstant) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \ | |||||
| DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE})) | |||||
| .REQUIRED_ATTR(file_id, String) | |||||
| .REQUIRED_ATTR(shape, ListInt) | |||||
| .REQUIRED_ATTR(dtype, Type) | |||||
| .OP_END_FACTORY_REG(FileConstant) | |||||
| /** | /** | ||||
| *@brief Returns a copy of the input tensor. \n | *@brief Returns a copy of the input tensor. \n | ||||
| @@ -1329,31 +1348,6 @@ REG_OP(ExpandD) | |||||
| .REQUIRED_ATTR(shape, ListInt) | .REQUIRED_ATTR(shape, ListInt) | ||||
| .OP_END_FACTORY_REG(ExpandD) | .OP_END_FACTORY_REG(ExpandD) | ||||
| /** | |||||
| * @brief Calculate buckets limit and offset. \n | |||||
| * @par Inputs: | |||||
| * Three inputs, including: | |||||
| * @li bucket_list: A 1-D tensor of type int32 with the value of ivf_counts and ivf_offset index. \n | |||||
| * @li ivf_counts: A 1-D tensor of type int32 with the value of ivf counts. \n | |||||
| * @li ivf_offset: A 1-D tensor of type int32 or int64 with the value of ivf offset. \n | |||||
| * @par Attributes: | |||||
| * total_limit: A int64 type maximum value of the sum of ivf_counts corresponding to bucket_list. \n | |||||
| * @par Outputs: | |||||
| * @li buckets_limit: A 1-D tensor of type int32 with the sum <= total_limit. \n | |||||
| * @li buckets_offset: A 1-D tensor of type int32 or int64 with the value of ivf_offset corresponding to bucket_list. \n | |||||
| */ | |||||
| REG_OP(CalcBucketsLimitAndOffset) | |||||
| .INPUT(bucket_list, TensorType({DT_INT32})) | |||||
| .INPUT(ivf_counts, TensorType({DT_INT32})) | |||||
| .INPUT(ivf_offset, TensorType({DT_INT32, DT_INT64})) | |||||
| .OUTPUT(buckets_limit, TensorType({DT_INT32})) | |||||
| .OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) | |||||
| .REQUIRED_ATTR(total_limit, Int) | |||||
| .OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) | |||||
| /** | /** | ||||
| *@brief Get dim number in tensordesc. \n | *@brief Get dim number in tensordesc. \n | ||||
| @@ -1362,6 +1356,9 @@ REG_OP(CalcBucketsLimitAndOffset) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: A 1D tensor. The data type must be int32. \n | *y: A 1D tensor. The data type must be int32. \n | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(GetShape) | REG_OP(GetShape) | ||||
| .DYNAMIC_INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \ | .DYNAMIC_INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \ | ||||
| @@ -1377,8 +1374,13 @@ REG_OP(GetShape) | |||||
| *@par outputs: | *@par outputs: | ||||
| * y: a tensor_desc, type is int.\n | * y: a tensor_desc, type is int.\n | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(UpdateTensorDesc) | REG_OP(UpdateTensorDesc) | ||||
| .INPUT(x, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, | |||||
| DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE})) | |||||
| .OUTPUT(y, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, | .OUTPUT(y, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, | ||||
| DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE})) | DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE})) | ||||
| .REQUIRED_ATTR(shape, ListInt) | .REQUIRED_ATTR(shape, ListInt) | ||||
| @@ -585,6 +585,14 @@ REG_OP(ResizeNearestNeighborV2GradD) | |||||
| *@li original_image: A Tensor. 4-D shape. Must set the format, supported format list ["NCHW, NHWC"] | *@li original_image: A Tensor. 4-D shape. Must set the format, supported format list ["NCHW, NHWC"] | ||||
| channels], The image tensor that was resized . \n | channels], The image tensor that was resized . \n | ||||
| *@par Attributes: | |||||
| *@li size: An optional listint. Defaults to {}. | |||||
| *@par Attributes: | |||||
| *@li ori_image_size: An optional listint. Defaults to {}. | |||||
| *@par Attributes: | |||||
| *@li src_start_w: An optional int. Defaults to 0. | |||||
| *@par Attributes: | |||||
| *@li dst_start_w: An optional int. Defaults to 0. | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li align_corners: An optional bool. Defaults to False. If true, the centers of | *@li align_corners: An optional bool. Defaults to False. If true, the centers of | ||||
| the 4 corner pixels of the input and grad tensors are aligned. Defaults to | the 4 corner pixels of the input and grad tensors are aligned. Defaults to | ||||
| @@ -606,6 +614,10 @@ REG_OP(ResizeBilinearV2Grad) | |||||
| .INPUT(grads, TensorType({DT_FLOAT})) | .INPUT(grads, TensorType({DT_FLOAT})) | ||||
| .INPUT(original_image, TensorType::FloatingDataType()) | .INPUT(original_image, TensorType::FloatingDataType()) | ||||
| .OUTPUT(y, TensorType({DT_FLOAT})) | .OUTPUT(y, TensorType({DT_FLOAT})) | ||||
| .ATTR(size, ListInt, {}) | |||||
| .ATTR(ori_image_size, ListInt, {}) | |||||
| .ATTR(src_start_w, Int, 0) | |||||
| .ATTR(dst_start_w, Int, 0) | |||||
| .ATTR(align_corners, Bool, false) | .ATTR(align_corners, Bool, false) | ||||
| .ATTR(half_pixel_centers, Bool, false) | .ATTR(half_pixel_centers, Bool, false) | ||||
| .OP_END_FACTORY_REG(ResizeBilinearV2Grad) | .OP_END_FACTORY_REG(ResizeBilinearV2Grad) | ||||
| @@ -624,7 +636,10 @@ size for the images . \n | |||||
| output tensors are aligned, preserving the values at the corner pixels. | output tensors are aligned, preserving the values at the corner pixels. | ||||
| Defaults to false . | Defaults to false . | ||||
| * @li half_pixel_centers: An optional bool. Defaults to False . \n | * @li half_pixel_centers: An optional bool. Defaults to False . \n | ||||
| *@li ori_image_size: An optional listint. Defaults to {}. | |||||
| *@li split_size: An optional listint. Defaults to {}. | |||||
| *@li src_start_w: An optional int. Defaults to 0. | |||||
| *@li dst_start_w: An optional int. Defaults to 0. | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: 4-D with shape [batch, new_height, new_width, channels] . \n | *y: 4-D with shape [batch, new_height, new_width, channels] . \n | ||||
| @@ -640,6 +655,10 @@ REG_OP(ResizeBilinearV2) | |||||
| DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | ||||
| .INPUT(size, TensorType({DT_INT32})) | .INPUT(size, TensorType({DT_INT32})) | ||||
| .OUTPUT(y, TensorType({DT_FLOAT})) | .OUTPUT(y, TensorType({DT_FLOAT})) | ||||
| .ATTR(ori_image_size, ListInt, {}) | |||||
| .ATTR(split_size, ListInt, {}) | |||||
| .ATTR(src_start_w, Int, 0) | |||||
| .ATTR(dst_start_w, Int, 0) | |||||
| .ATTR(align_corners, Bool, false) | .ATTR(align_corners, Bool, false) | ||||
| .ATTR(half_pixel_centers, Bool, false) | .ATTR(half_pixel_centers, Bool, false) | ||||
| .OP_END_FACTORY_REG(ResizeBilinearV2) | .OP_END_FACTORY_REG(ResizeBilinearV2) | ||||
| @@ -113,9 +113,7 @@ if input "x" is with format NC1HWC0. Specifies the mean of "x". | |||||
| Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". | Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". | ||||
| *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | ||||
| Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. | Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. | ||||
| *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
| Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . | |||||
| *@li reserve_space_3: An optional Tensor of type float32. For compatibility with tensorflow, only has one useless element. \n | |||||
| *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. \n | |||||
| *@attention Constraints: | *@attention Constraints: | ||||
| *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, | *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, | ||||
| @@ -137,7 +135,6 @@ REG_OP(BatchNorm) | |||||
| .OUTPUT(batch_variance, TensorType({DT_FLOAT})) | .OUTPUT(batch_variance, TensorType({DT_FLOAT})) | ||||
| .OUTPUT(reserve_space_1, TensorType({DT_FLOAT})) | .OUTPUT(reserve_space_1, TensorType({DT_FLOAT})) | ||||
| .OUTPUT(reserve_space_2, TensorType({DT_FLOAT})) | .OUTPUT(reserve_space_2, TensorType({DT_FLOAT})) | ||||
| .OUTPUT(reserve_space_3, TensorType({DT_FLOAT})) | |||||
| .ATTR(epsilon, Float, 0.0001) | .ATTR(epsilon, Float, 0.0001) | ||||
| .ATTR(data_format, String, "NHWC") | .ATTR(data_format, String, "NHWC") | ||||
| .ATTR(is_training, Bool, true) | .ATTR(is_training, Bool, true) | ||||
| @@ -166,6 +163,33 @@ REG_OP(SyncBatchNormBackwardReduce) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .OP_END_FACTORY_REG(SyncBatchNormBackwardReduce) | .OP_END_FACTORY_REG(SyncBatchNormBackwardReduce) | ||||
| /** | |||||
| *@brief part of SyncBatchNormBackward . \n | |||||
| *@par Inputs: | |||||
| * Three inputs, including: | |||||
| *@li grad_output: A Tensor. Must be one of the following types: float16, float32 . | |||||
| *@li save_input: A Tensor. Must be one of the following types: float16, float32 . | |||||
| *@li mean: A Tensor. Must be one of the following types: float16, float32 . | |||||
| *@li invstd: A Tensor. Must be one of the following types: float16, float32 . | |||||
| *@li weight: A Tensor. Must be one of the following types: float16, float32 . | |||||
| *@li mean_dy: A Tensor. Must be one of the following types: float16, float32 . | |||||
| *@li mean_dy_xmu: A Tensor. Must be one of the following types: float16, float32 . \n | |||||
| *@par Outputs: | |||||
| *@li grad_input: A Tensor. Has the same type and format as input "grad_output" . \n | |||||
| */ | |||||
| REG_OP(SyncBatchNormBackwardElemt) | |||||
| .INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(save_input, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(invstd, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(mean_dy, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(mean_dy_xmu, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(grad_input, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OP_END_FACTORY_REG(SyncBatchNormBackwardElemt) | |||||
| /** | /** | ||||
| *@brief Performs batch normalization . \n | *@brief Performs batch normalization . \n | ||||
| @@ -285,8 +309,7 @@ REG_OP(BatchNormExt2) | |||||
| *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0. | *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0. | ||||
| *@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. | *@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. | ||||
| *@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm. | *@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm. | ||||
| *@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . | |||||
| *@li reserve_space_3: A 1D optional Tensor of type float32. It is an output of BatchNorm . \n | |||||
| *@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . \n | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". | *@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". | ||||
| @@ -313,7 +336,6 @@ REG_OP(BatchNormGrad) | |||||
| .INPUT(scale, TensorType({DT_FLOAT})) | .INPUT(scale, TensorType({DT_FLOAT})) | ||||
| .INPUT(reserve_space_1, TensorType({DT_FLOAT})) | .INPUT(reserve_space_1, TensorType({DT_FLOAT})) | ||||
| .INPUT(reserve_space_2, TensorType({DT_FLOAT})) | .INPUT(reserve_space_2, TensorType({DT_FLOAT})) | ||||
| .OPTIONAL_INPUT(reserve_space_3, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT})) | .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| .OUTPUT(scale_backprop, TensorType({DT_FLOAT})) | .OUTPUT(scale_backprop, TensorType({DT_FLOAT})) | ||||
| .OUTPUT(offset_backprop, TensorType({DT_FLOAT})) | .OUTPUT(offset_backprop, TensorType({DT_FLOAT})) | ||||
| @@ -128,7 +128,7 @@ REG_OP(OCRIdentifyPreHandle) | |||||
| .INPUT(imgs_offset, TensorType({DT_INT32})) | .INPUT(imgs_offset, TensorType({DT_INT32})) | ||||
| .INPUT(imgs_size, TensorType({DT_INT32})) | .INPUT(imgs_size, TensorType({DT_INT32})) | ||||
| .OUTPUT(resized_imgs, TensorType({DT_UINT8})) | .OUTPUT(resized_imgs, TensorType({DT_UINT8})) | ||||
| .ATTR(size, ListInt, {}) | |||||
| .REQUIRED_ATTR(size, ListInt) | |||||
| .ATTR(data_format, String, "NHWC") | .ATTR(data_format, String, "NHWC") | ||||
| .OP_END_FACTORY_REG(OCRIdentifyPreHandle) | .OP_END_FACTORY_REG(OCRIdentifyPreHandle) | ||||
| @@ -247,6 +247,7 @@ REG_OP(OCRDetectionPostHandle) | |||||
| *@li clipped_polys_data: A Tensor of type int32. point data of every clipped poly. \n | *@li clipped_polys_data: A Tensor of type int32. point data of every clipped poly. \n | ||||
| *@li clipped_polys_offset: A Tensor of type int32. Offset of every clipped poly . \n | *@li clipped_polys_offset: A Tensor of type int32. Offset of every clipped poly . \n | ||||
| *@li clipped_polys_size: A Tensor of type int32. Size of every clipped poly. \n | *@li clipped_polys_size: A Tensor of type int32. Size of every clipped poly. \n | ||||
| *@li clipped_polys_num: A Tensor of type int32. Number of clipped polys. \n | |||||
| */ | */ | ||||
| REG_OP(ResizeAndClipPolys) | REG_OP(ResizeAndClipPolys) | ||||
| .INPUT(polys_data, TensorType({DT_INT32})) | .INPUT(polys_data, TensorType({DT_INT32})) | ||||
| @@ -259,6 +260,7 @@ REG_OP(ResizeAndClipPolys) | |||||
| .OUTPUT(clipped_polys_data, TensorType({DT_INT32})) | .OUTPUT(clipped_polys_data, TensorType({DT_INT32})) | ||||
| .OUTPUT(clipped_polys_offset, TensorType({DT_INT32})) | .OUTPUT(clipped_polys_offset, TensorType({DT_INT32})) | ||||
| .OUTPUT(clipped_polys_size, TensorType({DT_INT32})) | .OUTPUT(clipped_polys_size, TensorType({DT_INT32})) | ||||
| .OUTPUT(clipped_polys_num, TensorType({DT_INT32})) | |||||
| .OP_END_FACTORY_REG(ResizeAndClipPolys); | .OP_END_FACTORY_REG(ResizeAndClipPolys); | ||||
| @@ -1305,6 +1305,27 @@ REG_OP(ReduceStdWithMean) | |||||
| .ATTR(invert, Bool, false) | .ATTR(invert, Bool, false) | ||||
| .ATTR(epsilon, Float, 0.001) | .ATTR(epsilon, Float, 0.001) | ||||
| .OP_END_FACTORY_REG(ReduceStdWithMean) | .OP_END_FACTORY_REG(ReduceStdWithMean) | ||||
| /** | |||||
| *@brief Performs reduced batch normalization . \n | |||||
| *@par Inputs: | |||||
| *x: A 5D Tensor of type float16 or float32, with format NC1HWC0 . \n | |||||
| *@par Outputs: | |||||
| *@li mean: A Tensor of type float32 for SUM reduced "x". | |||||
| *@li variance: A Tensor of type float32 for square sum reduced "x" . \n | |||||
| *@par Restrictions: | |||||
| * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | |||||
| REG_OP(ReduceMeanVariance) | |||||
| .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .OUTPUT(mean, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .OUTPUT(variance, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .ATTR(axes, ListInt, {}) | |||||
| .ATTR(keep_dims, Bool, true) | |||||
| .OP_END_FACTORY_REG(ReduceMeanVariance) | |||||
| } //namespace ge | } //namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ | ||||
| @@ -78,8 +78,8 @@ REG_OP(TopKPQDistance) | |||||
| .OUTPUT(topk_ivf, TensorType({DT_INT32})) | .OUTPUT(topk_ivf, TensorType({DT_INT32})) | ||||
| .OUTPUT(topk_index, TensorType({DT_INT32})) | .OUTPUT(topk_index, TensorType({DT_INT32})) | ||||
| .ATTR(order, String, "ASC") | .ATTR(order, String, "ASC") | ||||
| .ATTR(k, Int, 0) | |||||
| .ATTR(group_size, Int, 0) | |||||
| .REQUIRED_ATTR(k, Int) | |||||
| .REQUIRED_ATTR(group_size, Int) | |||||
| .OP_END_FACTORY_REG(TopKPQDistance) | .OP_END_FACTORY_REG(TopKPQDistance) | ||||
| /** | /** | ||||
| @@ -129,6 +129,68 @@ REG_OP(ScanPQCodes) | |||||
| .ATTR(split_count, Int, 1) | .ATTR(split_count, Int, 1) | ||||
| .ATTR(split_index, Int, 0) | .ATTR(split_index, Int, 0) | ||||
| .OP_END_FACTORY_REG(ScanPQCodes) | .OP_END_FACTORY_REG(ScanPQCodes) | ||||
| /** | |||||
| * @brief Calculate buckets limit and offset. \n | |||||
| * @par Inputs: | |||||
| * Three inputs, including: | |||||
| * @li bucket_list: A 1-D tensor of type int32 with the value of ivf_counts and ivf_offset index. \n | |||||
| * @li ivf_counts: A 1-D tensor of type int32 with the value of ivf counts. \n | |||||
| * @li ivf_offset: A 1-D tensor of type int32 or int64 with the value of ivf offset. \n | |||||
| * @par Attributes: | |||||
| * total_limit: A int64 type maximum value of the sum of ivf_counts corresponding to bucket_list. \n | |||||
| * @par Outputs: | |||||
| * @li buckets_limit: A 1-D tensor of type int32 with the sum <= total_limit. \n | |||||
| * @li buckets_offset: A 1-D tensor of type int32 or int64 with the value of ivf_offset corresponding to bucket_list. \n | |||||
| */ | |||||
| REG_OP(CalcBucketsLimitAndOffset) | |||||
| .INPUT(bucket_list, TensorType({DT_INT32})) | |||||
| .INPUT(ivf_counts, TensorType({DT_INT32})) | |||||
| .INPUT(ivf_offset, TensorType({DT_INT32, DT_INT64})) | |||||
| .OUTPUT(buckets_limit, TensorType({DT_INT32})) | |||||
| .OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) | |||||
| .REQUIRED_ATTR(total_limit, Int) | |||||
| .OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) | |||||
| /** | |||||
| * @brief Calculate ProdVirialSeA. \n | |||||
| * | |||||
| * @par Inputs: | |||||
| * Five inputs, including: | |||||
| * @li net_deriv: A Tensor. Must be one of the following types: float16, float32, float64. | |||||
| * @li in_deriv: A Tensor. Must be one of the following types: float16, float32, float64. | |||||
| * @li rij: A Tensor. Must be one of the following types: float16, float32, float64. | |||||
| * @li nlist: A Tensor. dtype is int32. | |||||
| * @li natoms: A Tensor. dtype is int32. \n | |||||
| * | |||||
| * @par Outputs: | |||||
| * Two outputs, including: | |||||
| * @li virial: A Tensor. Must be one of the following types: float16, float32, float64. | |||||
| * @li atom_virial: A Tensor. Must be one of the following types: float16, float32, float64. \n | |||||
| * | |||||
| * @par Attributes: | |||||
| * Two attributes, including: | |||||
| * @li n_a_sel: A Scalar. | |||||
| * @li n_r_sel: A Scalar. \n | |||||
| * | |||||
| * @par Restrictions: | |||||
| * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | |||||
| REG_OP(ProdVirialSeA) | |||||
| .INPUT(net_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(in_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(rij, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(nlist, TensorType({DT_INT32})) | |||||
| .INPUT(natoms, TensorType({DT_INT32})) | |||||
| .OUTPUT(virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
| .OUTPUT(atom_virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
| .REQUIRED_ATTR(n_a_sel, Int) | |||||
| .REQUIRED_ATTR(n_r_sel, Int) | |||||
| .ATTR(nall, Int, 28328) | |||||
| .OP_END_FACTORY_REG(ProdVirialSeA) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_ | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef __CCE_RUNTIME_BASE_H__ | |||||
| #define __CCE_RUNTIME_BASE_H__ | |||||
| #ifndef CCE_RUNTIME_BASE_H | |||||
| #define CCE_RUNTIME_BASE_H | |||||
| #include <stdint.h> | #include <stdint.h> | ||||
| #include "toolchain/prof_callback.h" | #include "toolchain/prof_callback.h" | ||||
| @@ -443,4 +443,4 @@ RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId); | |||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // __CCE_RUNTIME_BASE_H__ | |||||
| #endif // CCE_RUNTIME_BASE_H | |||||
| @@ -239,8 +239,18 @@ RTS_API rtError_t rtSetOpWaitTimeOut(uint32_t timeout); | |||||
| */ | */ | ||||
| RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout); | RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout); | ||||
| /** | |||||
| * @ingroup | |||||
| * @brief get is Heterogenous. | |||||
| * @param [out] heterogenous=1 Heterogenous Mode: read isHeterogenous=1 in ini file. | |||||
| * @param [out] heterogenous=0 NOT Heterogenous Mode: | |||||
| * 1:not found ini file, 2:error when reading ini, 3:Heterogenous value is not 1 | |||||
| * @return RT_ERROR_NONE for ok | |||||
| */ | |||||
| RTS_API rtError_t rtGetIsHeterogenous(int32_t *heterogenous); | |||||
| #if defined(__cplusplus) | #if defined(__cplusplus) | ||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // CCE_RUNTIME_CONFIG_H | |||||
| #endif // CCE_RUNTIME_CONFIG_H | |||||
| @@ -25,7 +25,7 @@ extern "C" { | |||||
| #define RT_CAPABILITY_SUPPORT (0x1U) | #define RT_CAPABILITY_SUPPORT (0x1U) | ||||
| #define RT_CAPABILITY_NOT_SUPPORT (0x0U) | #define RT_CAPABILITY_NOT_SUPPORT (0x0U) | ||||
| #define MEMORY_INFO_TS_4G_LIMITED (0x0) // for compatibility | |||||
| #define MEMORY_INFO_TS_4G_LIMITED (0x0U) // for compatibility | |||||
| typedef struct tagRTDeviceInfo { | typedef struct tagRTDeviceInfo { | ||||
| uint8_t env_type; // 0: FPGA 1: EMU 2: ESL | uint8_t env_type; // 0: FPGA 1: EMU 2: ESL | ||||
| @@ -171,6 +171,15 @@ RTS_API rtError_t rtSetDevice(int32_t device); | |||||
| */ | */ | ||||
| RTS_API rtError_t rtSetDeviceV2(int32_t device, rtDeviceMode deviceMode); | RTS_API rtError_t rtSetDeviceV2(int32_t device, rtDeviceMode deviceMode); | ||||
| /** | |||||
| * @ingroup dvrt_dev | |||||
| * @brief get deviceMode | |||||
| * @param [out] deviceMode the device mode | |||||
| * @return RT_ERROR_NONE for ok | |||||
| * @return RT_ERROR_INVALID_VALUE for error input | |||||
| */ | |||||
| RTS_API rtError_t rtGetDeviceMode(rtDeviceMode *deviceMode); | |||||
| /** | /** | ||||
| * @ingroup dvrt_dev | * @ingroup dvrt_dev | ||||
| * @brief set target die for current thread | * @brief set target die for current thread | ||||
| @@ -133,8 +133,11 @@ typedef struct tagRtArgsWithTiling { | |||||
| uint16_t tilingDataOffset; // tiling data offset | uint16_t tilingDataOffset; // tiling data offset | ||||
| uint16_t hostInputAddrOffset; // index of host_memory input in inputs_addrs list | uint16_t hostInputAddrOffset; // index of host_memory input in inputs_addrs list | ||||
| uint16_t hostInputDataOffset; // host_mem input data offset | uint16_t hostInputDataOffset; // host_mem input data offset | ||||
| bool hasHostMemInput; // has host_memory input data in args or not: ture or false | |||||
| uint8_t reserved[7]; | |||||
| uint8_t hasHostMemInput; // has host_memory input data in args or not: 0 means no host_memory input data, | |||||
| // others means has host_memory input data. | |||||
| uint8_t isNoNeedH2DCopy; // is no need host to device copy: 0 means need H2D copy, | |||||
| // others means doesn't need H2D copy. | |||||
| uint8_t reserved[6]; | |||||
| } rtArgsWithTiling_t; | } rtArgsWithTiling_t; | ||||
| /** | /** | ||||
| @@ -299,8 +302,8 @@ RTS_API rtError_t rtDependencyRegister(void *mHandle, void *sHandle); | |||||
| * @return RT_ERROR_NONE for ok | * @return RT_ERROR_NONE for ok | ||||
| * @return RT_ERROR_INVALID_VALUE for error input | * @return RT_ERROR_INVALID_VALUE for error input | ||||
| */ | */ | ||||
| RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char_t *stubName, const void *devFunc, | |||||
| uint32_t funcMode); | |||||
| RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char_t *stubName, | |||||
| const void *devFunc, uint32_t funcMode); | |||||
| /** | /** | ||||
| * @ingroup rt_kernel | * @ingroup rt_kernel | ||||
| @@ -371,8 +374,9 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void * | |||||
| * @return RT_ERROR_NONE for ok | * @return RT_ERROR_NONE for ok | ||||
| * @return RT_ERROR_INVALID_VALUE for error input | * @return RT_ERROR_INVALID_VALUE for error input | ||||
| */ | */ | ||||
| RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, | |||||
| rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo); | |||||
| RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, | |||||
| void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream_, | |||||
| const void *kernelInfo); | |||||
| /** | /** | ||||
| * @ingroup rt_kernel | * @ingroup rt_kernel | ||||
| @@ -576,7 +576,7 @@ RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqeIndex, rtStream_t strea | |||||
| * @return RT_ERROR_INVALID_VALUE for error input | * @return RT_ERROR_INVALID_VALUE for error input | ||||
| * @return RT_ERROR_DRV_ERR for driver error | * @return RT_ERROR_DRV_ERR for driver error | ||||
| */ | */ | ||||
| RTS_API rtError_t rtSetIpcMemPid(const char_t *name, int32_t pid[], int num); | |||||
| RTS_API rtError_t rtSetIpcMemPid(const char_t *name, int32_t pid[], int32_t num); | |||||
| /** | /** | ||||
| * @ingroup dvrt_mem | * @ingroup dvrt_mem | ||||
| @@ -31,5 +31,6 @@ | |||||
| #include "rt_ffts.h" | #include "rt_ffts.h" | ||||
| #include "rt_ffts_plus.h" | #include "rt_ffts_plus.h" | ||||
| #include "rt_dfx.h" | #include "rt_dfx.h" | ||||
| #include "rt_mem_queue.h" | |||||
| #endif // CCE_RUNTIME_RT_H | |||||
| #endif // CCE_RUNTIME_RT_H | |||||
| @@ -23,6 +23,8 @@ | |||||
| #define PROF_AICORE_METRICS 0x00000004 | #define PROF_AICORE_METRICS 0x00000004 | ||||
| #define PROF_AICPU_TRACE 0x00000008 | #define PROF_AICPU_TRACE 0x00000008 | ||||
| #define PROF_L2CACHE 0x00000010 | #define PROF_L2CACHE 0x00000010 | ||||
| #define PROF_HCCL_TRACE 0x00000020 | |||||
| #define PROF_TRAINING_TRACE 0x00000040 | |||||
| // system profilinig switch | // system profilinig switch | ||||
| #define PROF_CPU 0x00010000 | #define PROF_CPU 0x00010000 | ||||
| @@ -41,10 +43,7 @@ | |||||
| #define PROF_AIVECTORCORE_METRICS 0x0000020000000 | #define PROF_AIVECTORCORE_METRICS 0x0000020000000 | ||||
| #define PROF_SUBTASK_TIME 0x0000040000000 | #define PROF_SUBTASK_TIME 0x0000040000000 | ||||
| #define PROF_TRAINING_TRACE 0x0000080000000 | |||||
| #define PROF_HCCL_TRACE 0x0000100000000 | |||||
| #define PROF_TASK_TRACE 0x0000185000002 | |||||
| #define PROF_TASK_TRACE 0x0000005000062 | |||||
| #define PROF_MODEL_LOAD 0x8000000000000000 | #define PROF_MODEL_LOAD 0x8000000000000000 | ||||
| @@ -54,6 +53,8 @@ | |||||
| #define PROF_AICORE_METRICS_MASK 0x00000004 | #define PROF_AICORE_METRICS_MASK 0x00000004 | ||||
| #define PROF_AICPU_TRACE_MASK 0x00000008 | #define PROF_AICPU_TRACE_MASK 0x00000008 | ||||
| #define PROF_L2CACHE_MASK 0x00000010 | #define PROF_L2CACHE_MASK 0x00000010 | ||||
| #define PROF_HCCL_TRACE_MASK 0x00000020 | |||||
| #define PROF_TRAINING_TRACE_MASK 0x00000040 | |||||
| // system profilinig mask | // system profilinig mask | ||||
| #define PROF_CPU_MASK 0x00010000 | #define PROF_CPU_MASK 0x00010000 | ||||
| @@ -72,9 +73,6 @@ | |||||
| #define PROF_AIVECTORCORE_METRICS_MASK 0x0000020000000 | #define PROF_AIVECTORCORE_METRICS_MASK 0x0000020000000 | ||||
| #define PROF_SUBTASK_TIME_MASK 0x0000040000000 | #define PROF_SUBTASK_TIME_MASK 0x0000040000000 | ||||
| #define PROF_TRAINING_TRACE_MASK 0x0000080000000 | |||||
| #define PROF_HCCL_TRACE_MASK 0x0000100000000 | |||||
| #define PROF_MODEL_LOAD_MASK 0x8000000000000000 | #define PROF_MODEL_LOAD_MASK 0x8000000000000000 | ||||
| #if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) | #if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) | ||||