Merge pull request !2092 from yanghaoran/releasetags/v1.8.0^2
| @@ -60,6 +60,7 @@ static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resou | |||
| static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit | |||
| static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty | |||
| static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | |||
| static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | |||
| static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
| static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
| @@ -293,6 +293,7 @@ struct OpDescInfo { | |||
| std::string dev_func; | |||
| std::string tvm_magic; | |||
| uint32_t tiling_key = 0U; | |||
| uintptr_t args = 0U; | |||
| std::string tiling_data; | |||
| std::string node_info; | |||
| std::vector<int64_t> workspace_bytes; | |||
| @@ -1 +1 @@ | |||
| Subproject commit 7d777404b3b7fe7daeaf00e566e431c6a05b040a | |||
| Subproject commit fe47d04d75170006fc0d28538dec49a2da426ceb | |||
| @@ -58,6 +58,10 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no str | |||
| static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||
| static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||
| static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource | |||
| static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit | |||
| static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty | |||
| static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full | |||
| static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init | |||
| static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
| static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
| @@ -97,6 +101,10 @@ static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector | |||
| static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||
| static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||
| static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||
| static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode | |||
| static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die | |||
| static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id | |||
| static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set | |||
| static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||
| static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||
| @@ -105,5 +113,4 @@ static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc di | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||
| @@ -497,6 +497,25 @@ REG_OP(Constant) | |||
| .ATTR(value, Tensor, Tensor()) | |||
| .OP_END_FACTORY_REG(Constant) | |||
| /** | |||
| *@brief Creates a file constant tensor, The operator is used to process the very large weight which is store in file. \n | |||
| *@par Attributes: | |||
| *file_id: A string, used to record file id. \n | |||
| *shape: data shape. \n | |||
| *dtype: data type. \n | |||
| *@par Outputs: | |||
| *y: The FileConstant tensor. \n | |||
| */ | |||
| REG_OP(FileConstant) | |||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \ | |||
| DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE})) | |||
| .REQUIRED_ATTR(file_id, String) | |||
| .REQUIRED_ATTR(shape, ListInt) | |||
| .REQUIRED_ATTR(dtype, Type) | |||
| .OP_END_FACTORY_REG(FileConstant) | |||
| /** | |||
| *@brief Returns a copy of the input tensor. \n | |||
| @@ -1329,31 +1348,6 @@ REG_OP(ExpandD) | |||
| .REQUIRED_ATTR(shape, ListInt) | |||
| .OP_END_FACTORY_REG(ExpandD) | |||
| /** | |||
| * @brief Calculate buckets limit and offset. \n | |||
| * @par Inputs: | |||
| * Three inputs, including: | |||
| * @li bucket_list: A 1-D tensor of type int32 with the value of ivf_counts and ivf_offset index. \n | |||
| * @li ivf_counts: A 1-D tensor of type int32 with the value of ivf counts. \n | |||
| * @li ivf_offset: A 1-D tensor of type int32 or int64 with the value of ivf offset. \n | |||
| * @par Attributes: | |||
| * total_limit: A int64 type maximum value of the sum of ivf_counts corresponding to bucket_list. \n | |||
| * @par Outputs: | |||
| * @li buckets_limit: A 1-D tensor of type int32 with the sum <= total_limit. \n | |||
| * @li buckets_offset: A 1-D tensor of type int32 or int64 with the value of ivf_offset corresponding to bucket_list. \n | |||
| */ | |||
| REG_OP(CalcBucketsLimitAndOffset) | |||
| .INPUT(bucket_list, TensorType({DT_INT32})) | |||
| .INPUT(ivf_counts, TensorType({DT_INT32})) | |||
| .INPUT(ivf_offset, TensorType({DT_INT32, DT_INT64})) | |||
| .OUTPUT(buckets_limit, TensorType({DT_INT32})) | |||
| .OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) | |||
| .REQUIRED_ATTR(total_limit, Int) | |||
| .OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) | |||
| /** | |||
| *@brief Get dim number in tensordesc. \n | |||
| @@ -1362,6 +1356,9 @@ REG_OP(CalcBucketsLimitAndOffset) | |||
| *@par Outputs: | |||
| *y: A 1D tensor. The data type must be int32. \n | |||
| *@par Restrictions: | |||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
| */ | |||
| REG_OP(GetShape) | |||
| .DYNAMIC_INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \ | |||
| @@ -1377,8 +1374,13 @@ REG_OP(GetShape) | |||
| *@par outputs: | |||
| * y: a tensor_desc, type is int.\n | |||
| *@par Restrictions: | |||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
| */ | |||
| REG_OP(UpdateTensorDesc) | |||
| .INPUT(x, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, | |||
| DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE})) | |||
| .OUTPUT(y, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, | |||
| DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE})) | |||
| .REQUIRED_ATTR(shape, ListInt) | |||
| @@ -585,6 +585,14 @@ REG_OP(ResizeNearestNeighborV2GradD) | |||
| *@li original_image: A Tensor. 4-D shape. Must set the format, supported format list ["NCHW, NHWC"] | |||
| channels], The image tensor that was resized . \n | |||
| *@par Attributes: | |||
| *@li size: An optional listint. Defaults to {}. | |||
| *@par Attributes: | |||
| *@li ori_image_size: An optional listint. Defaults to {}. | |||
| *@par Attributes: | |||
| *@li src_start_w: An optional int. Defaults to 0. | |||
| *@par Attributes: | |||
| *@li dst_start_w: An optional int. Defaults to 0. | |||
| *@par Attributes: | |||
| *@li align_corners: An optional bool. Defaults to False. If true, the centers of | |||
| the 4 corner pixels of the input and grad tensors are aligned. Defaults to | |||
| @@ -606,6 +614,10 @@ REG_OP(ResizeBilinearV2Grad) | |||
| .INPUT(grads, TensorType({DT_FLOAT})) | |||
| .INPUT(original_image, TensorType::FloatingDataType()) | |||
| .OUTPUT(y, TensorType({DT_FLOAT})) | |||
| .ATTR(size, ListInt, {}) | |||
| .ATTR(ori_image_size, ListInt, {}) | |||
| .ATTR(src_start_w, Int, 0) | |||
| .ATTR(dst_start_w, Int, 0) | |||
| .ATTR(align_corners, Bool, false) | |||
| .ATTR(half_pixel_centers, Bool, false) | |||
| .OP_END_FACTORY_REG(ResizeBilinearV2Grad) | |||
| @@ -624,7 +636,10 @@ size for the images . \n | |||
| output tensors are aligned, preserving the values at the corner pixels. | |||
| Defaults to false . | |||
| * @li half_pixel_centers: An optional bool. Defaults to False . \n | |||
| *@li ori_image_size: An optional listint. Defaults to {}. | |||
| *@li split_size: An optional listint. Defaults to {}. | |||
| *@li src_start_w: An optional int. Defaults to 0. | |||
| *@li dst_start_w: An optional int. Defaults to 0. | |||
| *@par Outputs: | |||
| *y: 4-D with shape [batch, new_height, new_width, channels] . \n | |||
| @@ -640,6 +655,10 @@ REG_OP(ResizeBilinearV2) | |||
| DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
| .INPUT(size, TensorType({DT_INT32})) | |||
| .OUTPUT(y, TensorType({DT_FLOAT})) | |||
| .ATTR(ori_image_size, ListInt, {}) | |||
| .ATTR(split_size, ListInt, {}) | |||
| .ATTR(src_start_w, Int, 0) | |||
| .ATTR(dst_start_w, Int, 0) | |||
| .ATTR(align_corners, Bool, false) | |||
| .ATTR(half_pixel_centers, Bool, false) | |||
| .OP_END_FACTORY_REG(ResizeBilinearV2) | |||
| @@ -113,9 +113,7 @@ if input "x" is with format NC1HWC0. Specifies the mean of "x". | |||
| Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". | |||
| *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||
| Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. | |||
| *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||
| Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . | |||
| *@li reserve_space_3: An optional Tensor of type float32. For compatibility with tensorflow, only has one useless element. \n | |||
| *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. \n | |||
| *@attention Constraints: | |||
| *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, | |||
| @@ -137,7 +135,6 @@ REG_OP(BatchNorm) | |||
| .OUTPUT(batch_variance, TensorType({DT_FLOAT})) | |||
| .OUTPUT(reserve_space_1, TensorType({DT_FLOAT})) | |||
| .OUTPUT(reserve_space_2, TensorType({DT_FLOAT})) | |||
| .OUTPUT(reserve_space_3, TensorType({DT_FLOAT})) | |||
| .ATTR(epsilon, Float, 0.0001) | |||
| .ATTR(data_format, String, "NHWC") | |||
| .ATTR(is_training, Bool, true) | |||
| @@ -166,6 +163,33 @@ REG_OP(SyncBatchNormBackwardReduce) | |||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .OP_END_FACTORY_REG(SyncBatchNormBackwardReduce) | |||
| /** | |||
| *@brief part of SyncBatchNormBackward . \n | |||
| *@par Inputs: | |||
| * Three inputs, including: | |||
| *@li grad_output: A Tensor. Must be one of the following types: float16, float32 . | |||
| *@li save_input: A Tensor. Must be one of the following types: float16, float32 . | |||
| *@li mean: A Tensor. Must be one of the following types: float16, float32 . | |||
| *@li invstd: A Tensor. Must be one of the following types: float16, float32 . | |||
| *@li weight: A Tensor. Must be one of the following types: float16, float32 . | |||
| *@li mean_dy: A Tensor. Must be one of the following types: float16, float32 . | |||
| *@li mean_dy_xmu: A Tensor. Must be one of the following types: float16, float32 . \n | |||
| *@par Outputs: | |||
| *@li grad_input: A Tensor. Has the same type and format as input "grad_output" . \n | |||
| */ | |||
| REG_OP(SyncBatchNormBackwardElemt) | |||
| .INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .INPUT(save_input, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .INPUT(invstd, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .INPUT(mean_dy, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .INPUT(mean_dy_xmu, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .OUTPUT(grad_input, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
| .OP_END_FACTORY_REG(SyncBatchNormBackwardElemt) | |||
| /** | |||
| *@brief Performs batch normalization . \n | |||
| @@ -285,8 +309,7 @@ REG_OP(BatchNormExt2) | |||
| *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0. | |||
| *@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. | |||
| *@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm. | |||
| *@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . | |||
| *@li reserve_space_3: A 1D optional Tensor of type float32. It is an output of BatchNorm . \n | |||
| *@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . \n | |||
| *@par Attributes: | |||
| *@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". | |||
| @@ -313,7 +336,6 @@ REG_OP(BatchNormGrad) | |||
| .INPUT(scale, TensorType({DT_FLOAT})) | |||
| .INPUT(reserve_space_1, TensorType({DT_FLOAT})) | |||
| .INPUT(reserve_space_2, TensorType({DT_FLOAT})) | |||
| .OPTIONAL_INPUT(reserve_space_3, TensorType({DT_FLOAT})) | |||
| .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
| .OUTPUT(scale_backprop, TensorType({DT_FLOAT})) | |||
| .OUTPUT(offset_backprop, TensorType({DT_FLOAT})) | |||
| @@ -128,7 +128,7 @@ REG_OP(OCRIdentifyPreHandle) | |||
| .INPUT(imgs_offset, TensorType({DT_INT32})) | |||
| .INPUT(imgs_size, TensorType({DT_INT32})) | |||
| .OUTPUT(resized_imgs, TensorType({DT_UINT8})) | |||
| .ATTR(size, ListInt, {}) | |||
| .REQUIRED_ATTR(size, ListInt) | |||
| .ATTR(data_format, String, "NHWC") | |||
| .OP_END_FACTORY_REG(OCRIdentifyPreHandle) | |||
| @@ -247,6 +247,7 @@ REG_OP(OCRDetectionPostHandle) | |||
| *@li clipped_polys_data: A Tensor of type int32. point data of every clipped poly. \n | |||
| *@li clipped_polys_offset: A Tensor of type int32. Offset of every clipped poly . \n | |||
| *@li clipped_polys_size: A Tensor of type int32. Size of every clipped poly. \n | |||
| *@li clipped_polys_num: A Tensor of type int32. Number of clipped polys. \n | |||
| */ | |||
| REG_OP(ResizeAndClipPolys) | |||
| .INPUT(polys_data, TensorType({DT_INT32})) | |||
| @@ -259,6 +260,7 @@ REG_OP(ResizeAndClipPolys) | |||
| .OUTPUT(clipped_polys_data, TensorType({DT_INT32})) | |||
| .OUTPUT(clipped_polys_offset, TensorType({DT_INT32})) | |||
| .OUTPUT(clipped_polys_size, TensorType({DT_INT32})) | |||
| .OUTPUT(clipped_polys_num, TensorType({DT_INT32})) | |||
| .OP_END_FACTORY_REG(ResizeAndClipPolys); | |||
| @@ -1305,6 +1305,27 @@ REG_OP(ReduceStdWithMean) | |||
| .ATTR(invert, Bool, false) | |||
| .ATTR(epsilon, Float, 0.001) | |||
| .OP_END_FACTORY_REG(ReduceStdWithMean) | |||
| /** | |||
| *@brief Performs reduced batch normalization . \n | |||
| *@par Inputs: | |||
| *x: A 5D Tensor of type float16 or float32, with format NC1HWC0 . \n | |||
| *@par Outputs: | |||
| *@li mean: A Tensor of type float32 for SUM reduced "x". | |||
| *@li variance: A Tensor of type float32 for square sum reduced "x" . \n | |||
| *@par Restrictions: | |||
| * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
| */ | |||
| REG_OP(ReduceMeanVariance) | |||
| .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
| .OUTPUT(mean, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
| .OUTPUT(variance, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
| .ATTR(axes, ListInt, {}) | |||
| .ATTR(keep_dims, Bool, true) | |||
| .OP_END_FACTORY_REG(ReduceMeanVariance) | |||
| } //namespace ge | |||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ | |||
| @@ -78,8 +78,8 @@ REG_OP(TopKPQDistance) | |||
| .OUTPUT(topk_ivf, TensorType({DT_INT32})) | |||
| .OUTPUT(topk_index, TensorType({DT_INT32})) | |||
| .ATTR(order, String, "ASC") | |||
| .ATTR(k, Int, 0) | |||
| .ATTR(group_size, Int, 0) | |||
| .REQUIRED_ATTR(k, Int) | |||
| .REQUIRED_ATTR(group_size, Int) | |||
| .OP_END_FACTORY_REG(TopKPQDistance) | |||
| /** | |||
| @@ -129,6 +129,68 @@ REG_OP(ScanPQCodes) | |||
| .ATTR(split_count, Int, 1) | |||
| .ATTR(split_index, Int, 0) | |||
| .OP_END_FACTORY_REG(ScanPQCodes) | |||
| /** | |||
| * @brief Calculate buckets limit and offset. \n | |||
| * @par Inputs: | |||
| * Three inputs, including: | |||
| * @li bucket_list: A 1-D tensor of type int32 with the value of ivf_counts and ivf_offset index. \n | |||
| * @li ivf_counts: A 1-D tensor of type int32 with the value of ivf counts. \n | |||
| * @li ivf_offset: A 1-D tensor of type int32 or int64 with the value of ivf offset. \n | |||
| * @par Attributes: | |||
| * total_limit: A int64 type maximum value of the sum of ivf_counts corresponding to bucket_list. \n | |||
| * @par Outputs: | |||
| * @li buckets_limit: A 1-D tensor of type int32 with the sum <= total_limit. \n | |||
| * @li buckets_offset: A 1-D tensor of type int32 or int64 with the value of ivf_offset corresponding to bucket_list. \n | |||
| */ | |||
| REG_OP(CalcBucketsLimitAndOffset) | |||
| .INPUT(bucket_list, TensorType({DT_INT32})) | |||
| .INPUT(ivf_counts, TensorType({DT_INT32})) | |||
| .INPUT(ivf_offset, TensorType({DT_INT32, DT_INT64})) | |||
| .OUTPUT(buckets_limit, TensorType({DT_INT32})) | |||
| .OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) | |||
| .REQUIRED_ATTR(total_limit, Int) | |||
| .OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) | |||
| /** | |||
| * @brief Calculate ProdVirialSeA. \n | |||
| * | |||
| * @par Inputs: | |||
| * Five inputs, including: | |||
| * @li net_deriv: A Tensor. Must be one of the following types: float16, float32, float64. | |||
| * @li in_deriv: A Tensor. Must be one of the following types: float16, float32, float64. | |||
| * @li rij: A Tensor. Must be one of the following types: float16, float32, float64. | |||
| * @li nlist: A Tensor. dtype is int32. | |||
| * @li natoms: A Tensor. dtype is int32. \n | |||
| * | |||
| * @par Outputs: | |||
| * Two outputs, including: | |||
| * @li virial: A Tensor. Must be one of the following types: float16, float32, float64. | |||
| * @li atom_virial: A Tensor. Must be one of the following types: float16, float32, float64. \n | |||
| * | |||
| * @par Attributes: | |||
| * Two attributes, including: | |||
| * @li n_a_sel: A Scalar. | |||
| * @li n_r_sel: A Scalar. \n | |||
| * | |||
| * @par Restrictions: | |||
| * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
| */ | |||
| REG_OP(ProdVirialSeA) | |||
| .INPUT(net_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
| .INPUT(in_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
| .INPUT(rij, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
| .INPUT(nlist, TensorType({DT_INT32})) | |||
| .INPUT(natoms, TensorType({DT_INT32})) | |||
| .OUTPUT(virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
| .OUTPUT(atom_virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
| .REQUIRED_ATTR(n_a_sel, Int) | |||
| .REQUIRED_ATTR(n_r_sel, Int) | |||
| .ATTR(nall, Int, 28328) | |||
| .OP_END_FACTORY_REG(ProdVirialSeA) | |||
| } // namespace ge | |||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_ | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef __CCE_RUNTIME_BASE_H__ | |||
| #define __CCE_RUNTIME_BASE_H__ | |||
| #ifndef CCE_RUNTIME_BASE_H | |||
| #define CCE_RUNTIME_BASE_H | |||
| #include <stdint.h> | |||
| #include "toolchain/prof_callback.h" | |||
| @@ -443,4 +443,4 @@ RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId); | |||
| } | |||
| #endif | |||
| #endif // __CCE_RUNTIME_BASE_H__ | |||
| #endif // CCE_RUNTIME_BASE_H | |||
| @@ -239,8 +239,18 @@ RTS_API rtError_t rtSetOpWaitTimeOut(uint32_t timeout); | |||
| */ | |||
| RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout); | |||
| /** | |||
| * @ingroup | |||
| * @brief get is Heterogenous. | |||
| * @param [out] heterogenous=1 Heterogenous Mode: read isHeterogenous=1 in ini file. | |||
| * @param [out] heterogenous=0 NOT Heterogenous Mode: | |||
| * 1:not found ini file, 2:error when reading ini, 3:Heterogenous value is not 1 | |||
| * @return RT_ERROR_NONE for ok | |||
| */ | |||
| RTS_API rtError_t rtGetIsHeterogenous(int32_t *heterogenous); | |||
| #if defined(__cplusplus) | |||
| } | |||
| #endif | |||
| #endif // CCE_RUNTIME_CONFIG_H | |||
| #endif // CCE_RUNTIME_CONFIG_H | |||
| @@ -25,7 +25,7 @@ extern "C" { | |||
| #define RT_CAPABILITY_SUPPORT (0x1U) | |||
| #define RT_CAPABILITY_NOT_SUPPORT (0x0U) | |||
| #define MEMORY_INFO_TS_4G_LIMITED (0x0) // for compatibility | |||
| #define MEMORY_INFO_TS_4G_LIMITED (0x0U) // for compatibility | |||
| typedef struct tagRTDeviceInfo { | |||
| uint8_t env_type; // 0: FPGA 1: EMU 2: ESL | |||
| @@ -171,6 +171,15 @@ RTS_API rtError_t rtSetDevice(int32_t device); | |||
| */ | |||
| RTS_API rtError_t rtSetDeviceV2(int32_t device, rtDeviceMode deviceMode); | |||
| /** | |||
| * @ingroup dvrt_dev | |||
| * @brief get deviceMode | |||
| * @param [out] deviceMode the device mode | |||
| * @return RT_ERROR_NONE for ok | |||
| * @return RT_ERROR_INVALID_VALUE for error input | |||
| */ | |||
| RTS_API rtError_t rtGetDeviceMode(rtDeviceMode *deviceMode); | |||
| /** | |||
| * @ingroup dvrt_dev | |||
| * @brief set target die for current thread | |||
| @@ -133,8 +133,11 @@ typedef struct tagRtArgsWithTiling { | |||
| uint16_t tilingDataOffset; // tiling data offset | |||
| uint16_t hostInputAddrOffset; // index of host_memory input in inputs_addrs list | |||
| uint16_t hostInputDataOffset; // host_mem input data offset | |||
| bool hasHostMemInput; // has host_memory input data in args or not: ture or false | |||
| uint8_t reserved[7]; | |||
| uint8_t hasHostMemInput; // has host_memory input data in args or not: 0 means no host_memory input data, | |||
| // others means has host_memory input data. | |||
| uint8_t isNoNeedH2DCopy; // is no need host to device copy: 0 means need H2D copy, | |||
| // others means doesn't need H2D copy. | |||
| uint8_t reserved[6]; | |||
| } rtArgsWithTiling_t; | |||
| /** | |||
| @@ -299,8 +302,8 @@ RTS_API rtError_t rtDependencyRegister(void *mHandle, void *sHandle); | |||
| * @return RT_ERROR_NONE for ok | |||
| * @return RT_ERROR_INVALID_VALUE for error input | |||
| */ | |||
| RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char_t *stubName, const void *devFunc, | |||
| uint32_t funcMode); | |||
| RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char_t *stubName, | |||
| const void *devFunc, uint32_t funcMode); | |||
| /** | |||
| * @ingroup rt_kernel | |||
| @@ -371,8 +374,9 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void * | |||
| * @return RT_ERROR_NONE for ok | |||
| * @return RT_ERROR_INVALID_VALUE for error input | |||
| */ | |||
| RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, | |||
| rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo); | |||
| RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, | |||
| void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream_, | |||
| const void *kernelInfo); | |||
| /** | |||
| * @ingroup rt_kernel | |||
| @@ -576,7 +576,7 @@ RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqeIndex, rtStream_t strea | |||
| * @return RT_ERROR_INVALID_VALUE for error input | |||
| * @return RT_ERROR_DRV_ERR for driver error | |||
| */ | |||
| RTS_API rtError_t rtSetIpcMemPid(const char_t *name, int32_t pid[], int num); | |||
| RTS_API rtError_t rtSetIpcMemPid(const char_t *name, int32_t pid[], int32_t num); | |||
| /** | |||
| * @ingroup dvrt_mem | |||
| @@ -31,5 +31,6 @@ | |||
| #include "rt_ffts.h" | |||
| #include "rt_ffts_plus.h" | |||
| #include "rt_dfx.h" | |||
| #include "rt_mem_queue.h" | |||
| #endif // CCE_RUNTIME_RT_H | |||
| #endif // CCE_RUNTIME_RT_H | |||
| @@ -23,6 +23,8 @@ | |||
| #define PROF_AICORE_METRICS 0x00000004 | |||
| #define PROF_AICPU_TRACE 0x00000008 | |||
| #define PROF_L2CACHE 0x00000010 | |||
| #define PROF_HCCL_TRACE 0x00000020 | |||
| #define PROF_TRAINING_TRACE 0x00000040 | |||
| // system profilinig switch | |||
| #define PROF_CPU 0x00010000 | |||
| @@ -41,10 +43,7 @@ | |||
| #define PROF_AIVECTORCORE_METRICS 0x0000020000000 | |||
| #define PROF_SUBTASK_TIME 0x0000040000000 | |||
| #define PROF_TRAINING_TRACE 0x0000080000000 | |||
| #define PROF_HCCL_TRACE 0x0000100000000 | |||
| #define PROF_TASK_TRACE 0x0000185000002 | |||
| #define PROF_TASK_TRACE 0x0000005000062 | |||
| #define PROF_MODEL_LOAD 0x8000000000000000 | |||
| @@ -54,6 +53,8 @@ | |||
| #define PROF_AICORE_METRICS_MASK 0x00000004 | |||
| #define PROF_AICPU_TRACE_MASK 0x00000008 | |||
| #define PROF_L2CACHE_MASK 0x00000010 | |||
| #define PROF_HCCL_TRACE_MASK 0x00000020 | |||
| #define PROF_TRAINING_TRACE_MASK 0x00000040 | |||
| // system profilinig mask | |||
| #define PROF_CPU_MASK 0x00010000 | |||
| @@ -72,9 +73,6 @@ | |||
| #define PROF_AIVECTORCORE_METRICS_MASK 0x0000020000000 | |||
| #define PROF_SUBTASK_TIME_MASK 0x0000040000000 | |||
| #define PROF_TRAINING_TRACE_MASK 0x0000080000000 | |||
| #define PROF_HCCL_TRACE_MASK 0x0000100000000 | |||
| #define PROF_MODEL_LOAD_MASK 0x8000000000000000 | |||
| #if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) | |||