diff --git a/inc/external/acl/acl.h b/inc/external/acl/acl.h index 99e87e28..5fbaccc7 100644 --- a/inc/external/acl/acl.h +++ b/inc/external/acl/acl.h @@ -25,9 +25,9 @@ extern "C" { #endif -// Current version is 1.1.0 +// Current version is 1.2.0 #define ACL_MAJOR_VERSION 1 -#define ACL_MINOR_VERSION 1 +#define ACL_MINOR_VERSION 2 #define ACL_PATCH_VERSION 0 /** diff --git a/inc/external/acl/acl_tdt_queue.h b/inc/external/acl/acl_tdt_queue.h index c8ab02a3..e940c020 100644 --- a/inc/external/acl/acl_tdt_queue.h +++ b/inc/external/acl/acl_tdt_queue.h @@ -219,6 +219,7 @@ ACL_FUNC_VISIBILITY aclError acltdtQueryQueueRoutes(const acltdtQueueRouteQueryI * @brief alloc acltdtBuf * * @param size [IN] alloc buf size + * @param type [IN] reserved parameters, need to set zero currently * @param buf [OUT] pointer to the acltdtBuf * * @retval ACL_SUCCESS The function is successfully executed. @@ -226,7 +227,7 @@ ACL_FUNC_VISIBILITY aclError acltdtQueryQueueRoutes(const acltdtQueueRouteQueryI * * @see acltdtFreeBuf */ -ACL_FUNC_VISIBILITY aclError acltdtAllocBuf(size_t size, acltdtBuf *buf); +ACL_FUNC_VISIBILITY aclError acltdtAllocBuf(size_t size, uint32_t type, acltdtBuf *buf); /** * @ingroup AscendCL diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index 45c157f3..c50f83c0 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -180,7 +180,7 @@ #define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg) \ { \ GELOGE((_status), "[Check][InnerData]%s", (errormsg)); \ - REPORT_INNER_ERROR("E10052", "%s", (errormsg)); \ + REPORT_INNER_ERROR("E19999", "%s", (errormsg)); \ } #define GE_WARNINGLOG_AND_ERRORMSG(errormsg) \ diff --git a/inc/framework/omg/parser/parser_factory.h b/inc/framework/omg/parser/parser_factory.h index d64a4c2d..cf6499e9 100644 --- a/inc/framework/omg/parser/parser_factory.h +++ b/inc/framework/omg/parser/parser_factory.h @@ -62,7 +62,7 @@ class GE_FUNC_VISIBILITY ModelParserFactory { class GE_FUNC_VISIBILITY ModelParserRegisterar { public: - ModelParserRegisterar(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN const fun) { + ModelParserRegisterar(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN const fun) noexcept { ModelParserFactory::Instance()->RegisterCreator(type, fun); } ~ModelParserRegisterar() {} @@ -114,7 +114,7 @@ class GE_FUNC_VISIBILITY WeightsParserFactory { class GE_FUNC_VISIBILITY WeightsParserRegisterar { public: - WeightsParserRegisterar(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN const fun) { + WeightsParserRegisterar(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN const fun) noexcept { WeightsParserFactory::Instance()->RegisterCreator(type, fun); } ~WeightsParserRegisterar() {} diff --git a/metadef b/metadef index 58412ad7..2d98a178 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 58412ad7aed08cd1c01cc070d80706e4253c9075 +Subproject commit 2d98a17884e656a2446239cdb9cee79543cb0161 diff --git a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h index 248303ef..1c8f8e44 100644 --- a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h +++ b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h @@ -1,117 +1,117 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ -#define __INC_EXTERNEL_RT_ERROR_CODES_H__ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -static const int32_t ACL_RT_SUCCESS = 0; // success - -static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid -static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id -static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null -static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context -static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context -static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal -static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned -static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed -static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed -static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream -static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread -static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set -static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create -static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream -static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type -static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle -static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type -static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout - -static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support -static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error -static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error -static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow -static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device -static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail -static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission -static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource -static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource -static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource -static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource -static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource -static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit -static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty -static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full -static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init -static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow - -static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error -static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error -static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream -static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream -static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete -static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence -static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete -static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error -static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error -static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support -static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat -static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed -static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout -static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error -static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout -static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception -static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception -static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout -static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception -static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error -static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error -static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error -static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error -static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal -static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering -static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init -static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data -static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error -static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate -static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed -static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed -static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context -static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out -static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error -static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout -static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception -static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception -static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal -static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode -static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die -static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id -static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set - -static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error -static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error -static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect - -#ifdef __cplusplus -} -#endif -#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ +/** +* @file rt_error_codes.h +* +* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ +#define __INC_EXTERNEL_RT_ERROR_CODES_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static const int32_t ACL_RT_SUCCESS = 0; // success + +static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid +static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id +static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null +static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context +static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context +static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal +static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned +static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed +static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed +static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream +static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread +static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set +static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create +static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream +static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type +static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle +static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type +static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout +static const int32_t ACL_ERROR_RT_TASK_TIMEOUT = 107020; // task timeout + +static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support +static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error +static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error +static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow +static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device +static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail +static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission +static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource +static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource +static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource +static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource +static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource +static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit +static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty +static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full +static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init +static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow +static const int32_t ACL_ERROR_RT_OVER_FLOW = 207017; // common over flow + +static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error +static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error +static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream +static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream +static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete +static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence +static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete +static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error +static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error +static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support +static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat +static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed +static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout +static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error +static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout +static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception +static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception +static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout +static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception +static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error +static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error +static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error +static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error +static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal +static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering +static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init +static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data +static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error +static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate +static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed +static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed +static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context +static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out +static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout +static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception +static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal +static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode +static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die +static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id +static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set +static const int32_t ACL_ERROR_RT_AICORE_TRAP_READ_OVERFLOW = 507042; // aic trap read overflow +static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043; // aic trap write overflow +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044; // aiv trap read overflow +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow + +static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error +static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error +static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect + +#ifdef __cplusplus +} +#endif +#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ diff --git a/third_party/fwkacllib/inc/ops/encoding_ops.h b/third_party/fwkacllib/inc/ops/encoding_ops.h index f96a67e7..73344353 100644 --- a/third_party/fwkacllib/inc/ops/encoding_ops.h +++ b/third_party/fwkacllib/inc/ops/encoding_ops.h @@ -1,5 +1,5 @@ -/** - * Copyright (C) Huawei Technologies Co., Ltd 2022-2022. All rights reserved. +/* + * Copyright (c) Huawei Technologies Co., Ltd 2022-2022. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h index bd63c60c..f3cc5aed 100644 --- a/third_party/fwkacllib/inc/ops/linalg_ops.h +++ b/third_party/fwkacllib/inc/ops/linalg_ops.h @@ -525,6 +525,26 @@ REG_OP(BandedTriangularSolve) .ATTR(lower, Bool, true) .ATTR(adjoint, Bool, false) .OP_END_FACTORY_REG(BandedTriangularSolve) + +/** +* @brief Returns the complex conjugatetranspose. + +* @par Inputs: +* @li x: A Tensor. Must be one of the following types: double, float32, float16, + int8, uint8, int16, uint16, int32, uint32, int64, uint64, bool +* @li perm: A Index. Must be one of the following types: int32, int64 \n +* +* @par Outputs: +* @li y: A Tensor. Has the same type as "x" . \n + +* @par Third-party framework compatibility. +* Compatible with tensorflow ConjugateTranspose operator. +*/ +REG_OP(ConjugateTranspose) + .INPUT(x, TensorType::BasicType()) + .INPUT(perm, TensorType::IndexNumberType()) + .OUTPUT(y, TensorType::BasicType()) + .OP_END_FACTORY_REG(ConjugateTranspose) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index 94fd0600..8e9ee4db 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -1217,6 +1217,35 @@ REG_OP(DenseCountSparseOutput) .REQUIRED_ATTR(binary_output, Bool) .OP_END_FACTORY_REG(DenseCountSparseOutput) +/** +* @brief Computes gradients for SparseSegmentSum . \n + +* @par Inputs: +* The input grad must have be type float or double. Inputs include: +* @li grad: A Tensor. Must be one of the following types: bfloat16, float16, float32, double. + gradient propagated to the SparseSegmentSum op. +* @li indices: A Tensor. Must be one of the following types: int32, int64. + indices passed to the corresponding SparseSegmentSum op. +* @li segment_ids: A Tensor of type int32, int64. segment_ids passed to the + corresponding SparseSegmentSum op. +* @li output_dim0: A Tensor of type int32. dimension 0 of "x" passed to + SparseSegmentSum op . \n + +* @par Outputs: +* output:A Tensor. Has the same type as grad . \n + +* @par Third-party framework compatibility +* Compatible with tensorflow SparseSegmentSumGrad operator +*/ + +REG_OP(SparseSegmentSumGrad) + .INPUT(grad, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(indices, TensorType({DT_INT32, DT_INT64})) + .INPUT(segment_ids, TensorType({DT_INT32, DT_INT64})) + .INPUT(output_dim0, TensorType({DT_INT32})) + .OUTPUT(output, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OP_END_FACTORY_REG(SparseSegmentSumGrad) + /** * @brief Count the number of occurrences of each value in the input ragged integer array, * and output it according to the sparse matrix. \n @@ -1335,6 +1364,87 @@ REG_OP(ScaledMaskedSoftmaxGrad) .ATTR(scale, Float, 1.0) .ATTR(fixed_triu_mask, Bool, false) .OP_END_FACTORY_REG(ScaledMaskedSoftmaxGrad) + +/** + * @brief SignBitsPack. + + * @par Inputs: + * one input, including: + * @li x: A 1D Tensor of float32 or float16. + * + * @par Attributes: + * @li size: first dim value of output tensor. + * + * @par Outputs: + * @li y: A 2D Tensor of type uint8 with shape (size, N) + */ +REG_OP(SignBitsPack) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_UINT8})) + .REQUIRED_ATTR(size, Int) + .OP_END_FACTORY_REG(SignBitsPack) + +/** +* @brief Get sobol samples. \n + +* @par Inputs: +* Three inputs, including: +* @li dim: Dimension of results, which must be a scalar of type int32. +* @li num_results: Number of results, which must be a scalar of type int32. +* @li skip: Number of initial points, which must be a scalar of type int32. \n + +* @par Attributes: +* @li dtype: Data type of output samples. \n + +* @par Outputs: +* @li y: A Tensor with the DT_FLOAT or DT_DOUBLE type generated samples. \n + +* @par Third-party framework compatibility +* @li compatible with tensorflow SobolSample operator. +**/ +REG_OP(SobolSample) + .INPUT(dim, TensorType({DT_INT32})) + .INPUT(num_results, TensorType({DT_INT32})) + .INPUT(skip, TensorType({DT_INT32})) + .OUTPUT(samples, TensorType({DT_FLOAT,DT_DOUBLE})) + .ATTR(dtype, Type, DT_FLOAT) + .OP_END_FACTORY_REG(SobolSample) + +/** + * @brief Count the number of occurrences of each value in the input sparse integer array, + * and output it according to the sparse matrix. \n + + * @par Inputs: + * @li indices: A tensor of type int64. + * @li values: A tensor of type int32 or int64. + * @li dense_shape: A tensor of type int64. + * @li weights: A tensor of type int32 or int64 or float or double. \n + + * @par Attributes: + * @li minlength: An optional int >=-1. Defaults to -1. + * @li maxlength: An optional int >=-1. Defaults to -1. + * @li binary_output: A required bool. \n + + * @par Outputs: + * @li output_indices: A tensor of type int64. + * @li output_values: A tensor of the same type as "weights". + * @li output_dense_shape: A tensor of type int64. \n + + * @par Third-party framework compatibility + * Compatible with the TensorFlow operator SparseCountSparseOutput. \n + */ +REG_OP(SparseCountSparseOutput) + .INPUT(indices, TensorType({DT_INT64})) + .INPUT(values, TensorType({DT_INT32,DT_INT64})) + .INPUT(dense_shape, TensorType({DT_INT64})) + .INPUT(weights, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE})) + .OUTPUT(output_indices, TensorType({DT_INT64})) + .OUTPUT(output_values, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE})) + .OUTPUT(output_dense_shape, TensorType({DT_INT64})) + .ATTR(minlength, Int, -1) + .ATTR(maxlength, Int, -1) + .REQUIRED_ATTR(binary_output, Bool) + .OP_END_FACTORY_REG(SparseCountSparseOutput) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index 57b46398..38e22be8 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -155,6 +155,49 @@ REG_OP(AttentionLnQKV) .ATTR(trans_b, Bool, false) .OP_END_FACTORY_REG(AttentionLnQKV) +/** +* @brief + swin_transformer model specific structure.Operator only supports swin_transformer. \n +* @par Inputs: +* Five inputs, including: +* @li x: A Tensor. Must be one of the following types: float16. +* @li gamma: A Tensor. Must be one of the following types: float16. +* @li beta: A Tensor. Must be one of the following types: float16. +* @li weight: A Tensor. Must be one of the following types: float16. +* @li bias: A Tensor. Must be one of the following types: float16. \n + +* @par Attributes: +* @li head_num: A optional attribute, the type is int. +* @li head_dim: A optional attribute, the type is int. +* @li seq_length: A optional attribute, the type is int. +* @li shifts: A optional attribute, the type is list int. Defaults to (). +* @li epsilon: A optional attribute, the type is float. Defaults to 1e-7. \n + +* @par Outputs: +* Three outputs, including: +* @li query_output: A Tensor. Must be one of the following types: float16. +* @li key_output: A Tensor. Must be one of the following types: float16. +* @li value_output: A Tensor. Must be one of the following types: float16. \n + +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n +*/ +REG_OP(SwinTransformerLnQKV) + .INPUT(x, TensorType({DT_FLOAT16})) + .INPUT(gamma, TensorType({DT_FLOAT16})) + .INPUT(beta, TensorType({DT_FLOAT16})) + .INPUT(weight, TensorType({DT_FLOAT16})) + .INPUT(bias, TensorType({DT_FLOAT16})) + .OUTPUT(query_output, TensorType({DT_FLOAT16})) + .OUTPUT(key_output, TensorType({DT_FLOAT16})) + .OUTPUT(value_output, TensorType({DT_FLOAT16})) + .REQUIRED_ATTR(head_num, Int) + .REQUIRED_ATTR(head_dim, Int) + .REQUIRED_ATTR(seq_length, Int) + .ATTR(shifts, ListInt, {}) + .ATTR(epsilon, Float, 0.0000001) + .OP_END_FACTORY_REG(SwinTransformerLnQKV) + /** *@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n @@ -1639,6 +1682,80 @@ REG_OP(TensorScatterMin) .OUTPUT(output, TensorType::BasicType()) .OP_END_FACTORY_REG(TensorScatterMin) +/** +* @brief: Returns the batched diagonal part of a batched tensor. \n + +* @par Inputs: +* @li x: A Tensor. Rank r tensor where r >= 2. +* @li k: A Tensor of type int32. Diagonal offset(s). Positive value means superdiagonal, + 0 refers to the main diagonal, and negative value means subdiagonals. k can be a + single integer (for a single diagonal) or a pair of integers specifying the low and + high ends of a matrix band. k[0] must not be larger than k[1]. +* @li padding_value:A Tensor. Must have the same type as input. The value to fill the area + outside the specified diagonal band with. Default is 0. \n + +* @par Outputs: +* @li y: A Tensor. Has the same type as "input". \n + +* @par Attributes: +* @li align:An optional string from: "LEFT_RIGHT", "RIGHT_LEFT", "LEFT_LEFT", "RIGHT_RIGHT". Defaults to "RIGHT_LEFT". + +* @par Third-party framework compatibility +* Compatible with the Tensorflow operator FillDiagonal. +*/ + REG_OP(MatrixDiagPartV3) + .INPUT(x, TensorType::BasicType()) + .INPUT(k, TensorType({DT_INT32})) + .INPUT(padding_value, TensorType::BasicType()) + .OUTPUT(y, TensorType::BasicType()) + .ATTR(align,String ,"RIGHT_LEFT") + .OP_END_FACTORY_REG(MatrixDiagPartV3) + +/** +* @brief Returns a batched diagonal tensor with given batched diagonal values . \n + +* @par Inputs: +* Five inputs, including: +* @li x: Rank `r`, where `r >= 1` \n + +* @li k: +* Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main +* diagonal, and negative value means subdiagonals. `k` can be a single integer +* (for a single diagonal) or a pair of integers specifying the low and high ends +* of a matrix band. `k[0]` must not be larger than `k[1]`. \n + +* @li num_rows: +* The number of rows of the output matrix. If it is not provided, the op assumes +* the output matrix is a square matrix and infers the matrix size from k and the +* innermost dimension of `diagonal`. \n + +* @li num_cols: An NCHW, NHWC, or ND Tensor. +* The number of columns of the output matrix. If it is not provided, the op +* assumes the output matrix is a square matrix and infers the matrix size from +* k and the innermost dimension of `diagonal`. \n + +* @li padding_value: The number to fill the area outside the specified diagonal band with. \n + +* @par Attributes: +* @li align: An optional string from: "LEFT_RIGHT", "RIGHT_LEFT", "LEFT_LEFT", "RIGHT_RIGHT". +* Defaults to "RIGHT_LEFT" \n + +* @par Outputs: +* @li y: Has rank `r+1` when `k` is an integer or `k[0] == k[1]`, rank `r` otherwise . \n + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterUpdate. +*/ +REG_OP(MatrixDiagV3) + .INPUT(x, TensorType::BasicType()) + .INPUT(k, TensorType({DT_INT32})) + .INPUT(num_rows, TensorType({DT_INT32})) + .INPUT(num_cols, TensorType({DT_INT32})) + .INPUT(padding_value, TensorType::BasicType()) + .OUTPUT(y, TensorType::BasicType()) + .ATTR(align, String, "RIGHT_LEFT") + .OP_END_FACTORY_REG(MatrixDiagV3) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index 6b3b817a..bb0770e6 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -651,21 +651,21 @@ REG_OP(Deconvolution) | Name | Field | Scope |\n |------------------|----------|--------------|\n | x(fmap) | H | [1, 200000] |\n - | | W | [1, 4096] |\n - | Filter Size | H | [1, 255] |\n - | | W | [1, 255] |\n + | | W | [1, 200000] |\n + | Filter Size | H | [1, 200000] |\n + | | W | [1, 200000] |\n | out_backprop | H | [1, 200000] |\n - | | W | [1, 4096] |\n + | | W | [1, 200000] |\n | y | H | [1, 200000] |\n - | | W | [1, 4096] |\n - | Stride | H | [1, 63] |\n - | | W | [1, 63] |\n - | Padding | Top | [0, 255] |\n - | | Bottom | [0, 255] |\n - | | Left | [0, 255] |\n - | | Right | [0, 255] |\n - | Dilation | H | [1, 255] |\n - | | W | [1, 255] |\n + | | W | [1, 200000] |\n + | Stride | H | [1, 200000] |\n + | | W | [1, 200000] |\n + | Padding | Top | [0, 200000] |\n + | | Bottom | [0, 200000] |\n + | | Left | [0, 200000] |\n + | | Right | [0, 200000] |\n + | Dilation | H | [1, 200000] |\n + | | W | [1, 200000] |\n *\n *@par Outputs: * y: A Tensor. Has the same type as x, has the same format as filter_size. diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index 15aa6c20..c5724f43 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -1550,8 +1550,8 @@ REG_OP(DecodeWheelsTarget) *@li max_size_per_class: A required attribute of type int, specifying the nms output num per class. *@li max_total_size: A required attribute of type int, specifying the the nms output num per batch. *@li change_coordinate_frame: A optional attribute of type bool, whether to normalize coordinates after clipping. -*@li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false". -*@li image_size: A optional attribute of type ListInt, the size of the image. \n +* @li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false". +* @li image_size: A optional attribute of type ListInt, the size of the image. \n *@par Outputs: *@li nmsed_boxes: A 3D Tensor of type float16 with shape (batch, max_total_size, 4), @@ -2352,6 +2352,51 @@ REG_OP(DIoU) .ATTR(is_cross, Bool, true) .ATTR(mode, String, "iou") .OP_END_FACTORY_REG(DIoU) + +/** +* @brief Calculate the intersection ratio of two rotated cuboids . \n + +* @par Inputs: +* @li bboxes : data of grad increment, a 3D Tensor of type float32 with +* shape (B, 7, N). "N" indicates the number of boxes, and the value +* "7" refers to [x, y, z, w, h, d, theta]. +* @li gtboxes: Bounding boxes, a 3D Tensor of type float32 with +* shape (B, 7, K). "K" indcates the number of boxes, and the value +* "7" refers to [x, y, z, w, h, d, theta]. + +* @par Outputs: +* iou: A 3D Tensor of float32 with shape [B, N, K]. + +* @attention Constraints: +* In each batch, the invalid box cannot appear before the valid box. +*/ +REG_OP(Iou3D) + .INPUT(bboxes, TensorType({DT_FLOAT})) + .INPUT(gtboxes, TensorType({DT_FLOAT})) + .OUTPUT(iou, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(Iou3D) + +/** +* @brief Generates bounding boxes based on "priors" and "bboxes". +* It is a customized yolox operator . \n + +* @par Inputs: +* Two inputs, including: +* @li priors: prior sample boxes of origin image +* A 2D Tensor of type float32 or float16 with shape (N, 4). +* "N" indicates the number of boxes, and the value "4" refers to "x0", "x1", "y0", and "y1". +* @li bboxes_input: bboxes predicted by the model. A 2D Tensor of type float32 or float16 with shape (B, N, 4). +* "B" indicates the batch_size, N indicates the number of boxes, 4 indicates "dx", "dy", "dw", and "dh" . \n + +* @par Outputs: +* bboxes_output: Bboxes generated based on "priors" and "bboxes_input". Have the same format +* and type as "bboxes_input". +*/ +REG_OP(YoloxBoundingBoxDecode) + .INPUT(priors, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(decoded_bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(YoloxBoundingBoxDecode) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 5a2ee6eb..523fb199 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -426,10 +426,10 @@ REG_OP(ConfusionSoftmaxGrad) *@li keepdims: A bool Scalar. If true, retains reduced dimensions with length 1 . \n *@par Outputs: -*y: A Tensor dtype of float16, float32. \n +* y: A Tensor dtype of float16, float32. \n *@attention Constraints: -*THIS OPERATOR IS DEPRECATED. It will be removed in a future version. +* THIS OPERATOR IS DEPRECATED. It will be removed in a future version. */ REG_OP(SoftmaxGradExt) .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index e81b84c4..4c6f7293 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -1317,33 +1317,38 @@ REG_OP(AvgPool1DD) .ATTR(count_include_pad, Bool, false) .OP_END_FACTORY_REG(AvgPool1DD) /** -*@brief Performs max pooling on the input and outputs both max values and indices . \n +* @brief Performs max pooling on the input and outputs both max values and indices . \n -*@par Inputs: +* @par Inputs: * One input: -*x: An 4d Tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"]. -*@par Attributes: -*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for -* each dimension of the input tensor. No default value. -*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for -* each dimension of the input tensor. No default value. -*@li pads: A required string. No default value. -*@li dtype: A optional int. default value is 3. -*@li dilation: A optional list of int8, int16, int32, or int64 values. -*@li ceil_mode: A optional bool. default value is false . \n +* x: An 5hd Tensor of type float16. +* Must set the format, supported format list ["NC1HWC0"]. +* @par Attributes: +* @li ksize: A required list of int8, int16, int32, or int64 values, +* specifying the size of the window for each dimension of the input tensor. No default value. +* @li strides: A required list of int8, int16, int32, or int64 values, +* specifying the stride of the sliding window for each dimension of the input tensor. No default value. +* @li pads: A required list of int8, int16, int32, or int64 values, +* specifying the pad of the input feature map. No default value. \n +* @li dtype: A optional int. default value is 3. +* @li dilation: A optional list of int8, int16, int32, or int64 values. +* @li ceil_mode: A optional bool. default value is false . \n -*@par Outputs: -*y: A Tensor. Has the same type and format as input "x". -*argmax: A Tensor. type:uint16. -*@attention Constraints: -*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. -*@li "strides is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, -* strides[2] <= 63, strides[2] >= 1. -*@li "dilation" is a list that has length 4. -*@li "ceil_mode" is a bool, default is false . \n +* @par Outputs: +* y: A Tensor. Has the same type and format as input "x". +* argmax: A Tensor. type:uint16. +* @attention Constraints: +* @li ksize: a list that has length 4: +* ksize[0] = 1, ksize[1] = 1, ksize[2] * ksize[3] <= (ub_size-8)*1024//6//2//16. +* @li strides: a list that has length 4: +* strides[0] = 1, strides[1] = 1, 1 <= strides[2] <= 2048, 1 <= strides[3] <= 2048. +* @li pads: a list that has length 4: +* pads[0] = 1, pads[1] = 1, 1 <= pads[2] <= (ksize[2]//2), 1 <= pads[3] <= (ksize[3]//2). +* @li dilation: a list that has length 4. +* @li ceil_mode: is a bool, default is false . \n -*@par Third-party framework compatibility -* Compatible with the TensorFlow operator MaxPoolWithArgmax. +* @par Third-party framework compatibility +* Compatible with the PyTorch operator max_pool2d_with_indices. */ REG_OP(MaxPoolWithArgmaxV2) .INPUT(x, TensorType({DT_FLOAT16})) @@ -1358,36 +1363,44 @@ REG_OP(MaxPoolWithArgmaxV2) .OP_END_FACTORY_REG(MaxPoolWithArgmaxV2) /** -*@brief Performs the backpropagation of MaxPoolWithArgmaxV2 . \n +* @brief Performs the backpropagation of MaxPoolWithArgmaxV2. \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li x: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"] -*@li grad: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"] -*@li argmx: An 4d tensor of type uint16 or int64. Must set the format, supported format list ["NCHW, NHWC"] \n +* @li x: An 5hd tensor of type float16. +* Must set the format, supported format list ["NC1HWC0"] +* @li grad: An 5hd tensor of type float16. +* Must set the format, supported format list ["NC1HWC0"] +* @li argmax: An 5hd tensor of type uint16 or int64. +* Must set the format, supported format list ["NC1HWC0"] \n -*@par Attributes: -*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for - * each dimension of the input tensor. No default value. -*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for - * each dimension of the input tensor. No default value. -*@li pads: A required string. No default value. -*@li dtype: A optional int. default value is 3. -*@li dilation: A optional list of int8, int16, int32, or int64 values. -*@li ceil_mode: A optional bool. default value is false . \n - -*@par Outputs: -*y: A Tensor. Has the same type and format as input "x" . \n +* @par Attributes: +* @li ksize: A required list of int8, int16, int32, or int64 values, +* specifying the size of the window for each dimension of the input tensor. No default value. +* @li strides: A required list of int8, int16, int32, or int64 values, +* specifying the stride of the sliding window for each dimension of the input tensor. No default value. +* @li pads: A required list of int8, int16, int32, or int64 values, +* specifying the pad of the input feature map. No default value. \n +* @li dtype: A optional int. default value is 3. +* @li dilation: A optional list of int8, int16, int32, or int64 values. +* @li ceil_mode: A optional bool. default value is false. \n -*@attention Constraints: -*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. -*@li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1 -*@li "dilation" is a list that has length 4. -*@li "ceil_mode" is a bool, default is false . \n +* @par Outputs: +* y: A Tensor. Has the same type and format as input "x". \n -*@see max_pool_grad_with_argmaxv2 -*@par Third-party framework compatibility -* Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV2. +* @attention Constraints: +* @li ksize: a list that has length 4: +* ksize[0] = 1, ksize[1] = 1, ksize[2] * ksize[3] <= (ub_size-8)*1024//7//2//16. +* @li strides: a list that has length 4: +* strides[0] = 1, strides[1] = 1, 1 <= strides[2] <= 2048, 1 <= strides[3] <= 2048. +* @li pads: a list that has length 4: +* pads[0] = 1, pads[1] = 1, 1 <= pads[2] <= (ksize[2]//2), 1 <= pads[3] <= (ksize[3]//2). +* @li dilation: a list that has length 4. +* @li ceil_mode: is a bool, default is false. \n + +* @see max_pool_grad_with_argmaxv2 +* @par Third-party framework compatibility +* Compatible with the PyTorch backward operator of max_pool2d_with_indices. */ REG_OP(MaxPoolGradWithArgmaxV2) @@ -1674,24 +1687,28 @@ REG_OP(AdaptiveAvgPool2dGrad) * @li argmax: A tensor of type uint16 or int64. \n * @par Attributes: -* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for -* each dimension of the input tensor. No default value. -* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for -* each dimension of the input tensor. No default value. -* @li pads: A required listint. \n +* @li ksize: A required list of int8, int16, int32, or int64 values, +* specifying the size of the window for each dimension of the input tensor. No default value. +* @li strides: A required list of int8, int16, int32, or int64 values, +* specifying the stride of the sliding window for each dimension of the input tensor. No default value. +* @li pads: A required list of int8, int16, int32, or int64 values, +* specifying the pad of the input feature map. No default value. \n * @par Outputs: * y: A Tensor. Has the same type and format as input "x". \n * @attention Constraints: -* @li ksize: is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. -* @li strides: is a list that has length 4: strides[0] = 1 or strides[3] = 1 -* @li pads: listint. -* @li ceil_mode: defaults to False. -* @li data_format: A optional string. \n +* @li The MaxPoolGradWithArgmaxV2 operator has the same function, and it is recommended to use the V2 operator. +* @li ksize: a list that has length 4: +* ksize[0] = 1, ksize[3] = 1, ksize[1] * ksize[2] <= (ub_size-8)*1024//7//2//16. +* @li strides: a list that has length 4: +* strides[0] = 1, strides[3] = 1, 1 <= strides[1] <= 2048, 1 <= strides[2] <= 2048. +* @li pads: a list that has length 4: +* pads[0] = 1, pads[3] = 1, 1 <= pads[2] <= (ksize[1]//2), 1 <= pads[2] <= (ksize[3]//2). +* @li ceil_mode: defaults to False.\n * @par Third-party framework compatibility -* Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV1. +* Compatible with the Pytorch backward operator of max_pool2d_with_indices. */ REG_OP(MaxPoolGradWithArgmaxV1) @@ -1715,26 +1732,29 @@ REG_OP(MaxPoolGradWithArgmaxV1) * x: A Tensor of type float16. \n * @par Attributes: -* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for -* each dimension of the input tensor. No default value. -* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for -* each dimension of the input tensor. No default value. -* @li pads: A required string. No default value. \n +* @li ksize: A required list of int8, int16, int32, or int64 values, +* specifying the size of the window for each dimension of the input tensor. No default value. +* @li strides: A required list of int8, int16, int32, or int64 values, +* specifying the stride of the sliding window for each dimension of the input tensor. No default value. +* @li pads: A required list of int8, int16, int32, or int64 values, +* specifying the pad of the input feature map. No default value. \n * @par Outputs: * y: A Tensor. Has the same type and format as input "x". * argmax: A Tensor. type:uint16. \n * @attention Constraints: -* @li ksize: a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. -* @li stride: a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, -* strides[2] <= 63, strides[2] >= 1. -* @li pads: listint. +* @li The MaxPoolWithArgmaxV2 operator has the same function, and it is recommended to use the V2 operator. +* @li ksize: a list that has length 4: +* ksize[0] = 1, ksize[3] = 1, ksize[1] * ksize[2] <= (ub_size-8)*1024//6//2//16. +* @li strides: a list that has length 4: +* strides[0] = 1, strides[3] = 1, 1 <= strides[1] <= 2048, 1 <= strides[2] <= 2048. +* @li pads: a list that has length 4: +* pads[0] = 1, pads[3] = 1, 1 <= pads[1] <= (ksize[1]//2), 1 <= pads[2] <= (ksize[2]//2). * @li ceil_mode: defaults to False. -* @li data_format: A optional string. \n * @par Third-party framework compatibility -* Compatible with the TensorFlow operator MaxPoolWithArgmaxV1. +* Compatible with the PyTorch operator max_pool2d_with_indices. */ REG_OP(MaxPoolWithArgmaxV1) .INPUT(x, TensorType({DT_FLOAT16})) diff --git a/third_party/fwkacllib/inc/ops/ocr_ops.h b/third_party/fwkacllib/inc/ops/ocr_ops.h index efaa7aa7..9f43c8a6 100644 --- a/third_party/fwkacllib/inc/ops/ocr_ops.h +++ b/third_party/fwkacllib/inc/ops/ocr_ops.h @@ -239,10 +239,10 @@ REG_OP(OCRDetectionPostHandle) *@li polys_data: A Tensor of type int32. point data of every poly. *@li polys_offset:A Tensor of type int32. Offset of every poly . *@li polys_size:A Tensor of type int32. Size of every poly. -*@li img_h:A Tensor of type int32. Height of original image. -*@li img_w:A Tensor of type int32. Width of original image. *@li h_scale:A Tensor of type float. Expand scale of height. -*@li w_scale:A Tensor of type float. Expand scale of width. \n +*@li w_scale:A Tensor of type float. Expand scale of width. +*@li img_h:A Tensor of type int32. Height of original image. +*@li img_w:A Tensor of type int32. Width of original image. \n *@par Outputs: *@li clipped_polys_data: A Tensor of type int32. point data of every clipped poly. \n @@ -254,10 +254,10 @@ REG_OP(ResizeAndClipPolys) .INPUT(polys_data, TensorType({DT_INT32})) .INPUT(polys_offset, TensorType({DT_INT32})) .INPUT(polys_size, TensorType({DT_INT32})) - .INPUT(img_h, TensorType({DT_INT32})) - .INPUT(img_w, TensorType({DT_INT32})) .INPUT(h_scale, TensorType({DT_FLOAT})) .INPUT(w_scale, TensorType({DT_FLOAT})) + .INPUT(img_h, TensorType({DT_INT32})) + .INPUT(img_w, TensorType({DT_INT32})) .OUTPUT(clipped_polys_data, TensorType({DT_INT32})) .OUTPUT(clipped_polys_offset, TensorType({DT_INT32})) .OUTPUT(clipped_polys_size, TensorType({DT_INT32})) diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index 2ab5843f..7f742957 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -1320,6 +1320,7 @@ REG_OP(DynamicGRUV2Grad) * @li reset:A 4D Tensor. Must be one of the following types: float16, float32. * @li new:A 4D Tensor. Must be one of the following types: float16, float32. * @li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. +* @li seq_length:A 1D Tensor. Must be one of the following types: float16, float32. * @par Attributes: * @li t_state:An Int identifying the current t state. Default to [0, 4]. @@ -1343,6 +1344,7 @@ REG_OP(GRUV2HiddenGradCell) .INPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(new, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1353,7 +1355,7 @@ REG_OP(GRUV2HiddenGradCell) /** * @brief: DynamicGRUCellGrad calculation. * @par Inputs: -* ten inputs: \n +* eleven inputs: \n * @li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. * @li h:A 4D Tensor. Must be one of the following types: float16, float32. * @li dy:A 4D Tensor. Must be one of the following types: float16, float32. @@ -1364,6 +1366,7 @@ REG_OP(GRUV2HiddenGradCell) * @li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32.+ * @li init_h:A 4D Tensor. Must be one of the following types: float16, float32. * @li t_state:A 1D Tensor. Must be one of the following types: int32. The format must be ND. +* @li seq_length:A 1D Tensor. Must be one of the following types: float16, float32. * @par Attributes: * gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. @@ -1388,6 +1391,7 @@ REG_OP(DynamicGRUCellGrad) .INPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(t_state, TensorType({DT_INT32, DT_INT32})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT}))