Browse Source

upgrade Ascend package 17 Jun 22

tags/v1.8.0^2
zhangzhenghai 2 years ago
parent
commit
00032536ea
16 changed files with 536 additions and 219 deletions
  1. +2
    -2
      inc/external/acl/acl.h
  2. +2
    -1
      inc/external/acl/acl_tdt_queue.h
  3. +1
    -1
      inc/framework/common/debug/log.h
  4. +2
    -2
      inc/framework/omg/parser/parser_factory.h
  5. +1
    -1
      metadef
  6. +117
    -117
      third_party/fwkacllib/inc/external/runtime/rt_error_codes.h
  7. +2
    -2
      third_party/fwkacllib/inc/ops/encoding_ops.h
  8. +20
    -0
      third_party/fwkacllib/inc/ops/linalg_ops.h
  9. +110
    -0
      third_party/fwkacllib/inc/ops/math_ops.h
  10. +117
    -0
      third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
  11. +13
    -13
      third_party/fwkacllib/inc/ops/nn_calculation_ops.h
  12. +47
    -2
      third_party/fwkacllib/inc/ops/nn_detect_ops.h
  13. +2
    -2
      third_party/fwkacllib/inc/ops/nn_norm_ops.h
  14. +90
    -70
      third_party/fwkacllib/inc/ops/nn_pooling_ops.h
  15. +5
    -5
      third_party/fwkacllib/inc/ops/ocr_ops.h
  16. +5
    -1
      third_party/fwkacllib/inc/ops/rnn.h

+ 2
- 2
inc/external/acl/acl.h View File

@@ -25,9 +25,9 @@
extern "C" { extern "C" {
#endif #endif


// Current version is 1.1.0
// Current version is 1.2.0
#define ACL_MAJOR_VERSION 1 #define ACL_MAJOR_VERSION 1
#define ACL_MINOR_VERSION 1
#define ACL_MINOR_VERSION 2
#define ACL_PATCH_VERSION 0 #define ACL_PATCH_VERSION 0


/** /**


+ 2
- 1
inc/external/acl/acl_tdt_queue.h View File

@@ -219,6 +219,7 @@ ACL_FUNC_VISIBILITY aclError acltdtQueryQueueRoutes(const acltdtQueueRouteQueryI
* @brief alloc acltdtBuf * @brief alloc acltdtBuf
* *
* @param size [IN] alloc buf size * @param size [IN] alloc buf size
* @param type [IN] reserved parameters, need to set zero currently
* @param buf [OUT] pointer to the acltdtBuf * @param buf [OUT] pointer to the acltdtBuf
* *
* @retval ACL_SUCCESS The function is successfully executed. * @retval ACL_SUCCESS The function is successfully executed.
@@ -226,7 +227,7 @@ ACL_FUNC_VISIBILITY aclError acltdtQueryQueueRoutes(const acltdtQueueRouteQueryI
* *
* @see acltdtFreeBuf * @see acltdtFreeBuf
*/ */
ACL_FUNC_VISIBILITY aclError acltdtAllocBuf(size_t size, acltdtBuf *buf);
ACL_FUNC_VISIBILITY aclError acltdtAllocBuf(size_t size, uint32_t type, acltdtBuf *buf);


/** /**
* @ingroup AscendCL * @ingroup AscendCL


+ 1
- 1
inc/framework/common/debug/log.h View File

@@ -180,7 +180,7 @@
#define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg) \ #define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg) \
{ \ { \
GELOGE((_status), "[Check][InnerData]%s", (errormsg)); \ GELOGE((_status), "[Check][InnerData]%s", (errormsg)); \
REPORT_INNER_ERROR("E10052", "%s", (errormsg)); \
REPORT_INNER_ERROR("E19999", "%s", (errormsg)); \
} }


#define GE_WARNINGLOG_AND_ERRORMSG(errormsg) \ #define GE_WARNINGLOG_AND_ERRORMSG(errormsg) \


+ 2
- 2
inc/framework/omg/parser/parser_factory.h View File

@@ -62,7 +62,7 @@ class GE_FUNC_VISIBILITY ModelParserFactory {


class GE_FUNC_VISIBILITY ModelParserRegisterar { class GE_FUNC_VISIBILITY ModelParserRegisterar {
public: public:
ModelParserRegisterar(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN const fun) {
ModelParserRegisterar(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN const fun) noexcept {
ModelParserFactory::Instance()->RegisterCreator(type, fun); ModelParserFactory::Instance()->RegisterCreator(type, fun);
} }
~ModelParserRegisterar() {} ~ModelParserRegisterar() {}
@@ -114,7 +114,7 @@ class GE_FUNC_VISIBILITY WeightsParserFactory {


class GE_FUNC_VISIBILITY WeightsParserRegisterar { class GE_FUNC_VISIBILITY WeightsParserRegisterar {
public: public:
WeightsParserRegisterar(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN const fun) {
WeightsParserRegisterar(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN const fun) noexcept {
WeightsParserFactory::Instance()->RegisterCreator(type, fun); WeightsParserFactory::Instance()->RegisterCreator(type, fun);
} }
~WeightsParserRegisterar() {} ~WeightsParserRegisterar() {}


+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit 58412ad7aed08cd1c01cc070d80706e4253c9075
Subproject commit 2d98a17884e656a2446239cdb9cee79543cb0161

+ 117
- 117
third_party/fwkacllib/inc/external/runtime/rt_error_codes.h View File

@@ -1,117 +1,117 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
#define __INC_EXTERNEL_RT_ERROR_CODES_H__
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
static const int32_t ACL_RT_SUCCESS = 0; // success
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource
static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit
static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init
static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect
#ifdef __cplusplus
}
#endif
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
/**
* @file rt_error_codes.h
*
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
#define __INC_EXTERNEL_RT_ERROR_CODES_H__
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
static const int32_t ACL_RT_SUCCESS = 0; // success
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout
static const int32_t ACL_ERROR_RT_TASK_TIMEOUT = 107020; // task timeout
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource
static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit
static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty
static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full
static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init
static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow
static const int32_t ACL_ERROR_RT_OVER_FLOW = 207017; // common over flow
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set
static const int32_t ACL_ERROR_RT_AICORE_TRAP_READ_OVERFLOW = 507042; // aic trap read overflow
static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043; // aic trap write overflow
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044; // aiv trap read overflow
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect
#ifdef __cplusplus
}
#endif
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__

+ 2
- 2
third_party/fwkacllib/inc/ops/encoding_ops.h View File

@@ -1,5 +1,5 @@
/**
* Copyright (C) Huawei Technologies Co., Ltd 2022-2022. All rights reserved.
/*
* Copyright (c) Huawei Technologies Co., Ltd 2022-2022. All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 20
- 0
third_party/fwkacllib/inc/ops/linalg_ops.h View File

@@ -525,6 +525,26 @@ REG_OP(BandedTriangularSolve)
.ATTR(lower, Bool, true) .ATTR(lower, Bool, true)
.ATTR(adjoint, Bool, false) .ATTR(adjoint, Bool, false)
.OP_END_FACTORY_REG(BandedTriangularSolve) .OP_END_FACTORY_REG(BandedTriangularSolve)

/**
* @brief Returns the complex conjugatetranspose.

* @par Inputs:
* @li x: A Tensor. Must be one of the following types: double, float32, float16,
int8, uint8, int16, uint16, int32, uint32, int64, uint64, bool
* @li perm: A Index. Must be one of the following types: int32, int64 \n
*
* @par Outputs:
* @li y: A Tensor. Has the same type as "x" . \n

* @par Third-party framework compatibility.
* Compatible with tensorflow ConjugateTranspose operator.
*/
REG_OP(ConjugateTranspose)
.INPUT(x, TensorType::BasicType())
.INPUT(perm, TensorType::IndexNumberType())
.OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(ConjugateTranspose)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_

+ 110
- 0
third_party/fwkacllib/inc/ops/math_ops.h View File

@@ -1217,6 +1217,35 @@ REG_OP(DenseCountSparseOutput)
.REQUIRED_ATTR(binary_output, Bool) .REQUIRED_ATTR(binary_output, Bool)
.OP_END_FACTORY_REG(DenseCountSparseOutput) .OP_END_FACTORY_REG(DenseCountSparseOutput)


/**
* @brief Computes gradients for SparseSegmentSum . \n

* @par Inputs:
* The input grad must have be type float or double. Inputs include:
* @li grad: A Tensor. Must be one of the following types: bfloat16, float16, float32, double.
gradient propagated to the SparseSegmentSum op.
* @li indices: A Tensor. Must be one of the following types: int32, int64.
indices passed to the corresponding SparseSegmentSum op.
* @li segment_ids: A Tensor of type int32, int64. segment_ids passed to the
corresponding SparseSegmentSum op.
* @li output_dim0: A Tensor of type int32. dimension 0 of "x" passed to
SparseSegmentSum op . \n

* @par Outputs:
* output:A Tensor. Has the same type as grad . \n

* @par Third-party framework compatibility
* Compatible with tensorflow SparseSegmentSumGrad operator
*/

REG_OP(SparseSegmentSumGrad)
.INPUT(grad, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(indices, TensorType({DT_INT32, DT_INT64}))
.INPUT(segment_ids, TensorType({DT_INT32, DT_INT64}))
.INPUT(output_dim0, TensorType({DT_INT32}))
.OUTPUT(output, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(SparseSegmentSumGrad)

/** /**
* @brief Count the number of occurrences of each value in the input ragged integer array, * @brief Count the number of occurrences of each value in the input ragged integer array,
* and output it according to the sparse matrix. \n * and output it according to the sparse matrix. \n
@@ -1335,6 +1364,87 @@ REG_OP(ScaledMaskedSoftmaxGrad)
.ATTR(scale, Float, 1.0) .ATTR(scale, Float, 1.0)
.ATTR(fixed_triu_mask, Bool, false) .ATTR(fixed_triu_mask, Bool, false)
.OP_END_FACTORY_REG(ScaledMaskedSoftmaxGrad) .OP_END_FACTORY_REG(ScaledMaskedSoftmaxGrad)
/**
* @brief SignBitsPack.

* @par Inputs:
* one input, including:
* @li x: A 1D Tensor of float32 or float16.
*
* @par Attributes:
* @li size: first dim value of output tensor.
*
* @par Outputs:
* @li y: A 2D Tensor of type uint8 with shape (size, N)
*/
REG_OP(SignBitsPack)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_UINT8}))
.REQUIRED_ATTR(size, Int)
.OP_END_FACTORY_REG(SignBitsPack)

/**
* @brief Get sobol samples. \n

* @par Inputs:
* Three inputs, including:
* @li dim: Dimension of results, which must be a scalar of type int32.
* @li num_results: Number of results, which must be a scalar of type int32.
* @li skip: Number of initial points, which must be a scalar of type int32. \n

* @par Attributes:
* @li dtype: Data type of output samples. \n

* @par Outputs:
* @li y: A Tensor with the DT_FLOAT or DT_DOUBLE type generated samples. \n

* @par Third-party framework compatibility
* @li compatible with tensorflow SobolSample operator.
**/
REG_OP(SobolSample)
.INPUT(dim, TensorType({DT_INT32}))
.INPUT(num_results, TensorType({DT_INT32}))
.INPUT(skip, TensorType({DT_INT32}))
.OUTPUT(samples, TensorType({DT_FLOAT,DT_DOUBLE}))
.ATTR(dtype, Type, DT_FLOAT)
.OP_END_FACTORY_REG(SobolSample)

/**
* @brief Count the number of occurrences of each value in the input sparse integer array,
* and output it according to the sparse matrix. \n

* @par Inputs:
* @li indices: A tensor of type int64.
* @li values: A tensor of type int32 or int64.
* @li dense_shape: A tensor of type int64.
* @li weights: A tensor of type int32 or int64 or float or double. \n
* @par Attributes:
* @li minlength: An optional int >=-1. Defaults to -1.
* @li maxlength: An optional int >=-1. Defaults to -1.
* @li binary_output: A required bool. \n

* @par Outputs:
* @li output_indices: A tensor of type int64.
* @li output_values: A tensor of the same type as "weights".
* @li output_dense_shape: A tensor of type int64. \n

* @par Third-party framework compatibility
* Compatible with the TensorFlow operator SparseCountSparseOutput. \n
*/
REG_OP(SparseCountSparseOutput)
.INPUT(indices, TensorType({DT_INT64}))
.INPUT(values, TensorType({DT_INT32,DT_INT64}))
.INPUT(dense_shape, TensorType({DT_INT64}))
.INPUT(weights, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE}))
.OUTPUT(output_indices, TensorType({DT_INT64}))
.OUTPUT(output_values, TensorType({DT_INT32,DT_INT64,DT_FLOAT,DT_DOUBLE}))
.OUTPUT(output_dense_shape, TensorType({DT_INT64}))
.ATTR(minlength, Int, -1)
.ATTR(maxlength, Int, -1)
.REQUIRED_ATTR(binary_output, Bool)
.OP_END_FACTORY_REG(SparseCountSparseOutput)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_

+ 117
- 0
third_party/fwkacllib/inc/ops/matrix_calculation_ops.h View File

@@ -155,6 +155,49 @@ REG_OP(AttentionLnQKV)
.ATTR(trans_b, Bool, false) .ATTR(trans_b, Bool, false)
.OP_END_FACTORY_REG(AttentionLnQKV) .OP_END_FACTORY_REG(AttentionLnQKV)


/**
* @brief
swin_transformer model specific structure.Operator only supports swin_transformer. \n
* @par Inputs:
* Five inputs, including:
* @li x: A Tensor. Must be one of the following types: float16.
* @li gamma: A Tensor. Must be one of the following types: float16.
* @li beta: A Tensor. Must be one of the following types: float16.
* @li weight: A Tensor. Must be one of the following types: float16.
* @li bias: A Tensor. Must be one of the following types: float16. \n

* @par Attributes:
* @li head_num: A optional attribute, the type is int.
* @li head_dim: A optional attribute, the type is int.
* @li seq_length: A optional attribute, the type is int.
* @li shifts: A optional attribute, the type is list int. Defaults to ().
* @li epsilon: A optional attribute, the type is float. Defaults to 1e-7. \n

* @par Outputs:
* Three outputs, including:
* @li query_output: A Tensor. Must be one of the following types: float16.
* @li key_output: A Tensor. Must be one of the following types: float16.
* @li value_output: A Tensor. Must be one of the following types: float16. \n

* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
*/
REG_OP(SwinTransformerLnQKV)
.INPUT(x, TensorType({DT_FLOAT16}))
.INPUT(gamma, TensorType({DT_FLOAT16}))
.INPUT(beta, TensorType({DT_FLOAT16}))
.INPUT(weight, TensorType({DT_FLOAT16}))
.INPUT(bias, TensorType({DT_FLOAT16}))
.OUTPUT(query_output, TensorType({DT_FLOAT16}))
.OUTPUT(key_output, TensorType({DT_FLOAT16}))
.OUTPUT(value_output, TensorType({DT_FLOAT16}))
.REQUIRED_ATTR(head_num, Int)
.REQUIRED_ATTR(head_dim, Int)
.REQUIRED_ATTR(seq_length, Int)
.ATTR(shifts, ListInt, {})
.ATTR(epsilon, Float, 0.0000001)
.OP_END_FACTORY_REG(SwinTransformerLnQKV)

/** /**
*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n *@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n


@@ -1639,6 +1682,80 @@ REG_OP(TensorScatterMin)
.OUTPUT(output, TensorType::BasicType()) .OUTPUT(output, TensorType::BasicType())
.OP_END_FACTORY_REG(TensorScatterMin) .OP_END_FACTORY_REG(TensorScatterMin)


/**
* @brief: Returns the batched diagonal part of a batched tensor. \n

* @par Inputs:
* @li x: A Tensor. Rank r tensor where r >= 2.
* @li k: A Tensor of type int32. Diagonal offset(s). Positive value means superdiagonal,
0 refers to the main diagonal, and negative value means subdiagonals. k can be a
single integer (for a single diagonal) or a pair of integers specifying the low and
high ends of a matrix band. k[0] must not be larger than k[1].
* @li padding_value:A Tensor. Must have the same type as input. The value to fill the area
outside the specified diagonal band with. Default is 0. \n

* @par Outputs:
* @li y: A Tensor. Has the same type as "input". \n

* @par Attributes:
* @li align:An optional string from: "LEFT_RIGHT", "RIGHT_LEFT", "LEFT_LEFT", "RIGHT_RIGHT". Defaults to "RIGHT_LEFT".

* @par Third-party framework compatibility
* Compatible with the Tensorflow operator FillDiagonal.
*/
REG_OP(MatrixDiagPartV3)
.INPUT(x, TensorType::BasicType())
.INPUT(k, TensorType({DT_INT32}))
.INPUT(padding_value, TensorType::BasicType())
.OUTPUT(y, TensorType::BasicType())
.ATTR(align,String ,"RIGHT_LEFT")
.OP_END_FACTORY_REG(MatrixDiagPartV3)

/**
* @brief Returns a batched diagonal tensor with given batched diagonal values . \n

* @par Inputs:
* Five inputs, including:
* @li x: Rank `r`, where `r >= 1` \n

* @li k:
* Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main
* diagonal, and negative value means subdiagonals. `k` can be a single integer
* (for a single diagonal) or a pair of integers specifying the low and high ends
* of a matrix band. `k[0]` must not be larger than `k[1]`. \n

* @li num_rows:
* The number of rows of the output matrix. If it is not provided, the op assumes
* the output matrix is a square matrix and infers the matrix size from k and the
* innermost dimension of `diagonal`. \n

* @li num_cols: An NCHW, NHWC, or ND Tensor.
* The number of columns of the output matrix. If it is not provided, the op
* assumes the output matrix is a square matrix and infers the matrix size from
* k and the innermost dimension of `diagonal`. \n

* @li padding_value: The number to fill the area outside the specified diagonal band with. \n

* @par Attributes:
* @li align: An optional string from: "LEFT_RIGHT", "RIGHT_LEFT", "LEFT_LEFT", "RIGHT_RIGHT".
* Defaults to "RIGHT_LEFT" \n

* @par Outputs:
* @li y: Has rank `r+1` when `k` is an integer or `k[0] == k[1]`, rank `r` otherwise . \n

* @par Third-party framework compatibility
* Compatible with the TensorFlow operator ScatterUpdate.
*/
REG_OP(MatrixDiagV3)
.INPUT(x, TensorType::BasicType())
.INPUT(k, TensorType({DT_INT32}))
.INPUT(num_rows, TensorType({DT_INT32}))
.INPUT(num_cols, TensorType({DT_INT32}))
.INPUT(padding_value, TensorType::BasicType())
.OUTPUT(y, TensorType::BasicType())
.ATTR(align, String, "RIGHT_LEFT")
.OP_END_FACTORY_REG(MatrixDiagV3)

} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_

+ 13
- 13
third_party/fwkacllib/inc/ops/nn_calculation_ops.h View File

@@ -651,21 +651,21 @@ REG_OP(Deconvolution)
| Name | Field | Scope |\n | Name | Field | Scope |\n
|------------------|----------|--------------|\n |------------------|----------|--------------|\n
| x(fmap) | H | [1, 200000] |\n | x(fmap) | H | [1, 200000] |\n
| | W | [1, 4096] |\n
| Filter Size | H | [1, 255] |\n
| | W | [1, 255] |\n
| | W | [1, 200000] |\n
| Filter Size | H | [1, 200000] |\n
| | W | [1, 200000] |\n
| out_backprop | H | [1, 200000] |\n | out_backprop | H | [1, 200000] |\n
| | W | [1, 4096] |\n
| | W | [1, 200000] |\n
| y | H | [1, 200000] |\n | y | H | [1, 200000] |\n
| | W | [1, 4096] |\n
| Stride | H | [1, 63] |\n
| | W | [1, 63] |\n
| Padding | Top | [0, 255] |\n
| | Bottom | [0, 255] |\n
| | Left | [0, 255] |\n
| | Right | [0, 255] |\n
| Dilation | H | [1, 255] |\n
| | W | [1, 255] |\n
| | W | [1, 200000] |\n
| Stride | H | [1, 200000] |\n
| | W | [1, 200000] |\n
| Padding | Top | [0, 200000] |\n
| | Bottom | [0, 200000] |\n
| | Left | [0, 200000] |\n
| | Right | [0, 200000] |\n
| Dilation | H | [1, 200000] |\n
| | W | [1, 200000] |\n
*\n *\n
*@par Outputs: *@par Outputs:
* y: A Tensor. Has the same type as x, has the same format as filter_size. * y: A Tensor. Has the same type as x, has the same format as filter_size.


+ 47
- 2
third_party/fwkacllib/inc/ops/nn_detect_ops.h View File

@@ -1550,8 +1550,8 @@ REG_OP(DecodeWheelsTarget)
*@li max_size_per_class: A required attribute of type int, specifying the nms output num per class. *@li max_size_per_class: A required attribute of type int, specifying the nms output num per class.
*@li max_total_size: A required attribute of type int, specifying the the nms output num per batch. *@li max_total_size: A required attribute of type int, specifying the the nms output num per batch.
*@li change_coordinate_frame: A optional attribute of type bool, whether to normalize coordinates after clipping. *@li change_coordinate_frame: A optional attribute of type bool, whether to normalize coordinates after clipping.
*@li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false".
*@li image_size: A optional attribute of type ListInt, the size of the image. \n
* @li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false".
* @li image_size: A optional attribute of type ListInt, the size of the image. \n


*@par Outputs: *@par Outputs:
*@li nmsed_boxes: A 3D Tensor of type float16 with shape (batch, max_total_size, 4), *@li nmsed_boxes: A 3D Tensor of type float16 with shape (batch, max_total_size, 4),
@@ -2352,6 +2352,51 @@ REG_OP(DIoU)
.ATTR(is_cross, Bool, true) .ATTR(is_cross, Bool, true)
.ATTR(mode, String, "iou") .ATTR(mode, String, "iou")
.OP_END_FACTORY_REG(DIoU) .OP_END_FACTORY_REG(DIoU)

/**
* @brief Calculate the intersection ratio of two rotated cuboids . \n

* @par Inputs:
* @li bboxes : data of grad increment, a 3D Tensor of type float32 with
* shape (B, 7, N). "N" indicates the number of boxes, and the value
* "7" refers to [x, y, z, w, h, d, theta].
* @li gtboxes: Bounding boxes, a 3D Tensor of type float32 with
* shape (B, 7, K). "K" indcates the number of boxes, and the value
* "7" refers to [x, y, z, w, h, d, theta].

* @par Outputs:
* iou: A 3D Tensor of float32 with shape [B, N, K].

* @attention Constraints:
* In each batch, the invalid box cannot appear before the valid box.
*/
REG_OP(Iou3D)
.INPUT(bboxes, TensorType({DT_FLOAT}))
.INPUT(gtboxes, TensorType({DT_FLOAT}))
.OUTPUT(iou, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(Iou3D)

/**
* @brief Generates bounding boxes based on "priors" and "bboxes".
* It is a customized yolox operator . \n

* @par Inputs:
* Two inputs, including:
* @li priors: prior sample boxes of origin image
* A 2D Tensor of type float32 or float16 with shape (N, 4).
* "N" indicates the number of boxes, and the value "4" refers to "x0", "x1", "y0", and "y1".
* @li bboxes_input: bboxes predicted by the model. A 2D Tensor of type float32 or float16 with shape (B, N, 4).
* "B" indicates the batch_size, N indicates the number of boxes, 4 indicates "dx", "dy", "dw", and "dh" . \n

* @par Outputs:
* bboxes_output: Bboxes generated based on "priors" and "bboxes_input". Have the same format
* and type as "bboxes_input".
*/
REG_OP(YoloxBoundingBoxDecode)
.INPUT(priors, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(decoded_bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(YoloxBoundingBoxDecode)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_


+ 2
- 2
third_party/fwkacllib/inc/ops/nn_norm_ops.h View File

@@ -426,10 +426,10 @@ REG_OP(ConfusionSoftmaxGrad)
*@li keepdims: A bool Scalar. If true, retains reduced dimensions with length 1 . \n *@li keepdims: A bool Scalar. If true, retains reduced dimensions with length 1 . \n


*@par Outputs: *@par Outputs:
*y: A Tensor dtype of float16, float32. \n
* y: A Tensor dtype of float16, float32. \n


*@attention Constraints: *@attention Constraints:
*THIS OPERATOR IS DEPRECATED. It will be removed in a future version.
* THIS OPERATOR IS DEPRECATED. It will be removed in a future version.
*/ */
REG_OP(SoftmaxGradExt) REG_OP(SoftmaxGradExt)
.INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT}))


+ 90
- 70
third_party/fwkacllib/inc/ops/nn_pooling_ops.h View File

@@ -1317,33 +1317,38 @@ REG_OP(AvgPool1DD)
.ATTR(count_include_pad, Bool, false) .ATTR(count_include_pad, Bool, false)
.OP_END_FACTORY_REG(AvgPool1DD) .OP_END_FACTORY_REG(AvgPool1DD)
/** /**
*@brief Performs max pooling on the input and outputs both max values and indices . \n
* @brief Performs max pooling on the input and outputs both max values and indices . \n


*@par Inputs:
* @par Inputs:
* One input: * One input:
*x: An 4d Tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"].
*@par Attributes:
*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
* each dimension of the input tensor. No default value.
*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
* each dimension of the input tensor. No default value.
*@li pads: A required string. No default value.
*@li dtype: A optional int. default value is 3.
*@li dilation: A optional list of int8, int16, int32, or int64 values.
*@li ceil_mode: A optional bool. default value is false . \n
* x: An 5hd Tensor of type float16.
* Must set the format, supported format list ["NC1HWC0"].
* @par Attributes:
* @li ksize: A required list of int8, int16, int32, or int64 values,
* specifying the size of the window for each dimension of the input tensor. No default value.
* @li strides: A required list of int8, int16, int32, or int64 values,
* specifying the stride of the sliding window for each dimension of the input tensor. No default value.
* @li pads: A required list of int8, int16, int32, or int64 values,
* specifying the pad of the input feature map. No default value. \n
* @li dtype: A optional int. default value is 3.
* @li dilation: A optional list of int8, int16, int32, or int64 values.
* @li ceil_mode: A optional bool. default value is false . \n


*@par Outputs:
*y: A Tensor. Has the same type and format as input "x".
*argmax: A Tensor. type:uint16.
*@attention Constraints:
*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
*@li "strides is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1,
* strides[2] <= 63, strides[2] >= 1.
*@li "dilation" is a list that has length 4.
*@li "ceil_mode" is a bool, default is false . \n
* @par Outputs:
* y: A Tensor. Has the same type and format as input "x".
* argmax: A Tensor. type:uint16.
* @attention Constraints:
* @li ksize: a list that has length 4:
* ksize[0] = 1, ksize[1] = 1, ksize[2] * ksize[3] <= (ub_size-8)*1024//6//2//16.
* @li strides: a list that has length 4:
* strides[0] = 1, strides[1] = 1, 1 <= strides[2] <= 2048, 1 <= strides[3] <= 2048.
* @li pads: a list that has length 4:
* pads[0] = 1, pads[1] = 1, 1 <= pads[2] <= (ksize[2]//2), 1 <= pads[3] <= (ksize[3]//2).
* @li dilation: a list that has length 4.
* @li ceil_mode: is a bool, default is false . \n


*@par Third-party framework compatibility
* Compatible with the TensorFlow operator MaxPoolWithArgmax.
* @par Third-party framework compatibility
* Compatible with the PyTorch operator max_pool2d_with_indices.
*/ */
REG_OP(MaxPoolWithArgmaxV2) REG_OP(MaxPoolWithArgmaxV2)
.INPUT(x, TensorType({DT_FLOAT16})) .INPUT(x, TensorType({DT_FLOAT16}))
@@ -1358,36 +1363,44 @@ REG_OP(MaxPoolWithArgmaxV2)
.OP_END_FACTORY_REG(MaxPoolWithArgmaxV2) .OP_END_FACTORY_REG(MaxPoolWithArgmaxV2)


/** /**
*@brief Performs the backpropagation of MaxPoolWithArgmaxV2 . \n
* @brief Performs the backpropagation of MaxPoolWithArgmaxV2. \n


*@par Inputs:
* @par Inputs:
* Three inputs, including: * Three inputs, including:
*@li x: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"]
*@li grad: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"]
*@li argmx: An 4d tensor of type uint16 or int64. Must set the format, supported format list ["NCHW, NHWC"] \n
* @li x: An 5hd tensor of type float16.
* Must set the format, supported format list ["NC1HWC0"]
* @li grad: An 5hd tensor of type float16.
* Must set the format, supported format list ["NC1HWC0"]
* @li argmax: An 5hd tensor of type uint16 or int64.
* Must set the format, supported format list ["NC1HWC0"] \n


*@par Attributes:
*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
* each dimension of the input tensor. No default value.
*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
* each dimension of the input tensor. No default value.
*@li pads: A required string. No default value.
*@li dtype: A optional int. default value is 3.
*@li dilation: A optional list of int8, int16, int32, or int64 values.
*@li ceil_mode: A optional bool. default value is false . \n

*@par Outputs:
*y: A Tensor. Has the same type and format as input "x" . \n
* @par Attributes:
* @li ksize: A required list of int8, int16, int32, or int64 values,
* specifying the size of the window for each dimension of the input tensor. No default value.
* @li strides: A required list of int8, int16, int32, or int64 values,
* specifying the stride of the sliding window for each dimension of the input tensor. No default value.
* @li pads: A required list of int8, int16, int32, or int64 values,
* specifying the pad of the input feature map. No default value. \n
* @li dtype: A optional int. default value is 3.
* @li dilation: A optional list of int8, int16, int32, or int64 values.
* @li ceil_mode: A optional bool. default value is false. \n


*@attention Constraints:
*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
*@li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1
*@li "dilation" is a list that has length 4.
*@li "ceil_mode" is a bool, default is false . \n
* @par Outputs:
* y: A Tensor. Has the same type and format as input "x". \n


*@see max_pool_grad_with_argmaxv2
*@par Third-party framework compatibility
* Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV2.
* @attention Constraints:
* @li ksize: a list that has length 4:
* ksize[0] = 1, ksize[1] = 1, ksize[2] * ksize[3] <= (ub_size-8)*1024//7//2//16.
* @li strides: a list that has length 4:
* strides[0] = 1, strides[1] = 1, 1 <= strides[2] <= 2048, 1 <= strides[3] <= 2048.
* @li pads: a list that has length 4:
* pads[0] = 1, pads[1] = 1, 1 <= pads[2] <= (ksize[2]//2), 1 <= pads[3] <= (ksize[3]//2).
* @li dilation: a list that has length 4.
* @li ceil_mode: is a bool, default is false. \n

* @see max_pool_grad_with_argmaxv2
* @par Third-party framework compatibility
* Compatible with the PyTorch backward operator of max_pool2d_with_indices.
*/ */


REG_OP(MaxPoolGradWithArgmaxV2) REG_OP(MaxPoolGradWithArgmaxV2)
@@ -1674,24 +1687,28 @@ REG_OP(AdaptiveAvgPool2dGrad)
* @li argmax: A tensor of type uint16 or int64. \n * @li argmax: A tensor of type uint16 or int64. \n


* @par Attributes: * @par Attributes:
* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
* each dimension of the input tensor. No default value.
* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
* each dimension of the input tensor. No default value.
* @li pads: A required listint. \n
* @li ksize: A required list of int8, int16, int32, or int64 values,
* specifying the size of the window for each dimension of the input tensor. No default value.
* @li strides: A required list of int8, int16, int32, or int64 values,
* specifying the stride of the sliding window for each dimension of the input tensor. No default value.
* @li pads: A required list of int8, int16, int32, or int64 values,
* specifying the pad of the input feature map. No default value. \n


* @par Outputs: * @par Outputs:
* y: A Tensor. Has the same type and format as input "x". \n * y: A Tensor. Has the same type and format as input "x". \n


* @attention Constraints: * @attention Constraints:
* @li ksize: is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
* @li strides: is a list that has length 4: strides[0] = 1 or strides[3] = 1
* @li pads: listint.
* @li ceil_mode: defaults to False.
* @li data_format: A optional string. \n
* @li The MaxPoolGradWithArgmaxV2 operator has the same function, and it is recommended to use the V2 operator.
* @li ksize: a list that has length 4:
* ksize[0] = 1, ksize[3] = 1, ksize[1] * ksize[2] <= (ub_size-8)*1024//7//2//16.
* @li strides: a list that has length 4:
* strides[0] = 1, strides[3] = 1, 1 <= strides[1] <= 2048, 1 <= strides[2] <= 2048.
* @li pads: a list that has length 4:
* pads[0] = 1, pads[3] = 1, 1 <= pads[2] <= (ksize[1]//2), 1 <= pads[2] <= (ksize[3]//2).
* @li ceil_mode: defaults to False.\n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV1.
* Compatible with the Pytorch backward operator of max_pool2d_with_indices.
*/ */


REG_OP(MaxPoolGradWithArgmaxV1) REG_OP(MaxPoolGradWithArgmaxV1)
@@ -1715,26 +1732,29 @@ REG_OP(MaxPoolGradWithArgmaxV1)
* x: A Tensor of type float16. \n * x: A Tensor of type float16. \n


* @par Attributes: * @par Attributes:
* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
* each dimension of the input tensor. No default value.
* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
* each dimension of the input tensor. No default value.
* @li pads: A required string. No default value. \n
* @li ksize: A required list of int8, int16, int32, or int64 values,
* specifying the size of the window for each dimension of the input tensor. No default value.
* @li strides: A required list of int8, int16, int32, or int64 values,
* specifying the stride of the sliding window for each dimension of the input tensor. No default value.
* @li pads: A required list of int8, int16, int32, or int64 values,
* specifying the pad of the input feature map. No default value. \n


* @par Outputs: * @par Outputs:
* y: A Tensor. Has the same type and format as input "x". * y: A Tensor. Has the same type and format as input "x".
* argmax: A Tensor. type:uint16. \n * argmax: A Tensor. type:uint16. \n


* @attention Constraints: * @attention Constraints:
* @li ksize: a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
* @li stride: a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1,
* strides[2] <= 63, strides[2] >= 1.
* @li pads: listint.
* @li The MaxPoolWithArgmaxV2 operator has the same function, and it is recommended to use the V2 operator.
* @li ksize: a list that has length 4:
* ksize[0] = 1, ksize[3] = 1, ksize[1] * ksize[2] <= (ub_size-8)*1024//6//2//16.
* @li strides: a list that has length 4:
* strides[0] = 1, strides[3] = 1, 1 <= strides[1] <= 2048, 1 <= strides[2] <= 2048.
* @li pads: a list that has length 4:
* pads[0] = 1, pads[3] = 1, 1 <= pads[1] <= (ksize[1]//2), 1 <= pads[2] <= (ksize[2]//2).
* @li ceil_mode: defaults to False. * @li ceil_mode: defaults to False.
* @li data_format: A optional string. \n


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the TensorFlow operator MaxPoolWithArgmaxV1.
* Compatible with the PyTorch operator max_pool2d_with_indices.
*/ */
REG_OP(MaxPoolWithArgmaxV1) REG_OP(MaxPoolWithArgmaxV1)
.INPUT(x, TensorType({DT_FLOAT16})) .INPUT(x, TensorType({DT_FLOAT16}))


+ 5
- 5
third_party/fwkacllib/inc/ops/ocr_ops.h View File

@@ -239,10 +239,10 @@ REG_OP(OCRDetectionPostHandle)
*@li polys_data: A Tensor of type int32. point data of every poly. *@li polys_data: A Tensor of type int32. point data of every poly.
*@li polys_offset:A Tensor of type int32. Offset of every poly . *@li polys_offset:A Tensor of type int32. Offset of every poly .
*@li polys_size:A Tensor of type int32. Size of every poly. *@li polys_size:A Tensor of type int32. Size of every poly.
*@li img_h:A Tensor of type int32. Height of original image.
*@li img_w:A Tensor of type int32. Width of original image.
*@li h_scale:A Tensor of type float. Expand scale of height. *@li h_scale:A Tensor of type float. Expand scale of height.
*@li w_scale:A Tensor of type float. Expand scale of width. \n
*@li w_scale:A Tensor of type float. Expand scale of width.
*@li img_h:A Tensor of type int32. Height of original image.
*@li img_w:A Tensor of type int32. Width of original image. \n


*@par Outputs: *@par Outputs:
*@li clipped_polys_data: A Tensor of type int32. point data of every clipped poly. \n *@li clipped_polys_data: A Tensor of type int32. point data of every clipped poly. \n
@@ -254,10 +254,10 @@ REG_OP(ResizeAndClipPolys)
.INPUT(polys_data, TensorType({DT_INT32})) .INPUT(polys_data, TensorType({DT_INT32}))
.INPUT(polys_offset, TensorType({DT_INT32})) .INPUT(polys_offset, TensorType({DT_INT32}))
.INPUT(polys_size, TensorType({DT_INT32})) .INPUT(polys_size, TensorType({DT_INT32}))
.INPUT(img_h, TensorType({DT_INT32}))
.INPUT(img_w, TensorType({DT_INT32}))
.INPUT(h_scale, TensorType({DT_FLOAT})) .INPUT(h_scale, TensorType({DT_FLOAT}))
.INPUT(w_scale, TensorType({DT_FLOAT})) .INPUT(w_scale, TensorType({DT_FLOAT}))
.INPUT(img_h, TensorType({DT_INT32}))
.INPUT(img_w, TensorType({DT_INT32}))
.OUTPUT(clipped_polys_data, TensorType({DT_INT32})) .OUTPUT(clipped_polys_data, TensorType({DT_INT32}))
.OUTPUT(clipped_polys_offset, TensorType({DT_INT32})) .OUTPUT(clipped_polys_offset, TensorType({DT_INT32}))
.OUTPUT(clipped_polys_size, TensorType({DT_INT32})) .OUTPUT(clipped_polys_size, TensorType({DT_INT32}))


+ 5
- 1
third_party/fwkacllib/inc/ops/rnn.h View File

@@ -1320,6 +1320,7 @@ REG_OP(DynamicGRUV2Grad)
* @li reset:A 4D Tensor. Must be one of the following types: float16, float32. * @li reset:A 4D Tensor. Must be one of the following types: float16, float32.
* @li new:A 4D Tensor. Must be one of the following types: float16, float32. * @li new:A 4D Tensor. Must be one of the following types: float16, float32.
* @li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. * @li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32.
* @li seq_length:A 1D Tensor. Must be one of the following types: float16, float32.


* @par Attributes: * @par Attributes:
* @li t_state:An Int identifying the current t state. Default to [0, 4]. * @li t_state:An Int identifying the current t state. Default to [0, 4].
@@ -1343,6 +1344,7 @@ REG_OP(GRUV2HiddenGradCell)
.INPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(new, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(new, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(seq_length, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1353,7 +1355,7 @@ REG_OP(GRUV2HiddenGradCell)
/** /**
* @brief: DynamicGRUCellGrad calculation. * @brief: DynamicGRUCellGrad calculation.
* @par Inputs: * @par Inputs:
* ten inputs: \n
* eleven inputs: \n
* @li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. * @li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32.
* @li h:A 4D Tensor. Must be one of the following types: float16, float32. * @li h:A 4D Tensor. Must be one of the following types: float16, float32.
* @li dy:A 4D Tensor. Must be one of the following types: float16, float32. * @li dy:A 4D Tensor. Must be one of the following types: float16, float32.
@@ -1364,6 +1366,7 @@ REG_OP(GRUV2HiddenGradCell)
* @li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32.+ * @li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32.+
* @li init_h:A 4D Tensor. Must be one of the following types: float16, float32. * @li init_h:A 4D Tensor. Must be one of the following types: float16, float32.
* @li t_state:A 1D Tensor. Must be one of the following types: int32. The format must be ND. * @li t_state:A 1D Tensor. Must be one of the following types: int32. The format must be ND.
* @li seq_length:A 1D Tensor. Must be one of the following types: float16, float32.


* @par Attributes: * @par Attributes:
* gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. * gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
@@ -1388,6 +1391,7 @@ REG_OP(DynamicGRUCellGrad)
.INPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(t_state, TensorType({DT_INT32, DT_INT32})) .INPUT(t_state, TensorType({DT_INT32, DT_INT32}))
.OPTIONAL_INPUT(seq_length, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT}))


Loading…
Cancel
Save