| @@ -84,9 +84,9 @@ if (ENABLE_OPEN_SRC) | |||||
| elseif(ENABLE_GE_COV OR ENABLE_GE_UT) | elseif(ENABLE_GE_COV OR ENABLE_GE_UT) | ||||
| add_subdirectory(tests) | add_subdirectory(tests) | ||||
| else() | else() | ||||
| find_module(slog libalog.so ${ASCEND_ATC_DIR}) | |||||
| find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) | |||||
| find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | |||||
| find_module(slog libslog.so ${ASCEND_ATC_DIR} ${ASCEND_DRIVER_COMMON_DIR}) | |||||
| find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR}) | |||||
| find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR}) | |||||
| if(PLATFORM STREQUAL "train") | if(PLATFORM STREQUAL "train") | ||||
| find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | ||||
| find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | ||||
| @@ -5,14 +5,10 @@ endif() | |||||
| include(ExternalProject) | include(ExternalProject) | ||||
| set(JSON_SRC_DIR ${CMAKE_BINARY_DIR}/opensrc/json/include) | set(JSON_SRC_DIR ${CMAKE_BINARY_DIR}/opensrc/json/include) | ||||
| if (GE_PB_PKG) | |||||
| set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip") | |||||
| set(MD5 "0dc903888211db3a0f170304cd9f3a89") | |||||
| set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) | |||||
| #elseif (ENABLE_GITEE) | |||||
| # set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip") | |||||
| # set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7") | |||||
| #set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include") | |||||
| if (ENABLE_GITEE) | |||||
| set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip") | |||||
| set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7") | |||||
| set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include") | |||||
| else() | else() | ||||
| set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip") | set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip") | ||||
| set(MD5 "0dc903888211db3a0f170304cd9f3a89") | set(MD5 "0dc903888211db3a0f170304cd9f3a89") | ||||
| @@ -0,0 +1,73 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_EXTERNAL_ACL_ACL_H_ | |||||
| #define INC_EXTERNAL_ACL_ACL_H_ | |||||
| #include "acl_rt.h" | |||||
| #include "acl_op.h" | |||||
| #include "acl_mdl.h" | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| // Current version is 1.0.0 | |||||
| #define ACL_MAJOR_VERSION 1 | |||||
| #define ACL_MINOR_VERSION 0 | |||||
| #define ACL_PATCH_VERSION 0 | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief acl initialize | |||||
| * | |||||
| * @par Restriction | |||||
| * The aclInit interface can be called only once in a process | |||||
| * @param configPath [IN] the config path,it can be NULL | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclInit(const char *configPath); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief acl finalize | |||||
| * | |||||
| * @par Restriction | |||||
| * Need to call aclFinalize before the process exits. | |||||
| * After calling aclFinalize,the services cannot continue to be used normally. | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclFinalize(); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief query ACL interface version | |||||
| * | |||||
| * @param majorVersion[OUT] ACL interface major version | |||||
| * @param minorVersion[OUT] ACL interface minor version | |||||
| * @param patchVersion[OUT] ACL interface patch version | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *minorVersion, int32_t *patchVersion); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // INC_EXTERNAL_ACL_ACL_H_ | |||||
| @@ -0,0 +1,610 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_EXTERNAL_ACL_ACL_BASE_H_ | |||||
| #define INC_EXTERNAL_ACL_ACL_BASE_H_ | |||||
| #include <stdint.h> | |||||
| #include <stddef.h> | |||||
| #include "error_codes/rt_error_codes.h" | |||||
| #include "error_codes/ge_error_codes.h" | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| #if defined(_MSC_VER) | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define ACL_FUNC_VISIBILITY _declspec(dllexport) | |||||
| #else | |||||
| #define ACL_FUNC_VISIBILITY | |||||
| #endif | |||||
| #else | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define ACL_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
| #else | |||||
| #define ACL_FUNC_VISIBILITY | |||||
| #endif | |||||
| #endif | |||||
| #ifdef __GNUC__ | |||||
| #define ACL_DEPRECATED __attribute__((deprecated)) | |||||
| #define ACL_DEPRECATED_MESSAGE(message) __attribute__((deprecated(message))) | |||||
| #elif defined(_MSC_VER) | |||||
| #define ACL_DEPRECATED __declspec(deprecated) | |||||
| #define ACL_DEPRECATED_MESSAGE(message) __declspec(deprecated(message)) | |||||
| #else | |||||
| #define ACL_DEPRECATED | |||||
| #define ACL_DEPRECATED_MESSAGE(message) | |||||
| #endif | |||||
| typedef void *aclrtStream; | |||||
| typedef void *aclrtEvent; | |||||
| typedef void *aclrtContext; | |||||
| typedef int aclError; | |||||
| typedef uint16_t aclFloat16; | |||||
| typedef struct aclDataBuffer aclDataBuffer; | |||||
| typedef struct aclTensorDesc aclTensorDesc; | |||||
| static const int ACL_ERROR_NONE = 0; | |||||
| static const int ACL_SUCCESS = 0; | |||||
| static const int ACL_ERROR_INVALID_PARAM = 100000; | |||||
| static const int ACL_ERROR_UNINITIALIZE = 100001; | |||||
| static const int ACL_ERROR_REPEAT_INITIALIZE = 100002; | |||||
| static const int ACL_ERROR_INVALID_FILE = 100003; | |||||
| static const int ACL_ERROR_WRITE_FILE = 100004; | |||||
| static const int ACL_ERROR_INVALID_FILE_SIZE = 100005; | |||||
| static const int ACL_ERROR_PARSE_FILE = 100006; | |||||
| static const int ACL_ERROR_FILE_MISSING_ATTR = 100007; | |||||
| static const int ACL_ERROR_FILE_ATTR_INVALID = 100008; | |||||
| static const int ACL_ERROR_INVALID_DUMP_CONFIG = 100009; | |||||
| static const int ACL_ERROR_INVALID_PROFILING_CONFIG = 100010; | |||||
| static const int ACL_ERROR_INVALID_MODEL_ID = 100011; | |||||
| static const int ACL_ERROR_DESERIALIZE_MODEL = 100012; | |||||
| static const int ACL_ERROR_PARSE_MODEL = 100013; | |||||
| static const int ACL_ERROR_READ_MODEL_FAILURE = 100014; | |||||
| static const int ACL_ERROR_MODEL_SIZE_INVALID = 100015; | |||||
| static const int ACL_ERROR_MODEL_MISSING_ATTR = 100016; | |||||
| static const int ACL_ERROR_MODEL_INPUT_NOT_MATCH = 100017; | |||||
| static const int ACL_ERROR_MODEL_OUTPUT_NOT_MATCH = 100018; | |||||
| static const int ACL_ERROR_MODEL_NOT_DYNAMIC = 100019; | |||||
| static const int ACL_ERROR_OP_TYPE_NOT_MATCH = 100020; | |||||
| static const int ACL_ERROR_OP_INPUT_NOT_MATCH = 100021; | |||||
| static const int ACL_ERROR_OP_OUTPUT_NOT_MATCH = 100022; | |||||
| static const int ACL_ERROR_OP_ATTR_NOT_MATCH = 100023; | |||||
| static const int ACL_ERROR_OP_NOT_FOUND = 100024; | |||||
| static const int ACL_ERROR_OP_LOAD_FAILED = 100025; | |||||
| static const int ACL_ERROR_UNSUPPORTED_DATA_TYPE = 100026; | |||||
| static const int ACL_ERROR_FORMAT_NOT_MATCH = 100027; | |||||
| static const int ACL_ERROR_BIN_SELECTOR_NOT_REGISTERED = 100028; | |||||
| static const int ACL_ERROR_KERNEL_NOT_FOUND = 100029; | |||||
| static const int ACL_ERROR_BIN_SELECTOR_ALREADY_REGISTERED = 100030; | |||||
| static const int ACL_ERROR_KERNEL_ALREADY_REGISTERED = 100031; | |||||
| static const int ACL_ERROR_INVALID_QUEUE_ID = 100032; | |||||
| static const int ACL_ERROR_REPEAT_SUBSCRIBE = 100033; | |||||
| static const int ACL_ERROR_STREAM_NOT_SUBSCRIBE = 100034; | |||||
| static const int ACL_ERROR_THREAD_NOT_SUBSCRIBE = 100035; | |||||
| static const int ACL_ERROR_WAIT_CALLBACK_TIMEOUT = 100036; | |||||
| static const int ACL_ERROR_REPEAT_FINALIZE = 100037; | |||||
| static const int ACL_ERROR_NOT_STATIC_AIPP = 100038; | |||||
| static const int ACL_ERROR_COMPILING_STUB_MODE = 100039; | |||||
| static const int ACL_ERROR_GROUP_NOT_SET = 100040; | |||||
| static const int ACL_ERROR_GROUP_NOT_CREATE = 100041; | |||||
| static const int ACL_ERROR_PROF_ALREADY_RUN = 100042; | |||||
| static const int ACL_ERROR_PROF_NOT_RUN = 100043; | |||||
| static const int ACL_ERROR_DUMP_ALREADY_RUN = 100044; | |||||
| static const int ACL_ERROR_DUMP_NOT_RUN = 100045; | |||||
| static const int ACL_ERROR_PROF_REPEAT_SUBSCRIBE = 148046; | |||||
| static const int ACL_ERROR_PROF_API_CONFLICT = 148047; | |||||
| static const int ACL_ERROR_INVALID_MAX_OPQUEUE_NUM_CONFIG = 148048; | |||||
| static const int ACL_ERROR_BAD_ALLOC = 200000; | |||||
| static const int ACL_ERROR_API_NOT_SUPPORT = 200001; | |||||
| static const int ACL_ERROR_INVALID_DEVICE = 200002; | |||||
| static const int ACL_ERROR_MEMORY_ADDRESS_UNALIGNED = 200003; | |||||
| static const int ACL_ERROR_RESOURCE_NOT_MATCH = 200004; | |||||
| static const int ACL_ERROR_INVALID_RESOURCE_HANDLE = 200005; | |||||
| static const int ACL_ERROR_FEATURE_UNSUPPORTED = 200006; | |||||
| static const int ACL_ERROR_PROF_MODULES_UNSUPPORTED = 200007; | |||||
| static const int ACL_ERROR_STORAGE_OVER_LIMIT = 300000; | |||||
| static const int ACL_ERROR_INTERNAL_ERROR = 500000; | |||||
| static const int ACL_ERROR_FAILURE = 500001; | |||||
| static const int ACL_ERROR_GE_FAILURE = 500002; | |||||
| static const int ACL_ERROR_RT_FAILURE = 500003; | |||||
| static const int ACL_ERROR_DRV_FAILURE = 500004; | |||||
| static const int ACL_ERROR_PROFILING_FAILURE = 500005; | |||||
| #define ACL_TENSOR_SHAPE_RANGE_NUM 2 | |||||
| #define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE | |||||
| typedef enum { | |||||
| ACL_DT_UNDEFINED = -1, | |||||
| ACL_FLOAT = 0, | |||||
| ACL_FLOAT16 = 1, | |||||
| ACL_INT8 = 2, | |||||
| ACL_INT32 = 3, | |||||
| ACL_UINT8 = 4, | |||||
| ACL_INT16 = 6, | |||||
| ACL_UINT16 = 7, | |||||
| ACL_UINT32 = 8, | |||||
| ACL_INT64 = 9, | |||||
| ACL_UINT64 = 10, | |||||
| ACL_DOUBLE = 11, | |||||
| ACL_BOOL = 12, | |||||
| ACL_STRING = 13, | |||||
| } aclDataType; | |||||
| typedef enum { | |||||
| ACL_FORMAT_UNDEFINED = -1, | |||||
| ACL_FORMAT_NCHW = 0, | |||||
| ACL_FORMAT_NHWC = 1, | |||||
| ACL_FORMAT_ND = 2, | |||||
| ACL_FORMAT_NC1HWC0 = 3, | |||||
| ACL_FORMAT_FRACTAL_Z = 4, | |||||
| ACL_FORMAT_NC1HWC0_C04 = 12, | |||||
| ACL_FORMAT_NDHWC = 27, | |||||
| ACL_FORMAT_FRACTAL_NZ = 29, | |||||
| ACL_FORMAT_NCDHW = 30, | |||||
| ACL_FORMAT_NDC1HWC0 = 32, | |||||
| ACL_FRACTAL_Z_3D = 33 | |||||
| } aclFormat; | |||||
| typedef enum { | |||||
| ACL_DEBUG = 0, | |||||
| ACL_INFO = 1, | |||||
| ACL_WARNING = 2, | |||||
| ACL_ERROR = 3, | |||||
| } aclLogLevel; | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Converts data of type aclFloat16 to data of type float | |||||
| * | |||||
| * @param value [IN] Data to be converted | |||||
| * | |||||
| * @retval Transformed data | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY float aclFloat16ToFloat(aclFloat16 value); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Converts data of type float to data of type aclFloat16 | |||||
| * | |||||
| * @param value [IN] Data to be converted | |||||
| * | |||||
| * @retval Transformed data | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclFloat16 aclFloatToFloat16(float value); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create data of aclDataBuffer | |||||
| * | |||||
| * @param data [IN] pointer to data | |||||
| * @li Need to be managed by the user, | |||||
| * call aclrtMalloc interface to apply for memory, | |||||
| * call aclrtFree interface to release memory | |||||
| * | |||||
| * @param size [IN] size of data in bytes | |||||
| * | |||||
| * @retval pointer to created instance. nullptr if run out of memory | |||||
| * | |||||
| * @see aclrtMalloc | aclrtFree | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclDataBuffer *aclCreateDataBuffer(void *data, size_t size); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief destroy data of aclDataBuffer | |||||
| * | |||||
| * @par Function | |||||
| * Only the aclDataBuffer type data is destroyed here. | |||||
| * The memory of the data passed in when the aclDataDataBuffer interface | |||||
| * is called to create aclDataBuffer type data must be released by the user | |||||
| * | |||||
| * @param dataBuffer [IN] pointer to the aclDataBuffer | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclCreateDataBuffer | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclDestroyDataBuffer(const aclDataBuffer *dataBuffer); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief update new data of aclDataBuffer | |||||
| * | |||||
| * @param dataBuffer [OUT] pointer to aclDataBuffer | |||||
| * @li The old data need to be released by the user, otherwise it may occur memory leak leakage | |||||
| * call aclGetDataBufferAddr interface to get old data address | |||||
| * call aclrtFree interface to release memory | |||||
| * | |||||
| * @param data [IN] pointer to new data | |||||
| * @li Need to be managed by the user, | |||||
| * call aclrtMalloc interface to apply for memory, | |||||
| * call aclrtFree interface to release memory | |||||
| * | |||||
| * @param size [IN] size of data in bytes | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtMalloc | aclrtFree | aclGetDataBufferAddr | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclUpdateDataBuffer(aclDataBuffer *dataBuffer, void *data, size_t size); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get data address from aclDataBuffer | |||||
| * | |||||
| * @param dataBuffer [IN] pointer to the data of aclDataBuffer | |||||
| * | |||||
| * @retval data address | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY void *aclGetDataBufferAddr(const aclDataBuffer *dataBuffer); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get data size of aclDataBuffer | |||||
| * | |||||
| * @param dataBuffer [IN] pointer to the data of aclDataBuffer | |||||
| * | |||||
| * @retval data size | |||||
| */ | |||||
| ACL_DEPRECATED_MESSAGE("aclGetDataBufferSize is deprecated, use aclGetDataBufferSizeV2 instead") | |||||
| ACL_FUNC_VISIBILITY uint32_t aclGetDataBufferSize(const aclDataBuffer *dataBuffer); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get data size of aclDataBuffer to replace aclGetDataBufferSize | |||||
| * | |||||
| * @param dataBuffer [IN] pointer to the data of aclDataBuffer | |||||
| * | |||||
| * @retval data size | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY size_t aclGetDataBufferSizeV2(const aclDataBuffer *dataBuffer); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get size of aclDataType | |||||
| * | |||||
| * @param dataType [IN] aclDataType data the size to get | |||||
| * | |||||
| * @retval size of the aclDataType | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType); | |||||
| // interfaces of tensor desc | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create data aclTensorDesc | |||||
| * | |||||
| * @param dataType [IN] Data types described by tensor | |||||
| * @param numDims [IN] the number of dimensions of the shape | |||||
| * @param dims [IN] the size of the specified dimension | |||||
| * @param format [IN] tensor format | |||||
| * | |||||
| * @retval aclTensorDesc pointer. | |||||
| * @retval nullptr if param is invalid or run out of memory | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, | |||||
| aclFormat format); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief destroy data aclTensorDesc | |||||
| * | |||||
| * @param desc [IN] pointer to the data of aclTensorDesc to destroy | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set tensor shape range for aclTensorDesc | |||||
| * | |||||
| * @param desc [OUT] pointer to the data of aclTensorDesc | |||||
| * @param dimsCount [IN] the number of dimensions of the shape | |||||
| * @param dimsRange [IN] the range of dimensions of the shape | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount, | |||||
| int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get data type specified by the tensor description | |||||
| * | |||||
| * @param desc [IN] pointer to the instance of aclTensorDesc | |||||
| * | |||||
| * @retval data type specified by the tensor description. | |||||
| * @retval ACL_DT_UNDEFINED if description is null | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclDataType aclGetTensorDescType(const aclTensorDesc *desc); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get data format specified by the tensor description | |||||
| * | |||||
| * @param desc [IN] pointer to the instance of aclTensorDesc | |||||
| * | |||||
| * @retval data format specified by the tensor description. | |||||
| * @retval ACL_FORMAT_UNDEFINED if description is null | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclFormat aclGetTensorDescFormat(const aclTensorDesc *desc); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get tensor size specified by the tensor description | |||||
| * | |||||
| * @param desc [IN] pointer to the instance of aclTensorDesc | |||||
| * | |||||
| * @retval data size specified by the tensor description. | |||||
| * @retval 0 if description is null | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY size_t aclGetTensorDescSize(const aclTensorDesc *desc); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get element count specified by the tensor description | |||||
| * | |||||
| * @param desc [IN] pointer to the instance of aclTensorDesc | |||||
| * | |||||
| * @retval element count specified by the tensor description. | |||||
| * @retval 0 if description is null | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY size_t aclGetTensorDescElementCount(const aclTensorDesc *desc); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get number of dims specified by the tensor description | |||||
| * | |||||
| * @param desc [IN] pointer to the instance of aclTensorDesc | |||||
| * | |||||
| * @retval number of dims specified by the tensor description. | |||||
| * @retval 0 if description is null | |||||
| * @retval ACL_UNKNOWN_RANK if the tensor dim is -2 | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY size_t aclGetTensorDescNumDims(const aclTensorDesc *desc); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Get the size of the specified dim in the tensor description | |||||
| * | |||||
| * @param desc [IN] pointer to the instance of aclTensorDesc | |||||
| * @param index [IN] index of dims, start from 0. | |||||
| * | |||||
| * @retval dim specified by the tensor description and index. | |||||
| * @retval -1 if description or index is invalid | |||||
| */ | |||||
| ACL_DEPRECATED_MESSAGE("aclGetTensorDescDim is deprecated, use aclGetTensorDescDimV2 instead") | |||||
| ACL_FUNC_VISIBILITY int64_t aclGetTensorDescDim(const aclTensorDesc *desc, size_t index); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Get the size of the specified dim in the tensor description | |||||
| * | |||||
| * @param desc [IN] pointer to the instance of aclTensorDesc | |||||
| * @param index [IN] index of dims, start from 0. | |||||
| * @param dimSize [OUT] size of the specified dim. | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, size_t index, int64_t *dimSize); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Get the range of the specified dim in the tensor description | |||||
| * | |||||
| * @param desc [IN] pointer to the instance of aclTensorDesc | |||||
| * @param index [IN] index of dims, start from 0. | |||||
| * @param dimRangeNum [IN] number of dimRange. | |||||
| * @param dimRange [OUT] range of the specified dim. | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, size_t index, size_t dimRangeNum, | |||||
| int64_t *dimRange); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set tensor description name | |||||
| * | |||||
| * @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
| * @param name [IN] tensor description name | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY void aclSetTensorDescName(aclTensorDesc *desc, const char *name); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get tensor description name | |||||
| * | |||||
| * @param desc [IN] pointer to the instance of aclTensorDesc | |||||
| * | |||||
| * @retval tensor description name. | |||||
| * @retval empty string if description is null | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Convert the format in the source aclTensorDesc according to | |||||
| * the specified dstFormat to generate a new target aclTensorDesc. | |||||
| * The format in the source aclTensorDesc remains unchanged. | |||||
| * | |||||
| * @param srcDesc [IN] pointer to the source tensor desc | |||||
| * @param dstFormat [IN] destination format | |||||
| * @param dstDesc [OUT] pointer to the pointer to the destination tensor desc | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat, | |||||
| aclTensorDesc **dstDesc); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Set the storage format specified by the tensor description | |||||
| * | |||||
| * @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
| * @param format [IN] the storage format | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_DEPRECATED_MESSAGE("aclSetTensorStorageFormat is deprecated, use aclSetTensorFormat instead") | |||||
| ACL_FUNC_VISIBILITY aclError aclSetTensorStorageFormat(aclTensorDesc *desc, aclFormat format); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Set the storage shape specified by the tensor description | |||||
| * | |||||
| * @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
| * @param numDims [IN] the number of dimensions of the shape | |||||
| * @param dims [IN] the size of the specified dimension | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_DEPRECATED_MESSAGE("aclSetTensorStorageShape is deprecated, use aclSetTensorShape instead") | |||||
| ACL_FUNC_VISIBILITY aclError aclSetTensorStorageShape(aclTensorDesc *desc, int numDims, const int64_t *dims); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Set the format specified by the tensor description | |||||
| * | |||||
| * @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
| * @param format [IN] the storage format | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclSetTensorFormat(aclTensorDesc *desc, aclFormat format); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Set the shape specified by the tensor description | |||||
| * | |||||
| * @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
| * @param numDims [IN] the number of dimensions of the shape | |||||
| * @param dims [IN] the size of the specified dimension | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclSetTensorShape(aclTensorDesc *desc, int numDims, const int64_t *dims); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Set the original format specified by the tensor description | |||||
| * | |||||
| * @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
| * @param format [IN] the storage format | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclSetTensorOriginFormat(aclTensorDesc *desc, aclFormat format); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Set the original shape specified by the tensor description | |||||
| * | |||||
| * @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
| * @param numDims [IN] the number of dimensions of the shape | |||||
| * @param dims [IN] the size of the specified dimension | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int numDims, const int64_t *dims); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get op description info | |||||
| * | |||||
| * @param desc [IN] pointer to tensor description | |||||
| * @param index [IN] index of tensor | |||||
| * | |||||
| * @retval null for failed. | |||||
| * @retval OtherValues success. | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get address of tensor | |||||
| * | |||||
| * @param desc [IN] pointer to tensor description | |||||
| * | |||||
| * @retval null for failed | |||||
| * @retval OtherValues success | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Set the dynamic input name specified by the tensor description | |||||
| * | |||||
| * @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
| * @param dynamicInputName [IN] pointer to the dynamic input name | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclSetTensorDynamicInput(aclTensorDesc *desc, const char *dynamicInputName); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Set const data specified by the tensor description | |||||
| * | |||||
| * @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
| * @param dataBuffer [IN] pointer to the const databuffer | |||||
| * @param length [IN] the length of const databuffer | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBuffer, size_t length); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief an interface for users to output APP logs | |||||
| * | |||||
| * @param logLevel [IN] the level of current log | |||||
| * @param func [IN] the function where the log is located | |||||
| * @param file [IN] the file where the log is located | |||||
| * @param line [IN] Number of source lines where the log is located | |||||
| * @param fmt [IN] the format of current log | |||||
| * @param ... [IN] the value of current log | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line, | |||||
| const char *fmt, ...); | |||||
| #define ACL_APP_LOG(level, fmt, ...) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // INC_EXTERNAL_ACL_ACL_BASE_H_ | |||||
| @@ -0,0 +1,504 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_EXTERNAL_ACL_ACL_OP_H_ | |||||
| #define INC_EXTERNAL_ACL_ACL_OP_H_ | |||||
| #include "acl_base.h" | |||||
| #include "acl_rt.h" | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| typedef struct aclopHandle aclopHandle; | |||||
| typedef struct aclopAttr aclopAttr; | |||||
| typedef struct aclopKernelDesc aclopKernelDesc; | |||||
| typedef void (*aclDataDeallocator)(void *data, size_t length); | |||||
| static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1; | |||||
| typedef enum aclEngineType { | |||||
| ACL_ENGINE_SYS, | |||||
| ACL_ENGINE_AICORE, | |||||
| ACL_ENGINE_VECTOR, | |||||
| } aclopEngineType; | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Set base directory that contains single op models | |||||
| * | |||||
| * @par Restriction | |||||
| * The aclopSetModelDir interface can be called only once in a process. | |||||
| * @param modelDir [IN] path of the directory | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopSetModelDir(const char *modelDir); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief load single op models from memory | |||||
| * | |||||
| * @par Restriction | |||||
| * The aclopLoad interface can be called more than one times in a process. | |||||
| * @param model [IN] address of single op models | |||||
| * @param modelSize [IN] size of single op models | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopLoad(const void *model, size_t modelSize); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create data of type aclopAttr | |||||
| * | |||||
| * @retval pointer to created instance. | |||||
| * @retval nullptr if run out of memory | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclopAttr *aclopCreateAttr(); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief destroy data of typ aclopAttr | |||||
| * | |||||
| * @param attr [IN] pointer to the instance of aclopAttr | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY void aclopDestroyAttr(const aclopAttr *attr); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set an attribute. the type of the attribute is bool | |||||
| * | |||||
| * @param attr [OUT] pointer to the instance of aclopAttr | |||||
| * @param attrName [IN] attribute name | |||||
| * @param attrValue [IN] attribute value | |||||
| * false if attrValue is 0, true otherwise. | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopSetAttrBool(aclopAttr *attr, const char *attrName, uint8_t attrValue); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set an attribute. the type of the attribute is int64_t | |||||
| * | |||||
| * @param attr [OUT] pointer to the instance of aclopAttr | |||||
| * @param attrName [IN] attribute name | |||||
| * @param attrValue [IN] attribute value | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopSetAttrInt(aclopAttr *attr, const char *attrName, int64_t attrValue); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set an attribute. the type of the attribute is float | |||||
| * | |||||
| * @param attr [OUT] pointer to the instance of aclopAttr | |||||
| * @param attrName [IN] attribute name | |||||
| * @param attrValue [IN] attribute value | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopSetAttrFloat(aclopAttr *attr, const char *attrName, float attrValue); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set an attribute. the type of the attribute is string | |||||
| * | |||||
| * @param attr [OUT] pointer to the instance of aclopAttr | |||||
| * @param attrName [IN] attribute name | |||||
| * @param attrValue [IN] attribute value | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *attrName, const char *attrValue); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set an attribute. the type of the attribute is list of bools | |||||
| * | |||||
| * @param attr [OUT] pointer to the instance of aclopAttr | |||||
| * @param attrName [IN] attribute name | |||||
| * @param numValues [IN] number of values. false if attrValue is 0, true otherwise. | |||||
| * @param values [IN] pointer to values | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues, | |||||
| const uint8_t *values); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set an attribute. the type of the attribute is list of ints | |||||
| * | |||||
| * @param attr [OUT] pointer to the instance of aclopAttr | |||||
| * @param attrName [IN] attribute name | |||||
| * @param numValues [IN] number of values | |||||
| * @param values [IN] pointer to values | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues, | |||||
| const int64_t *values); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set an attribute. the type of the attribute is list of floats | |||||
| * | |||||
| * @param attr [OUT] pointer to the instance of aclopAttr | |||||
| * @param attrName [IN] attribute name | |||||
| * @param numValues [IN] number of values | |||||
| * @param values [IN] pointer to values | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues, | |||||
| const float *values); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set an attribute. the type of the attribute is list of strings | |||||
| * | |||||
| * @param attr [OUT] pointer to the instance of aclopAttr | |||||
| * @param attrName [IN] attribute name | |||||
| * @param numValues [IN] number of values | |||||
| * @param values [IN] pointer to values | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues, | |||||
| const char **values); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set an attribute. the type of the attribute is list of list of ints | |||||
| * | |||||
| * @param attr [OUT] pointer to the instance of aclopAttr | |||||
| * @param attrName [IN] attribute name | |||||
| * @param numLists [IN] number of lists | |||||
| * @param numValues [IN] pointer to number of values of each list | |||||
| * @param values [IN] pointer to values | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char *attrName, int numLists, | |||||
| const int *numValues, const int64_t *const values[]); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Load and execute the specified operator asynchronously | |||||
| * | |||||
| * @par Restriction | |||||
| * @li The input and output organization of each operator is different, | |||||
| * and the application needs to organize the operator strictly | |||||
| * according to the operator input and output parameters when calling. | |||||
| * @li When the user calls aclopExecute, | |||||
| * the ACL finds the corresponding task according to the optype, | |||||
| * the description of the input tesnsor, | |||||
| * the description of the output tesnsor, and attr, and issues the execution. | |||||
| * | |||||
| * @param opType [IN] type of op | |||||
| * @param numInputs [IN] number of inputs | |||||
| * @param inputDesc [IN] pointer to array of input tensor descriptions | |||||
| * @param inputs [IN] pointer to array of input buffers | |||||
| * @param numOutputs [IN] number of outputs | |||||
| * @param outputDesc [IN] pointer to array of output tensor descriptions | |||||
| * @param outputs [OUT] pointer to array of output buffers | |||||
| * @param attr [IN] pointer to instance of aclopAttr. | |||||
| * may pass nullptr if the op has no attribute | |||||
| * @param stream [IN] stream | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead") | |||||
| ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], | |||||
| const aclDataBuffer *const inputs[], int numOutputs, | |||||
| const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], | |||||
| const aclopAttr *attr, aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Load and execute the specified operator | |||||
| * The difference with aclopExecute is that aclopExecuteV2 will refresh outputDesc | |||||
| * | |||||
| * @par Restriction | |||||
| * @li The input and output organization of each operator is different, | |||||
| * and the application needs to organize the operator strictly | |||||
| * according to the operator input and output parameters when calling. | |||||
| * @li When the user calls aclopExecuteV2, | |||||
| * the ACL finds the corresponding task according to the optype, | |||||
| * the description of the input tesnsor, | |||||
| * the description of the output tesnsor, and attr, and issues the execution. | |||||
| * | |||||
| * @param opType [IN] type of op | |||||
| * @param numInputs [IN] number of inputs | |||||
| * @param inputDesc [IN] pointer to array of input tensor descriptions | |||||
| * @param inputs [IN] pointer to array of input buffers | |||||
| * @param numOutputs [IN] number of outputs | |||||
| * @param outputDesc [IN|OUT] pointer to array of output tensor descriptions | |||||
| * @param outputs [OUT] pointer to array of output buffers | |||||
| * @param attr [IN] pointer to instance of aclopAttr. | |||||
| * may pass nullptr if the op has no attribute | |||||
| * @param stream [IN] stream | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[], | |||||
| aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], | |||||
| aclDataBuffer *outputs[], aclopAttr *attr, aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create a instance of aclopHandle. | |||||
| * | |||||
| * @param opType [IN] type of op | |||||
| * @param numInputs [IN] number of inputs | |||||
| * @param inputDesc [IN] pointer to array of input tensor descriptions | |||||
| * @param numOutputs [IN] number of outputs | |||||
| * @param outputDesc [IN] pointer to array of output tensor descriptions | |||||
| * @param opAttr [IN] pointer to instance of aclopAttr. | |||||
| * may pass nullptr if the op has no attribute | |||||
| * @param handle [OUT] pointer to the pointer to the handle | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, int numInputs, | |||||
| const aclTensorDesc *const inputDesc[], int numOutputs, | |||||
| const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, | |||||
| aclopHandle **handle); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief destroy aclopHandle instance | |||||
| * | |||||
| * @param handle [IN] pointer to the instance of aclopHandle | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief execute an op with the handle. | |||||
| * can save op model matching cost compared with aclopExecute | |||||
| * | |||||
| * @param handle [IN] pointer to the instance of aclopHandle. | |||||
| * The aclopCreateHandle interface has been called | |||||
| * in advance to create aclopHandle type data. | |||||
| * @param numInputs [IN] number of inputs | |||||
| * @param inputs [IN] pointer to array of input buffers. | |||||
| * The aclCreateDataBuffer interface has been called | |||||
| * in advance to create aclDataBuffer type data. | |||||
| * @param numOutputs [IN] number of outputs | |||||
| * @param outputs [OUT] pointer to array of output buffers | |||||
| * @param stream [IN] stream | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclopCreateHandle | aclCreateDataBuffer | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInputs, | |||||
| const aclDataBuffer *const inputs[], int numOutputs, | |||||
| aclDataBuffer *const outputs[], aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief cast data type | |||||
| * | |||||
| * @param srcDesc [IN] source tensor desc | |||||
| * @param srcBuffer [IN] source tensor buffer | |||||
| * @param dstDesc [IN] destination tensor desc | |||||
| * @param dstBuffer [OUT] destination tensor buffer | |||||
| * @param truncate [IN] do not truncate if value is 0, truncate otherwise | |||||
| * @param stream [IN] stream | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDataBuffer *srcBuffer, | |||||
| const aclTensorDesc *dstDesc, aclDataBuffer *dstBuffer, uint8_t truncate, | |||||
| aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create a handle for casting datatype | |||||
| * | |||||
| * @param srcDesc [IN] source tensor desc | |||||
| * @param dstDesc [IN] destination tensor desc | |||||
| * @param truncate [IN] do not truncate if value is 0, truncate otherwise | |||||
| * @param handle [OUT] pointer to the pointer to the handle | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, aclTensorDesc *dstDesc, uint8_t truncate, | |||||
| aclopHandle **handle); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create kernel | |||||
| * | |||||
| * @param opType [IN] op type | |||||
| * @param kernelId [IN] kernel id | |||||
| * @param kernelName [IN] kernel name | |||||
| * @param binData [IN] kernel bin data | |||||
| * @param binSize [IN] kernel bin size | |||||
| * @param enginetype [IN] enigne type | |||||
| * @param deallocator [IN] callback function for deallocating bin data, | |||||
| * null if bin data to be deallocated by caller | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclopCompile | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *kernelId, const char *kernelName, | |||||
| void *binData, int binSize, aclopEngineType enginetype, | |||||
| aclDataDeallocator deallocator); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create kernel | |||||
| * | |||||
| * @param numInputs [IN] number of inputs | |||||
| * @param inputDesc [IN] pointer to array of input tensor descriptions | |||||
| * @param numOutputs [IN] number of outputs | |||||
| * @param outputDesc [IN] pointer to array of output tensor descriptions | |||||
| * @param opAttr [IN] pointer to instance of aclopAttr | |||||
| * @param aclopKernelDesc [IN] pointer to instance of aclopKernelDesc | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| typedef aclError (*aclopCompileFunc)(int numInputs, const aclTensorDesc *const inputDesc[], int numOutputs, | |||||
| const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, | |||||
| aclopKernelDesc *aclopKernelDesc); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief register compile function | |||||
| * | |||||
| * @param opType [IN] op type | |||||
| * @param func [IN] compile function | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclopUnregisterCompileFunc | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopRegisterCompileFunc(const char *opType, aclopCompileFunc func); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief unregister compile function | |||||
| * | |||||
| * @param opType [IN] op type | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set kernel args | |||||
| * | |||||
| * @param kernelDesc [IN] pointer to instance of aclopKernelDesc | |||||
| * @param kernelId [IN] kernel id | |||||
| * @param blockDim [IN] block dim | |||||
| * @param args [IN] args | |||||
| * @param argSize [IN] size in bytes of args | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, const char *kernelId, uint32_t blockDim, | |||||
| const void *args, uint32_t argSize); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set workspace sizes | |||||
| * | |||||
| * @param kernelDesc [IN] pointer to instance of aclopKernelDesc | |||||
| * @param numWorkspaces [IN] number of workspaces | |||||
| * @param workspaceSizes [IN] pointer to array of sizes of workspaces | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kernelDesc, int numWorkspaces, | |||||
| size_t *workspaceSizes); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief compile op with dynamic shape | |||||
| * | |||||
| * @param opType [IN] op type | |||||
| * @param numInputs [IN] number of inputs | |||||
| * @param inputDesc [IN] pointer to array of input tensor descriptions | |||||
| * @param numOutputs [IN] number of outputs | |||||
| * @param outputDesc [IN] pointer to array of output tensor descriptions | |||||
| * @param attr [IN] pointer to instance of aclopAttr. | |||||
| * may pass nullptr if the op has no attribute | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs, | |||||
| const aclTensorDesc *const inputDesc[], int numOutputs, | |||||
| const aclTensorDesc *const outputDesc[], const aclopAttr *attr); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief inferShape the specified operator synchronously | |||||
| * | |||||
| * @param opType [IN] type of op | |||||
| * @param numInputs [IN] number of inputs | |||||
| * @param inputDesc [IN] pointer to array of input tensor descriptions | |||||
| * @param inputs [IN] pointer to array of input buffers | |||||
| * @param numOutputs [IN] number of outputs | |||||
| * @param outputDesc [OUT] pointer to array of output tensor descriptions | |||||
| * @param attr [IN] pointer to instance of aclopAttr. | |||||
| * may pass nullptr if the op has no attribute | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, int numInputs, aclTensorDesc *inputDesc[], | |||||
| aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], | |||||
| aclopAttr *attr); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // INC_EXTERNAL_ACL_ACL_OP_H_ | |||||
| @@ -0,0 +1,106 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ | |||||
| #define INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ | |||||
| #include "acl_base.h" | |||||
| #include "acl_op.h" | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| typedef enum aclCompileType { ACL_COMPILE_SYS, ACL_COMPILE_UNREGISTERED } aclopCompileType; | |||||
| typedef enum { | |||||
| ACL_PRECISION_MODE, | |||||
| ACL_AICORE_NUM, | |||||
| ACL_AUTO_TUNE_MODE, | |||||
| ACL_OP_SELECT_IMPL_MODE, | |||||
| ACL_OPTYPELIST_FOR_IMPLMODE, | |||||
| ACL_OP_DEBUG_LEVEL, | |||||
| ACL_DEBUG_DIR, | |||||
| ACL_OP_COMPILER_CACHE_MODE, | |||||
| ACL_OP_COMPILER_CACHE_DIR | |||||
| } aclCompileOpt; | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief compile op | |||||
| * | |||||
| * @param opType [IN] op type | |||||
| * @param numInputs [IN] number of inputs | |||||
| * @param inputDesc [IN] pointer to array of input tensor descriptions | |||||
| * @param numOutputs [IN] number of outputs | |||||
| * @param outputDesc [IN] pointer to array of output tensor descriptions | |||||
| * @param attr [IN] pointer to instance of aclopAttr. | |||||
| * may pass nullptr if the op has no attribute | |||||
| * @param engineType [IN] engine type | |||||
| * @param compileFlag [IN] compile flag | |||||
| * @param opPath [IN] path of op | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], | |||||
| int numOutputs, const aclTensorDesc *const outputDesc[], | |||||
| const aclopAttr *attr, aclopEngineType engineType, | |||||
| aclopCompileType compileFlag, const char *opPath); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief compile and execute op | |||||
| * | |||||
| * @param opType [IN] op type | |||||
| * @param numInputs [IN] number of inputs | |||||
| * @param inputDesc [IN] pointer to array of input tensor descriptions | |||||
| * @param inputs [IN] pointer to array of input buffers | |||||
| * @param numOutputs [IN] number of outputs | |||||
| * @param outputDesc [IN] pointer to array of output tensor descriptions | |||||
| * @param outputs [IN] pointer to array of outputs buffers | |||||
| * @param attr [IN] pointer to instance of aclopAttr. | |||||
| * may pass nullptr if the op has no attribute | |||||
| * @param engineType [IN] engine type | |||||
| * @param compileFlag [IN] compile flag | |||||
| * @param opPath [IN] path of op | |||||
| * @param stream [IN] stream handle | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute( | |||||
| const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], | |||||
| int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr, | |||||
| aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set compile option | |||||
| * | |||||
| * @param aclCompileOpt [IN] compile option | |||||
| * @param value [IN] pointer for the option value | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *value); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ | |||||
| @@ -0,0 +1,296 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_EXTERNAL_ACL_PROF_H_ | |||||
| #define INC_EXTERNAL_ACL_PROF_H_ | |||||
| #include "acl_base.h" | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| #define ACL_PROF_ACL_API 0x0001 | |||||
| #define ACL_PROF_TASK_TIME 0x0002 | |||||
| #define ACL_PROF_AICORE_METRICS 0x0004 | |||||
| #define ACL_PROF_AICPU 0x0008 | |||||
| #define ACL_PROF_MAX_OP_NAME_LEN 257 | |||||
| #define ACL_PROF_MAX_OP_TYPE_LEN 65 | |||||
| typedef enum { | |||||
| ACL_AICORE_ARITHMETIC_UTILIZATION = 0, | |||||
| ACL_AICORE_PIPE_UTILIZATION = 1, | |||||
| ACL_AICORE_MEMORY_BANDWIDTH = 2, | |||||
| ACL_AICORE_L0B_AND_WIDTH = 3, | |||||
| ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, | |||||
| ACL_AICORE_NONE = 0xFF | |||||
| } aclprofAicoreMetrics; | |||||
| typedef struct aclprofConfig aclprofConfig; | |||||
| typedef struct aclprofStopConfig aclprofStopConfig; | |||||
| typedef struct aclprofAicoreEvents aclprofAicoreEvents; | |||||
| typedef struct aclprofSubscribeConfig aclprofSubscribeConfig; | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief profiling initialize | |||||
| * | |||||
| * @param profilerResultPath [IN] path of profiling result | |||||
| * @param length [IN] length of profilerResultPath | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclprofFinalize | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclprofInit(const char *profilerResultPath, size_t length); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief profiling finalize | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclprofInit | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclprofFinalize(); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Start profiling modules by profilerConfig | |||||
| * | |||||
| * @param profilerConfig [IN] config of profiling | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclprofStop | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Create data of type aclprofConfig | |||||
| * | |||||
| * @param deviceIdList [IN] list of device id | |||||
| * @param deviceNums [IN] number of devices | |||||
| * @param aicoreMetrics [IN] type of aicore metrics | |||||
| * @param aicoreEvents [IN] pointer to aicore events, only support NULL now | |||||
| * @param dataTypeConfig [IN] config modules need profiling | |||||
| * | |||||
| * @retval the aclprofConfig pointer | |||||
| * | |||||
| * @see aclprofDestroyConfig | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, | |||||
| aclprofAicoreMetrics aicoreMetrics, | |||||
| aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Destroy data of type aclprofConfig | |||||
| * | |||||
| * @param profilerConfig [IN] config of profiling | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclprofCreateConfig | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclprofDestroyConfig(const aclprofConfig *profilerConfig); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief stop profiling modules by stopProfilingConfig | |||||
| * | |||||
| * @param profilerConfig [IN] pointer to stop config of profiling | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclprofStart | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief subscribe profiling data of model | |||||
| * | |||||
| * @param modelId [IN] the model id subscribed | |||||
| * @param profSubscribeConfig [IN] pointer to config of model subscribe | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclprofModelUnSubscribe | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief unsubscribe profiling data of model | |||||
| * | |||||
| * @param modelId [IN] the model id unsubscribed | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclprofModelSubscribe | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create subscribe config | |||||
| * | |||||
| * @param timeInfoSwitch [IN] switch whether get time info from model | |||||
| * @param aicoreMetrics [IN] aicore metrics | |||||
| * @param fd [IN] pointer to write pipe | |||||
| * | |||||
| * @retval the aclprofSubscribeConfig pointer | |||||
| * | |||||
| * @see aclprofDestroySubscribeConfig | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch, | |||||
| aclprofAicoreMetrics aicoreMetrics, void *fd); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief destroy subscribe config | |||||
| * | |||||
| * @param profSubscribeConfig [IN] subscribe config | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclprofCreateSubscribeConfig | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclprofDestroySubscribeConfig(const aclprofSubscribeConfig *profSubscribeConfig); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create subscribe config | |||||
| * | |||||
| * @param opDescSize [OUT] size of op desc | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclprofGetOpDescSize(size_t *opDescSize); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get op number from subscription data | |||||
| * | |||||
| * @param opInfo [IN] pointer to subscription data | |||||
| * @param opInfoLen [IN] memory size of subscription data | |||||
| * @param opNumber [OUT] op number of subscription data | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLen, uint32_t *opNumber); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get op type from subscription data | |||||
| * | |||||
| * @param opInfo [IN] pointer to subscription data | |||||
| * @param opInfoLen [IN] memory size of subscription data | |||||
| * @param index [IN] index of op array in opInfo | |||||
| * @param opType [OUT] obtained op type string | |||||
| * @param opTypeLen [IN] obtained length of op type string | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType, | |||||
| size_t opTypeLen); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get op type from subscription data | |||||
| * | |||||
| * @param opInfo [IN] pointer to subscription data | |||||
| * @param opInfoLen [IN] memory size of subscription data | |||||
| * @param index [IN] index of op array in opInfo | |||||
| * @param opName [OUT] obtained op name string | |||||
| * @param opNameLen [IN] obtained length of op name string | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName, | |||||
| size_t opNameLen); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get start time of specified op from subscription data | |||||
| * | |||||
| * @param opInfo [IN] pointer to subscription data | |||||
| * @param opInfoLen [IN] memory size of subscription data | |||||
| * @param index [IN] index of op array in opInfo | |||||
| * | |||||
| * @retval start time(us) of specified op with timestamp | |||||
| * @retval 0 for failed | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY uint64_t aclprofGetOpStart(const void *opInfo, size_t opInfoLen, uint32_t index); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get end time of specified op from subscription data | |||||
| * | |||||
| * @param opInfo [IN] pointer to subscription data | |||||
| * @param opInfoLen [IN] memory size of subscription data | |||||
| * @param index [IN] index of op array in opInfo | |||||
| * | |||||
| * @retval end time(us) of specified op with timestamp | |||||
| * @retval 0 for failed | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY uint64_t aclprofGetOpEnd(const void *opInfo, size_t opInfoLen, uint32_t index); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get excution time of specified op from subscription data | |||||
| * | |||||
| * @param opInfo [IN] pointer to subscription data | |||||
| * @param opInfoLen [IN] memory size of subscription data | |||||
| * @param index [IN] index of op array in opInfo | |||||
| * | |||||
| * @retval execution time(us) of specified op with timestamp | |||||
| * @retval 0 for failed | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opInfoLen, uint32_t index); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get model id from subscription data | |||||
| * | |||||
| * @param opInfo [IN] pointer to subscription data | |||||
| * @param opInfoLen [IN] memory size of subscription data | |||||
| * | |||||
| * @retval model id of subscription data | |||||
| * @retval 0 for failed | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // INC_EXTERNAL_ACL_PROF_H_ | |||||
| @@ -0,0 +1,932 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_EXTERNAL_ACL_ACL_RT_H_ | |||||
| #define INC_EXTERNAL_ACL_ACL_RT_H_ | |||||
| #include <stdint.h> | |||||
| #include <stddef.h> | |||||
| #include "acl_base.h" | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| typedef enum aclrtRunMode { | |||||
| ACL_DEVICE, | |||||
| ACL_HOST, | |||||
| } aclrtRunMode; | |||||
| typedef enum aclrtTsId { | |||||
| ACL_TS_ID_AICORE = 0, | |||||
| ACL_TS_ID_AIVECTOR = 1, | |||||
| ACL_TS_ID_RESERVED = 2, | |||||
| } aclrtTsId; | |||||
| typedef enum aclrtEventStatus { | |||||
| ACL_EVENT_STATUS_COMPLETE = 0, | |||||
| ACL_EVENT_STATUS_NOT_READY = 1, | |||||
| ACL_EVENT_STATUS_RESERVED = 2, | |||||
| } aclrtEventStatus; | |||||
| typedef enum aclrtCallbackBlockType { | |||||
| ACL_CALLBACK_NO_BLOCK, | |||||
| ACL_CALLBACK_BLOCK, | |||||
| } aclrtCallbackBlockType; | |||||
| typedef enum aclrtMemcpyKind { | |||||
| ACL_MEMCPY_HOST_TO_HOST, | |||||
| ACL_MEMCPY_HOST_TO_DEVICE, | |||||
| ACL_MEMCPY_DEVICE_TO_HOST, | |||||
| ACL_MEMCPY_DEVICE_TO_DEVICE, | |||||
| } aclrtMemcpyKind; | |||||
| typedef enum aclrtMemMallocPolicy { | |||||
| ACL_MEM_MALLOC_HUGE_FIRST, | |||||
| ACL_MEM_MALLOC_HUGE_ONLY, | |||||
| ACL_MEM_MALLOC_NORMAL_ONLY, | |||||
| ACL_MEM_MALLOC_HUGE_FIRST_P2P, | |||||
| ACL_MEM_MALLOC_HUGE_ONLY_P2P, | |||||
| ACL_MEM_MALLOC_NORMAL_ONLY_P2P, | |||||
| } aclrtMemMallocPolicy; | |||||
| typedef enum aclrtMemAttr { | |||||
| ACL_DDR_MEM, | |||||
| ACL_HBM_MEM, | |||||
| ACL_DDR_MEM_HUGE, | |||||
| ACL_DDR_MEM_NORMAL, | |||||
| ACL_HBM_MEM_HUGE, | |||||
| ACL_HBM_MEM_NORMAL, | |||||
| ACL_DDR_MEM_P2P_HUGE, | |||||
| ACL_DDR_MEM_P2P_NORMAL, | |||||
| ACL_HBM_MEM_P2P_HUGE, | |||||
| ACL_HBM_MEM_P2P_NORMAL, | |||||
| } aclrtMemAttr; | |||||
| typedef enum aclrtGroupAttr { | |||||
| ACL_GROUP_AICORE_INT, | |||||
| ACL_GROUP_AIV_INT, | |||||
| ACL_GROUP_AIC_INT, | |||||
| ACL_GROUP_SDMANUM_INT, | |||||
| ACL_GROUP_ASQNUM_INT | |||||
| } aclrtGroupAttr; | |||||
| typedef struct tagRtGroupInfo aclrtGroupInfo; | |||||
| typedef struct rtExceptionInfo aclrtExceptionInfo; | |||||
| typedef void (*aclrtCallback)(void *userData); | |||||
| typedef void (*aclrtExceptionInfoCallback)(aclrtExceptionInfo *exceptionInfo); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Set a callback function to handle exception information | |||||
| * | |||||
| * @param callback [IN] callback function to handle exception information | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtSetExceptionInfoCallback(aclrtExceptionInfoCallback callback); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Get task id from exception information | |||||
| * | |||||
| * @param info [IN] pointer of exception information | |||||
| * | |||||
| * @retval The task id from exception information | |||||
| * @retval 0xFFFFFFFF if info is null | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY uint32_t aclrtGetTaskIdFromExceptionInfo(const aclrtExceptionInfo *info); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Get stream id from exception information | |||||
| * | |||||
| * @param info [IN] pointer of exception information | |||||
| * | |||||
| * @retval The stream id from exception information | |||||
| * @retval 0xFFFFFFFF if info is null | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY uint32_t aclrtGetStreamIdFromExceptionInfo(const aclrtExceptionInfo *info); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Get thread id from exception information | |||||
| * | |||||
| * @param info [IN] pointer of exception information | |||||
| * | |||||
| * @retval The thread id of fail task | |||||
| * @retval 0xFFFFFFFF if info is null | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY uint32_t aclrtGetThreadIdFromExceptionInfo(const aclrtExceptionInfo *info); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Get device id from exception information | |||||
| * | |||||
| * @param info [IN] pointer of exception information | |||||
| * | |||||
| * @retval The thread id of fail task | |||||
| * @retval 0xFFFFFFFF if info is null | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY uint32_t aclrtGetDeviceIdFromExceptionInfo(const aclrtExceptionInfo *info); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief The thread that handles the callback function on the Stream | |||||
| * | |||||
| * @param threadId [IN] thread ID | |||||
| * @param stream [IN] stream handle | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtSubscribeReport(uint64_t threadId, aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Add a callback function to be executed on the host | |||||
| * to the task queue of the Stream | |||||
| * | |||||
| * @param fn [IN] Specify the callback function to be added | |||||
| * The function prototype of the callback function is: | |||||
| * typedef void (*aclrtCallback)(void *userData); | |||||
| * @param userData [IN] User data to be passed to the callback function | |||||
| * @param blockType [IN] callback block type | |||||
| * @param stream [IN] stream handle | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtLaunchCallback(aclrtCallback fn, void *userData, aclrtCallbackBlockType blockType, | |||||
| aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief After waiting for a specified time, trigger callback processing | |||||
| * | |||||
| * @par Function | |||||
| * The thread processing callback specified by | |||||
| * the aclrtSubscribeReport interface | |||||
| * | |||||
| * @param timeout [IN] timeout value | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtSubscribeReport | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtProcessReport(int32_t timeout); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Cancel thread registration, | |||||
| * the callback function on the specified Stream | |||||
| * is no longer processed by the specified thread | |||||
| * | |||||
| * @param threadId [IN] thread ID | |||||
| * @param stream [IN] stream handle | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtUnSubscribeReport(uint64_t threadId, aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create context and associates it with the calling thread | |||||
| * | |||||
| * @par Function | |||||
| * The following use cases are supported: | |||||
| * @li If you don't call the aclrtCreateContext interface | |||||
| * to explicitly create the context, | |||||
| * the system will use the default context, which is implicitly created | |||||
| * when the aclrtSetDevice interface is called. | |||||
| * @li If multiple contexts are created in a process | |||||
| * (there is no limit on the number of contexts), | |||||
| * the current thread can only use one of them at the same time. | |||||
| * It is recommended to explicitly specify the context of the current thread | |||||
| * through the aclrtSetCurrentContext interface to increase. | |||||
| * the maintainability of the program. | |||||
| * | |||||
| * @param context [OUT] point to the created context | |||||
| * @param deviceId [IN] device to create context on | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtSetDevice | aclrtSetCurrentContext | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtCreateContext(aclrtContext *context, int32_t deviceId); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief destroy context instance | |||||
| * | |||||
| * @par Function | |||||
| * Can only destroy context created through aclrtCreateContext interface | |||||
| * | |||||
| * @param context [IN] the context to destroy | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtCreateContext | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtDestroyContext(aclrtContext context); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set the context of the thread | |||||
| * | |||||
| * @par Function | |||||
| * The following scenarios are supported: | |||||
| * @li If the aclrtCreateContext interface is called in a thread to explicitly | |||||
| * create a Context (for example: ctx1), the thread's Context can be specified | |||||
| * without calling the aclrtSetCurrentContext interface. | |||||
| * The system uses ctx1 as the context of thread1 by default. | |||||
| * @li If the aclrtCreateContext interface is not explicitly created, | |||||
| * the system uses the default context as the context of the thread. | |||||
| * At this time, the aclrtDestroyContext interface cannot be used to release | |||||
| * the default context. | |||||
| * @li If the aclrtSetCurrentContext interface is called multiple times to | |||||
| * set the thread's Context, the last one prevails. | |||||
| * | |||||
| * @par Restriction | |||||
| * @li If the cevice corresponding to the context set for the thread | |||||
| * has been reset, you cannot set the context as the context of the thread, | |||||
| * otherwise a business exception will result. | |||||
| * @li It is recommended to use the context created in a thread. | |||||
| * If the aclrtCreateContext interface is called in thread A to create a context, | |||||
| * and the context is used in thread B, | |||||
| * the user must guarantee the execution order of tasks in the same stream | |||||
| * under the same context in two threads. | |||||
| * | |||||
| * @param context [IN] the current context of the thread | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtCreateContext | aclrtDestroyContext | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtSetCurrentContext(aclrtContext context); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get the context of the thread | |||||
| * | |||||
| * @par Function | |||||
| * If the user calls the aclrtSetCurrentContext interface | |||||
| * multiple times to set the context of the current thread, | |||||
| * then the last set context is obtained | |||||
| * | |||||
| * @param context [OUT] the current context of the thread | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtSetCurrentContext | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtGetCurrentContext(aclrtContext *context); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Specify the device to use for the operation | |||||
| * implicitly create the default context and the default stream | |||||
| * | |||||
| * @par Function | |||||
| * The following use cases are supported: | |||||
| * @li Device can be specified in the process or thread. | |||||
| * If you call the aclrtSetDevice interface multiple | |||||
| * times to specify the same device, | |||||
| * you only need to call the aclrtResetDevice interface to reset the device. | |||||
| * @li The same device can be specified for operation | |||||
| * in different processes or threads. | |||||
| * @li Device is specified in a process, | |||||
| * and multiple threads in the process can share this device to explicitly | |||||
| * create a Context (aclrtCreateContext interface). | |||||
| * @li In multi-device scenarios, you can switch to other devices | |||||
| * through the aclrtSetDevice interface in the process. | |||||
| * | |||||
| * @param deviceId [IN] the device id | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtResetDevice |aclrtCreateContext | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtSetDevice(int32_t deviceId); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Reset the current operating Device and free resources on the device, | |||||
| * including the default context, the default stream, | |||||
| * and all streams created under the default context, | |||||
| * and synchronizes the interface. | |||||
| * If the task under the default context or stream has not been completed, | |||||
| * the system will wait for the task to complete before releasing it. | |||||
| * | |||||
| * @par Restriction | |||||
| * @li The Context, Stream, and Event that are explicitly created | |||||
| * on the device to be reset. Before resetting, | |||||
| * it is recommended to follow the following interface calling sequence, | |||||
| * otherwise business abnormalities may be caused. | |||||
| * @li Interface calling sequence: | |||||
| * call aclrtDestroyEvent interface to release Event or | |||||
| * call aclrtDestroyStream interface to release explicitly created Stream-> | |||||
| * call aclrtDestroyContext to release explicitly created Context-> | |||||
| * call aclrtResetDevice interface | |||||
| * | |||||
| * @param deviceId [IN] the device id | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtResetDevice(int32_t deviceId); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get target device of current thread | |||||
| * | |||||
| * @param deviceId [OUT] the device id | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtGetDevice(int32_t *deviceId); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get target side | |||||
| * | |||||
| * @param runMode [OUT] the run mode | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtGetRunMode(aclrtRunMode *runMode); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Wait for compute device to finish | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtSynchronizeDevice(void); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Set Scheduling TS | |||||
| * | |||||
| * @param tsId [IN] the ts id | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtSetTsDevice(aclrtTsId tsId); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get total device number. | |||||
| * | |||||
| * @param count [OUT] the device number | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtGetDeviceCount(uint32_t *count); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create event instance | |||||
| * | |||||
| * @param event [OUT] created event | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtCreateEvent(aclrtEvent *event); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief destroy event instance | |||||
| * | |||||
| * @par Function | |||||
| * Only events created through the aclrtCreateEvent interface can be | |||||
| * destroyed, synchronous interfaces. When destroying an event, | |||||
| * the user must ensure that the tasks involved in the aclrtSynchronizeEvent | |||||
| * interface or the aclrtStreamWaitEvent interface are completed before | |||||
| * they are destroyed. | |||||
| * | |||||
| * @param event [IN] event to destroy | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtCreateEvent | aclrtSynchronizeEvent | aclrtStreamWaitEvent | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtDestroyEvent(aclrtEvent event); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Record an Event in the Stream | |||||
| * | |||||
| * @param event [IN] event to record | |||||
| * @param stream [IN] stream handle | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Reset an event | |||||
| * | |||||
| * @par Function | |||||
| * Users need to make sure to wait for the tasks in the Stream | |||||
| * to complete before resetting the Event | |||||
| * | |||||
| * @param event [IN] event to reset | |||||
| * @param stream [IN] stream handle | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Queries an event's status | |||||
| * | |||||
| * @param event [IN] event to query | |||||
| * @param status [OUT] event status | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Block Host Running, wait event to be complete | |||||
| * | |||||
| * @param event [IN] event to wait | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief computes the elapsed time between events. | |||||
| * | |||||
| * @param ms [OUT] time between start and end in ms | |||||
| * @param start [IN] starting event | |||||
| * @param end [IN] ending event | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, aclrtEvent end); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief alloc memory on device | |||||
| * | |||||
| * @par Function | |||||
| * alloc for size linear memory on device | |||||
| * and return a pointer to allocated memory by *devPtr | |||||
| * | |||||
| * @par Restriction | |||||
| * @li The memory requested by the aclrtMalloc interface needs to be released | |||||
| * through the aclrtFree interface. | |||||
| * @li Before calling the media data processing interface, | |||||
| * if you need to apply memory on the device to store input or output data, | |||||
| * you need to call acldvppMalloc to apply for memory. | |||||
| * | |||||
| * @param devPtr [OUT] pointer to pointer to allocated memory on device | |||||
| * @param size [IN] alloc memory size | |||||
| * @param policy [IN] memory alloc policy | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtFree | acldvppMalloc | aclrtMallocCached | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief allocate memory on device with cache | |||||
| * | |||||
| * @par Function | |||||
| * alloc for size linear memory on device | |||||
| * and return a pointer to allocated memory by *devPtr | |||||
| * | |||||
| * @par Restriction | |||||
| * @li The memory requested by the aclrtMallocCached interface needs to be released | |||||
| * through the aclrtFree interface. | |||||
| * | |||||
| * @param devPtr [OUT] pointer to pointer to allocated memory on device | |||||
| * @param size [IN] alloc memory size | |||||
| * @param policy [IN] memory alloc policy | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtFree | aclrtMalloc | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief flush cache data to ddr | |||||
| * | |||||
| * @param devPtr [IN] the pointer that flush data to ddr | |||||
| * @param size [IN] flush size | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtMemFlush(void *devPtr, size_t size); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief invalidate cache data | |||||
| * | |||||
| * @param devPtr [IN] pointer to invalidate cache data | |||||
| * @param size [IN] invalidate size | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtMemInvalidate(void *devPtr, size_t size); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief free device memory | |||||
| * | |||||
| * @par Function | |||||
| * can only free memory allocated through the aclrtMalloc interface | |||||
| * | |||||
| * @param devPtr [IN] Pointer to memory to be freed | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtMalloc | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtFree(void *devPtr); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief alloc memory on host | |||||
| * | |||||
| * @par Restriction | |||||
| * @li The requested memory cannot be used in the Device | |||||
| * and needs to be explicitly copied to the Device. | |||||
| * @li The memory requested by the aclrtMallocHost interface | |||||
| * needs to be released through the aclrtFreeHost interface. | |||||
| * | |||||
| * @param hostPtr [OUT] pointer to pointer to allocated memory on the host | |||||
| * @param size [IN] alloc memory size | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtFreeHost | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtMallocHost(void **hostPtr, size_t size); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief free host memory | |||||
| * | |||||
| * @par Function | |||||
| * can only free memory allocated through the aclrtMallocHost interface | |||||
| * | |||||
| * @param hostPtr [IN] free memory pointer | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtMallocHost | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief synchronous memory replication between host and device | |||||
| * | |||||
| * @param dst [IN] destination address pointer | |||||
| * @param destMax [IN] Max length of the destination address memory | |||||
| * @param src [IN] source address pointer | |||||
| * @param count [IN] the length of byte to copy | |||||
| * @param kind [IN] memcpy type | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count, | |||||
| aclrtMemcpyKind kind); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Initialize memory and set contents of memory to specified value | |||||
| * | |||||
| * @par Function | |||||
| * The memory to be initialized is on the Host or device side, | |||||
| * and the system determines whether | |||||
| * it is host or device according to the address | |||||
| * | |||||
| * @param devPtr [IN] Starting address of memory | |||||
| * @param maxCount [IN] Max length of destination address memory | |||||
| * @param value [IN] Set value | |||||
| * @param count [IN] The length of memory | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t value, size_t count); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Asynchronous memory replication between Host and Device | |||||
| * | |||||
| * @par Function | |||||
| * After calling this interface, | |||||
| * be sure to call the aclrtSynchronizeStream interface to ensure that | |||||
| * the task of memory replication has been completed | |||||
| * | |||||
| * @par Restriction | |||||
| * @li For on-chip Device-to-Device memory copy, | |||||
| * both the source and destination addresses must be 64-byte aligned | |||||
| * | |||||
| * @param dst [IN] destination address pointer | |||||
| * @param destMax [IN] Max length of destination address memory | |||||
| * @param src [IN] source address pointer | |||||
| * @param count [IN] the number of byte to copy | |||||
| * @param kind [IN] memcpy type | |||||
| * @param stream [IN] asynchronized task stream | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtSynchronizeStream | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count, | |||||
| aclrtMemcpyKind kind, aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Asynchronous initialize memory | |||||
| * and set contents of memory to specified value async | |||||
| * | |||||
| * @par Function | |||||
| * The memory to be initialized is on the Host or device side, | |||||
| * and the system determines whether | |||||
| * it is host or device according to the address | |||||
| * | |||||
| * @param devPtr [IN] destination address pointer | |||||
| * @param maxCount [IN] Max length of destination address memory | |||||
| * @param value [IN] set value | |||||
| * @param count [IN] the number of byte to set | |||||
| * @param stream [IN] asynchronized task stream | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtSynchronizeStream | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count, | |||||
| aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create stream instance | |||||
| * | |||||
| * @param stream [OUT] the created stream | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtCreateStream(aclrtStream *stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief destroy stream instance | |||||
| * | |||||
| * @par Function | |||||
| * Can only destroy streams created through the aclrtCreateStream interface | |||||
| * | |||||
| * @par Restriction | |||||
| * Before calling the aclrtDestroyStream interface to destroy | |||||
| * the specified Stream, you need to call the aclrtSynchronizeStream interface | |||||
| * to ensure that the tasks in the Stream have been completed. | |||||
| * | |||||
| * @param stream [IN] the stream to destroy | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtCreateStream | aclrtSynchronizeStream | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtDestroyStream(aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief block the host until all tasks | |||||
| * in the specified stream have completed | |||||
| * | |||||
| * @param stream [IN] the stream to wait | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtSynchronizeStream(aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Blocks the operation of the specified Stream until | |||||
| * the specified Event is completed. | |||||
| * Support for multiple streams waiting for the same event. | |||||
| * | |||||
| * @param stream [IN] the wait stream If using thedefault Stream, set NULL | |||||
| * @param event [IN] the event to wait | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtStreamWaitEvent(aclrtStream stream, aclrtEvent event); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set group | |||||
| * | |||||
| * @par Function | |||||
| * set the task to the corresponding group | |||||
| * | |||||
| * @param groupId [IN] group id | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtGetGroupCount | aclrtGetAllGroupInfo | aclrtGetGroupInfoDetail | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtSetGroup(int32_t groupId); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get the number of group | |||||
| * | |||||
| * @par Function | |||||
| * get the number of group. if the number of group is zero, | |||||
| * it means that group is not supported or group is not created. | |||||
| * | |||||
| * @param count [OUT] the number of group | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtGetGroupCount(uint32_t *count); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create group information | |||||
| * | |||||
| * @retval null for failed. | |||||
| * @retval OtherValues success. | |||||
| * | |||||
| * @see aclrtDestroyGroupInfo | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclrtGroupInfo *aclrtCreateGroupInfo(); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief destroy group information | |||||
| * | |||||
| * @param groupInfo [IN] pointer to group information | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtCreateGroupInfo | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtDestroyGroupInfo(aclrtGroupInfo *groupInfo); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get all group information | |||||
| * | |||||
| * @param groupInfo [OUT] pointer to group information | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtGetGroupCount | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief get detail information of group | |||||
| * | |||||
| * @param groupInfo [IN] pointer to group information | |||||
| * @param groupId [IN] group index value | |||||
| * @param attr [IN] group attribute | |||||
| * @param attrValue [OUT] pointer to attribute value | |||||
| * @param valueLen [IN] length of attribute value | |||||
| * @param paramRetSize [OUT] pointer to real length of attribute value | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtGetGroupCount | aclrtGetAllGroupInfo | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupId, | |||||
| aclrtGroupAttr attr, void *attrValue, size_t valueLen, | |||||
| size_t *paramRetSize); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief checking whether current device and peer device support the p2p feature | |||||
| * | |||||
| * @param canAccessPeer [OUT] pointer to save the checking result | |||||
| * @param deviceId [IN] current device id | |||||
| * @param peerDeviceId [IN] peer device id | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtDeviceEnablePeerAccess | aclrtDeviceDisablePeerAccess | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtDeviceCanAccessPeer(int32_t *canAccessPeer, int32_t deviceId, int32_t peerDeviceId); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief enable the peer device to support the p2p feature | |||||
| * | |||||
| * @param peerDeviceId [IN] the peer device id | |||||
| * @param flags [IN] reserved field, now it must be zero | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtDeviceCanAccessPeer | aclrtDeviceDisablePeerAccess | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtDeviceEnablePeerAccess(int32_t peerDeviceId, uint32_t flags); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief disable the peer device to support the p2p function | |||||
| * | |||||
| * @param peerDeviceId [IN] the peer device id | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtDeviceCanAccessPeer | aclrtDeviceEnablePeerAccess | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtDeviceDisablePeerAccess(int32_t peerDeviceId); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Obtain the free memory and total memory of specified attribute. | |||||
| * the specified memory include normal memory and huge memory. | |||||
| * | |||||
| * @param attr [IN] the memory attribute of specified device | |||||
| * @param free [OUT] the free memory of specified device | |||||
| * @param total [OUT] the total memory of specified device. | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, size_t *total); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // INC_EXTERNAL_ACL_ACL_RT_H_ | |||||
| @@ -0,0 +1,276 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_EXTERNAL_ACL_ACL_TDT_H_ | |||||
| #define INC_EXTERNAL_ACL_ACL_TDT_H_ | |||||
| #include "acl/acl_base.h" | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| enum acltdtTensorType { | |||||
| ACL_TENSOR_DATA_UNDEFINED = -1, | |||||
| ACL_TENSOR_DATA_TENSOR, | |||||
| ACL_TENSOR_DATA_END_OF_SEQUENCE, | |||||
| ACL_TENSOR_DATA_ABNORMAL | |||||
| }; | |||||
| typedef struct acltdtDataItem acltdtDataItem; | |||||
| typedef struct acltdtDataset acltdtDataset; | |||||
| typedef struct acltdtChannelHandle acltdtChannelHandle; | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Get tensor type from item | |||||
| * | |||||
| * @param dataItem [IN] pointer to the data item | |||||
| * | |||||
| * @retval Tensor type. | |||||
| * @retval ACL_DT_UNDEFINED if dataItem is null | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY acltdtTensorType acltdtGetTensorTypeFromItem(const acltdtDataItem *dataItem); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Get data type from item | |||||
| * | |||||
| * @param dataItem [IN] pointer to the data item | |||||
| * | |||||
| * @retval Data type. | |||||
| * @retval ACL_DT_UNDEFINED if dataItem is null | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *dataItem); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Get data address from item | |||||
| * | |||||
| * @param dataItem [IN] pointer to data item | |||||
| * | |||||
| * @retval null for failed | |||||
| * @retval OtherValues success | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Get data size from item | |||||
| * | |||||
| * @param dataItem [IN] pointer to data item | |||||
| * | |||||
| * @retval 0 for failed | |||||
| * @retval OtherValues success | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Get dim's number from item | |||||
| * | |||||
| * @param dataItem [IN] pointer to data item | |||||
| * | |||||
| * @retval 0 for failed | |||||
| * @retval OtherValues success | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Get dims from item | |||||
| * | |||||
| * @param dataItem [IN] the struct of data item | |||||
| * @param dims [IN|OUT] pointer to the dims of dataTtem | |||||
| * @param dimNum [IN] the size of the dims | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataItem, int64_t *dims, size_t dimNum); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Create the struct of data item | |||||
| * | |||||
| * @param tdtType [IN] Tdt tensor type | |||||
| * @param dims [IN] pointer of tdtDataItem's dims | |||||
| * @param dimNum [IN] Dim number | |||||
| * @param dataType [IN] Data type | |||||
| * @param data [IN] Data pointer | |||||
| * @param size [IN] Data size | |||||
| * | |||||
| * @retval null for failed | |||||
| * @retval OtherValues success | |||||
| * | |||||
| * @see acltdtDestroyDataItem | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum, | |||||
| aclDataType dataType, void *data, size_t size); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Destroy the struct of data item | |||||
| * | |||||
| * @param dataItem [IN] pointer to the data item | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see acltdtCreateDataItem | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError acltdtDestroyDataItem(acltdtDataItem *dataItem); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Create the tdt dataset | |||||
| * | |||||
| * @retval null for failed | |||||
| * @retval OtherValues success | |||||
| * | |||||
| * @see acltdtDestroyDataset | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY acltdtDataset *acltdtCreateDataset(); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Destroy the tdt dataset | |||||
| * | |||||
| * @param dataset [IN] pointer to the dataset | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see acltdtCreateDataset | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError acltdtDestroyDataset(acltdtDataset *dataset); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Get the data item | |||||
| * | |||||
| * @param dataset [IN] pointer to the dataset | |||||
| * @param index [IN] index of the dataset | |||||
| * | |||||
| * @retval null for failed | |||||
| * @retval OtherValues success | |||||
| * | |||||
| * @see acltdtAddDataItem | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY acltdtDataItem *acltdtGetDataItem(const acltdtDataset *dataset, size_t index); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Get the data item | |||||
| * | |||||
| * @param dataset [OUT] pointer to the dataset | |||||
| * @param dataItem [IN] pointer to the data item | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see acltdtGetDataItem | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError acltdtAddDataItem(acltdtDataset *dataset, acltdtDataItem *dataItem); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Get the size of dataset | |||||
| * | |||||
| * @param dataset [IN] pointer to the dataset | |||||
| * | |||||
| * @retval 0 for failed | |||||
| * @retval OtherValues success | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY size_t acltdtGetDatasetSize(const acltdtDataset *dataset); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Stop the channel | |||||
| * | |||||
| * @param handle [IN] pointer to the channel handle | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see acltdtCreateChannel | acltdtDestroyChannel | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError acltdtStopChannel(acltdtChannelHandle *handle); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Create the channel | |||||
| * | |||||
| * @param deviceId [IN] the device id | |||||
| * @param name [IN] the channel's name | |||||
| * | |||||
| * @retval null for failed | |||||
| * @retval OtherValues success | |||||
| * | |||||
| * @see acltdtStopChannel | acltdtDestroyChannel | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY acltdtChannelHandle *acltdtCreateChannel(uint32_t deviceId, const char *name); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Destroy the channel | |||||
| * | |||||
| * @param handle [IN] pointer to the channel handle | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see acltdtCreateChannel | acltdtStopChannel | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Send tensor to device | |||||
| * | |||||
| * @param handle [IN] pointer to the channel handle | |||||
| * @param dataset [IN] pointer to the dataset | |||||
| * @param timeout [IN] to be reserved, now it must be -1 | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see acltdtReceiveTensor | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset, | |||||
| int32_t timeout); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Receive tensor from device | |||||
| * | |||||
| * @param handle [IN] pointer to the channel handle | |||||
| * @param dataset [OUT] pointer to the dataset | |||||
| * @param timeout [IN] to be reserved, now it must be -1 | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see acltdtSendTensor | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset, | |||||
| int32_t timeout); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // INC_EXTERNAL_ACL_ACL_TDT_H_ | |||||
| @@ -0,0 +1,61 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | |||||
| #define INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | |||||
| #include <stddef.h> | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000; | |||||
| static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001; | |||||
| static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002; | |||||
| static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003; | |||||
| static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006; | |||||
| static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007; | |||||
| static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008; | |||||
| static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009; | |||||
| static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011; | |||||
| static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012; | |||||
| static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013; | |||||
| static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014; | |||||
| static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015; | |||||
| static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016; | |||||
| static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017; | |||||
| static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018; | |||||
| static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019; | |||||
| static const uint32_t ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID = 145020; | |||||
| static const uint32_t ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID = 145021; | |||||
| static const uint32_t ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID = 145022; | |||||
| static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000; | |||||
| static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001; | |||||
| static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000; | |||||
| static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001; | |||||
| static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002; | |||||
| static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003; | |||||
| static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004; | |||||
| static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005; | |||||
| static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006; | |||||
| static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007; | |||||
| static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008; | |||||
| static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009; | |||||
| #ifdef __cplusplus | |||||
| } // namespace ge | |||||
| #endif | |||||
| #endif // INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | |||||
| @@ -0,0 +1,101 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||||
| #define __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||||
| #include <stddef.h> | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| static const int32_t ACL_RT_SUCCESS = 0; // success | |||||
| static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||||
| static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||||
| static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||||
| static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||||
| static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||||
| static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||||
| static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||||
| static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||||
| static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||||
| static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||||
| static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||||
| static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||||
| static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||||
| static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||||
| static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||||
| static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||||
| static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||||
| static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||||
| static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||||
| static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||||
| static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||||
| static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||||
| static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||||
| static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||||
| static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||||
| static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||||
| static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||||
| static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||||
| static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||||
| static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||||
| static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
| static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
| static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
| static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
| static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
| static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
| static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
| static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
| static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
| static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
| static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
| static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
| static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
| static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
| static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
| static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
| static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
| static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
| static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
| static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
| static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
| static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
| static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
| static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
| static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
| static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
| static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
| static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
| static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
| static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
| static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
| static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||||
| @@ -0,0 +1,334 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ | |||||
| #define INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ | |||||
| #include "acl/acl.h" | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| typedef enum aclTransType { ACL_TRANS_N, ACL_TRANS_T, ACL_TRANS_NZ, ACL_TRANS_NZ_T } aclTransType; | |||||
| typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECISION } aclComputeType; | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief perform the matrix-vector multiplication | |||||
| * | |||||
| * @param transA [IN] transpose type of matrix A | |||||
| * @param m [IN] number of rows of matrix A | |||||
| * @param n [IN] number of columns of matrix A | |||||
| * @param alpha [IN] pointer to scalar used for multiplication. | |||||
| * of same type as dataTypeC | |||||
| * @param a [IN] pointer to matrix A | |||||
| * @param lda [IN] leading dimension used to store the matrix A | |||||
| * @param dataTypeA [IN] datatype of matrix A | |||||
| * @param x [IN] pointer to vector x | |||||
| * @param incx [IN] stride between consecutive elements of vector x | |||||
| * @param dataTypeX [IN] datatype of vector x | |||||
| * @param beta [IN] pointer to scalar used for multiplication. | |||||
| * of same type as dataTypeC If beta == 0, | |||||
| * then y does not have to be a valid input | |||||
| * @param y [IN|OUT] pointer to vector y | |||||
| * @param incy [IN] stride between consecutive elements of vector y | |||||
| * @param dataTypeY [IN] datatype of vector y | |||||
| * @param type [IN] computation type | |||||
| * @param stream [IN] stream | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, const void *alpha, const void *a, int lda, | |||||
| aclDataType dataTypeA, const void *x, int incx, aclDataType dataTypeX, | |||||
| const void *beta, void *y, int incy, aclDataType dataTypeY, | |||||
| aclComputeType type, aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create a handle for performing the matrix-vector multiplication | |||||
| * | |||||
| * @param transA [IN] transpose type of matrix A | |||||
| * @param m [IN] number of rows of matrix A | |||||
| * @param n [IN] number of columns of matrix A | |||||
| * @param dataTypeA [IN] datatype of matrix A | |||||
| * @param dataTypeX [IN] datatype of vector x | |||||
| * @param dataTypeY [IN] datatype of vector y | |||||
| * @param type [IN] computation type | |||||
| * @param handle [OUT] pointer to the pointer to the handle | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, int m, int n, aclDataType dataTypeA, | |||||
| aclDataType dataTypeX, aclDataType dataTypeY, | |||||
| aclComputeType type, aclopHandle **handle); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief perform the matrix-vector multiplication | |||||
| * | |||||
| * @param transA [IN] transpose type of matrix A | |||||
| * @param m [IN] number of rows of matrix A | |||||
| * @param n [IN] number of columns of matrix A | |||||
| * @param alpha [IN] pointer to scalar used for multiplication | |||||
| * @param a [IN] pointer to matrix A | |||||
| * @param lda [IN] leading dimension used to store the matrix A | |||||
| * @param x [IN] pointer to vector x | |||||
| * @param incx [IN] stride between consecutive elements of vector x | |||||
| * @param beta [IN] pointer to scalar used for multiplication. | |||||
| * If beta value == 0, | |||||
| * then y does not have to be a valid input | |||||
| * @param y [IN|OUT] pointer to vector y | |||||
| * @param incy [IN] stride between consecutive elements of vector y | |||||
| * @param type [IN] computation type | |||||
| * @param stream [IN] stream | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, const aclFloat16 *alpha, | |||||
| const aclFloat16 *a, int lda, const aclFloat16 *x, int incx, | |||||
| const aclFloat16 *beta, aclFloat16 *y, int incy, aclComputeType type, | |||||
| aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create a handle for performing the matrix-vector multiplication | |||||
| * | |||||
| * @param transA [IN] transpose type of matrix A | |||||
| * @param m [IN] number of rows of matrix A | |||||
| * @param n [IN] number of columns of matrix A | |||||
| * @param type [IN] computation type | |||||
| * @param handle [OUT] pointer to the pointer to the handle | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, int m, int n, aclComputeType type, | |||||
| aclopHandle **handle); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief perform the matrix-vector multiplication | |||||
| * | |||||
| * @param transA [IN] transpose type of matrix A | |||||
| * @param m [IN] number of rows of matrix A | |||||
| * @param n [IN] number of columns of matrix A | |||||
| * @param alpha [IN] pointer to scalar used for multiplication | |||||
| * @param a [IN] pointer to matrix A | |||||
| * @param lda [IN] leading dimension used to store the matrix A | |||||
| * @param x [IN] pointer to vector x | |||||
| * @param incx [IN] stride between consecutive elements of vector x | |||||
| * @param beta [IN] pointer to scalar used for multiplication. | |||||
| * If beta value == 0, | |||||
| * then y does not have to be a valid input | |||||
| * @param y [IN|OUT] pointer to vector y | |||||
| * @param incy [IN] stride between consecutive elements of vector y | |||||
| * @param type [IN] computation type | |||||
| * @param stream [IN] stream | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, const int32_t *alpha, const int8_t *a, | |||||
| int lda, const int8_t *x, int incx, const int32_t *beta, int32_t *y, | |||||
| int incy, aclComputeType type, aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create a handle for performing the matrix-vector multiplication | |||||
| * | |||||
| * @param transA [IN] transpose type of matrix A | |||||
| * @param m [IN] number of rows of matrix A | |||||
| * @param n [IN] number of columns of matrix A | |||||
| * @param handle [OUT] pointer to the pointer to the handle | |||||
| * @param type [IN] computation type | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, int m, int n, aclComputeType type, | |||||
| aclopHandle **handle); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief perform the matrix-matrix multiplication | |||||
| * | |||||
| * @param transA [IN] transpose type of matrix A | |||||
| * @param transB [IN] transpose type of matrix B | |||||
| * @param transC [IN] transpose type of matrix C | |||||
| * @param m [IN] number of rows of matrix A and matrix C | |||||
| * @param n [IN] number of columns of matrix B and matrix C | |||||
| * @param k [IN] number of columns of matrix A and rows of matrix B | |||||
| * @param alpha [IN] pointer to scalar used for multiplication. of same type as dataTypeC | |||||
| * @param matrixA [IN] pointer to matrix A | |||||
| * @param lda [IN] leading dimension array used to store matrix A | |||||
| * @param dataTypeA [IN] datatype of matrix A | |||||
| * @param matrixB [IN] pointer to matrix B | |||||
| * @param ldb [IN] leading dimension array used to store matrix B | |||||
| * @param dataTypeB [IN] datatype of matrix B | |||||
| * @param beta [IN] pointer to scalar used for multiplication. | |||||
| * of same type as dataTypeC If beta == 0, | |||||
| * then matrixC does not have to be a valid input | |||||
| * @param matrixC [IN|OUT] pointer to matrix C | |||||
| * @param ldc [IN] leading dimension array used to store matrix C | |||||
| * @param dataTypeC [IN] datatype of matrix C | |||||
| * @param type [IN] computation type | |||||
| * @param stream [IN] stream | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, | |||||
| int k, const void *alpha, const void *matrixA, int lda, | |||||
| aclDataType dataTypeA, const void *matrixB, int ldb, aclDataType dataTypeB, | |||||
| const void *beta, void *matrixC, int ldc, aclDataType dataTypeC, | |||||
| aclComputeType type, aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create a handle for performing the matrix-matrix multiplication | |||||
| * | |||||
| * @param transA [IN] transpose type of matrix A | |||||
| * @param transB [IN] transpose type of matrix B | |||||
| * @param transC [IN] transpose type of matrix C | |||||
| * @param m [IN] number of rows of matrix A and matrix C | |||||
| * @param n [IN] number of columns of matrix B and matrix C | |||||
| * @param k [IN] number of columns of matrix A and rows of matrix B | |||||
| * @param dataTypeA [IN] datatype of matrix A | |||||
| * @param dataTypeB [IN] datatype of matrix B | |||||
| * @param dataTypeC [IN] datatype of matrix C | |||||
| * @param type [IN] computation type | |||||
| * @param handle [OUT] pointer to the pointer to the handle | |||||
| * @param type [IN] computation type | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, | |||||
| int m, int n, int k, aclDataType dataTypeA, | |||||
| aclDataType dataTypeB, aclDataType dataTypeC, | |||||
| aclComputeType type, aclopHandle **handle); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief perform the matrix-matrix multiplication | |||||
| * | |||||
| * @param transA [IN] transpose type of matrix A | |||||
| * @param transB [IN] transpose type of matrix B | |||||
| * @param transC [IN] transpose type of matrix C | |||||
| * @param m [IN] number of rows of matrix A and matrix C | |||||
| * @param n [IN] number of columns of matrix B and matrix C | |||||
| * @param k [IN] number of columns of matrix A and rows of matrix B | |||||
| * @param alpha [IN] pointer to scalar used for multiplication | |||||
| * @param matrixA [IN] pointer to matrix A | |||||
| * @param lda [IN] leading dimension used to store the matrix A | |||||
| * @param matrixB [IN] pointer to matrix B | |||||
| * @param ldb [IN] leading dimension used to store the matrix B | |||||
| * @param beta [IN] pointer to scalar used for multiplication. | |||||
| * If beta value == 0, | |||||
| * then matrixC does not have to be a valid input | |||||
| * @param matrixC [IN|OUT] pointer to matrix C | |||||
| * @param ldc [IN] leading dimension used to store the matrix C | |||||
| * @param type [IN] computation type | |||||
| * @param stream [IN] stream | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, | |||||
| int k, const aclFloat16 *alpha, const aclFloat16 *matrixA, int lda, | |||||
| const aclFloat16 *matrixB, int ldb, const aclFloat16 *beta, | |||||
| aclFloat16 *matrixC, int ldc, aclComputeType type, aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create a handle for performing the matrix-matrix multiplication | |||||
| * | |||||
| * @param transA [IN] transpose type of matrix A | |||||
| * @param transB [IN] transpose type of matrix B | |||||
| * @param transC [IN] transpose type of matrix C | |||||
| * @param m [IN] number of rows of matrix A and matrix C | |||||
| * @param n [IN] number of columns of matrix B and matrix C | |||||
| * @param k [IN] number of columns of matrix A and rows of matrix B | |||||
| * @param type [IN] computation type | |||||
| * @param handle [OUT] pointer to the pointer to the handle | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, aclTransType transB, aclTransType transC, | |||||
| int m, int n, int k, aclComputeType type, | |||||
| aclopHandle **handle); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief perform the matrix-matrix multiplication | |||||
| * | |||||
| * @param transA [IN] transpose type of matrix A | |||||
| * @param transB [IN] transpose type of matrix B | |||||
| * @param transC [IN] transpose type of matrix C | |||||
| * @param m [IN] number of rows of matrix A and matrix C | |||||
| * @param n [IN] number of columns of matrix B and matrix C | |||||
| * @param k [IN] number of columns of matrix A and rows of matrix B | |||||
| * @param alpha [IN] pointer to scalar used for multiplication | |||||
| * @param matrixA [IN] pointer to matrix A | |||||
| * @param lda [IN] leading dimension used to store the matrix A | |||||
| * @param matrixB [IN] pointer to matrix B | |||||
| * @param ldb [IN] leading dimension used to store the matrix B | |||||
| * @param beta [IN] pointer to scalar used for multiplication. | |||||
| * If beta value == 0, | |||||
| * then matrixC does not have to be a valid input | |||||
| * @param matrixC [IN|OUT] pointer to matrix C | |||||
| * @param ldc [IN] leading dimension used to store the matrix C | |||||
| * @param type [IN] computation type | |||||
| * @param stream [IN] stream | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, | |||||
| int k, const int32_t *alpha, const int8_t *matrixA, int lda, | |||||
| const int8_t *matrixB, int ldb, const int32_t *beta, int32_t *matrixC, | |||||
| int ldc, aclComputeType type, aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief create a handle for performing the matrix-matrix multiplication | |||||
| * | |||||
| * @param transA [IN] transpose type of matrix A | |||||
| * @param transB [IN] transpose type of matrix B | |||||
| * @param transC [IN] transpose type of matrix C | |||||
| * @param m [IN] number of rows of matrix A and matrix C | |||||
| * @param n [IN] number of columns of matrix B and matrix C | |||||
| * @param k [IN] number of columns of matrix A and rows of matrix B | |||||
| * @param type [IN] computation type | |||||
| * @param handle [OUT] pointer to the pointer to the handle | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, | |||||
| int m, int n, int k, aclComputeType type, | |||||
| aclopHandle **handle); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ | |||||
| @@ -0,0 +1,353 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ | |||||
| #define INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ | |||||
| #include "acl/acl.h" | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| typedef struct aclfvInitPara aclfvInitPara; | |||||
| typedef struct aclfvFeatureInfo aclfvFeatureInfo; | |||||
| typedef struct aclfvRepoRange aclfvRepoRange; | |||||
| typedef struct aclfvQueryTable aclfvQueryTable; | |||||
| typedef struct aclfvSearchInput aclfvSearchInput; | |||||
| typedef struct aclfvSearchResult aclfvSearchResult; | |||||
| // search operation type | |||||
| enum aclfvSearchType { | |||||
| SEARCH_1_N, // 1:N operation type | |||||
| SEARCH_N_M // N:M operation type | |||||
| }; | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Create fv init param. | |||||
| * | |||||
| * @param fsNum [IN] The feature num | |||||
| * | |||||
| * @retval null for failed. | |||||
| * @retval OtherValues success. | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclfvInitPara *aclfvCreateInitPara(uint64_t fsNum); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Destroy fv init param. | |||||
| * | |||||
| * @par Function | |||||
| * Can only destroy fv init param information created | |||||
| * through aclfvCreateInitPara interface. | |||||
| * | |||||
| * @param initPara [IN] fv init param. | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclfvCreateInitPara | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclfvDestroyInitPara(aclfvInitPara *initPara); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set value for maxTopNumFor1N which in fv init param. | |||||
| * | |||||
| * @param initPara [IN|OUT] fv init param. | |||||
| * @param maxTopNumFor1N [IN] maxTopNumFor1N value for init param. | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclfvSet1NTopNum(aclfvInitPara *initPara, uint32_t maxTopNumFor1N); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief set value for maxTopNumForNM which in fv init param. | |||||
| * | |||||
| * @param initPara [IN|OUT] fv init param. | |||||
| * @param maxTopNumForNM [IN] maxTopNumForNM value for init param. | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t maxTopNumForNM); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Create fv feature info. | |||||
| * | |||||
| * @param id0 [IN] The first level library id0 | |||||
| * @param id1 [IN] Secondary library id1 | |||||
| * @param offset [IN] The offset of the first feature in the library | |||||
| * @param featureLen [IN] Single feature length | |||||
| * @param featureCount [IN] Single feature count | |||||
| * @param featureData [IN] Feature value list | |||||
| * @param featureDataLen [IN] Feature value list length | |||||
| * | |||||
| * @retval null for failed. | |||||
| * @retval OtherValues success. | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset, | |||||
| uint32_t featureLen, uint32_t featureCount, | |||||
| uint8_t *featureData, uint32_t featureDataLen); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Destroy fv feature info. | |||||
| * | |||||
| * @par Function | |||||
| * Can only destroy fv feature info information created | |||||
| * through aclfvCreateFeatureInfo interface. | |||||
| * | |||||
| * @param featureInfo [IN] fv feature info. | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclfvCreateFeatureInfo | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclfvDestroyFeatureInfo(aclfvFeatureInfo *featureInfo); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Create fv repo range. | |||||
| * | |||||
| * @param id0Min [IN] id0 start value | |||||
| * @param id0Min [IN] id0 max | |||||
| * @param id1Min [IN] id0 start value | |||||
| * @param id1Max [IN] id1 max | |||||
| * | |||||
| * @retval null for failed. OtherValues success | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclfvRepoRange *aclfvCreateRepoRange(uint32_t id0Min, uint32_t id0Max, uint32_t id1Min, | |||||
| uint32_t id1Max); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Destroy fv repo range. | |||||
| * | |||||
| * @par Function | |||||
| * Can only destroy fv repo range information created | |||||
| * through aclfvCreateRepoRange interface. | |||||
| * | |||||
| * @param repoRange [IN] fv repo range. | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclfvCreateRepoRange | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclfvDestroyRepoRange(aclfvRepoRange *repoRange); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Create query table. | |||||
| * | |||||
| * @param queryCnt [IN] Number of tables, the maximum number is 6 | |||||
| * @param tableLen [IN] Single table length, table length is 32KB | |||||
| * @param tableData [IN] Feature value list | |||||
| * @param tableDataLen [IN] The length of memory requested by the featureData pointer | |||||
| * | |||||
| * @retval null for failed. OtherValues success | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclfvQueryTable *aclfvCreateQueryTable(uint32_t queryCnt, uint32_t tableLen, uint8_t *tableData, | |||||
| uint32_t tableDataLen); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Destroy query table. | |||||
| * | |||||
| * @par Function | |||||
| * Can only destroy query table information created | |||||
| * through aclfvCreateQueryTable interface. | |||||
| * | |||||
| * @param queryTable [IN] query table. | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclfvCreateQueryTable | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclfvDestroyQueryTable(aclfvQueryTable *queryTable); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Create search input. | |||||
| * | |||||
| * @param queryTable [IN] query table | |||||
| * @param repoRange [IN] query repo range | |||||
| * @param topk [IN] query topk | |||||
| * | |||||
| * @retval null for failed. OtherValues success | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclfvSearchInput *aclfvCreateSearchInput(aclfvQueryTable *queryTable, aclfvRepoRange *repoRange, | |||||
| uint32_t topk); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Destroy search input. | |||||
| * | |||||
| * @par Function | |||||
| * Can only destroy search input information created | |||||
| * through aclfvCreateSearchInput interface. | |||||
| * | |||||
| * @param searchInput [IN] search input. | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclfvCreateSearchInput | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInput); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Create search result. | |||||
| * | |||||
| * @param queryCnt [IN] Retrieve the number of features | |||||
| * @param resultNum [IN] The number of search results for each feature, the number is queryCnt | |||||
| * @param resultNumDataLen [IN] resultNum memory length | |||||
| * @param id0 [IN] Level 1 library id0 | |||||
| * @param id1 [IN] Secondary library id1 | |||||
| * @param resultOffset [IN] The offset of the bottom library corresponding | |||||
| * to each feature retrieval result, total length topK * queryCnt | |||||
| * @param resultDistance [IN] Distance, total length topK * queryCnt | |||||
| * @param dataLen [IN] The memory size requested by | |||||
| * id0\id1\reslutOffset\resultDistance | |||||
| * | |||||
| * @retval null for failed. OtherValues success | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum, | |||||
| uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, | |||||
| uint32_t *resultOffset, float *resultDistance, | |||||
| uint32_t dataLen); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief Destroy search result. | |||||
| * | |||||
| * @par Function | |||||
| * Can only destroy search result information created | |||||
| * through aclfvCreateSearchResult interface. | |||||
| * | |||||
| * @param searchResult [IN] search result. | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclfvCreateSearchResult | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclfvDestroySearchResult(aclfvSearchResult *searchResult); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief fv IP initialize. | |||||
| * | |||||
| * @param initPara [IN] fv init param. | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure. | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclfvInit(aclfvInitPara *initPara); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief release fv resources. | |||||
| * | |||||
| * @par Function | |||||
| * Can only release fv resources created | |||||
| * through aclfvInit interface. | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure. | |||||
| * | |||||
| * @see aclfvInit | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclfvRelease(); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief fv repo add. | |||||
| * | |||||
| * @param type [IN] repo add type | |||||
| * @param featureInfo [IN] add feature information | |||||
| * @param stream [IN] stream of task execute | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure. | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclfvRepoAdd(aclfvSearchType type, aclfvFeatureInfo *featureInfo, aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief fv repo del. | |||||
| * | |||||
| * @param type [IN] repo delete type | |||||
| * @param repoRange [IN] repo range information | |||||
| * @param stream [IN] stream of task execute | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure. | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclfvRepoDel(aclfvSearchType type, aclfvRepoRange *repoRange, aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief fv accurate del. | |||||
| * | |||||
| * @param featureInfo [IN] accurate delete feature information | |||||
| * @param stream [IN] stream of task execute | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure. | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclfvDel(aclfvFeatureInfo *featureInfo, aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief fv accurate modify. | |||||
| * | |||||
| * @param featureInfo [IN] accurate modify feature information | |||||
| * @param stream [IN] stream of task execute | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure. | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclfvModify(aclfvFeatureInfo *featureInfo, aclrtStream stream); | |||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief fv search. | |||||
| * | |||||
| * @param type [IN] search type | |||||
| * @param searchInput [IN] search input | |||||
| * @param searchRst [OUT] search result | |||||
| * @param stream [IN] stream of task execute | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure. | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput *searchInput, | |||||
| aclfvSearchResult *searchRst, aclrtStream stream); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ | |||||
| @@ -0,0 +1,134 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| /** | |||||
| * @file hccl.h | |||||
| * @brief HCCL API | |||||
| */ | |||||
| #ifndef HCCL_H_ | |||||
| #define HCCL_H_ | |||||
| #include <hccl/hccl_types.h> | |||||
| #include <acl/acl.h> | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif // __cplusplus | |||||
| /** | |||||
| * @brief Initialize HCCL. | |||||
| * | |||||
| * @param clusterInfo A string identifying the cluster info file path, include file name. | |||||
| * @param rank A integer identifying the identify for the rank. | |||||
| * @param comm A pointer identifying the initialized communication resource. | |||||
| * @return HcclResult | |||||
| * @see HcclCommDestroy() | |||||
| */ | |||||
| extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm); | |||||
| /** | |||||
| * @brief Get hccl root info. | |||||
| * | |||||
| * @param rootInfo A pointer identifying the hccl root info. | |||||
| * @return HcclResult | |||||
| */ | |||||
| extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo); | |||||
| /** | |||||
| * @brief Initialize HCCL with root info. | |||||
| * | |||||
| * @param nRanks A integer identifying the rank size of the cluster. | |||||
| * @param rootInfo A struct identifying the hccl root info. | |||||
| * @param rank A integer identifying the identify for the rank. | |||||
| * @param comm A pointer identifying the initialized communication resource. | |||||
| * @return HcclResult | |||||
| * @see HcclCommDestroy() | |||||
| */ | |||||
| extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm); | |||||
| /** | |||||
| * @brief AllReduce operator. | |||||
| * | |||||
| * @param sendBuf A pointer identifying the input data address of the operator. | |||||
| * @param recvBuf A pointer identifying the output data address of the operator. | |||||
| * @param count An integer(u64) identifying the number of the output data. | |||||
| * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, | |||||
| * float32. | |||||
| * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||||
| * @param comm A pointer identifying the communication resource based on. | |||||
| * @param stream A pointer identifying the stream information. | |||||
| * @return HcclResult | |||||
| */ | |||||
| extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, | |||||
| HcclComm comm, aclrtStream stream); | |||||
| /** | |||||
| * @brief Broadcast operator. | |||||
| * | |||||
| * @param buf A pointer identifying the data address of the operator. | |||||
| * @param count An integer(u64) identifying the number of the data. | |||||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||||
| * @param root An integer(u32) identifying the the root rank in the operator. | |||||
| * @param comm A pointer identifying the communication resource based on | |||||
| * @param stream A pointer identifying the stream information. | |||||
| * @return HcclResult | |||||
| */ | |||||
| extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, | |||||
| aclrtStream stream); | |||||
| /** | |||||
| * @brief ReduceScatter operator. | |||||
| * | |||||
| * @param sendBuf A pointer identifying the input data address of the operator. | |||||
| * @param recvBuf A pointer identifying the output data address of the operator. | |||||
| * @param recvCount An integer(u64) identifying the number of the output data. | |||||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||||
| * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||||
| * @param comm A pointer identifying the communication resource based on. | |||||
| * @param stream A pointer identifying the stream information. | |||||
| * @return HcclResult | |||||
| */ | |||||
| extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, | |||||
| HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||||
| /** | |||||
| * @brief AllGather operator. | |||||
| * | |||||
| * @param sendBuf A pointer identifying the input data address of the operator. | |||||
| * @param recvBuf A pointer identifying the output data address of the operator. | |||||
| * @param sendCount An integer(u64) identifying the number of the input data. | |||||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||||
| * @param comm A pointer identifying the communication resource based on. | |||||
| * @param stream A pointer identifying the stream information. | |||||
| * @return HcclResult | |||||
| */ | |||||
| extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, | |||||
| aclrtStream stream); | |||||
| /** | |||||
| * @brief Destroy HCCL comm | |||||
| * | |||||
| * @param comm A pointer identifying the communication resource targetting | |||||
| * @return HcclResult | |||||
| * @see HcclCommInitClusterInfo() | |||||
| */ | |||||
| extern HcclResult HcclCommDestroy(HcclComm comm); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif // __cplusplus | |||||
| #endif // HCCL_H_ | |||||
| @@ -0,0 +1,101 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| /** | |||||
| * @file hccl_types.h | |||||
| * @brief HCCL data type definition | |||||
| * | |||||
| */ | |||||
| #ifndef HCCL_TYPES_H_ | |||||
| #define HCCL_TYPES_H_ | |||||
| #include <stdint.h> | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif // __cplusplus | |||||
| /** | |||||
| * @brief HCCL functions return value definition | |||||
| */ | |||||
| typedef enum { | |||||
| HCCL_SUCCESS = 0, /**< success */ | |||||
| HCCL_E_PARA = 1, /**< parameter error */ | |||||
| HCCL_E_PTR = 2, /**< empty pointer */ | |||||
| HCCL_E_MEMORY = 3, /**< memory error */ | |||||
| HCCL_E_INTERNAL = 4, /**< internal error */ | |||||
| HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||||
| HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||||
| HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||||
| HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||||
| HCCL_E_TIMEOUT = 9, /**< timeout */ | |||||
| HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||||
| HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||||
| HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||||
| HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||||
| HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||||
| HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||||
| HCCL_E_DRV = 16, /**< call driver api fail */ | |||||
| HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||||
| HCCL_E_CCE = 18, /**< call cce api fail */ | |||||
| HCCL_E_NETWORK = 19, /**< call network api fail */ | |||||
| HCCL_E_RESERVED /**< reserved */ | |||||
| } HcclResult; | |||||
| /** | |||||
| * @brief handle to HCCL communicator | |||||
| */ | |||||
| typedef void *HcclComm; | |||||
| /** | |||||
| * @brief HCCL Reduction opperation | |||||
| */ | |||||
| typedef enum { | |||||
| HCCL_REDUCE_SUM = 0, /**< sum */ | |||||
| HCCL_REDUCE_PROD = 1, /**< prod */ | |||||
| HCCL_REDUCE_MAX = 2, /**< max */ | |||||
| HCCL_REDUCE_MIN = 3, /**< min */ | |||||
| HCCL_REDUCE_RESERVED /**< reserved */ | |||||
| } HcclReduceOp; | |||||
| /** | |||||
| * @brief HCCL data type | |||||
| */ | |||||
| typedef enum { | |||||
| HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||||
| HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||||
| HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||||
| HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||||
| HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||||
| HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||||
| HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||||
| HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||||
| } HcclDataType; | |||||
| const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||||
| /** | |||||
| * @brief HCCL root info | |||||
| */ | |||||
| typedef struct HcclRootInfoDef { | |||||
| char internal[HCCL_ROOT_INFO_BYTES]; | |||||
| } HcclRootInfo; | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif // __cplusplus | |||||
| #endif // HCCL_TYPES_H_ | |||||
| @@ -0,0 +1,101 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||||
| #define __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||||
| #include <stddef.h> | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| static const int32_t ACL_RT_SUCCESS = 0; // success | |||||
| static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||||
| static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||||
| static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||||
| static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||||
| static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||||
| static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||||
| static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||||
| static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||||
| static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||||
| static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||||
| static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||||
| static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||||
| static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||||
| static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||||
| static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||||
| static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||||
| static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||||
| static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||||
| static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||||
| static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||||
| static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||||
| static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||||
| static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||||
| static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||||
| static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||||
| static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||||
| static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||||
| static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||||
| static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||||
| static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||||
| static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
| static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
| static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
| static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
| static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
| static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
| static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
| static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
| static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
| static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
| static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
| static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
| static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
| static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
| static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
| static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
| static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
| static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
| static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
| static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
| static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
| static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
| static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
| static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
| static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
| static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
| static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
| static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
| static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
| static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
| static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
| static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||||
| @@ -1,101 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| /** | |||||
| * @file hccl_types.h | |||||
| * @brief HCCL data type definition | |||||
| * | |||||
| */ | |||||
| #ifndef HCCL_TYPES_H_ | |||||
| #define HCCL_TYPES_H_ | |||||
| #include <stdint.h> | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif // __cplusplus | |||||
| /** | |||||
| * @brief HCCL functions return value definition | |||||
| */ | |||||
| typedef enum { | |||||
| HCCL_SUCCESS = 0, /**< success */ | |||||
| HCCL_E_PARA = 1, /**< parameter error */ | |||||
| HCCL_E_PTR = 2, /**< empty pointer */ | |||||
| HCCL_E_MEMORY = 3, /**< memory error */ | |||||
| HCCL_E_INTERNAL = 4, /**< internal error */ | |||||
| HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||||
| HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||||
| HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||||
| HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||||
| HCCL_E_TIMEOUT = 9, /**< timeout */ | |||||
| HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||||
| HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||||
| HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||||
| HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||||
| HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||||
| HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||||
| HCCL_E_DRV = 16, /**< call driver api fail */ | |||||
| HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||||
| HCCL_E_CCE = 18, /**< call cce api fail */ | |||||
| HCCL_E_NETWORK = 19, /**< call network api fail */ | |||||
| HCCL_E_RESERVED /**< reserved */ | |||||
| } HcclResult; | |||||
| /** | |||||
| * @brief handle to HCCL communicator | |||||
| */ | |||||
| typedef void *HcclComm; | |||||
| /** | |||||
| * @brief HCCL Reduction opperation | |||||
| */ | |||||
| typedef enum { | |||||
| HCCL_REDUCE_SUM = 0, /**< sum */ | |||||
| HCCL_REDUCE_PROD = 1, /**< prod */ | |||||
| HCCL_REDUCE_MAX = 2, /**< max */ | |||||
| HCCL_REDUCE_MIN = 3, /**< min */ | |||||
| HCCL_REDUCE_RESERVED /**< reserved */ | |||||
| } HcclReduceOp; | |||||
| /** | |||||
| * @brief HCCL data type | |||||
| */ | |||||
| typedef enum { | |||||
| HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||||
| HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||||
| HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||||
| HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||||
| HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||||
| HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||||
| HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||||
| HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||||
| } HcclDataType; | |||||
| const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||||
| /** | |||||
| * @brief HCCL root info | |||||
| */ | |||||
| typedef struct HcclRootInfoDef { | |||||
| char internal[HCCL_ROOT_INFO_BYTES]; | |||||
| } HcclRootInfo; | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif // __cplusplus | |||||
| #endif // HCCL_TYPES_H_ | |||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -65,6 +65,8 @@ in aipp config file, framework will auto add one input node to graph at last. \n | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| *Compatible with the TensorFlow operator AippData. | *Compatible with the TensorFlow operator AippData. | ||||
| *@par Restrictions: | |||||
| *Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly. | |||||
| */ | */ | ||||
| REG_OP(AippData) | REG_OP(AippData) | ||||
| .INPUT(data, TensorType::ALL()) | .INPUT(data, TensorType::ALL()) | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -39,6 +39,7 @@ | |||||
| #include "image_ops.h" | #include "image_ops.h" | ||||
| #include "internal_ops.h" | #include "internal_ops.h" | ||||
| #include "linalg_ops.h" | #include "linalg_ops.h" | ||||
| #include "list_ops.h" | |||||
| #include "logging_ops.h" | #include "logging_ops.h" | ||||
| #include "lookup_ops.h" | #include "lookup_ops.h" | ||||
| #include "math_ops.h" | #include "math_ops.h" | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1153,6 +1153,79 @@ REG_OP(EditDistance) | |||||
| .OUTPUT(output, TensorType({DT_FLOAT})) | .OUTPUT(output, TensorType({DT_FLOAT})) | ||||
| .OP_END_FACTORY_REG(EditDistance) | .OP_END_FACTORY_REG(EditDistance) | ||||
| /** | |||||
| * @brief sort_v2. | |||||
| * @par Inputs: | |||||
| * @li x: An ND tensor of type float16. | |||||
| * @par Attributes: | |||||
| * @li axis: An optional int. The dimension to sort along. This value defaults to -1. | |||||
| * @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False. | |||||
| * @par Outputs: | |||||
| * @li y: An ND tensor of type float16. | |||||
| * @attention Constraints: | |||||
| * @li Axis should select the last dim. | |||||
| * @li When the sorting data is less than 150K, it is recommended to use this tbe ops, | |||||
| and the descending performance is better than the ascending. | |||||
| * @li The upper limit of data on Ascend910 is 2000K. | |||||
| */ | |||||
| REG_OP(SortV2) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
| .ATTR(axis, Int, -1) | |||||
| .ATTR(descending, Bool, false) | |||||
| .OP_END_FACTORY_REG(SortV2) | |||||
| /** | |||||
| * @brief Expand the input tensor to a compatible shape. \n | |||||
| * @par Inputs: | |||||
| * One inputs, including: | |||||
| * @li x: A Tensor. Must be one of the following types: | |||||
| * float16, float32, int32, int8 ,uint8. \n | |||||
| * @li shape: A Tensor to specify the shape that the input tensor expanded to. \n | |||||
| * @par Outputs: | |||||
| * @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the ONNX operator Expand. | |||||
| */ | |||||
| REG_OP(Expand) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||||
| .INPUT(shape, TensorType({DT_INT16, DT_INT32, DT_INT64})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||||
| .OP_END_FACTORY_REG(Expand) | |||||
| /** | |||||
| * @brief Expand the input tensor to a compatible shape. \n | |||||
| * @par Inputs: | |||||
| * One inputs, including: | |||||
| * @li x: A Tensor. Must be one of the following types: | |||||
| * float16, float32, int32, int8 ,uint8. \n | |||||
| * @par Attributes: | |||||
| * @li shape: A required listInt to specify the shape that the input tensor expanded to. \n | |||||
| * @par Outputs: | |||||
| * @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the ONNX operator Expand. | |||||
| */ | |||||
| REG_OP(ExpandD) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||||
| .REQUIRED_ATTR(shape, ListInt) | |||||
| .OP_END_FACTORY_REG(ExpandD) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -908,7 +908,7 @@ REG_OP(TensorArray) | |||||
| .OUTPUT(handle, TensorType({DT_RESOURCE})) | .OUTPUT(handle, TensorType({DT_RESOURCE})) | ||||
| .OUTPUT(flow, TensorType({DT_FLOAT})) | .OUTPUT(flow, TensorType({DT_FLOAT})) | ||||
| .REQUIRED_ATTR(dtype, Type) | .REQUIRED_ATTR(dtype, Type) | ||||
| .ATTR(element_shape, ListInt, ge::UNKNOWN_SHAPE) | |||||
| .ATTR(element_shape, ListInt, ge::UNKNOWN_RANK) | |||||
| .ATTR(dynamic_size, Bool, false) | .ATTR(dynamic_size, Bool, false) | ||||
| .ATTR(clear_after_read, Bool, true) | .ATTR(clear_after_read, Bool, true) | ||||
| .ATTR(identical_element_shapes, Bool, false) | .ATTR(identical_element_shapes, Bool, false) | ||||
| @@ -963,7 +963,7 @@ REG_OP(TensorArrayConcat) | |||||
| DT_QUINT8, DT_QINT32})) | DT_QUINT8, DT_QINT32})) | ||||
| .OUTPUT(lengths, TensorType({DT_INT64})) | .OUTPUT(lengths, TensorType({DT_INT64})) | ||||
| .REQUIRED_ATTR(dtype, Type) | .REQUIRED_ATTR(dtype, Type) | ||||
| .ATTR(element_shape_except0, ListInt, ge::UNKNOWN_SHAPE) | |||||
| .ATTR(element_shape_except0, ListInt, ge::UNKNOWN_RANK) | |||||
| .OP_END_FACTORY_REG(TensorArrayConcat) | .OP_END_FACTORY_REG(TensorArrayConcat) | ||||
| /** | /** | ||||
| @@ -999,7 +999,7 @@ REG_OP(TensorArrayGather) | |||||
| DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, | DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, | ||||
| DT_QUINT8, DT_QINT32})) | DT_QUINT8, DT_QINT32})) | ||||
| .REQUIRED_ATTR(dtype, Type) | .REQUIRED_ATTR(dtype, Type) | ||||
| .ATTR(element_shape, ListInt, ge::UNKNOWN_SHAPE) | |||||
| .ATTR(element_shape, ListInt, ge::UNKNOWN_RANK) | |||||
| .OP_END_FACTORY_REG(TensorArrayGather) | .OP_END_FACTORY_REG(TensorArrayGather) | ||||
| /** | /** | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -122,7 +122,8 @@ REG_OP(MinimumGrad) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *One input: | *One input: | ||||
| *x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8, | *x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8, | ||||
| int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. \n | |||||
| int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. | |||||
| For float32 type, the actual calculation on the chip is based on float16. \n | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *dst_type: An required attribute of type int32, specifying the dst data type. \n | *dst_type: An required attribute of type int32, specifying the dst data type. \n | ||||
| @@ -611,6 +612,15 @@ REG_OP(Log1p) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: A Tensor. Has the same type as "x1". | *y: A Tensor. Has the same type as "x1". | ||||
| *@attention Constraints: | |||||
| *@li x2: The input data does not support 0 | |||||
| *@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the | |||||
| *requirement of double thousandths in the mini form | |||||
| *@li Due to different architectures, the calculation results of this operator | |||||
| *on NPU and CPU may be inconsistent | |||||
| *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| *Compatible with the TensorFlow operator Mod. | *Compatible with the TensorFlow operator Mod. | ||||
| */ | */ | ||||
| @@ -2042,6 +2052,15 @@ REG_OP(FloorDiv) | |||||
| * | * | ||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: Result remainder. | *y: Result remainder. | ||||
| *@attention Constraints: | |||||
| *@li x2: The input data does not support 0 | |||||
| *@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the | |||||
| *requirement of double thousandths in the mini form | |||||
| *@li Due to different architectures, the calculation results of this operator | |||||
| *on NPU and CPU may be inconsistent | |||||
| *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| * Compatible with the TensorFlow operator FloorMod. | * Compatible with the TensorFlow operator FloorMod. | ||||
| */ | */ | ||||
| @@ -2168,6 +2187,14 @@ REG_OP(Tan) | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: A Tensor. Has the same type as "x1". \n | *y: A Tensor. Has the same type as "x1". \n | ||||
| *@attention Constraints: | |||||
| *@li x2: The input data does not support 0 | |||||
| *@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the | |||||
| *requirement of double thousandths in the mini form | |||||
| *@li Due to different architectures, the calculation results of this operator | |||||
| *on NPU and CPU may be inconsistent | |||||
| *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| *@li Compatible with the TensorFlow operator TruncateMod. | *@li Compatible with the TensorFlow operator TruncateMod. | ||||
| */ | */ | ||||
| @@ -2829,9 +2856,9 @@ REG_OP(AdamApplyOneAssign) | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
| */ | */ | ||||
| REG_OP(LambApplyOptimizerAssign) | REG_OP(LambApplyOptimizerAssign) | ||||
| .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .INPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .INPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| .INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| .INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -2842,6 +2869,8 @@ REG_OP(LambApplyOptimizerAssign) | |||||
| .INPUT(do_use_weight, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(do_use_weight, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| .INPUT(weight_decay_rate, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(weight_decay_rate, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| .OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT})) | .OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| .OUTPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .OUTPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .OP_END_FACTORY_REG(LambApplyOptimizerAssign) | .OP_END_FACTORY_REG(LambApplyOptimizerAssign) | ||||
| /** | /** | ||||
| @@ -2873,7 +2902,8 @@ REG_OP(LambApplyWeightAssign) | |||||
| .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| .INPUT(input4, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .INPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .OUTPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .OP_END_FACTORY_REG(LambApplyWeightAssign) | .OP_END_FACTORY_REG(LambApplyWeightAssign) | ||||
| /** | /** | ||||
| @@ -3329,8 +3359,297 @@ REG_OP(TensorRedirect) | |||||
| .OUTPUT(output_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, | .OUTPUT(output_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, | ||||
| DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32})) | DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32})) | ||||
| .OP_END_FACTORY_REG(TensorRedirect) | .OP_END_FACTORY_REG(TensorRedirect) | ||||
| } // namespace ge | |||||
| /** | |||||
| * @brief Performs the element-wise division of tensor x2 by tensor x3, | |||||
| * multiply the result by the scalar value and add it to tensor x1 | |||||
| * @par Inputs: | |||||
| * Three inputs, including: | |||||
| * @li input_data: A mutable input Tensor. Must be one of the following types: | |||||
| * float16, float32. | |||||
| * @li x1: A mutable input Tensor of the same type as x1. | |||||
| * @li x2: A mutable input Tensor of the same type as x1. | |||||
| * @li value: A mutable input Tensor. Must be one of the following types: | |||||
| * float16, float32, int32. \n | |||||
| * @par Outputs: | |||||
| * @li y: A mutable Tensor. Has the same type as "x1". \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator Addcdiv. | |||||
| */ | |||||
| REG_OP(Addcdiv) | |||||
| .INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32 })) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OP_END_FACTORY_REG(Addcdiv) | |||||
| /** | |||||
| * @brief Performs the element-wise multiplication of tensor x2 by tensor x3, | |||||
| * multiply the result by the scalar value and add it to tensor input_data | |||||
| * @par Inputs: | |||||
| * Three inputs, including: | |||||
| * @li input_data: A mutable input Tensor. Must be one of the following types: | |||||
| * float16, float32, int8, int32, uint8. | |||||
| * @li x1: A mutable input Tensor of the same type as x1. | |||||
| * @li x2: A mutable input Tensor of the same type as x1. | |||||
| * @li value: A tensor which includes only one element of the same type as x1. \n | |||||
| * @par Outputs: | |||||
| * @li y: A mutable output Tensor. Has the same type as "x1". \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator Addcmul. | |||||
| */ | |||||
| REG_OP(Addcmul) | |||||
| .INPUT(input_data, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||||
| .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||||
| .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||||
| .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||||
| .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||||
| .OP_END_FACTORY_REG(Addcmul) | |||||
| /** | |||||
| * @brief Computes the result of x2 * alpha + x1. | |||||
| * @par Inputs: | |||||
| * @li x1: An ND tensor of type float16, float32, int32. | |||||
| * @li x2: An ND tensor of type float16, float32, int32. | |||||
| * @li alpha: A scalar tensor of type float16, float32. \n | |||||
| * @par Outputs: | |||||
| * @li y: An ND tensor tensor with the same shape and type as "x1". \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator Axpy. | |||||
| */ | |||||
| REG_OP(AxpyV2) | |||||
| .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .OP_END_FACTORY_REG(AxpyV2) | |||||
| /** | |||||
| * @brief Computes the result of x1 + x2. | |||||
| * @par Inputs: | |||||
| * @li x1: An ND tensor of type float16, float, int32. | |||||
| * @li x2: An ND tensor of type float16, float, int32. \n | |||||
| * @par Outputs: | |||||
| * @li y: An ND tensor tensor with the same type as "x1". \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator Add. | |||||
| */ | |||||
| REG_OP(PtAdd) | |||||
| .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .OP_END_FACTORY_REG(PtAdd) | |||||
| /** | |||||
| * @brief Computes the result of x1 * x2. | |||||
| * @par Inputs: | |||||
| * @li x1: An ND tensor of type float16, float32, int32. | |||||
| * @li x2: An ND tensor of type float16, float32, int32. \n | |||||
| * @par Outputs: | |||||
| * @li y: Same shape and type as the largest ND tensor in x1 x2. \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator muls. | |||||
| */ | |||||
| REG_OP(PtMuls) | |||||
| .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .OP_END_FACTORY_REG(PtMuls) | |||||
| /** | |||||
| * @brief Computes the result of x1 - x2. | |||||
| * @par Inputs: | |||||
| * @li x1: An ND tensor of type float16, float, int32. | |||||
| * @li x2: An ND tensor of type float16, float, int32. \n | |||||
| * @par Outputs: | |||||
| * @li y: An ND tensor tensor with the same type as "x1". \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator Sub. | |||||
| */ | |||||
| REG_OP(PtSub) | |||||
| .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .OP_END_FACTORY_REG(PtSub) | |||||
| /** | |||||
| * @brief Add the partial values of two tensors in format NC1HWC0. | |||||
| * @par Inputs: | |||||
| * @li x1: A Tensor in 5HD, and must be one of the following types: float16, | |||||
| * float32. \n | |||||
| * @li x2: A Tensor of the same type as "x1", and the same shape as "x1", | |||||
| * except for the C1 value. \n | |||||
| * @par Attributes: | |||||
| * @li x1_c1_offset: A required int. Offset value of C1 in "x1". \n | |||||
| * @li x2_c1_offset: A required int. Offset value of C1 in "x2". \n | |||||
| * @li c1_len: A required int. C1 len of "y". The value must be less than | |||||
| * the difference between C1 and offset in "x1" and "x2". \n | |||||
| * @par Outputs: | |||||
| * @li y: A Tensor of the same type as "x1", and the same shape as "x1", | |||||
| * except for the C1 value. Record the result after adding. \n | |||||
| */ | |||||
| REG_OP(StrideAdd) | |||||
| .INPUT(x1, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
| .INPUT(x2, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
| .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
| .REQUIRED_ATTR(x1_c1_offset, Int) | |||||
| .REQUIRED_ATTR(x2_c1_offset, Int) | |||||
| .REQUIRED_ATTR(c1_len, Int) | |||||
| .OP_END_FACTORY_REG(StrideAdd) | |||||
| /** | |||||
| * @brief Compare two tensors are totally equal or not, only output a bool value" | |||||
| * @par Inputs: | |||||
| * Two inputs, including: | |||||
| * @li input_x: A Tensor. the first tensor. \n | |||||
| * @li input_y: A Tensor. the second tensor. \n | |||||
| * @par Outputs: | |||||
| * @li output_z: A Tensor. Bool type, compare result of the two inputs. \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch equal operator. \n | |||||
| */ | |||||
| REG_OP(TensorEqual) | |||||
| .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||||
| .INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||||
| .OUTPUT(output_z, TensorType({DT_BOOL})) | |||||
| .OP_END_FACTORY_REG(TensorEqual) | |||||
| /** | |||||
| * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). | |||||
| * All inputs and outputs must have the same data type. This operator supports multidirectional | |||||
| * (i.e., Numpy-style) broadcasting | |||||
| * | |||||
| * @par inputs | |||||
| * one input including: | |||||
| * @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64 | |||||
| * | |||||
| * @par output | |||||
| * one output including: | |||||
| * @li y:A Tensor of the same type as x | |||||
| * | |||||
| */ | |||||
| REG_OP(MaxN) | |||||
| .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) | |||||
| .OP_END_FACTORY_REG(MaxN) | |||||
| /** | |||||
| * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). | |||||
| * All inputs and outputs must have the same data type. This operator supports multidirectional | |||||
| * (i.e., Numpy-style) broadcasting | |||||
| * | |||||
| * @par inputs | |||||
| * one input including: | |||||
| * @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64 | |||||
| * | |||||
| * @par output | |||||
| * one output including: | |||||
| * @li y:A Tensor of the same type as x | |||||
| * | |||||
| */ | |||||
| REG_OP(MinN) | |||||
| .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, | |||||
| DT_INT32, DT_INT64})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, | |||||
| DT_INT32, DT_INT64})) | |||||
| .OP_END_FACTORY_REG(MinN) | |||||
| /** | |||||
| * @brief Calculates x * maske * value. | |||||
| * | |||||
| * @par Inputs: | |||||
| * @li x: An tensor of type float16 or float32, specifying the input to the data layer. | |||||
| * @li mask: An tensor of type int8 or float16 or float32, be same shape with x. \n | |||||
| * | |||||
| * @par Attributes: | |||||
| * value: A optional float. \n | |||||
| * | |||||
| * @par Outputs: | |||||
| * y: The output tensor of type float16 or float32. | |||||
| @ li y:A Tensor of the same type and shape as x | |||||
| * | |||||
| */ | |||||
| REG_OP(MaskedScale) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32})) | |||||
| .INPUT(mask, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT32})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32})) | |||||
| .REQUIRED_ATTR(value, Float) | |||||
| .OP_END_FACTORY_REG(MaskedScale) | |||||
| /** | |||||
| * @brief Calculate the lerp function. \n | |||||
| * @par Inputs: | |||||
| * Three inputs, including: | |||||
| * @li start: A tensor. Must be one of the following types: | |||||
| * float16, float32. \n | |||||
| * @li end: A tensor. Must be one of the following types: | |||||
| * float16, float32. \n | |||||
| * @li weight: A tensor. Must be one of the following types: | |||||
| * float16, float32. \n | |||||
| * @par Outputs: | |||||
| * y: A Tensor with the same type and shape of input_x's. \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator Lerp. \n | |||||
| */ | |||||
| REG_OP(Lerp) | |||||
| .INPUT(start, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(end, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OP_END_FACTORY_REG(Lerp) | |||||
| /** | |||||
| *@brief Hardmax(element in input, axis) = 1 if the element is the first maximum value along the specified axis, 0 | |||||
| *otherwise The input does not need to explicitly be a 2D vector.The "axis" attribute indicates the dimension along | |||||
| *which Hardmax will be performed.The output tensor has the same shape and contains the Hardmax values of the | |||||
| *corresponding input. | |||||
| * | |||||
| *@par inputs | |||||
| *one input including: | |||||
| *@li x: input A Tensor.Must be one of the following types:float32,float16 | |||||
| * | |||||
| *@par Attributes: | |||||
| *@li axis:A required int attribute that decides which dimension will be used to cal the hard_max | |||||
| * | |||||
| *@par output: | |||||
| *one output including: | |||||
| *@li y:A Tensor of the same type as x | |||||
| * | |||||
| */ | |||||
| REG_OP(HardMax) | |||||
| .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT })) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(axis, Int, -1) | |||||
| .OP_END_FACTORY_REG(HardMax) | |||||
| } // namespace ge | |||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -45,8 +45,6 @@ REG_OP(HcomAllGather) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) | .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) | ||||
| .REQUIRED_ATTR(rank_size, Int) | .REQUIRED_ATTR(rank_size, Int) | ||||
| .REQUIRED_ATTR(group, String) | .REQUIRED_ATTR(group, String) | ||||
| .ATTR(alpha, Float, 1.0) | |||||
| .ATTR(beta, Float, 0.0) | |||||
| .OP_END_FACTORY_REG(HcomAllGather) | .OP_END_FACTORY_REG(HcomAllGather) | ||||
| /** | /** | ||||
| @@ -77,8 +75,6 @@ REG_OP(HcomAllReduce) | |||||
| .REQUIRED_ATTR(group, String) | .REQUIRED_ATTR(group, String) | ||||
| .ATTR(fusion, Int, 1) | .ATTR(fusion, Int, 1) | ||||
| .ATTR(fusion_id, Int, -1) | .ATTR(fusion_id, Int, -1) | ||||
| .ATTR(alpha, Float, 1.0) | |||||
| .ATTR(beta, Float, 0.0) | |||||
| .OP_END_FACTORY_REG(HcomAllReduce) | .OP_END_FACTORY_REG(HcomAllReduce) | ||||
| /** | /** | ||||
| @@ -91,7 +87,7 @@ REG_OP(HcomAllReduce) | |||||
| input of this rank will be broadcast to other ranks. | input of this rank will be broadcast to other ranks. | ||||
| * @li fusion: A required integer identifying if the op need to fusion,the | * @li fusion: A required integer identifying if the op need to fusion,the | ||||
| default value is none fusion | default value is none fusion | ||||
| * @li fusion: A required integer identifying the fusion id if para fusion | |||||
| * @li fusion_id: A required integer identifying the fusion id if para fusion | |||||
| is set. | is set. | ||||
| * @li group: A required string identifying the group name of ranks | * @li group: A required string identifying the group name of ranks | ||||
| participating in the op. | participating in the op. | ||||
| @@ -109,10 +105,39 @@ REG_OP(HcomBroadcast) | |||||
| .REQUIRED_ATTR(group, String) | .REQUIRED_ATTR(group, String) | ||||
| .ATTR(fusion, Int, 0) | .ATTR(fusion, Int, 0) | ||||
| .ATTR(fusion_id, Int, -1) | .ATTR(fusion_id, Int, -1) | ||||
| .ATTR(alpha, Float, 1.0) | |||||
| .ATTR(beta, Float, 0.0) | |||||
| .OP_END_FACTORY_REG(HcomBroadcast) | .OP_END_FACTORY_REG(HcomBroadcast) | ||||
| /** | |||||
| * @brief preforms reduction from others rank to rootrank | |||||
| * @par Inputs: | |||||
| * @li root_rank: A required integer identifying the root rank in the op | |||||
| the reduction result will be on this root rank | |||||
| * x: A tensor. Must be one of the following types: int8, int16, int32, float16, | |||||
| float32. | |||||
| * @par Attributes: | |||||
| * @li reduction: A required string identifying the reduction operation to | |||||
| perform.The supported operation are: "sum", "max", "min", "prod". | |||||
| * @li group: A required string identifying the group name of ranks | |||||
| participating in the op. | |||||
| * @li fusion: An optional integer identifying the fusion flag of the op. | |||||
| 0: no fusion; 1 (default): fusion; 2: fusion the ops by fusion id. | |||||
| * @li fusion_id: An optional integer identifying the fusion id of the op. | |||||
| * The HcomReduce ops with the same fusion id will be fused. | |||||
| * @par Outputs: | |||||
| * y: A Tensor. Has the same type as "x". | |||||
| * @attention Constraints: | |||||
| *"group" is limited to 128 characters. Use "hccl_world_group" | |||||
| as the name of a world group. | |||||
| */ | |||||
| REG_OP(HcomReduce) | |||||
| .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) | |||||
| .REQUIRED_ATTR(root_rank, Int) | |||||
| .REQUIRED_ATTR(reduction, String) | |||||
| .REQUIRED_ATTR(group, String) | |||||
| .ATTR(fusion, Int, 0) | |||||
| .ATTR(fusion_id, Int, -1) | |||||
| .OP_END_FACTORY_REG(HcomReduce) | |||||
| /** | /** | ||||
| * @brief Performs reduction across all input tensors, scattering in equal | * @brief Performs reduction across all input tensors, scattering in equal | ||||
| blocks among ranks, each rank getting a chunk of data based on its rank | blocks among ranks, each rank getting a chunk of data based on its rank | ||||
| @@ -139,8 +164,6 @@ REG_OP(HcomReduceScatter) | |||||
| .REQUIRED_ATTR(reduction, String) | .REQUIRED_ATTR(reduction, String) | ||||
| .REQUIRED_ATTR(group, String) | .REQUIRED_ATTR(group, String) | ||||
| .REQUIRED_ATTR(rank_size, Int) | .REQUIRED_ATTR(rank_size, Int) | ||||
| .ATTR(alpha, Float, 1.0) | |||||
| .ATTR(beta, Float, 0.0) | |||||
| .OP_END_FACTORY_REG(HcomReduceScatter) | .OP_END_FACTORY_REG(HcomReduceScatter) | ||||
| /** | /** | ||||
| @@ -167,8 +190,6 @@ REG_OP(HcomSend) | |||||
| .REQUIRED_ATTR(group, String) | .REQUIRED_ATTR(group, String) | ||||
| .REQUIRED_ATTR(sr_tag, Int) | .REQUIRED_ATTR(sr_tag, Int) | ||||
| .REQUIRED_ATTR(dest_rank, Int) | .REQUIRED_ATTR(dest_rank, Int) | ||||
| .ATTR(alpha, Float, 1.0) | |||||
| .ATTR(beta, Float, 0.0) | |||||
| .OP_END_FACTORY_REG(HcomSend) | .OP_END_FACTORY_REG(HcomSend) | ||||
| /** | /** | ||||
| @@ -202,8 +223,6 @@ REG_OP(HcomReceive) | |||||
| .REQUIRED_ATTR(src_rank, Int) | .REQUIRED_ATTR(src_rank, Int) | ||||
| .REQUIRED_ATTR(shape, ListInt) | .REQUIRED_ATTR(shape, ListInt) | ||||
| .REQUIRED_ATTR(dtype, Type) | .REQUIRED_ATTR(dtype, Type) | ||||
| .ATTR(alpha, Float, 1.0) | |||||
| .ATTR(beta, Float, 0.0) | |||||
| .OP_END_FACTORY_REG(HcomReceive) | .OP_END_FACTORY_REG(HcomReceive) | ||||
| /** | /** | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -31,11 +31,12 @@ namespace ge { | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Input images is a tensor of at least 3 dimensions. The last dimension is | *Input images is a tensor of at least 3 dimensions. The last dimension is | ||||
| interpretted as channels, and must be three. Inputs include: | interpretted as channels, and must be three. Inputs include: | ||||
| *@li images:A Tensor of type float. Images to adjust. At least 3-D. | |||||
| *@li images:A Tensor of type float. Images to adjust. At least 3-D. The format | |||||
| must be NHWC. | |||||
| *@li delta:A Tensor of type float. A float delta to add to the hue . \n | *@li delta:A Tensor of type float. A float delta to add to the hue . \n | ||||
| *@par Outputs: | *@par Outputs: | ||||
| *y:A Tensor of type float . \n | |||||
| *y:A Tensor of type float. The format must be NHWC. \n | |||||
| *@attention Constraints: | *@attention Constraints: | ||||
| *Input images is a tensor of at least 3 dimensions. The last dimension is | *Input images is a tensor of at least 3 dimensions. The last dimension is | ||||
| @@ -57,11 +58,12 @@ REG_OP(AdjustHue) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Input images is a tensor of at least 3 dimensions. The last dimension is | *Input images is a tensor of at least 3 dimensions. The last dimension is | ||||
| interpretted as channels, and must be three. Inputs include: | interpretted as channels, and must be three. Inputs include: | ||||
| *@li images:A Tensor of type float. Images to adjust. At least 3-D. | |||||
| *@li images:A Tensor of type float. Images to adjust. At least 3-D. The format | |||||
| must be NHWC. | |||||
| *@li scale:A Tensor of type float. A float scale to add to the saturation . \n | *@li scale:A Tensor of type float. A float scale to add to the saturation . \n | ||||
| *@par Outputs: | *@par Outputs: | ||||
| *y:A Tensor of type float . \n | |||||
| *y:A Tensor of type float. The format must be NHWC. \n | |||||
| *@attention Constraints: | *@attention Constraints: | ||||
| *Input images is a tensor of at least 3 dimensions. The last dimension is | *Input images is a tensor of at least 3 dimensions. The last dimension is | ||||
| @@ -83,11 +85,12 @@ REG_OP(AdjustSaturation) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Input images is a tensor of at least 3 dimensions. The last 3 dimensions are | *Input images is a tensor of at least 3 dimensions. The last 3 dimensions are | ||||
| interpreted as '[height, width, channels]'. Inputs include: | interpreted as '[height, width, channels]'. Inputs include: | ||||
| *@li images:A Tensor of type float. Images to adjust. At least 3-D. | |||||
| *@li images:A Tensor of type float. Images to adjust. At least 3-D. The format | |||||
| must be NHWC. | |||||
| *@li scale:A Tensor of type float. A float multiplier for adjusting contrast . \n | *@li scale:A Tensor of type float. A float multiplier for adjusting contrast . \n | ||||
| *@par Outputs: | *@par Outputs: | ||||
| *y:A Tensor of type float . \n | |||||
| *y:A Tensor of type float. The format must be NHWC. \n | |||||
| *@attention Constraints: | *@attention Constraints: | ||||
| *Input images is a tensor of at least 3 dimensions. The last dimension is | *Input images is a tensor of at least 3 dimensions. The last dimension is | ||||
| @@ -112,7 +115,7 @@ nearest neighbor sampling to a common output size specified by crop_size . \n | |||||
| *Input images must be a 4-D tensor. Inputs include: | *Input images must be a 4-D tensor. Inputs include: | ||||
| *@li images:A Tensor. Must be one of the following types:uint8, uint16, int8, | *@li images:A Tensor. Must be one of the following types:uint8, uint16, int8, | ||||
| int16, int32, int64, float16, float, double. A 4-D tensor of shape | int16, int32, int64, float16, float, double. A 4-D tensor of shape | ||||
| [batch, image_height, image_width, depth]. | |||||
| [batch, image_height, image_width, depth]. The format must be NHWC. | |||||
| *@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4]. | *@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4]. | ||||
| *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with | *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with | ||||
| int32 values in [0, batch). | int32 values in [0, batch). | ||||
| @@ -127,7 +130,7 @@ extrapolation, when applicable. | |||||
| NearestNeighbor . \n | NearestNeighbor . \n | ||||
| *@par Outputs: | *@par Outputs: | ||||
| *y:A Tensor of type float . \n | |||||
| *y:A Tensor of type float. The format must be NHWC. \n | |||||
| *@attention Constraints: | *@attention Constraints: | ||||
| *Input images must be a 4-D tensor . \n | *Input images must be a 4-D tensor . \n | ||||
| @@ -193,7 +196,9 @@ boxes tensor . \n | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Input images and grads must be a 4-D tensor. Inputs include: | *Input images and grads must be a 4-D tensor. Inputs include: | ||||
| *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. | *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. | ||||
| The format must be NHWC. | |||||
| *@li images: A 4-D tensor of shape [batch, image_height, image_width, depth]. | *@li images: A 4-D tensor of shape [batch, image_height, image_width, depth]. | ||||
| The format must be NHWC. | |||||
| Both image_height and image_width need to be positive. | Both image_height and image_width need to be positive. | ||||
| *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor | *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor | ||||
| specifies the coordinates of a box in the box_ind[i] image and is specified in | specifies the coordinates of a box in the box_ind[i] image and is specified in | ||||
| @@ -233,6 +238,7 @@ images tensor . \n | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Input grads must be a 4-D tensor. Inputs include: | *Input grads must be a 4-D tensor. Inputs include: | ||||
| *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. | *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. | ||||
| The format must be NHWC. | |||||
| *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor | *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor | ||||
| specifies the coordinates of a box in the box_ind[i] image and is specified | specifies the coordinates of a box in the box_ind[i] image and is specified | ||||
| in normalized coordinates [y1, x1, y2, x2]. | in normalized coordinates [y1, x1, y2, x2]. | ||||
| @@ -248,7 +254,8 @@ method: A string specifying the interpolation method. Only 'bilinear' is | |||||
| supported for now . \n | supported for now . \n | ||||
| *@par Outputs: | *@par Outputs: | ||||
| *y:A 4-D tensor of shape [batch, image_height, image_width, depth] . \n | |||||
| *y:A 4-D tensor of shape [batch, image_height, image_width, depth]. The format | |||||
| must be NHWC. \n | |||||
| *@attention Constraints: | *@attention Constraints: | ||||
| *Input grads must be a 4-D tensor . \n | *Input grads must be a 4-D tensor . \n | ||||
| @@ -273,6 +280,7 @@ REG_OP(CropAndResizeGradImage) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Input x must be a 4-D tensor. Inputs include: | *Input x must be a 4-D tensor. Inputs include: | ||||
| *@li x: A 4-D float tensor of shape [batch_size, height, width, channels]. | *@li x: A 4-D float tensor of shape [batch_size, height, width, channels]. | ||||
| The format must be NHWC. | |||||
| *@li size: A 1-D tensor of 2 elements containing the size of the glimpses to | *@li size: A 1-D tensor of 2 elements containing the size of the glimpses to | ||||
| extract. The glimpse height must be specified first, following by the glimpse | extract. The glimpse height must be specified first, following by the glimpse | ||||
| width. | width. | ||||
| @@ -293,7 +301,7 @@ uniform_noise . \n | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y:A tensor representing the glimpses [batch_size, glimpse_height, | *y:A tensor representing the glimpses [batch_size, glimpse_height, | ||||
| glimpse_width, channels] . \n | |||||
| glimpse_width, channels]. The format must be NHWC. \n | |||||
| *@attention Constraints: | *@attention Constraints: | ||||
| *Input x must be a 4-D tensor . \n | *Input x must be a 4-D tensor . \n | ||||
| @@ -340,7 +348,8 @@ REG_OP(HSVToRGB) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Input images must be a 4-D tensor. Inputs include: | *Input images must be a 4-D tensor. Inputs include: | ||||
| *@li images: 4-D with shape [batch, height, width, channels]. | |||||
| *@li images: 4-D with shape [batch, height, width, channels]. The format must | |||||
| be NHWC. | |||||
| *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new | *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new | ||||
| size for the images. | size for the images. | ||||
| *@li min: A Tensor of type float. | *@li min: A Tensor of type float. | ||||
| @@ -354,6 +363,7 @@ the values at the corner pixels. Defaults to false. | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *@li resized_images: 4-D with shape [batch, new_height, new_width, channels]. | *@li resized_images: 4-D with shape [batch, new_height, new_width, channels]. | ||||
| The format must be NHWC. | |||||
| *@li y_min: A Tensor of type float. | *@li y_min: A Tensor of type float. | ||||
| *@li y_max: A Tensor of type float . \n | *@li y_max: A Tensor of type float . \n | ||||
| @@ -381,7 +391,8 @@ REG_OP(QuantizedResizeBilinear) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Input images must be a 4-D tensor. Inputs include: | *Input images must be a 4-D tensor. Inputs include: | ||||
| *@li images: 4-D with shape [batch, height, width, channels]. | |||||
| *@li images: 4-D with shape [batch, height, width, channels]. The format must | |||||
| be NHWC. | |||||
| *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. | *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. | ||||
| The new size for the images . \n | The new size for the images . \n | ||||
| @@ -391,7 +402,8 @@ output tensors are aligned, preserving the values at the corner pixels. | |||||
| Defaults to false . \n | Defaults to false . \n | ||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: 4-D with shape [batch, new_height, new_width, channels] . \n | |||||
| *y: 4-D with shape [batch, new_height, new_width, channels]. The format must | |||||
| be NHWC. \n | |||||
| *@attention Constraints: | *@attention Constraints: | ||||
| *Input images can be of different types but output images are always float . \n | *Input images can be of different types but output images are always float . \n | ||||
| @@ -414,10 +426,10 @@ REG_OP(ResizeArea) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Input grads must be a 4-D tensor. Inputs include: | *Input grads must be a 4-D tensor. Inputs include: | ||||
| *@li grads: A Tensor of type float. 4-D with shape [batch, height, width, | *@li grads: A Tensor of type float. 4-D with shape [batch, height, width, | ||||
| channels]. | |||||
| channels]. The format must be NHWC. | |||||
| *@li original_image: A Tensor. Must be one of the following types: float, | *@li original_image: A Tensor. Must be one of the following types: float, | ||||
| double. 4-D with shape [batch, orig_height, orig_width, channels], The image | double. 4-D with shape [batch, orig_height, orig_width, channels], The image | ||||
| tensor that was resized . \n | |||||
| tensor that was resized. The format must be NHWC. \n | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li align_corners: An optional bool. Defaults to False. If true, the centers | *@li align_corners: An optional bool. Defaults to False. If true, the centers | ||||
| @@ -426,10 +438,10 @@ false. | |||||
| *@li half_pixel_centers: An optional bool. Defaults to False . \n | *@li half_pixel_centers: An optional bool. Defaults to False . \n | ||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: A Tensor. Has the same type as original_image . \n | |||||
| *y: A Tensor. Has the same type as original_image. The format must be NHWC. \n | |||||
| *@attention Constraints: | *@attention Constraints: | ||||
| *Input images can be of different types but output images are always float . \n | |||||
| *Input images can be of different types but output images are always float . | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| *Compatible with tensorflow ResizeBicubicGrad operator. | *Compatible with tensorflow ResizeBicubicGrad operator. | ||||
| @@ -448,7 +460,8 @@ REG_OP(ResizeBicubicGrad) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Input images must be a 4-D tensor. Inputs include: | *Input images must be a 4-D tensor. Inputs include: | ||||
| *@li images: 4-D with shape [batch, height, width, channels]. | |||||
| *@li images: 4-D with shape [batch, height, width, channels]. The format | |||||
| must be NHWC. | |||||
| *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new | *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new | ||||
| size for the images . \n | size for the images . \n | ||||
| @@ -459,10 +472,11 @@ Defaults to false. | |||||
| *@li half_pixel_centers: An optional bool. Defaults to False . \n | *@li half_pixel_centers: An optional bool. Defaults to False . \n | ||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: 4-D with shape [batch, new_height, new_width, channels] . \n | |||||
| *y: 4-D with shape [batch, new_height, new_width, channels]. The format | |||||
| must be NHWC. \n | |||||
| *@attention Constraints: | *@attention Constraints: | ||||
| *Input images can be of different types but output images are always float . \n | |||||
| *Input images can be of different types but output images are always float . | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| *Compatible with tensorflow ResizeBicubic operator. | *Compatible with tensorflow ResizeBicubic operator. | ||||
| @@ -483,7 +497,7 @@ REG_OP(ResizeBicubic) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Input grads must be a 4-D tensor. Inputs include: | *Input grads must be a 4-D tensor. Inputs include: | ||||
| *@li grads: A Tensor. Must be one of the following types: uint8, int8, int32, | *@li grads: A Tensor. Must be one of the following types: uint8, int8, int32, | ||||
| float16, float, double. 4-D with shape [batch, height, width, channels]. | |||||
| float16, float, double. Must set the format, supported format list ["NCHW, NHWC"] | |||||
| *@li size: A 1-D int32 Tensor of 2 elements: orig_height, orig_width. | *@li size: A 1-D int32 Tensor of 2 elements: orig_height, orig_width. | ||||
| The original input size . \n | The original input size . \n | ||||
| @@ -550,9 +564,8 @@ REG_OP(ResizeNearestNeighborV2GradD) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Input grads must be a 4-D tensor. Inputs include: | *Input grads must be a 4-D tensor. Inputs include: | ||||
| *@li grads: A Tensor of type float32. 4-D with shape [batch, height, width, | |||||
| channels]. | |||||
| *@li original_image: A Tensor. 4-D with shape [batch, orig_height, orig_width, | |||||
| *@li grads: A Tensor of type float32. Must set the format, supported format list ["NCHW, NHWC"] | |||||
| *@li original_image: A Tensor. 4-D shape. Must set the format, supported format list ["NCHW, NHWC"] | |||||
| channels], The image tensor that was resized . \n | channels], The image tensor that was resized . \n | ||||
| *@par Attributes: | *@par Attributes: | ||||
| @@ -583,7 +596,7 @@ REG_OP(ResizeBilinearV2Grad) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Input images must be a 4-D tensor. Inputs include: | *Input images must be a 4-D tensor. Inputs include: | ||||
| *@li x: 4-D with shape [batch, height, width, channels]. | |||||
| *@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"] | |||||
| *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new | *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new | ||||
| size for the images . \n | size for the images . \n | ||||
| @@ -697,7 +710,7 @@ REG_OP(SampleDistortedBoundingBoxExt2) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Input x must be a 4-D tensor. Inputs include: | *Input x must be a 4-D tensor. Inputs include: | ||||
| *@li x: 4-D with shape [batch, height, width, channels]. | |||||
| *@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"]. | |||||
| *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. | *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. | ||||
| The new size for the images . \n | The new size for the images . \n | ||||
| @@ -729,12 +742,12 @@ REG_OP(ResizeNearestNeighborV2) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Input images must be a 4-D tensor. Inputs include: | *Input images must be a 4-D tensor. Inputs include: | ||||
| *@li images: A Tensor. Must be one of the following types: float. 4-D with | *@li images: A Tensor. Must be one of the following types: float. 4-D with | ||||
| shape [batch, height, width, depth]. A batch of images. | |||||
| shape [batch, height, width, depth]. A batch of images. The format must be NHWC. | |||||
| *@li boxes: A Tensor of type float32. 3-D with shape [batch, | *@li boxes: A Tensor of type float32. 3-D with shape [batch, | ||||
| num_bounding_boxes, 4] containing bounding boxes . \n | num_bounding_boxes, 4] containing bounding boxes . \n | ||||
| *@par Outputs: | *@par Outputs: | ||||
| *A Tensor. Has the same type as images . \n | |||||
| *A Tensor. Has the same type as images. The format must be NHWC. \n | |||||
| *@attention Constraints: | *@attention Constraints: | ||||
| *Input images must be a 4-D tensor . \n | *Input images must be a 4-D tensor . \n | ||||
| @@ -1342,6 +1355,129 @@ REG_OP(SpatialTransformerD) | |||||
| .ATTR(use_default_theta, ListBool, {}) | .ATTR(use_default_theta, ListBool, {}) | ||||
| .OP_END_FACTORY_REG(SpatialTransformerD) | .OP_END_FACTORY_REG(SpatialTransformerD) | ||||
| } // namespace ge | |||||
| /** | |||||
| * @brief Resize the input tensor. \n | |||||
| currently, only support resize image tensor using nearest neighbor and linear interpolation. | |||||
| * @par Inputs: | |||||
| * Input x must be a 4-D tensor. Inputs include: \n | |||||
| * @li x: A Tensor. Must be one of the following types: uint8, int8, int16, \n | |||||
| int32, int64, float16, float, double. 4-D with shape [batch, height, width, channels] \n | |||||
| or shape [batch, channels, height, width]. | |||||
| * @li roi: A 1-D float Tensor. only takes effect when attr coordinate_transformation_mode \n | |||||
| is "tf_crop_and_resize" | |||||
| * @li scales: A 1-D float Tensor, the scale array along each dimension, Only one of \n | |||||
| 'scales' and 'sizes' can be specified. | |||||
| * @li sizes: A 1-D int64 Tensor, The size of the output tensor. nly one of \n | |||||
| 'scales' and 'sizes' can be specified. If 'size' is specified, then set scales \n | |||||
| to empty data (zero shape) in this operator's input list. | |||||
| * @par Attributes: | |||||
| * @li coordinate_transformation_mode: String. Defaults to half_pixel. how to transform \n | |||||
| the coordinate in the resized tensor to the coordinate in the original tensor. \n | |||||
| other optional: pytorch_half_pixel, align_corners, asymmetric, tf_half_pixel_for_nn, \n | |||||
| tf_crop_and_resize. | |||||
| * @li cubic_coeff_a: Float. Defaults to -0.75, only used in cubic interpolation. \n | |||||
| other optional: -0.5 | |||||
| * @li exclude_outside: Int. Defaults to 0, If set to 1, the weight of sampling \n | |||||
| locations outside the tensor will be set to 0 and the weight will be renormalized \n | |||||
| so that their sum is 1.0. | |||||
| * @li extrapolation_value: Float. Defaults to 0.0f. When coordinate_transformation_mode \n | |||||
| is "tf_crop_and_resize" and x_original is outside the range [0, length_original - 1], \n | |||||
| this value is used as the corresponding output value. | |||||
| * @li mode: String. Defaults to nearest. Three interpolation modes: nearest (default), \n | |||||
| linear and cubic. | |||||
| * @li nearest_mode: String. Defaults to round_prefer_floor. Four modes: round_prefer_floor, \n | |||||
| round_prefer_ceil, floor, ceil. Only used by nearest interpolation. | |||||
| * @par Outputs: | |||||
| * y: A Tensor. Has the same type as x. | |||||
| * @attention Constraints: \n | |||||
| * Input x must be a 4-D tensor. | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with tensorflow ResizeNearestNeighborV2 operator. | |||||
| */ | |||||
| REG_OP(Resize) | |||||
| .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, | |||||
| DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(roi, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(scales, TensorType({DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(sizes, TensorType({DT_INT64})) | |||||
| .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, | |||||
| DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
| .ATTR(coordinate_transformation_mode, String, "half_pixel") | |||||
| .ATTR(cubic_coeff_a, Float, -0.75) | |||||
| .ATTR(exclude_outside, Int, 0) | |||||
| .ATTR(extrapolation_value, Float, 0) | |||||
| .ATTR(mode, String, "nearest") | |||||
| .ATTR(nearest_mode, String, "round_prefer_floor") | |||||
| .OP_END_FACTORY_REG(Resize) | |||||
| /** | |||||
| *@brief Function parse image from string to int. \n | |||||
| *@par Inputs: | |||||
| *@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n | |||||
| *@par Attributes: | |||||
| *@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image. | |||||
| *@li ratio: An optional int. Defaults to 1. Downscaling ratio. | |||||
| *@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower but nicer upscaling of the chroma planes | |||||
| *@li try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input. | |||||
| *@li acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted. | |||||
| *@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n | |||||
| *@par Outputs: | |||||
| *image: A Tensor dtype of uint8. | |||||
| */ | |||||
| REG_OP(DecodeJpeg) | |||||
| .INPUT(contents, TensorType({DT_STRING})) | |||||
| .OUTPUT(image, TensorType({DT_UINT8})) | |||||
| .ATTR(channels, Int, 0) | |||||
| .ATTR(ratio, Int, 1) | |||||
| .ATTR(fancy_upscaling, Bool, true) | |||||
| .ATTR(try_recover_truncated, Bool, false) | |||||
| .ATTR(acceptable_fraction, Float, 1.0) | |||||
| .ATTR(dct_method, String, "") | |||||
| .OP_END_FACTORY_REG(DecodeJpeg) | |||||
| /** | |||||
| *@brief Image warping using per-pixel flow vectors. \n | |||||
| *@par Inputs: | |||||
| *@li images: 4-D Tensor with shape `[batch, height, width, channels]`. | |||||
| *@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n | |||||
| *@par Outputs: | |||||
| *y: Returns 4-D with the same shape and dtype as `images`. \n | |||||
| */ | |||||
| REG_OP(DenseImageWarp) | |||||
| .INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .OP_END_FACTORY_REG(DenseImageWarp) | |||||
| /** | |||||
| *@brief Computes the gradients of DenseImageWarp with respect to image and flow. \n | |||||
| *@par Inputs: | |||||
| *@li grad: gradients with respect to DenseImageWarp output. | |||||
| *@li images: 4-D Tensor with shape `[batch, height, width, channels]`. | |||||
| *@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n | |||||
| *@par Outputs: | |||||
| *grad_image: Returns 4-D with the same shape and dtype as `images`. | |||||
| *grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n | |||||
| */ | |||||
| REG_OP(DenseImageWarpGrad) | |||||
| .INPUT(grad, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .OUTPUT(grad_image, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .OUTPUT(grad_flow, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .OP_END_FACTORY_REG(DenseImageWarpGrad) | |||||
| } // namespace ge | |||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -0,0 +1,230 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| /*! | |||||
| * \file list_ops.h | |||||
| * \brief | |||||
| */ | |||||
| #ifndef OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_ | |||||
| #define OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_ | |||||
| #include <algorithm> | |||||
| #include "graph/operator_reg.h" | |||||
| #include "graph/operator.h" | |||||
| namespace ge { | |||||
| /** | |||||
| *@brief Creates and returns an empty tensor list. \n | |||||
| *@par Inputs: | |||||
| *@li element_shape: A shape compatible with that of elements in the list. | |||||
| *@li max_num_elements: The maximum number of elements. \n | |||||
| *@par Attributes: | |||||
| *@li element_dtype: The type of elements in the list. \n | |||||
| *@par Outputs: | |||||
| *@li handle: An empty tensor list . \n | |||||
| *@par Third-party framework compatibility. | |||||
| *Compatible with tensorflow EmptyTensorList operator. | |||||
| */ | |||||
| REG_OP(EmptyTensorList) | |||||
| .INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) | |||||
| .INPUT(max_num_elements, TensorType({DT_INT32})) | |||||
| .OUTPUT(handle, TensorType({DT_VARIANT})) | |||||
| .ATTR(element_dtype, Type, DT_INT32) | |||||
| .OP_END_FACTORY_REG(EmptyTensorList) | |||||
| /** | |||||
| *@brief Returns a list which has the passed-in `Tensor` as last element | |||||
| and the other elements of the given list in `input_handle`. \n | |||||
| *@par Inputs: | |||||
| *@li input_handle: The old list. | |||||
| *@li tensor: The tensor to put on the list. \n | |||||
| *@par Attributes: | |||||
| *@li element_dtype: The type of elements in the list. \n | |||||
| *@par Outputs: | |||||
| *@li output_handle:A list with the elements of old list followed by tensor. \n | |||||
| *@par Third-party framework compatibility. | |||||
| *Compatible with tensorflow TensorListPushBack operator. | |||||
| */ | |||||
| REG_OP(TensorListPushBack) | |||||
| .INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
| .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||||
| DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||||
| DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE, | |||||
| DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||||
| .OUTPUT(output_handle, TensorType({DT_VARIANT})) | |||||
| .ATTR(element_dtype, Type, DT_INT32) | |||||
| .OP_END_FACTORY_REG(TensorListPushBack) | |||||
| /** | |||||
| *@brief The last element of the input list as well as a | |||||
| list with all but that element. \n | |||||
| *@par Inputs: | |||||
| *@li input_handle: The input list. | |||||
| *@li element_shape: A shape compatible with that of elements in the list. \n | |||||
| *@par Attributes: | |||||
| *@li element_dtype: The type of elements in the list. \n | |||||
| *@par Outputs: | |||||
| *@li output_handle:A list with the elements of the old list followed by tensor. | |||||
| *@li tensor:The withdrawn last element of the list. \n | |||||
| *@par Third-party framework compatibility. | |||||
| *Compatible with tensorflow TensorListPopBack operator. | |||||
| */ | |||||
| REG_OP(TensorListPopBack) | |||||
| .INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
| .INPUT(element_shape, TensorType({DT_INT32})) | |||||
| .OUTPUT(output_handle, TensorType({DT_VARIANT})) | |||||
| .OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||||
| DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||||
| DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE, | |||||
| DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||||
| .ATTR(element_dtype, Type, DT_INT32) | |||||
| .OP_END_FACTORY_REG(TensorListPopBack) | |||||
| /** | |||||
| *@brief The number of tensors in the input tensor list. \n | |||||
| *@par Inputs: | |||||
| *@li input_handle: The input list. \n | |||||
| *@par Outputs: | |||||
| *@li length:The number of tensors in the list. \n | |||||
| *@par Third-party framework compatibility. | |||||
| *Compatible with tensorflow TensorListLength operator. | |||||
| */ | |||||
| REG_OP(TensorListLength) | |||||
| .INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
| .OUTPUT(length, TensorType({DT_INT32})) | |||||
| .OP_END_FACTORY_REG(TensorListLength) | |||||
| /** | |||||
| *@brief The shape of elements in the input tensor list. \n | |||||
| *@par Inputs: | |||||
| *@li input_handle: The input list. \n | |||||
| *@par Attributes: | |||||
| *@li shape_type: The type of shape in the list. \n | |||||
| *@par Outputs: | |||||
| *@li element_shape:A shape compatible with that of elements in the list. \n | |||||
| *@par Third-party framework compatibility. | |||||
| *Compatible with tensorflow TensorListElementShape operator. | |||||
| */ | |||||
| REG_OP(TensorListElementShape) | |||||
| .INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
| .OUTPUT(element_shape, TensorType({DT_INT32,DT_INT64})) | |||||
| .ATTR(shape_type, Type, DT_INT32) | |||||
| .OP_END_FACTORY_REG(TensorListElementShape) | |||||
| /** | |||||
| *@brief List of the given size with empty elements. \n | |||||
| *@par Inputs: | |||||
| *@li element_shape: A shape compatible with that of elements in the list. | |||||
| *@li num_elements: The number of elements to reserve. \n | |||||
| *@par Attributes: | |||||
| *@li element_dtype: The type of elements in the list. | |||||
| *@li shape_type: The type of shape in the list. \n | |||||
| *@par Outputs: | |||||
| *@li handle: An output tensor list . \n | |||||
| *@par Third-party framework compatibility. | |||||
| *Compatible with tensorflow TensorListReserve operator. | |||||
| */ | |||||
| REG_OP(TensorListReserve) | |||||
| .INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) | |||||
| .INPUT(num_elements, TensorType({DT_INT32})) | |||||
| .OUTPUT(handle, TensorType({DT_VARIANT})) | |||||
| .ATTR(element_dtype, Type, DT_INT32) | |||||
| .ATTR(shape_type, Type, DT_INT32) | |||||
| .OP_END_FACTORY_REG(TensorListReserve) | |||||
| /** | |||||
| *@brief Get input tensor list elements of index position. \n | |||||
| *@par Inputs: | |||||
| *@li input_handle: The input list. | |||||
| *@li index: A tensor of position. | |||||
| *@li element_shape: A shape compatible with that of elements in the list. \n | |||||
| *@par Attributes: | |||||
| *@li element_dtype: The type of elements in the list. \n | |||||
| *@par Outputs: | |||||
| *@li item: An output tensor value of index position . \n | |||||
| *@par Third-party framework compatibility. | |||||
| *Compatible with tensorflow TensorListGetItem operator. | |||||
| */ | |||||
| REG_OP(TensorListGetItem) | |||||
| .INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
| .INPUT(index, TensorType({DT_INT32})) | |||||
| .INPUT(element_shape, TensorType({DT_INT32})) | |||||
| .OUTPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||||
| DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||||
| DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE, | |||||
| DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||||
| .ATTR(element_dtype, Type, DT_INT32) | |||||
| .OP_END_FACTORY_REG(TensorListGetItem) | |||||
| /** | |||||
| *@brief Sets the index-th position of the list to contain the given tensor. \n | |||||
| *@par Inputs: | |||||
| *@li input_handle: The input list. | |||||
| *@li index: The position in the list to which the tensor will be assigned. | |||||
| *@li item: The element to be assigned to that position. \n | |||||
| *@par Attributes: | |||||
| *@li element_dtype: The type of elements in the list. \n | |||||
| *@par Outputs: | |||||
| *@li output_handle: An output tensor list . \n | |||||
| *@par Third-party framework compatibility. | |||||
| *Compatible with tensorflow TensorListSetItem operator. | |||||
| */ | |||||
| REG_OP(TensorListSetItem) | |||||
| .INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
| .INPUT(index, TensorType({DT_INT32})) | |||||
| .INPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||||
| DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||||
| DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE, | |||||
| DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||||
| .OUTPUT(output_handle, TensorType({DT_VARIANT})) | |||||
| .ATTR(element_dtype, Type, DT_INT32) | |||||
| .OP_END_FACTORY_REG(TensorListSetItem) | |||||
| } // namespace ge | |||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_ | |||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -365,6 +365,27 @@ REG_OP(GetNext) | |||||
| .ATTR(channel_name, String, "") | .ATTR(channel_name, String, "") | ||||
| .OP_END_FACTORY_REG(GetNext) | .OP_END_FACTORY_REG(GetNext) | ||||
| /** | |||||
| *@brief Get dynamic dims after GetNext. \n | |||||
| *@par Inputs: | |||||
| *input: A nested structure of Tensor objects, from GetNext's output. \n | |||||
| *@par Attributes: | |||||
| *@li shape_info: GE shape_info for each inputs, -1 means unknow dim. | |||||
| *@li N: Inputs number. \n | |||||
| *@par Outputs: | |||||
| *dims: GE unknow dims, a vector of int64. \n | |||||
| */ | |||||
| REG_OP(GetDynamicDims) | |||||
| .DYNAMIC_INPUT(input, TensorType({DT_INT32, DT_INT64})) | |||||
| .OUTPUT(dims, TensorType({DT_INT32, DT_INT64})) | |||||
| .REQUIRED_ATTR(shape_info, ListInt) | |||||
| .REQUIRED_ATTR(N, Int) | |||||
| .OP_END_FACTORY_REG(GetDynamicDims) | |||||
| /** | /** | ||||
| *@brief End of sequence . \n | *@brief End of sequence . \n | ||||
| @@ -710,6 +731,9 @@ REG_OP(IFMR) | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| *Compatible with mindspore | *Compatible with mindspore | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(WtsARQ) | REG_OP(WtsARQ) | ||||
| @@ -741,6 +765,9 @@ REG_OP(WtsARQ) | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| *Compatible with mindspore | *Compatible with mindspore | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(ActsULQ) | REG_OP(ActsULQ) | ||||
| @@ -768,6 +795,9 @@ REG_OP(ActsULQ) | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| *Compatible with mindspore | *Compatible with mindspore | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(ActsULQInputGrad) | REG_OP(ActsULQInputGrad) | ||||
| @@ -790,6 +820,9 @@ REG_OP(ActsULQInputGrad) | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| *Compatible with mindspore | *Compatible with mindspore | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(ActULQClampMaxGrad) | REG_OP(ActULQClampMaxGrad) | ||||
| @@ -812,6 +845,9 @@ REG_OP(ActULQClampMaxGrad) | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| *Compatible with mindspore | *Compatible with mindspore | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(ActULQClampMinGrad) | REG_OP(ActULQClampMinGrad) | ||||
| @@ -821,6 +857,33 @@ REG_OP(ActULQClampMinGrad) | |||||
| .OUTPUT(clamp_min_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(clamp_min_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .OP_END_FACTORY_REG(ActULQClampMinGrad) | .OP_END_FACTORY_REG(ActULQClampMinGrad) | ||||
| /** | |||||
| * @brief Computes Lp norm. | |||||
| * @par Inputs: | |||||
| * @li x: An ND tensor of type float16, float32. \n | |||||
| * | |||||
| * @par Attributes: | |||||
| * @li p: Int, "inf" or "-inf", default value is 2. | |||||
| * @li axes: ListInt, {} means all axes will be computed. | |||||
| * @li keepdim: Bool, default is false. | |||||
| * @li epsilon: Float, default is 1e-12. \n | |||||
| * @par Outputs: | |||||
| * @li y: An ND tensor of type float16, float32. The shape of y is depending | |||||
| * on axes and keepdim. \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator LpNorm. | |||||
| */ | |||||
| REG_OP(LpNorm) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(p, Int, 2) | |||||
| .ATTR(axes, ListInt, {}) | |||||
| .ATTR(keepdim, Bool, false) | |||||
| .ATTR(epsilon, Float, 1e-12) | |||||
| .OP_END_FACTORY_REG(LpNorm) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -38,8 +38,8 @@ namespace ge { | |||||
| * float32, int32. Has format [ND, NHWC] . \n | * float32, int32. Has format [ND, NHWC] . \n | ||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. | |||||
| *@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n | |||||
| *@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. | |||||
| *@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: The result matrix Tensor. 2D. Must be one of the following types: float16, | *y: The result matrix Tensor. 2D. Must be one of the following types: float16, | ||||
| @@ -70,8 +70,8 @@ REG_OP(MatMul) | |||||
| * float32, int32. Has format [ND, NHWC] . \n | * float32, int32. Has format [ND, NHWC] . \n | ||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. | |||||
| *@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n | |||||
| *@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. | |||||
| *@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: The result matrix Tensor. 2D. Must be one of the following types: float16, | *y: The result matrix Tensor. 2D. Must be one of the following types: float16, | ||||
| @@ -156,8 +156,8 @@ REG_OP(GEMM) | |||||
| * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n | * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n | ||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. | |||||
| *@li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n | |||||
| *@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. | |||||
| *@li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, | *y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, | ||||
| @@ -175,6 +175,41 @@ REG_OP(BatchMatMul) | |||||
| .ATTR(adj_x2, Bool, false) | .ATTR(adj_x2, Bool, false) | ||||
| .OP_END_FACTORY_REG(BatchMatMul) | .OP_END_FACTORY_REG(BatchMatMul) | ||||
| /** | |||||
| * @brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n | |||||
| * @par Inputs: | |||||
| * Three inputs, including: | |||||
| * @li x1: A matrix Tensor. Must be one of the following types: float16, | |||||
| * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. | |||||
| * @li x2: A matrix Tensor. Must be one of the following types: float16, | |||||
| * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n | |||||
| * @li bias: A matrix Tensor. Must be one of the following types: float16, | |||||
| * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n | |||||
| * @par Attributes: | |||||
| * @li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. | |||||
| * @li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n | |||||
| * @par Outputs: | |||||
| * y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, | |||||
| * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2" . \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the TensorFlow operator BatchMatmul. | |||||
| */ | |||||
| REG_OP(BatchMatMulV2) | |||||
| .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||||
| .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||||
| .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||||
| .ATTR(adj_x1, Bool, false) | |||||
| .ATTR(adj_x2, Bool, false) | |||||
| .OP_END_FACTORY_REG(BatchMatMulV2) | |||||
| /** | /** | ||||
| *@brief Computes half the L2 norm of a tensor without the sqrt . \n | *@brief Computes half the L2 norm of a tensor without the sqrt . \n | ||||
| @@ -979,6 +1014,14 @@ REG_OP(MatrixDiagV2) | |||||
| .OUTPUT(output, TensorType::BasicType()) | .OUTPUT(output, TensorType::BasicType()) | ||||
| .OP_END_FACTORY_REG(MatrixDiagV2) | .OP_END_FACTORY_REG(MatrixDiagV2) | ||||
| REG_OP(IndexAdd) | |||||
| .INPUT(var, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||||
| .INPUT(indices, TensorType({DT_INT32})) | |||||
| .INPUT(updates, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||||
| .OUTPUT(var_out, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||||
| .ATTR(axis, Int, 0) | |||||
| .OP_END_FACTORY_REG(IndexAdd) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -365,6 +365,25 @@ REG_OP(BiasAddGrad) | |||||
| * 4-D with shape [batch, out_height, out_width, out_channels] | * 4-D with shape [batch, out_height, out_width, out_channels] | ||||
| * or [batch, out_channels, out_height, out_width]. | * or [batch, out_channels, out_height, out_width]. | ||||
| * Gradients with respect to the output of the convolution. | * Gradients with respect to the output of the convolution. | ||||
| *\n | |||||
| *\n | |||||
| * The following are the supported data types and data formats: | |||||
| *@verbatim | |||||
| | Tensor | out_bckprop | filter | y | |||||
| ------------|-------------|---------|-------- | |||||
| | Data Type | float16 | float16 | float16 | |||||
| | |-------------|---------|-------- | |||||
| | | float32 | float32 | float32 | |||||
| | |-------------|---------|-------- | |||||
| | | float64 | float64 | float64 | |||||
| ------------|-------------|---------|-------- | |||||
| | Format | NCHW | NCHW | NCHW | |||||
| | | NHWC | HWCN | NHWC | |||||
| @endverbatim | |||||
| * For float32 and float64 type, the actual calculation on the chip is based on | |||||
| * float16. | |||||
| *\n | |||||
| * | |||||
| *@par Attributes: | *@par Attributes: | ||||
| * Five attributes: | * Five attributes: | ||||
| * @li strides: A tuple/list of 4 integers. The stride of the sliding window | * @li strides: A tuple/list of 4 integers. The stride of the sliding window | ||||
| @@ -377,8 +396,52 @@ REG_OP(BiasAddGrad) | |||||
| * channels. | * channels. | ||||
| * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to | * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to | ||||
| * "NHWC". Specify the data format of the input and output data. | * "NHWC". Specify the data format of the input and output data. | ||||
| *\n | |||||
| *\n | |||||
| * The following value range restrictions must be met: | |||||
| *@verbatim | |||||
| | Name | Field | Scope | |||||
| -------------------|----------|-------------- | |||||
| | input_size | H | [1, 4096] | |||||
| | | W | [1, 4096] | |||||
| -------------------|----------|-------------- | |||||
| | Filter | H | [1, 255] | |||||
| | | W | [1, 255] | |||||
| -------------------|----------|-------------- | |||||
| | out_backprop | H | [1, 4096] | |||||
| | | W | [1, 4096] | |||||
| -------------------|----------|-------------- | |||||
| | y(fmap) | H | [1, 4096] | |||||
| | | W | [1, 4096] | |||||
| -------------------|----------|-------------- | |||||
| | Stride | H | [1, 63] | |||||
| | | W | [1, 63] | |||||
| -------------------|----------|-------------- | |||||
| | Padding | Top | [0, 255] | |||||
| | | Bottom | [0, 255] | |||||
| | | Left | [0, 255] | |||||
| | | Right | [0, 255] | |||||
| -------------------|----------|-------------- | |||||
| | Dilation | H | [1, 255] | |||||
| | | W | [1, 255] | |||||
| @endverbatim | |||||
| * In Ascend910, fmap or out_backprop's H and W not support 1 when | |||||
| * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 | |||||
| *\n | |||||
| * | |||||
| *@par Outputs: | *@par Outputs: | ||||
| * y: A Tensor. Has the same type as filter,and has same format as input_size. | * y: A Tensor. Has the same type as filter,and has same format as input_size. | ||||
| *\n | |||||
| * out_backprop_height = (fmap_height + pad_top + pad_bottom - | |||||
| * (dilation_h * (filter_height - 1) + 1)) | |||||
| * / stride_h + 1 | |||||
| *\n | |||||
| * out_backprop_width = (fmap_width + pad_left + pad_right - | |||||
| * (dilation_w * (filter_width - 1) + 1)) | |||||
| * / stride_w + 1 | |||||
| *\n | |||||
| * | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| * Compatible with Tensorflow's conv2d_backprop_input | * Compatible with Tensorflow's conv2d_backprop_input | ||||
| */ | */ | ||||
| @@ -454,6 +517,21 @@ REG_OP(Conv2DBackpropInputD) | |||||
| * @li bias: An optional tensor. Must have the same type as "y". | * @li bias: An optional tensor. Must have the same type as "y". | ||||
| * @li offset_w: An optional 1D tensor for quantized deconvolution. | * @li offset_w: An optional 1D tensor for quantized deconvolution. | ||||
| * Type is int8. Reserved.\n | * Type is int8. Reserved.\n | ||||
| *\n | |||||
| *\n | |||||
| * The following are the supported data types and data formats: | |||||
| *@verbatim | |||||
| | Tensor | x | filter | bias | y | |||||
| ------------|---------|---------|---------|-------- | |||||
| | Data Type | float16 | float16 | float16 | float16 | |||||
| | |---------|---------|---------|-------- | |||||
| | | int8 | int8 | int32 | int32 | |||||
| ------------|---------|---------|---------|-------- | |||||
| | Format | NCHW | NCHW | ND | NCHW | |||||
| @endverbatim | |||||
| * For int8, a dequant or requant operator must be followed. | |||||
| *\n | |||||
| * | |||||
| *@par Attributes: | *@par Attributes: | ||||
| * Six attributes: | * Six attributes: | ||||
| * @li strides: A tuple or list of 2 integers. The stride of the sliding window | * @li strides: A tuple or list of 2 integers. The stride of the sliding window | ||||
| @@ -468,8 +546,51 @@ REG_OP(Conv2DBackpropInputD) | |||||
| Specify the data format of the input and output data. | Specify the data format of the input and output data. | ||||
| * @li offset_x: An optional integer for quantized deconvolution. | * @li offset_x: An optional integer for quantized deconvolution. | ||||
| * Defaults to "0". | * Defaults to "0". | ||||
| *\n | |||||
| *\n | |||||
| * The following value range restrictions must be met: | |||||
| *@verbatim | |||||
| | Name | Field | Scope | |||||
| -------------------|----------|-------------- | |||||
| | x (out_backprop) | H | [1, 4096] | |||||
| | | W | [1, 4096] | |||||
| -------------------|----------|-------------- | |||||
| | Filter | H | [1, 255] | |||||
| | | W | [1, 255] | |||||
| -------------------|----------|-------------- | |||||
| | y (fmap) | H | [1, 4096] | |||||
| | | W | [1, 4096] | |||||
| -------------------|----------|-------------- | |||||
| | Stride | H | [1, 63] | |||||
| | | W | [1, 63] | |||||
| -------------------|----------|-------------- | |||||
| | Padding | Top | [0, 255] | |||||
| | | Bottom | [0, 255] | |||||
| | | Left | [0, 255] | |||||
| | | Right | [0, 255] | |||||
| -------------------|----------|-------------- | |||||
| | Dilation | H | [1, 255] | |||||
| | | W | [1, 255] | |||||
| -------------------|----------|-------------- | |||||
| | Offset_x | | [-128, 127] | |||||
| @endverbatim | |||||
| * In Ascend910, fmap or out_backprop's H and W not support 1 when | |||||
| * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 | |||||
| *\n | |||||
| * | |||||
| *@par Outputs: | *@par Outputs: | ||||
| * y: A Tensor. 4D tensor with shape [batch, channels, height, width]. | * y: A Tensor. 4D tensor with shape [batch, channels, height, width]. | ||||
| *\n | |||||
| * out_backprop_height = (fmap_height + pad_top + pad_bottom - | |||||
| * (dilation_h * (filter_height - 1) + 1)) | |||||
| * / stride_h + 1 | |||||
| *\n | |||||
| * out_backprop_width = (fmap_width + pad_left + pad_right - | |||||
| * (dilation_w * (filter_width - 1) + 1)) | |||||
| * / stride_w + 1 | |||||
| *\n | |||||
| * | |||||
| * When type of x is float16, the type of y must be float16. | * When type of x is float16, the type of y must be float16. | ||||
| * When type of x is int8, the type of y must be int32. | * When type of x is int8, the type of y must be int32. | ||||
| */ | */ | ||||
| @@ -502,6 +623,25 @@ REG_OP(Deconvolution) | |||||
| * [batch, out_height, out_width, out_channels] or [batch, out_channels, | * [batch, out_height, out_width, out_channels] or [batch, out_channels, | ||||
| * out_height, out_width]. Gradients with respect to the output of the | * out_height, out_width]. Gradients with respect to the output of the | ||||
| * convolution. | * convolution. | ||||
| *\n | |||||
| *\n | |||||
| * The following are the supported data types and data formats: | |||||
| *@verbatim | |||||
| | Tensor | x | out_backprop | y | |||||
| ------------|---------|--------------|--------- | |||||
| | Data Type | float16 | float16 | float16 | |||||
| | |---------|--------------|--------- | |||||
| | | float32 | float32 | float32 | |||||
| | |---------|--------------|--------- | |||||
| | | float64 | float64 | float64 | |||||
| |-----------|---------|--------------|--------- | |||||
| | Format | NCHW | NCHW | NCHW | |||||
| | | NHWC | NHWC | HWCN | |||||
| @endverbatim | |||||
| * For float32 and float64 type of x and outbackprop, the actual calculation on the chip | |||||
| * is based on float16. | |||||
| *\n | |||||
| * | |||||
| *@par Attributes: | *@par Attributes: | ||||
| * Five attributes: | * Five attributes: | ||||
| * @li strides: A tuple/list of 4 integers. The stride of the sliding window | * @li strides: A tuple/list of 4 integers. The stride of the sliding window | ||||
| @@ -514,8 +654,52 @@ REG_OP(Deconvolution) | |||||
| * channels. | * channels. | ||||
| * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to | * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to | ||||
| * "NHWC". Specify the data format of the input and output data. | * "NHWC". Specify the data format of the input and output data. | ||||
| *\n | |||||
| *\n | |||||
| * The following value range restrictions must be met: | |||||
| *@verbatim | |||||
| | Name | Field | Scope | |||||
| -------------------|----------|-------------- | |||||
| | x(fmap) | H | [1, 4096] | |||||
| | | W | [1, 4096] | |||||
| -------------------|----------|-------------- | |||||
| | Filter Size | H | [1, 255] | |||||
| | | W | [1, 255] | |||||
| -------------------|----------|-------------- | |||||
| | out_backprop | H | [1, 4096] | |||||
| | | W | [1, 4096] | |||||
| -------------------|----------|-------------- | |||||
| | y | H | [1, 4096] | |||||
| | | W | [1, 4096] | |||||
| -------------------|----------|-------------- | |||||
| | Stride | H | [1, 63] | |||||
| | | W | [1, 63] | |||||
| -------------------|----------|-------------- | |||||
| | Padding | Top | [0, 255] | |||||
| | | Bottom | [0, 255] | |||||
| | | Left | [0, 255] | |||||
| | | Right | [0, 255] | |||||
| -------------------|----------|-------------- | |||||
| | Dilation | H | [1, 255] | |||||
| | | W | [1, 255] | |||||
| @endverbatim | |||||
| * In Ascend910, out_backprop's H and W not support 1 when | |||||
| * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 | |||||
| *\n | |||||
| * | |||||
| *@par Outputs: | *@par Outputs: | ||||
| * y: A Tensor. Has the same type as x, has the same format as filter_size. | * y: A Tensor. Has the same type as x, has the same format as filter_size. | ||||
| *\n | |||||
| * out_backprop_height = (in_height + pad_top + pad_bottom - | |||||
| * (dilation_h * (filter_height - 1) + 1)) | |||||
| * / stride_h + 1 | |||||
| *\n | |||||
| * out_backprop_width = (in_width + pad_left + pad_right - | |||||
| * (dilation_w * (filter_width - 1) + 1)) | |||||
| * / stride_w + 1 | |||||
| *\n | |||||
| * | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| * Compatible with Tensorflow's conv2d_backprop_filter | * Compatible with Tensorflow's conv2d_backprop_filter | ||||
| */ | */ | ||||
| @@ -617,8 +801,7 @@ REG_OP(Conv2DBackpropFilterD) | |||||
| * (top, bottom, left, right) side of the input. | * (top, bottom, left, right) side of the input. | ||||
| *@li dilations: Optional. A list of 4 integers. The dilation factor for each | *@li dilations: Optional. A list of 4 integers. The dilation factor for each | ||||
| * dimension of input. The dimension order is determined by the data format of | * dimension of input. The dimension order is determined by the data format of | ||||
| * "x". The N and C dimensions must be set to 1. The H and W dimensions must be | |||||
| * set to 1 for int8 type. Defaults to [1, 1, 1, 1]. | |||||
| * "x". The N and C dimensions must be set to 1. Defaults to [1, 1, 1, 1]. | |||||
| *@li groups: Optional. An integer of type int32. The number of blocked | *@li groups: Optional. An integer of type int32. The number of blocked | ||||
| * connections from input channels to output channels. In_channels and | * connections from input channels to output channels. In_channels and | ||||
| * out_channels must both be divisible by "groups". Defaults to 1. | * out_channels must both be divisible by "groups". Defaults to 1. | ||||
| @@ -652,6 +835,8 @@ REG_OP(Conv2DBackpropFilterD) | |||||
| | Offset_x | | [-128, 127] | | Offset_x | | [-128, 127] | ||||
| @endverbatim | @endverbatim | ||||
| * The W dimension of the input image supports cases exceeding 4096, but it may | |||||
| * cause compilation errors. | |||||
| *\n | *\n | ||||
| * | * | ||||
| *@par Outputs: | *@par Outputs: | ||||
| @@ -666,21 +851,6 @@ REG_OP(Conv2DBackpropFilterD) | |||||
| * out_width = (in_width + pad_left + pad_right - | * out_width = (in_width + pad_left + pad_right - | ||||
| * (dilation_w * (filter_width - 1) + 1)) | * (dilation_w * (filter_width - 1) + 1)) | ||||
| * / stride_w + 1 | * / stride_w + 1 | ||||
| * | |||||
| *@attention Constraints: | |||||
| *@li The following restrictions on the output must be met: | |||||
| *@verbatim | |||||
| | Output | Restrictions | |||||
| ----------|-------------------------------- | |||||
| | H == 1 | H * W(input) == H * W(filter) | |||||
| | W == 1 | | |||||
| ----------|-------------------------------- | |||||
| | H != 1 | W(input) == W(filter) | |||||
| | W == 1 | Only for Ascend310 Hi3796V300CS | |||||
| @endverbatim | |||||
| * "H * W (input)" indicates the image size after padding and "H * W (filter)" | |||||
| * indicates the filter size after dilation."W(input)" and W(filter) indicate | |||||
| * the same rule on the W dimension. | |||||
| *\n | *\n | ||||
| * | * | ||||
| *@par Quantization supported or not | *@par Quantization supported or not | ||||
| @@ -778,7 +948,7 @@ REG_OP(Conv2DCompress) | |||||
| * With the format "HWCN" , the data is stored in the order of: [filter_height, | * With the format "HWCN" , the data is stored in the order of: [filter_height, | ||||
| * filter_width, in_channels / groups, out_channels]. | * filter_width, in_channels / groups, out_channels]. | ||||
| *@li offsets: A 4D tensor of x-y coordinates offset and mask. With the format | *@li offsets: A 4D tensor of x-y coordinates offset and mask. With the format | ||||
| * "NHWC", the data is stored in the order of: [batch, in_height, in_width, | |||||
| * "NHWC", the data is stored in the order of: [batch, out_height, out_width, | |||||
| * deformable_groups * filter_height * filter_width * 3]. | * deformable_groups * filter_height * filter_width * 3]. | ||||
| *@li bias: An optional 1D tensor of additive biases to the filter outputs. | *@li bias: An optional 1D tensor of additive biases to the filter outputs. | ||||
| * The data is stored in the order of: [out_channels]. | * The data is stored in the order of: [out_channels]. | ||||
| @@ -822,25 +992,12 @@ REG_OP(Conv2DCompress) | |||||
| *@verbatim | *@verbatim | ||||
| | Name | Field | Scope | | Name | Field | Scope | ||||
| --------------------|--------|---------------------------- | --------------------|--------|---------------------------- | ||||
| | Input Image Size | H | [1, 100000] | |||||
| | | W | [1, 4096] | |||||
| | Input Image Size | H | [1, 100000 / filter_height] | |||||
| | | W | [1, 4096 / filter_width] | |||||
| --------------------|--------|---------------------------- | --------------------|--------|---------------------------- | ||||
| | Filter Size | H | [1, 255] | |||||
| | | W | [1, 255] | |||||
| --------------------|--------|---------------------------- | |||||
| | Stride | H | [1, 63] | |||||
| | Filter Size | H | [1, 63] | |||||
| | | W | [1, 63] | | | W | [1, 63] | ||||
| --------------------|--------|---------------------------- | |||||
| | Padding | Top | [0, 255] | |||||
| | | Bottom | [0, 255] | |||||
| | | Left | [0, 255] | |||||
| | | Right | [0, 255] | |||||
| ------------ -------|--------|---------------------------- | |||||
| | Dilation | H | [1, 255] | |||||
| | | W | [1, 255] | |||||
| @endverbatim | @endverbatim | ||||
| * "W(input)" indicate the image width after padding and W(filter) indicates the | |||||
| * filter width after dilation. | |||||
| *\n | *\n | ||||
| * | * | ||||
| *@par Outputs: | *@par Outputs: | ||||
| @@ -855,21 +1012,7 @@ REG_OP(Conv2DCompress) | |||||
| * out_width = (in_width + pad_left + pad_right - | * out_width = (in_width + pad_left + pad_right - | ||||
| * (dilation_w * (filter_width - 1) + 1)) | * (dilation_w * (filter_width - 1) + 1)) | ||||
| * / stride_w + 1 | * / stride_w + 1 | ||||
| * | |||||
| *@attention Constraints: | |||||
| *@li The following restrictions on the output must be met: | |||||
| *@verbatim | |||||
| | Output | Restrictions | |||||
| ----------|-------------------------------- | |||||
| | H == 1 | H * W(input) == H * W(filter) | |||||
| | W == 1 | | |||||
| ----------|-------------------------------- | |||||
| | H != 1 | W(input) == W(filter) | |||||
| | W == 1 | Only for Ascend310 Hi3796V300CS | |||||
| @endverbatim | |||||
| * "H * W(input)" indicates the image size after padding and "H * W(filter)" | |||||
| * indicates the filter size after dilation. "W(input)" and W(filter) indicate | |||||
| * the same rule on the W dimension. | |||||
| *\n | |||||
| * | * | ||||
| *@par Quantization supported or not | *@par Quantization supported or not | ||||
| *@li No | *@li No | ||||
| @@ -920,8 +1063,8 @@ REG_OP(DeformableConv2D) | |||||
| * @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
| * Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
| * @li dilations: A list of 5 integers. Specifies the dilation factor for each | * @li dilations: A list of 5 integers. Specifies the dilation factor for each | ||||
| * dimension of "x", now only support [1,1,1,1,1] | |||||
| * The N and C dimensions must be 1. Has the same format as "x". | |||||
| * dimension of "x". | |||||
| * The N, C and D dimensions must be 1. Has the same format as "x". | |||||
| * @li offset_x: An optional int. Input offset, used for quantized inference. | * @li offset_x: An optional int. Input offset, used for quantized inference. | ||||
| * Defaults to 0. Reserved . \n | * Defaults to 0. Reserved . \n | ||||
| @@ -967,8 +1110,8 @@ REG_OP(Conv3D) | |||||
| *@par Required Attributes: | *@par Required Attributes: | ||||
| * @li strides: A list of 5 integers. Specifies the stride of the sliding window | * @li strides: A list of 5 integers. Specifies the stride of the sliding window | ||||
| * for each dimension of "x". | |||||
| * The N and C dimensions must be 1. Has the same format as "x". | |||||
| * for each dimension of "out_backprop". | |||||
| * The N and C dimensions must be 1. Has the same format as "out_backprop". | |||||
| * @li pads: A list of 6 integers. | * @li pads: A list of 6 integers. | ||||
| * Supports only padding along the D, H and W dimensions in sequence of head, | * Supports only padding along the D, H and W dimensions in sequence of head, | ||||
| * tail, top, bottom, left and right . \n | * tail, top, bottom, left and right . \n | ||||
| @@ -980,10 +1123,11 @@ REG_OP(Conv3D) | |||||
| * @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
| * Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
| * @li dilations: A tuple/list of 5 integers, The dilation factor for each | * @li dilations: A tuple/list of 5 integers, The dilation factor for each | ||||
| * dimension of the input, now only support [1,1,1,1,1] | |||||
| * dimension of the input. | |||||
| * The N, C and D dimensions must be 1. Has the same format as "out_backprop". | |||||
| *@par Outputs: | *@par Outputs: | ||||
| * y: A Tensor. Has the same type as filter,and has same format as input_size | |||||
| * y: A Tensor. Has the same type as filter,and has same format as "input_size" | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| * Compatible with Tensorflow's conv3d_backprop_input | * Compatible with Tensorflow's conv3d_backprop_input | ||||
| @@ -1011,8 +1155,8 @@ REG_OP(Conv3DBackpropInput) | |||||
| *@par Required Attributes: | *@par Required Attributes: | ||||
| * @li strides: A list of 5 integers. Specifies the stride of the sliding window | * @li strides: A list of 5 integers. Specifies the stride of the sliding window | ||||
| * for each dimension of "x". | |||||
| * The N and C dimensions must be 1. Has the same format as "x". | |||||
| * for each dimension of "out_backprop". | |||||
| * The N and C dimensions must be 1. Has the same format as "out_backprop". | |||||
| * @li pads: A list of 6 integers. Supports only padding along the D, H and W | * @li pads: A list of 6 integers. Supports only padding along the D, H and W | ||||
| * dimensions in sequence of head, tail, top, bottom, left and right. | * dimensions in sequence of head, tail, top, bottom, left and right. | ||||
| * @li input_size: A tuple/list of type int32, int64. An integer vector | * @li input_size: A tuple/list of type int32, int64. An integer vector | ||||
| @@ -1027,9 +1171,10 @@ REG_OP(Conv3DBackpropInput) | |||||
| * @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
| * Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
| * @li dilations: A tuple/list of 5 integers, The dilation factor for each | * @li dilations: A tuple/list of 5 integers, The dilation factor for each | ||||
| * dimension of input, now only support [1,1,1,1,1] | |||||
| * dimension of input. | |||||
| * The N, C and D dimensions must be 1. Has the same format as "out_backprop". | |||||
| *@par Outputs: | *@par Outputs: | ||||
| * y: A Tensor. Has the same type and data format as out_backprop. | |||||
| * y: A Tensor. Has the same type and data format as "out_backprop". | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| * Compatible with Tensorflow's conv3d_backprop_input | * Compatible with Tensorflow's conv3d_backprop_input | ||||
| @@ -1072,9 +1217,7 @@ REG_OP(Conv3DBackpropInputD) | |||||
| * @li c_t: A optinal Tensor dtype of float16, float32. The cell state at time t . \n | * @li c_t: A optinal Tensor dtype of float16, float32. The cell state at time t . \n | ||||
| *@par Third-party framework compatibility: | *@par Third-party framework compatibility: | ||||
| * Compatible with the Pytorch operator adds. | |||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| * Compatible with the Caffe operator LSTM. | |||||
| */ | */ | ||||
| REG_OP(LSTM) | REG_OP(LSTM) | ||||
| .INPUT(x, TensorType({DT_FLOAT16})) | .INPUT(x, TensorType({DT_FLOAT16})) | ||||
| @@ -1121,14 +1264,15 @@ REG_OP(LSTM) | |||||
| *@par Attributes: | *@par Attributes: | ||||
| * Three attributes: | * Three attributes: | ||||
| * @li dilations: A tuple/list of 5 integers, The dilation factor for each | * @li dilations: A tuple/list of 5 integers, The dilation factor for each | ||||
| * dimension of input, now only support [1,1,1,1,1]. | |||||
| * dimension of input. | |||||
| * The N, C and D dimensions must be 1. Has the same format as "x". | |||||
| * @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
| * channels. Reserved. | * channels. Reserved. | ||||
| * @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
| * Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
| *@par Outputs: | *@par Outputs: | ||||
| * y: A Tensor that has the same type as x | |||||
| * y: A Tensor that has the same type as "x" | |||||
| * and the format is NDHWC, NCDHW or DHWCN. | * and the format is NDHWC, NCDHW or DHWCN. | ||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| * Compatible with Tensorflow's conv3d_backprop_filter | * Compatible with Tensorflow's conv3d_backprop_filter | ||||
| @@ -1172,7 +1316,8 @@ REG_OP(Conv3DBackpropFilter) | |||||
| *@par Attributes: | *@par Attributes: | ||||
| * Three attributes: | * Three attributes: | ||||
| * @li dilations: A tuple/list of 5 integers, The dilation factor for each | * @li dilations: A tuple/list of 5 integers, The dilation factor for each | ||||
| * dimension of input, now only support [1,1,1,1,1]. | |||||
| * dimension of input. | |||||
| * The N, C and D dimensions must be 1. Has the same format as "x". | |||||
| * @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
| * channels. Reserved. | * channels. Reserved. | ||||
| * @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
| @@ -1226,13 +1371,14 @@ REG_OP(Conv3DBackpropFilterD) | |||||
| * @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
| * channels. Reserved. | * channels. Reserved. | ||||
| * @li dilations: A tuple/list of 5 integers, | * @li dilations: A tuple/list of 5 integers, | ||||
| * The dilation factor for each dimension of input, now only support [1,1,1,1,1] | |||||
| * The dilation factor for each dimension of input. | |||||
| * The N, C and D dimensions must be 1. Has the same format as "x". | |||||
| * @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
| * Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
| * @li output_padding: The size will be added in the output shape. | * @li output_padding: The size will be added in the output shape. | ||||
| * @li offset_x: Input offset_x value. Reserved. | * @li offset_x: Input offset_x value. Reserved. | ||||
| *@par Outputs: | *@par Outputs: | ||||
| * y: A Tensor. Has the same type and format as x. | |||||
| * y: A Tensor. Has the same type and format as "x". | |||||
| */ | */ | ||||
| REG_OP(Conv3DTranspose) | REG_OP(Conv3DTranspose) | ||||
| .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) | .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) | ||||
| @@ -1273,7 +1419,8 @@ REG_OP(Conv3DTranspose) | |||||
| *@par Attributes: | *@par Attributes: | ||||
| * Five attributes: | * Five attributes: | ||||
| * @li dilations: A tuple/list of 5 integers, The dilation factor for each | * @li dilations: A tuple/list of 5 integers, The dilation factor for each | ||||
| * dimension of input, now only support [1,1,1,1,1] | |||||
| * dimension of input. | |||||
| * The N, C and D dimensions must be 1. Has the same format as "x". | |||||
| * @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
| * channels. Reserved. | * channels. Reserved. | ||||
| * @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
| @@ -1281,7 +1428,7 @@ REG_OP(Conv3DTranspose) | |||||
| * @li output_padding: The size will be added in the output shape. | * @li output_padding: The size will be added in the output shape. | ||||
| * @li offset_x: Input offset_x value. Reserved. | * @li offset_x: Input offset_x value. Reserved. | ||||
| *@par Outputs: | *@par Outputs: | ||||
| * y: A Tensor. Has the same type and format as x. | |||||
| * y: A Tensor. Has the same type and format as "x". | |||||
| *@par Restrictions: | *@par Restrictions: | ||||
| * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead. | * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead. | ||||
| */ | */ | ||||
| @@ -1316,6 +1463,22 @@ REG_OP(Conv3DTransposeD) | |||||
| * or [out_channels, in_channel, filter_height, filter_width]. | * or [out_channels, in_channel, filter_height, filter_width]. | ||||
| * @li bias: An optional 1D tensor of type float16 or int32. Format is "ND". | * @li bias: An optional 1D tensor of type float16 or int32. Format is "ND". | ||||
| * @li offset_w: An optional 1D tensor for quantized inference. Reserved. | * @li offset_w: An optional 1D tensor for quantized inference. Reserved. | ||||
| *\n | |||||
| *\n | |||||
| * The following are the supported data types and data formats: | |||||
| *@verbatim | |||||
| | Tensor | x | filter | bias | y | |||||
| ------------|---------|---------|---------|-------- | |||||
| | Data Type | float16 | float16 | float16 | float16 | |||||
| | |---------|---------|---------|-------- | |||||
| | | int8 | int8 | int32 | int32 | |||||
| ------------|---------|---------|---------|-------- | |||||
| | Format | NCHW | NCHW | ND | NCHW | |||||
| | | NHWC | HWCN | | NHWC | |||||
| @endverbatim | |||||
| * For int8, a dequant or requant operator must be followed. | |||||
| *\n | |||||
| * | |||||
| *@par Required Attributes: | *@par Required Attributes: | ||||
| * @li strides: A required tuple/list of 4 integers. The stride of the sliding | * @li strides: A required tuple/list of 4 integers. The stride of the sliding | ||||
| * window for H/W dimension. The index of H/W is same as data_format. | * window for H/W dimension. The index of H/W is same as data_format. | ||||
| @@ -1334,9 +1497,55 @@ REG_OP(Conv3DTransposeD) | |||||
| * to [0, 0, 0, 0]. | * to [0, 0, 0, 0]. | ||||
| * @li offset_x: An optional int. Input offset, used for quantized inference. | * @li offset_x: An optional int. Input offset, used for quantized inference. | ||||
| * Defaults to "0". | * Defaults to "0". | ||||
| *\n | |||||
| *\n | |||||
| * The following value range restrictions must be met: | |||||
| *@verbatim | |||||
| | Name | Field | Scope | |||||
| -------------------|----------|-------------- | |||||
| | input_size | H | [1, 4096] | |||||
| | | W | [1, 4096] | |||||
| -------------------|----------|-------------- | |||||
| | x (out_backprop) | H | [1, 4096] | |||||
| | | W | [1, 4096] | |||||
| -------------------|----------|-------------- | |||||
| | filter | H | [1, 255] | |||||
| | | W | [1, 255] | |||||
| -------------------|----------|-------------- | |||||
| | y (fmap) | H | [1, 4096] | |||||
| | | W | [1, 4096] | |||||
| -------------------|----------|-------------- | |||||
| | Stride | H | [1, 63] | |||||
| | | W | [1, 63] | |||||
| -------------------|----------|-------------- | |||||
| | Padding | Top | [0, 255] | |||||
| | | Bottom | [0, 255] | |||||
| | | Left | [0, 255] | |||||
| | | Right | [0, 255] | |||||
| -------------------|----------|-------------- | |||||
| | Dilation | H | [1, 255] | |||||
| | | W | [1, 255] | |||||
| -------------------|----------|-------------- | |||||
| | Offset_x | | [-128, 127] | |||||
| @endverbatim | |||||
| * In Ascend910, fmap or out_backprop's H and W not support 1 when | |||||
| * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 | |||||
| *\n | |||||
| * | |||||
| *@par Outputs: | *@par Outputs: | ||||
| * y: A Tensor. A Tensor of type float16 or int32, and has same format as | * y: A Tensor. A Tensor of type float16 or int32, and has same format as | ||||
| * input_size. | * input_size. | ||||
| *\n | |||||
| * out_backprop_height = (fmap_height + pad_top + pad_bottom - | |||||
| * (dilation_h * (filter_height - 1) + 1)) | |||||
| * / stride_h + 1 | |||||
| *\n | |||||
| * out_backprop_width = (fmap_width + pad_left + pad_right - | |||||
| * (dilation_w * (filter_width - 1) + 1)) | |||||
| * / stride_w + 1 | |||||
| *\n | |||||
| * | |||||
| */ | */ | ||||
| REG_OP(Conv2DTranspose) | REG_OP(Conv2DTranspose) | ||||
| .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) | .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) | ||||
| @@ -1405,13 +1614,13 @@ REG_OP(Conv2DTransposeD) | |||||
| /** | /** | ||||
| *@brief Computes the deformed convolution output with the expected input | *@brief Computes the deformed convolution output with the expected input | ||||
| *@par Inputs: | *@par Inputs: | ||||
| * Four inputs: | |||||
| * Two inputs: | |||||
| * @li x: A Tensor of type float16,float32 | * @li x: A Tensor of type float16,float32 | ||||
| * @li offsets: A Tensor of type float16,float32.Deformation offset parameter. | * @li offsets: A Tensor of type float16,float32.Deformation offset parameter. | ||||
| *@par Required Attributes: | *@par Required Attributes: | ||||
| * @li strides: A tuple/list of 4 integers.The stride of the sliding window for | * @li strides: A tuple/list of 4 integers.The stride of the sliding window for | ||||
| * height and width for H/W dimension. | * height and width for H/W dimension. | ||||
| * @li pads: A tuple/list of 4 integers.Padding added to each dimension | |||||
| * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension | |||||
| * of the input. | * of the input. | ||||
| * @li ksize: A tuple/list of 2 integers.kernel size. | * @li ksize: A tuple/list of 2 integers.kernel size. | ||||
| *@par Attributes: | *@par Attributes: | ||||
| @@ -1420,6 +1629,7 @@ REG_OP(Conv2DTransposeD) | |||||
| * of input. Defaults to [1, 1, 1, 1] | * of input. Defaults to [1, 1, 1, 1] | ||||
| * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. | * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. | ||||
| * @li deformable_groups: Specify the c-axis grouping number of input x. | * @li deformable_groups: Specify the c-axis grouping number of input x. | ||||
| * @li modulated: Specify version of DeformableConv2D, true means v2, false means v1 | |||||
| *@par Outputs: | *@par Outputs: | ||||
| * y: A Tensor. A Tensor of type float16, float32. | * y: A Tensor. A Tensor of type float16, float32. | ||||
| */ | */ | ||||
| @@ -1433,7 +1643,69 @@ REG_OP(DeformableOffsets) | |||||
| .ATTR(dilations, ListInt, {1, 1, 1, 1}) | .ATTR(dilations, ListInt, {1, 1, 1, 1}) | ||||
| .ATTR(data_format, String, "NCHW") | .ATTR(data_format, String, "NCHW") | ||||
| .ATTR(deformable_groups, Int, 1) | .ATTR(deformable_groups, Int, 1) | ||||
| .ATTR(modulated, Bool, true) | |||||
| .OP_END_FACTORY_REG(DeformableOffsets) | .OP_END_FACTORY_REG(DeformableOffsets) | ||||
| /** | |||||
| *@brief Computes the gradients of DeformableOffsets with respect to input and offsets | |||||
| *@par Inputs: | |||||
| * Three inputs: | |||||
| * @li grad: A Tensor of type float16,float32. gradients with respect to DeformableOffsets output | |||||
| * @li x: A Tensor of type float16,float32. | |||||
| * @li offsets: A Tensor of type float16,float32.Deformation offset parameter. | |||||
| *@par Required Attributes: | |||||
| * @li strides: A tuple/list of 4 integers.The stride of the sliding window for | |||||
| * height and width for H/W dimension. | |||||
| * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension | |||||
| * of the input. | |||||
| * @li ksize: A tuple/list of 2 integers.kernel size. | |||||
| *@par Attributes: | |||||
| * Three attributes: | |||||
| * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension | |||||
| * of input. Defaults to [1, 1, 1, 1] | |||||
| * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. | |||||
| * @li deformable_groups: Specify the c-axis grouping number of input x. | |||||
| * @li modulated: Specify version of DeformableConv2D, true means v2, false means v1. | |||||
| *@par Outputs: | |||||
| * grad_x: A Tensor of type float16, float32. Gradients with respect to input_x | |||||
| * grad_offsets: A Tensor of type float16, float32. Gradients with respect to input_offsets | |||||
| */ | |||||
| REG_OP(DeformableOffsetsGrad) | |||||
| .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(offsets, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(grad_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(grad_offsets, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .REQUIRED_ATTR(strides, ListInt) | |||||
| .REQUIRED_ATTR(pads, ListInt) | |||||
| .REQUIRED_ATTR(ksize, ListInt) | |||||
| .ATTR(dilations, ListInt, {1, 1, 1, 1}) | |||||
| .ATTR(data_format, String, "NCHW") | |||||
| .ATTR(deformable_groups, Int, 1) | |||||
| .ATTR(modulated, Bool, true) | |||||
| .OP_END_FACTORY_REG(DeformableOffsetsGrad) | |||||
| /** | |||||
| *@brief Computes the deformed dilation output with the expected input | |||||
| *@par Inputs: | |||||
| * One inputs: | |||||
| * @li x: A Tensor of type int8, float16, float32 | |||||
| *@par Required Attributes: | |||||
| * @li dilations: A tuple/list of integers. | |||||
| *@par Attributes: | |||||
| * Two attributes: | |||||
| * @li padding_value: default value filling in blank | |||||
| * @li pads: A tuple/list of integers. | |||||
| *@par Outputs: | |||||
| * y: A Tensor. A Tensor of type int8, float16, float32. | |||||
| */ | |||||
| REG_OP(Dilation) | |||||
| .INPUT(x, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT})) | |||||
| .REQUIRED_ATTR(dilations, ListInt) | |||||
| .ATTR(pads, ListInt, {}) | |||||
| .ATTR(padding_value, Float, 0.0) | |||||
| .OP_END_FACTORY_REG(Dilation) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1383,6 +1383,7 @@ REG_OP(DecodeWheelsTarget) | |||||
| *@attention Constraints: | *@attention Constraints: | ||||
| * Only computation of float16 data is supported. | * Only computation of float16 data is supported. | ||||
| * Note: when the class num per image * max_size_per_class is too big, will compile fail with ERROR-insufficient memory | |||||
| */ | */ | ||||
| REG_OP(BatchMultiClassNonMaxSuppression) | REG_OP(BatchMultiClassNonMaxSuppression) | ||||
| .INPUT(boxes, TensorType({DT_FLOAT16})) | .INPUT(boxes, TensorType({DT_FLOAT16})) | ||||
| @@ -1485,7 +1486,10 @@ REG_OP(DecodeBboxV2) | |||||
| * | * | ||||
| *@par Outputs: | *@par Outputs: | ||||
| * @li y1: A Tensor. Must have the same type as x. | * @li y1: A Tensor. Must have the same type as x. | ||||
| * @li y2: A Tensor. Indices of y1 in x.Dtype must be int32. | |||||
| * @li y2: A Tensor. Indices of y1 in x. Dtype must be int32. | |||||
| * | |||||
| *@attention Constraints: | |||||
| * The upper limit of data on the direction axis is 7040. | |||||
| */ | */ | ||||
| REG_OP(Sort) | REG_OP(Sort) | ||||
| .INPUT(x, TensorType({ DT_FLOAT16 })) | .INPUT(x, TensorType({ DT_FLOAT16 })) | ||||
| @@ -1495,6 +1499,111 @@ REG_OP(Sort) | |||||
| .ATTR(descending, Bool, false) | .ATTR(descending, Bool, false) | ||||
| .OP_END_FACTORY_REG(Sort) | .OP_END_FACTORY_REG(Sort) | ||||
| REG_OP(PtIou) | |||||
| .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(mode, String, "iou") | |||||
| .OP_END_FACTORY_REG(PtIou) | |||||
| /** | |||||
| *@brief Greedily selects a subset of bounding boxes in descending order of | |||||
| score . \n | |||||
| *@par Inputs: | |||||
| *Input boxes and scores must be float16 type. Inputs include: | |||||
| *@li boxes: A input tensor with shape [num_batches,spatial_dimension,4]. | |||||
| The single box data format is indicated by center_point_box. | |||||
| *@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension] | |||||
| *@li max_output_size: A scalar integer tensor representing the maximum number | |||||
| of boxes to be selected by non max suppression. | |||||
| *@li iou_threshold: A 0-D float tensor representing the threshold for deciding | |||||
| whether boxes overlap too much with respect to IOU. | |||||
| *@li score_threshold: A 0-D float tensor representing the threshold for | |||||
| deciding when to remove boxes based on score . \n | |||||
| *@par Attributes: | |||||
| *center_point_box:Integer indicate the format of the box data. | |||||
| The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] | |||||
| where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair | |||||
| of box corners and the coordinates can be provided as normalized | |||||
| (i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models. | |||||
| 1 - the box data is supplied as [x_center, y_center, width, height]. | |||||
| Mostly used for Pytorch models. \n | |||||
| *@par Outputs: | |||||
| *@li selected_indices: A 2-D integer tensor of shape [M] representing the | |||||
| selected indices from the boxes tensor, where M <= max_output_size. \n | |||||
| *@attention Constraints: | |||||
| *Input boxes and scores must be float16 type . \n | |||||
| *@par Third-party framework compatibility | |||||
| *Compatible with onnx NonMaxSuppression operator. | |||||
| */ | |||||
| REG_OP(NonMaxSuppressionV6) | |||||
| .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32})) | |||||
| .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(selected_indices, TensorType({DT_INT32})) | |||||
| .ATTR(center_point_box, Int, 0) | |||||
| .ATTR(max_boxes_size, Int, 0) | |||||
| .OP_END_FACTORY_REG(NonMaxSuppressionV6) | |||||
| /** | |||||
| *@brief Greedily selects a subset of bounding boxes in descending order of | |||||
| score . \n | |||||
| *@par Inputs: | |||||
| *Input boxes and scores must be float16 type. Inputs include: | |||||
| *@li boxes: A input tensor with shape [num_batches,spatial_dimension,4]. | |||||
| The single box data format is indicated by center_point_box. | |||||
| *@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension] | |||||
| *@li max_output_size: A scalar integer tensor representing the maximum number | |||||
| of boxes to be selected by non max suppression. | |||||
| *@li iou_threshold: A 0-D float tensor representing the threshold for deciding | |||||
| whether boxes overlap too much with respect to IOU. | |||||
| *@li score_threshold: A 0-D float tensor representing the threshold for | |||||
| deciding when to remove boxes based on score . \n | |||||
| *@li index_id: A input tensor with shape [num_batches,num_classes,spatial_dimension,3] | |||||
| the last dim representing (batch_id,class_id,index_id) . \n | |||||
| *@par Attributes: | |||||
| *center_point_box:Integer indicate the format of the box data. | |||||
| The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] | |||||
| where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair | |||||
| of box corners and the coordinates can be provided as normalized | |||||
| (i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models. | |||||
| 1 - the box data is supplied as [x_center, y_center, width, height]. | |||||
| Mostly used for Pytorch models. \n | |||||
| *@par Outputs: | |||||
| *@li selected_indices: A 2-D integer tensor of shape [M] representing the | |||||
| selected indices from the boxes tensor, where M <= max_output_size. \n | |||||
| *@attention Constraints: | |||||
| *Input boxes and scores must be float16 type . \n | |||||
| *@par Third-party framework compatibility | |||||
| *Compatible with onnx NonMaxSuppression operator. | |||||
| */ | |||||
| REG_OP(NonMaxSuppressionV7) | |||||
| .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32})) | |||||
| .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(index_id, TensorType({DT_FLOAT16})) | |||||
| .OUTPUT(selected_indices, TensorType({DT_INT32})) | |||||
| .ATTR(center_point_box, Int, 0) | |||||
| .ATTR(max_boxes_size, Int, 0) | |||||
| .OP_END_FACTORY_REG(NonMaxSuppressionV7) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -160,20 +160,20 @@ REG_OP(SigmoidCrossEntropyWithLogits) | |||||
| .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogits) | .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogits) | ||||
| /** | /** | ||||
| *@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n | |||||
| *@brief Computes the sigmoid cross entropy loss of "predict" and "target". | |||||
| *@par Inputs: | *@par Inputs: | ||||
| * four inputs, including: | * four inputs, including: | ||||
| *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value. | *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value. | ||||
| *@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n | |||||
| *@li weight: An multi-dimensional Tensor, specifying the weight value. \n | |||||
| *@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value. | |||||
| *@li weight: An multi-dimensional Tensor, specifying the weight value. | |||||
| *@li pos_weight: An multi-dimensional Tensor, specifying the pos weight value. \n | *@li pos_weight: An multi-dimensional Tensor, specifying the pos weight value. \n | ||||
| *@par Attributes: | *@par Attributes: | ||||
| *reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean" . \n | |||||
| *reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean". \n | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n | |||||
| *loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict". \n | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| * Compatible with PyTorch operator BCEWithLogitsLoss. | * Compatible with PyTorch operator BCEWithLogitsLoss. | ||||
| @@ -978,6 +978,261 @@ REG_OP(InHost) | |||||
| .OUTPUT(variance_sqrt, TensorType({DT_FLOAT})) | .OUTPUT(variance_sqrt, TensorType({DT_FLOAT})) | ||||
| .ATTR(epsilon, Float, 0.00001) | .ATTR(epsilon, Float, 0.00001) | ||||
| .OP_END_FACTORY_REG(InHost) | .OP_END_FACTORY_REG(InHost) | ||||
| /** | |||||
| * @brief perform instance normalization to x. \n | |||||
| * @par Inputs: | |||||
| * Three inputs, including: | |||||
| * @li x: A Tensor. Must be one of the following types: float16, float32, format is NC1HWC0. | |||||
| * @li gamma: A Tensor. Must be one of the following types: float16, float32, format is ND. | |||||
| * @li beta: A Tensor. Must be one of the following types: float16, float32, format is ND. | |||||
| * @par Attributes: | |||||
| * @li data_format: An attribute of type String \n | |||||
| * @li epsilon: An attribute of type Float, . \n | |||||
| * @par Outputs: | |||||
| * @li y: A Tensor. Has the same type as "x", format is NC1HWC0. \n | |||||
| * @li mean: A Tensor. Has the same type as "x", format is NC1HWC0 and the shape is [N, C1, 1, 1, C0]. \n | |||||
| * @li variance: A Tensor. Has the same type as "x", format is NC1HWC0 and the shape is [N, C1, 1, 1, C0]. \n | |||||
| * @par Third-party framework compatibility | |||||
| * Can be used by onnx InstanceNormalization | |||||
| */ | |||||
| REG_OP(InstanceNorm) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(gamma, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(beta, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(variance, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .REQUIRED_ATTR(data_format, String) | |||||
| .REQUIRED_ATTR(epsilon, Float) | |||||
| .OP_END_FACTORY_REG(InstanceNorm) | |||||
| REG_OP(KlDivLossGrad) | |||||
| .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(reduction, String, "mean") | |||||
| .ATTR(log_target, Bool, false) | |||||
| .OP_END_FACTORY_REG(KlDivLossGrad) | |||||
| /** | |||||
| * @brief Computes l1_loss_grad or l1_loss_backward. \n | |||||
| * @par Inputs: | |||||
| * Three inputs, including: | |||||
| * @li grads: A Tensor. Must be one of the following types: float16, float32. | |||||
| * Required. | |||||
| * @li predict: A Tensor. Has the same type as "grads". Required. | |||||
| * @li label: A Tensor. Has the same type as "grads". Required. \n | |||||
| * @par Attributes: | |||||
| * @li reduction: An optional attribute of type String. Defaults to "mean". \n | |||||
| * @par Outputs: | |||||
| * @li y: A Tensor. Has the same type as "x". \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator L1LossGrad. | |||||
| */ | |||||
| REG_OP(L1LossGrad) | |||||
| .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(reduction, String, "mean") | |||||
| .OP_END_FACTORY_REG(L1LossGrad) | |||||
| /** | |||||
| * @brief Computes loss of lp, p=1,2,3.... | |||||
| * @par Inputs: | |||||
| * @li predict: An ND tensor of type float16, float32. | |||||
| * @li label: An ND tensor of type float16, float32. \n | |||||
| * @par Attributes: | |||||
| * @li p: A required int attribute that decides which loss to compute, now the p only can be 1 to compute l1_loss. | |||||
| * @li reduction: An optional string.Defaults to "mean". \n | |||||
| * @par Outputs: | |||||
| * @li y: An ND tensor tensor with the same shape and type as "predict". \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator LpLoss. | |||||
| */ | |||||
| REG_OP(LpLoss) | |||||
| .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .REQUIRED_ATTR(p, Int) | |||||
| .ATTR(reduction, String, "mean") | |||||
| .OP_END_FACTORY_REG(LpLoss) | |||||
| /** | |||||
| * @brief Computes gradients of mse loss. | |||||
| * @par Inputs: | |||||
| * @li predict: An ND tensor of type float16, float32. | |||||
| * @li label: An ND tensor of type float16, float32. | |||||
| * @li dout: An ND tensor of type float16, float32. \n | |||||
| * @par Attributes: | |||||
| * @li reduction: An optional string.Defaults to "mean". \n | |||||
| * @par Outputs: | |||||
| * @li y: An ND tensor tensor with the same shape and type as "predict". \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator MseLossGrad. | |||||
| */ | |||||
| REG_OP(MseLossGrad) | |||||
| .INPUT(predict, TensorType({DT_FLOAT32, DT_FLOAT16})) | |||||
| .INPUT(label, TensorType({DT_FLOAT32, DT_FLOAT16})) | |||||
| .INPUT(dout, TensorType({DT_FLOAT32, DT_FLOAT16})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT32, DT_FLOAT16})) | |||||
| .ATTR(reduction, String, "mean") | |||||
| .OP_END_FACTORY_REG(MseLossGrad) | |||||
| /** | |||||
| * @brief Computes mse loss. | |||||
| * @par Inputs: | |||||
| * two inputs, including: | |||||
| * @li predict: An ND Tensor of dtype float16 or float32. | |||||
| * @li label: An ND Tensor of dtype float16 or float32.\n | |||||
| * | |||||
| * @par Attributes: | |||||
| * @li reduction:An optional str from sum, none, mean, Defaults to "mean".\n | |||||
| * | |||||
| * @par Outputs: | |||||
| * @li y: when reduction=sum/mean, y is scale. when reduction=none, y has | |||||
| * same type and shape as "predict".\n | |||||
| */ | |||||
| REG_OP(MseLoss) | |||||
| .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(reduction, String, "mean") | |||||
| .OP_END_FACTORY_REG(MseLoss) | |||||
| /** | |||||
| * @brief Calculates the reversed outputs of the function "smooth_l1_loss_v2". \n | |||||
| * @par Inputs: | |||||
| * Three Inputs, including: | |||||
| * @li predict: A Tensor. Must be one of the following types: | |||||
| * float16, float32. | |||||
| * @li label: A Tensor. Has the same type as "predict". | |||||
| * @li dout: A Tensor. Has the same type as "predict". \n | |||||
| * @par Attributes: | |||||
| * Two Attributes, including: | |||||
| * @li sigma: An optional float. Defaults to 1.0. \n | |||||
| * @li reduction: An optional string. Defaults to "mean", | |||||
| * Must be one of the following: "none", "mean", "sum". \n | |||||
| * @par Outputs: | |||||
| * @li gradient: A Tensor. Has the same type as "predict". \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator SmoothL1LossBackward. | |||||
| */ | |||||
| REG_OP(SmoothL1LossGradV2) | |||||
| .INPUT(predict, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .INPUT(label, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .INPUT(dout, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .OUTPUT(gradient, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .ATTR(sigma, Float, 1.0) | |||||
| .ATTR(reduction, String, "mean") | |||||
| .OP_END_FACTORY_REG(SmoothL1LossGradV2) | |||||
| /** | |||||
| * @brief Creates a criterion that uses a squared term if the absolute | |||||
| * element-wise error falls below beta and an L1 term otherwise. It is | |||||
| * less sensitive to outliers than the MSELoss and in some cases prevents | |||||
| * exploding gradients. | |||||
| * @par Inputs: | |||||
| * @li predict: A multi-dimensional Tensor of type float16 or float32, | |||||
| * specifying the predictive value. \n | |||||
| * @li label: A multi-dimensional Tensor of type float16 or float32, | |||||
| * specifying the target value. \n | |||||
| * @par Attributes: | |||||
| * @li sigma: An optional int. Specifies the threshold of loss. Defaults | |||||
| * to "1.0". \n | |||||
| * @li reduction: An optional str. Specifies the reduction to apply to | |||||
| * the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, | |||||
| * 'mean': the sum of the output will be divided by the number of elements in | |||||
| * the output,'sum': the output will be summed. Default: 'mean'. \n | |||||
| * @par Outputs: | |||||
| * @li loss: Indicates the loss between the predictive value and target value. | |||||
| * Has the same dimensions as "predict". \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator smooth_l1_loss. \n | |||||
| */ | |||||
| REG_OP(SmoothL1LossV2) | |||||
| .INPUT(predict, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
| .INPUT(label, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
| .OUTPUT(loss, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
| .ATTR(sigma, Float, 1.0) | |||||
| .ATTR(reduction, String, "mean") | |||||
| .OP_END_FACTORY_REG(SmoothL1LossV2) | |||||
| /** | |||||
| * @brief Computes Centralization. result = x - mean(x, axes) | |||||
| * @par Inputs: | |||||
| * @li x: An ND tensor of type float16, float32. | |||||
| * @par Attributes: | |||||
| * @li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType. | |||||
| * Must be in the range [-rank(x), rank(x)). | |||||
| * @par Outputs: | |||||
| * @li y: A Tensor. Has the same type as "x". \n | |||||
| * @par Third-party framework compatibility | |||||
| * custom operator \n | |||||
| */ | |||||
| REG_OP(Centralization) | |||||
| .INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
| .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
| .ATTR(axes, ListInt, {-1}) | |||||
| .OP_END_FACTORY_REG(Centralization) | |||||
| /** | |||||
| * @brief Computes gradients of sigmoid_cross_entropy_with_logits_v2. | |||||
| * @par Inputs: | |||||
| * @li predict: An ND tensor of type float16, float32. | |||||
| * @li target: An ND tensor of type float16, float32. | |||||
| * @li dout: An ND tensor of type float16, float32. | |||||
| * @li weight: An optional ND tensor of type float16, float32. | |||||
| * @li pos_weight: An optional ND tensor of type float16, float32. \n | |||||
| * @par Attributes: | |||||
| * @li reduction: An optional string.Defaults to "mean". \n | |||||
| * @par Outputs: | |||||
| * @li gradient: An ND tensor tensor with the same shape and type as "predict". \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator SigmoidCrossEntropyWithLogitsGrad. | |||||
| */ | |||||
| REG_OP(SigmoidCrossEntropyWithLogitsGradV2) | |||||
| .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(dout, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(pos_weight, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(reduction, String, "mean") | |||||
| .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -182,6 +182,125 @@ REG_OP(AvgPool3D) | |||||
| .ATTR(data_format, String, "NDHWC") | .ATTR(data_format, String, "NDHWC") | ||||
| .OP_END_FACTORY_REG(AvgPool3D) | .OP_END_FACTORY_REG(AvgPool3D) | ||||
| /** | |||||
| *@brief Performs average pooling on the input. | |||||
| *@par Inputs: | |||||
| *@li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double. | |||||
| *@li filter: An optional tensor of type float16, float32, double, fractal_z_3d layout. | |||||
| *@li multiplier: An optional tensor of float16, float32, double. | |||||
| *@par Attributes: | |||||
| *@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor. | |||||
| *@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor. | |||||
| *@li pads: List of ints, implicit zero paddings on both sides of the input. | |||||
| *@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. | |||||
| *@li count_include_pad: When true, will include the zero-padding in the averaging calculation. | |||||
| *@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. | |||||
| *@li data_format: A string, format of input data . \n | |||||
| *@par Outputs: | |||||
| *y: The average pooled output tensor . \n | |||||
| *@attention Constraints: | |||||
| *@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] | |||||
| *@par Third-party framework compatibility | |||||
| * Compatible with the TensorFlow operator AvgPool3D. | |||||
| */ | |||||
| REG_OP(AvgPool3DD) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
| .OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
| .OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
| .REQUIRED_ATTR(ksize, ListInt) | |||||
| .REQUIRED_ATTR(strides, ListInt) | |||||
| .REQUIRED_ATTR(pads, ListInt) | |||||
| .ATTR(ceil_mode, Bool, false) | |||||
| .ATTR(count_include_pad, Bool, true) | |||||
| .ATTR(divisor_override, Int, 0) | |||||
| .ATTR(data_format, String, "NDHWC") | |||||
| .OP_END_FACTORY_REG(AvgPool3DD) | |||||
| /** | |||||
| * @brief Computes AvgPool3DGrad function. | |||||
| * @par Inputs: | |||||
| * @li orig_input_shape: An NDHWC tensor of type float16, float32, or double. | |||||
| * @li grads: An NDHWC tensor of type int32. | |||||
| * @par Attributes: | |||||
| * @li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor. | |||||
| * @li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor. | |||||
| * @li pads: List of ints, implicit zero paddings on both sides of the input. | |||||
| * @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. | |||||
| * @li count_include_pad: When true, will include the zero-padding in the averaging calculation. | |||||
| * @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. | |||||
| * @li data_format: A string, format of input data . | |||||
| * @par Outputs: | |||||
| * @output: A mutable tensor with the same shape and type as "orig_input". | |||||
| * @par Third-party framework compatibility | |||||
| * @li Compatible with the TensorFlow operator AvgPoolGrad. | |||||
| */ | |||||
| REG_OP(AvgPool3DGrad) | |||||
| .INPUT(orig_input_shape, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
| .INPUT(grads, TensorType({DT_INT32})) | |||||
| .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
| .REQUIRED_ATTR(ksize, ListInt) | |||||
| .REQUIRED_ATTR(strides, ListInt) | |||||
| .REQUIRED_ATTR(pads, ListInt) | |||||
| .ATTR(ceil_mode, Bool, false) | |||||
| .ATTR(count_include_pad, Bool, true) | |||||
| .ATTR(divisor_override, Int, 0) | |||||
| .ATTR(data_format, String, "NDHWC") | |||||
| .OP_END_FACTORY_REG(AvgPool3DGrad) | |||||
| /** | |||||
| * @brief Performs average pooling on the input. | |||||
| * @par Inputs: | |||||
| * @li grads: An NDHWC tensor of type float16. | |||||
| * @li filter: An optional tensor of type float16, fractal_z_3d layout. | |||||
| * @li multiplier: An optional tensor of float16. | |||||
| * @par Attributes: | |||||
| * @li orig_input_shape: List of ints that has length 5. The size of the window for each dimension of the input tensor. | |||||
| * @li ksize: List of ints that has length 3. The size of the window for each dimension of the input tensor. | |||||
| * @li strides:List of ints that has length 3. The stride of the sliding window for each dimension of the input tensor. | |||||
| * @li pads: List of ints, implicit zero paddings on both sides of the input. | |||||
| * @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. | |||||
| * @li count_include_pad: When true, will include the zero-padding in the averaging calculation. | |||||
| * @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. | |||||
| * @li data_format: A string, format of input data . \n | |||||
| * @par Outputs: | |||||
| * @output: The average pooled output tensor . \n | |||||
| * @attention Constraints: | |||||
| * @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the TensorFlow operator AvgPool3DGradD. | |||||
| */ | |||||
| REG_OP(AvgPool3DGradD) | |||||
| .INPUT(grads, TensorType({DT_FLOAT16})) | |||||
| .OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16})) | |||||
| .OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16})) | |||||
| .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
| .REQUIRED_ATTR(orig_input_shape, ListInt) | |||||
| .REQUIRED_ATTR(ksize, ListInt) | |||||
| .REQUIRED_ATTR(strides, ListInt) | |||||
| .REQUIRED_ATTR(pads, ListInt) | |||||
| .ATTR(ceil_mode, Bool, false) | |||||
| .ATTR(count_include_pad, Bool, true) | |||||
| .ATTR(divisor_override, Int, 0) | |||||
| .ATTR(data_format, String, "NDHWC") | |||||
| .OP_END_FACTORY_REG(AvgPool3DGradD) | |||||
| /** | /** | ||||
| *@brief Performs max_pool_ext2 on the input . \n | *@brief Performs max_pool_ext2 on the input . \n | ||||
| @@ -308,6 +427,31 @@ REG_OP(MaxPool3D) | |||||
| .ATTR(data_format, String, "NDHWC") | .ATTR(data_format, String, "NDHWC") | ||||
| .OP_END_FACTORY_REG(MaxPool3D) | .OP_END_FACTORY_REG(MaxPool3D) | ||||
| /** | |||||
| *@brief Applies a 2D adaptive max pooling over an input signal conposed of several input planes. \n | |||||
| * The output is of size H x W, for any input size. | |||||
| * @par Inputs: | |||||
| * One input, including: | |||||
| * @li x: A Tensor. Must be one of the following data types: | |||||
| * float16, float32, float64. \n | |||||
| * @par Attributes: | |||||
| * @li output_size: A required list of 2 ints | |||||
| * specifying the size (H,W) of the output tensor. \n | |||||
| * @par Outputs: | |||||
| * @li y: A Tensor. Has the same data type as "x" \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator AdaptiveMaxPool2d. | |||||
| */ | |||||
| REG_OP(AdaptiveMaxPool2d) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
| .OUTPUT(argmax, TensorType::IndexNumberType()) | |||||
| .REQUIRED_ATTR(output_size, ListInt) | |||||
| .OP_END_FACTORY_REG(AdaptiveMaxPool2d) | |||||
| /** | /** | ||||
| * @brief Computes second-order gradients of the maxpooling3d function . \n | * @brief Computes second-order gradients of the maxpooling3d function . \n | ||||
| @@ -477,8 +621,9 @@ REG_OP(MaxPoolV2) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| * One input: | * One input: | ||||
| *x: An NC1HWC0 Tensor. Supported type: float, double, int32, | |||||
| * uint8, int16, int8, int64, uint16, half, uint32, uint64 . \n | |||||
| *x: An 4D Tensor. Supported type: float, double, int32, | |||||
| * uint8, int16, int8, int64, uint16, half, uint32, uint64. | |||||
| * Must set the format, supported format list ["NCHW, NHWC"]. \n | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li ksize: A required list of int8, int16, int32, or int64 values, | *@li ksize: A required list of int8, int16, int32, or int64 values, | ||||
| @@ -517,10 +662,12 @@ REG_OP(MaxPoolWithArgmax) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| * Three inputs, including: | * Three inputs, including: | ||||
| *@li x: An NC1HWC0 tensor. Supported type: float, double, int32, | |||||
| *@li x: An 4d tensor. Supported type: float, double, int32, | |||||
| * uint8, int16, int8, int64, uint16, half, uint32, uint64. | * uint8, int16, int8, int64, uint16, half, uint32, uint64. | ||||
| *@li grad: An NC1HWC0 tensor. Supported type: float, double, int32, | |||||
| * Must set the format, supported format list ["NCHW, NHWC"] | |||||
| *@li grad: An 4d tensor. Supported type: float, double, int32, | |||||
| * uint8, int16, int8, int64, uint16, half, uint32, uint64. | * uint8, int16, int8, int64, uint16, half, uint32, uint64. | ||||
| * Must set the format, supported format list ["NCHW, NHWC"] | |||||
| *@li argmx: An NC1HWC0 tensor of type int32 or int64 . \n | *@li argmx: An NC1HWC0 tensor of type int32 or int64 . \n | ||||
| *@par Attributes: | *@par Attributes: | ||||
| @@ -1107,7 +1254,7 @@ REG_OP(AvgPool1DD) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| * One input: | * One input: | ||||
| *x: An NC1HWC0 Tensor of type float16. | |||||
| *x: An 4d Tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"]. | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | ||||
| * each dimension of the input tensor. No default value. | * each dimension of the input tensor. No default value. | ||||
| @@ -1148,9 +1295,9 @@ REG_OP(MaxPoolWithArgmaxV2) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| * Three inputs, including: | * Three inputs, including: | ||||
| *@li x: An NC1HWC0 tensor of type float16. | |||||
| *@li grad: An NC1HWC0 tensor of type float16. | |||||
| *@li argmx: An NC1HWC0 tensor of type uint16 or int64 . \n | |||||
| *@li x: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"] | |||||
| *@li grad: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"] | |||||
| *@li argmx: An 4d tensor of type uint16 or int64. Must set the format, supported format list ["NCHW, NHWC"] \n | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | ||||
| @@ -1291,5 +1438,171 @@ REG_OP(MaxPoolV3Grad) | |||||
| .ATTR(global_pooling, Bool, false) | .ATTR(global_pooling, Bool, false) | ||||
| .ATTR(ceil_mode, Bool, false) | .ATTR(ceil_mode, Bool, false) | ||||
| .OP_END_FACTORY_REG(MaxPoolV3Grad) | .OP_END_FACTORY_REG(MaxPoolV3Grad) | ||||
| /** | |||||
| *@brief Performs dilation2d on the input . \n | |||||
| *@par Inputs: | |||||
| *x: A tensor of shape is 4d, format is support NHWC. | |||||
| *filter: A tensor of shape is 3d, the type is same with x, | |||||
| and the c dimension is same with x. \n | |||||
| *@par Attributes: | |||||
| *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. | |||||
| *@li rates: A required list of 4 ints. The rates of the N and C dimensions are 1. | |||||
| *@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID. | |||||
| *@li pads: An optional list of 4 ints. | |||||
| *@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". | |||||
| *@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n | |||||
| *@par Outputs: | |||||
| *y: The output tensor. Has the same type and format as input "x" . \n | |||||
| *@par Third-party framework compatibility | |||||
| * Compatible with the TensorFlow operator Dilation2D. | |||||
| */ | |||||
| REG_OP(Dilation2D) | |||||
| .INPUT(x,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) | |||||
| .INPUT(filter,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) | |||||
| .OUTPUT(y,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) | |||||
| .REQUIRED_ATTR(strides, ListInt) | |||||
| .REQUIRED_ATTR(rates, ListInt) | |||||
| .ATTR(padding_mode, String, "SAME") | |||||
| .ATTR(pads, ListInt, {0,0,0,0}) | |||||
| .ATTR(ceil_mode, Bool, false) | |||||
| .ATTR(data_format, String, "NHWC") | |||||
| .OP_END_FACTORY_REG(Dilation2D) | |||||
| /** | |||||
| * @brief Applies a 2D adaptive average pooling over | |||||
| * an input signal composed of several input planes. \n | |||||
| * @par Inputs: | |||||
| * One input, including: | |||||
| * @li x: A Tensor. Must be one of the following data types: | |||||
| * float16, float32. \n | |||||
| * @par Attributes: | |||||
| * @li output_size: A required list of 2 ints | |||||
| * specifying the size (H,W) of the output tensor. \n | |||||
| * @par Outputs: | |||||
| * @li y: A Tensor. Has the same data type as "x" \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator AdaptiveAvgPool2d. | |||||
| */ | |||||
| REG_OP(AdaptiveAvgPool2d) | |||||
| .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .REQUIRED_ATTR(output_size, ListInt) | |||||
| .OP_END_FACTORY_REG(AdaptiveAvgPool2d) | |||||
| /** | |||||
| * @brief Compute gradients of adaptive averagev2 pooling function. | |||||
| * @par Inputs: | |||||
| * @li input_grad: A NCHW Tensor. Must be one of the following data types: | |||||
| * float16, float32. | |||||
| * @par Attributes: | |||||
| * @li orig_input_shape: A required tuple or list of type int32. | |||||
| * @par Outputs: | |||||
| * @li output_grad: A tensor with the same shape and type as "orig_input_shape". | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator AdaptiveAvgPool2dGrad. | |||||
| */ | |||||
| REG_OP(AdaptiveAvgPool2dGrad) | |||||
| .INPUT(input_grad, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .OUTPUT(output_grad, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .REQUIRED_ATTR(orig_input_shape, ListInt) | |||||
| .OP_END_FACTORY_REG(AdaptiveAvgPool2dGrad) | |||||
| /** | |||||
| * @brief Performs the backpropagation of MaxPoolWithGradArgmaxV1. | |||||
| * @par Inputs: | |||||
| * Three inputs, including: | |||||
| * @li x: An NC1HWC0 tensor of type float16. | |||||
| * @li grad: An NC1HWC0 tensor of type float16. | |||||
| * @li argmax: An NC1HWC0 tensor of type uint16 or int64. \n | |||||
| * @par Attributes: | |||||
| * @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | |||||
| * each dimension of the input tensor. No default value. | |||||
| * @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for | |||||
| * each dimension of the input tensor. No default value. | |||||
| * @li pads: A required listint. \n | |||||
| * @par Outputs: | |||||
| * y: A Tensor. Has the same type and format as input "x". \n | |||||
| * @attention Constraints: | |||||
| * @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | |||||
| * @li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1 | |||||
| * @li "pads" is listint. | |||||
| * @li "ceil_mode" defaults to False. | |||||
| * @li "data_format" defaults to "NC1HWC0". \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV1. | |||||
| */ | |||||
| REG_OP(MaxPoolGradWithArgmaxV1) | |||||
| .INPUT(x, TensorType({DT_FLOAT16})) | |||||
| .INPUT(grad, TensorType({DT_FLOAT16})) | |||||
| .INPUT(argmax, TensorType({DT_UINT16})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16})) | |||||
| .REQUIRED_ATTR(ksize, ListInt) | |||||
| .REQUIRED_ATTR(strides, ListInt) | |||||
| .REQUIRED_ATTR(pads, ListInt) | |||||
| .ATTR(dtype, Int, 3) | |||||
| .ATTR(dilation, ListInt, {1, 1, 1, 1}) | |||||
| .ATTR(ceil_mode, Bool, false) | |||||
| .OP_END_FACTORY_REG(MaxPoolGradWithArgmaxV1) | |||||
| /** | |||||
| * @brief Performs max pooling on the input and outputs both max values and indices. | |||||
| * @par Inputs: | |||||
| * One input: | |||||
| * x: An NC1HWC0 Tensor of type float16. \n | |||||
| * @par Attributes: | |||||
| * @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | |||||
| * each dimension of the input tensor. No default value. | |||||
| * @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for | |||||
| * each dimension of the input tensor. No default value. | |||||
| * @li pads: A required string. No default value. \n | |||||
| * @par Outputs: | |||||
| * y: A Tensor. Has the same type and format as input "x". | |||||
| * argmax: A Tensor. type:uint16, format:NC1HWC0. \n | |||||
| * @attention Constraints: | |||||
| * @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | |||||
| * @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, | |||||
| * strides[2] <= 63, strides[2] >= 1. | |||||
| * @li "pads" is listint. | |||||
| * @li "ceil_mode" defaults to False. | |||||
| * @li "data_format" defaults to "NC1HWC0". \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the TensorFlow operator MaxPoolWithArgmaxV1. | |||||
| */ | |||||
| REG_OP(MaxPoolWithArgmaxV1) | |||||
| .INPUT(x, TensorType({DT_FLOAT16})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16})) | |||||
| .OUTPUT(argmax, TensorType({DT_UINT16})) | |||||
| .REQUIRED_ATTR(ksize, ListInt) | |||||
| .REQUIRED_ATTR(strides, ListInt) | |||||
| .REQUIRED_ATTR(pads, ListInt) | |||||
| .ATTR(dtype, Int, 3) | |||||
| .ATTR(dilation, ListInt, {1, 1, 1, 1}) | |||||
| .ATTR(ceil_mode, Bool, false) | |||||
| .OP_END_FACTORY_REG(MaxPoolWithArgmaxV1) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -640,6 +640,208 @@ REG_OP(Mish) | |||||
| .OUTPUT(y, TensorType({ DT_FLOAT,DT_FLOAT16 })) | .OUTPUT(y, TensorType({ DT_FLOAT,DT_FLOAT16 })) | ||||
| .OP_END_FACTORY_REG(Mish) | .OP_END_FACTORY_REG(Mish) | ||||
| /** | |||||
| * @brief pytorch hardtanh_backward operator. | |||||
| * | |||||
| * @par Inputs: | |||||
| * 2 inputs, including: | |||||
| * @li result, minimum tensor of the linear region range, | |||||
| * datatype: float16/float32, format:ND/5HD. | |||||
| * @li grad, maximum tensor of the linear region range, | |||||
| * datatype:float16/float32, format:ND/5HD. \n | |||||
| * @par Attributes: | |||||
| * 2 attributes, including: | |||||
| * @li min_val, minimum value of the linear region range, datatype:float. | |||||
| * @li max_val, maximum value of the linear region range, datatype:float. \n | |||||
| * @par Outputs: | |||||
| * 1 output, including: | |||||
| * @li y, hardtanh_backward output tensor, datatype and format is same as | |||||
| * input result. \n | |||||
| * @attention Constraints: | |||||
| * This operator only supports dataType: float16/float32, format: ND/5HD. \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator HardtanhGrad. | |||||
| */ | |||||
| REG_OP(HardtanhGrad) | |||||
| .INPUT(result, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "First operand." */ | |||||
| .INPUT(grad, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "Second operand." */ | |||||
| .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "Result, has same element type as two inputs" */ | |||||
| .ATTR(min_val, Float, -1.0) | |||||
| .ATTR(max_val, Float, 1.0) | |||||
| .OP_END_FACTORY_REG(HardtanhGrad) | |||||
| /** | |||||
| * @brief Calculates the softplus loss function with attributes of beta and threshold. \n | |||||
| * @par Inputs: | |||||
| * One inputs, including: | |||||
| * @li x: A mutable Tensor. Must be one of the following types: | |||||
| * float16, float32. \n | |||||
| * @par Attributes: | |||||
| * @li beta: An optional float. Defaults to "1.0" \n | |||||
| * @li threshold: An optional float. Defaults to "20.0" \n | |||||
| * @par Outputs: | |||||
| * @li y: A mutable Tensor. Has the same type as "x" \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator Softplus. | |||||
| */ | |||||
| REG_OP(SoftplusV2) | |||||
| .INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
| .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
| .ATTR(beta, Float, 1.0) | |||||
| .ATTR(threshold, Float, 20.0) | |||||
| .OP_END_FACTORY_REG(SoftplusV2) | |||||
| /** | |||||
| * @brief Calculates the reversed outputs of the function "softplus_v2". \n | |||||
| * @par Inputs: | |||||
| * Two inputs, including: | |||||
| * @li input_gradients: A mutable Tensor. Must be one of the following types: | |||||
| * float16, float32. | |||||
| * @li input_features: A mutable Tensor of the same type as "input_gradients" \n | |||||
| * @par Attributes: | |||||
| * @li beta: An optional float. Defaults to "1.0" \n | |||||
| * @li threshold: An optional float. Defaults to "20.0" \n | |||||
| * @par Outputs: | |||||
| * @li output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator SoftplusGrad. | |||||
| */ | |||||
| REG_OP(SoftplusV2Grad) | |||||
| .INPUT(input_gradients, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
| .INPUT(input_features, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
| .OUTPUT(output_backprops, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
| .ATTR(beta, Float, 1.0) | |||||
| .ATTR(threshold, Float, 20.0) | |||||
| .OP_END_FACTORY_REG(SoftplusV2Grad) | |||||
| /** | |||||
| * @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor) | |||||
| * where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise. | |||||
| * | |||||
| * @par inputs | |||||
| * one input including: | |||||
| * @li x: input A Tensor. Must be one of the following types: float32, float16 | |||||
| * | |||||
| * @par output | |||||
| * one output including: | |||||
| * @li y:A Tensor of the same type as x | |||||
| * | |||||
| */ | |||||
| REG_OP(ThresholdedRelu) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(alpha, Float, 1.0) | |||||
| .OP_END_FACTORY_REG(ThresholdedRelu) | |||||
| /** | |||||
| * @brief Calculate the hard shrinkage function. \n | |||||
| * @par Inputs: | |||||
| * One inputs, including: | |||||
| * @li input_x: A tensor. Must be one of the following types: | |||||
| * float16, float32. \n | |||||
| * @par Attributes: | |||||
| * @li lambd: An optional float. Defaults to 0.5. \n | |||||
| * @par Outputs: | |||||
| * y: A Tensor with the same dtype and shape of input_x's. \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator Hardshrink. \n | |||||
| */ | |||||
| REG_OP(HardShrink) | |||||
| .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(lambd, Float, 0.5) | |||||
| .OP_END_FACTORY_REG(HardShrink) | |||||
| /** | |||||
| * @brief Calculate the hard sigmoid function. \n | |||||
| * @par Inputs: | |||||
| * One inputs, including: | |||||
| * @li input_x: A tensor. Must be one of the following types: | |||||
| * float16, float32, int32. \n | |||||
| * @par Attributes: | |||||
| * @li alpha: An optional float. Defaults to 0.16666666. \n | |||||
| * @li beta: An optional float. Defaults to 0.5. \n | |||||
| * @par Outputs: | |||||
| * y: A Tensor with the same dtype and shape of input_x's. \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator Hardsigmoid. \n | |||||
| */ | |||||
| REG_OP(HardSigmoid) | |||||
| .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||||
| .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .ATTR(alpha, Float, 0.16666666) | |||||
| .ATTR(beta, Float, 0.5) | |||||
| .OP_END_FACTORY_REG(HardSigmoid) | |||||
| /** | |||||
| * @brief Calculate the soft shrinkage function. \n | |||||
| * @par Inputs: | |||||
| * One inputs, including: | |||||
| * @li input_x: A tensor. Must be one of the following types: | |||||
| * float16, float32. \n | |||||
| * @par Attributes: | |||||
| * @li lambd: An optional float. Defaults to 0.5. \n | |||||
| * @par Outputs: | |||||
| * y: A Tensor with the same dtype and shape of input_x's. \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator Softshrink. \n | |||||
| */ | |||||
| REG_OP(SoftShrink) | |||||
| .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(lambd, Float, 0.5) | |||||
| .OP_END_FACTORY_REG(SoftShrink) | |||||
| /** | |||||
| * @brief Calculate the reversed outputs of the function "soft_shrink". \n | |||||
| * @par Inputs: | |||||
| * Two inputs, including: | |||||
| * @li input_grad: A tensor. Must be one of the following types: | |||||
| * float16, float32. \n | |||||
| * @li input_x: A tensor of the same dtype as "input_grad". \n | |||||
| * @par Attributes: | |||||
| * @li lambd: An optional float. Defaults to 0.5. \n | |||||
| * @par Outputs: | |||||
| * y: A Tensor of the same dtype and shape as "input_graxd". \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator SoftShrinkGrad. \n | |||||
| */ | |||||
| REG_OP(SoftShrinkGrad) | |||||
| .INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(lambd, Float, 0.5) | |||||
| .OP_END_FACTORY_REG(SoftShrinkGrad) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -161,7 +161,7 @@ REG_OP(Pad) | |||||
| *@brief Pads a tensor . \n | *@brief Pads a tensor . \n | ||||
| *@par Inputs: | *@par Inputs: | ||||
| *x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n | |||||
| *x: A Tensor. Must be one of the following types: float16, float32, int32 . \n | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *paddings: An optional "vector<vector<int>>". Defaults to "{}". | *paddings: An optional "vector<vector<int>>". Defaults to "{}". | ||||
| @@ -180,8 +180,8 @@ REG_OP(Pad) | |||||
| * Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead. | * Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead. | ||||
| */ | */ | ||||
| REG_OP(PadD) | REG_OP(PadD) | ||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .REQUIRED_ATTR(paddings, ListListInt) | .REQUIRED_ATTR(paddings, ListListInt) | ||||
| .OP_END_FACTORY_REG(PadD) | .OP_END_FACTORY_REG(PadD) | ||||
| @@ -213,7 +213,7 @@ REG_OP(PadV2) | |||||
| *@brief Pads a tensor . \n | *@brief Pads a tensor . \n | ||||
| *@par Inputs: | *@par Inputs: | ||||
| *x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n | |||||
| *x: A Tensor. Must be one of the following types: float16, float32, int32 . \n | |||||
| *constant_values: A Tensor. Must have the same type as input. | *constant_values: A Tensor. Must have the same type as input. | ||||
| *@par Attributes: | *@par Attributes: | ||||
| @@ -227,10 +227,7 @@ REG_OP(PadV2) | |||||
| *y: A Tensor of the same type as "x" . \n | *y: A Tensor of the same type as "x" . \n | ||||
| *@par Third-party framework compatibility: | *@par Third-party framework compatibility: | ||||
| * Compatible with TensorFlow operator Pad. | |||||
| * | |||||
| * @par Restrictions: | |||||
| * Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead. | |||||
| * Compatible with TensorFlow operator PadV2. | |||||
| */ | */ | ||||
| REG_OP(PadV2D) | REG_OP(PadV2D) | ||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | ||||
| @@ -403,5 +400,46 @@ REG_OP(EmbeddingRankId) | |||||
| .ATTR(mode, String, "mod") | .ATTR(mode, String, "mod") | ||||
| .OP_END_FACTORY_REG(EmbeddingRankId) | .OP_END_FACTORY_REG(EmbeddingRankId) | ||||
| /** | |||||
| * @brief Fill the value to a tensor has the specified shape. | |||||
| * @par Inputs: | |||||
| * One inputs, including: | |||||
| * @li dims: An Tensor, specify the shape that the value to fill. | |||||
| * @par Attributes: | |||||
| * @li value: An optional float value. Defaults to 0.0. | |||||
| * @par Outputs: | |||||
| * @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value. | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the ONNX operator ConstantOfShape. | |||||
| */ | |||||
| REG_OP(FillV2) | |||||
| .INPUT(dims, TensorType({DT_INT16, DT_INT32, DT_INT64})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) | |||||
| .ATTR(value, Float, 0) | |||||
| .OP_END_FACTORY_REG(FillV2) | |||||
| /** | |||||
| * @brief Fill the value to a tensor has the specified shape. | |||||
| * @par Attributes: | |||||
| * @li value: An optional float value. Defaults to 0.0. | |||||
| * @li dims: An required listInt to specify the shape that the value to fill. | |||||
| * @par Outputs: | |||||
| * @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value. | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the ONNX operator ConstantOfShape. | |||||
| */ | |||||
| REG_OP(FillV2D) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64})) | |||||
| .ATTR(value, Float, 0) | |||||
| .REQUIRED_ATTR(dims, ListInt) | |||||
| .OP_END_FACTORY_REG(FillV2D) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_ | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -495,6 +495,60 @@ REG_OP(ShuffleChannel) | |||||
| DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64})) | DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64})) | ||||
| .ATTR(group, Int, 1) | .ATTR(group, Int, 1) | ||||
| .OP_END_FACTORY_REG(ShuffleChannel) | .OP_END_FACTORY_REG(ShuffleChannel) | ||||
| /** | |||||
| * @briefGenerate a tensor of samples from a multinomial | |||||
| * distribution according to the probabilities of each of | |||||
| * the possible outcomes. | |||||
| * | |||||
| * @par inputs | |||||
| * one input including: | |||||
| * @li x:Input tensor with shape [batch_size, class_size], | |||||
| * where class_size is the number of all possible outcomes. | |||||
| * Each value along the axis zero represents the unnormalized | |||||
| * log-probability of each corresponding outcome in a batch. | |||||
| * | |||||
| * @par output | |||||
| * one output including: | |||||
| * @li y:Output tensor with shape [batch_size, sample_size], | |||||
| * where sample_size is the number of times to sample. | |||||
| * Each value along the axis zero represents the outcome of | |||||
| * the corresponding sample in a batch. | |||||
| * | |||||
| */ | |||||
| REG_OP(MultinomialFuss) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64})) | |||||
| .OUTPUT(y, TensorType({DT_INT32, DT_INT64})) | |||||
| .ATTR(dtype, Int, 6) | |||||
| .ATTR(sample_size, Int, 1) | |||||
| .ATTR(seed, Float, 0) | |||||
| .OP_END_FACTORY_REG(MultinomialFuss) | |||||
| /** | |||||
| * @brief During training, randomly zeroes some of the elements of the input tensor | |||||
| * with probability | |||||
| * | |||||
| * @par Inputs: | |||||
| * @li x: A ND Tensor. Must be one of the following data types: Float, Float16 | |||||
| * @li seed: A ND Tensor. Must be one of the following data types: Float | |||||
| * | |||||
| * @par Attributes: | |||||
| * @li p: probability of an element to be zeroed | |||||
| * | |||||
| * @par Outputs: | |||||
| * @li y: A tensor with the same shape and type as "x". | |||||
| * @li mask: A tensor with the same shape and type as "x". | |||||
| * @li new_seed: A tensor with the same shape and type as "seed". | |||||
| */ | |||||
| REG_OP(DropoutV2) | |||||
| .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT })) | |||||
| .INPUT(seed, TensorType({ DT_FLOAT })) | |||||
| .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT })) | |||||
| .OUTPUT(mask, TensorType({ DT_FLOAT })) | |||||
| .OUTPUT(seed, TensorType({ DT_FLOAT })) | |||||
| .REQUIRED_ATTR(p, Float) | |||||
| .OP_END_FACTORY_REG(DropoutV2) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -635,8 +635,8 @@ REG_OP(ReduceMin) | |||||
| * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead. | * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead. | ||||
| */ | */ | ||||
| REG_OP(ReduceMinD) | REG_OP(ReduceMinD) | ||||
| .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) | |||||
| .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32})) | |||||
| .REQUIRED_ATTR(axes, ListInt) | .REQUIRED_ATTR(axes, ListInt) | ||||
| .ATTR(keep_dims, Bool, false) | .ATTR(keep_dims, Bool, false) | ||||
| .OP_END_FACTORY_REG(ReduceMinD) | .OP_END_FACTORY_REG(ReduceMinD) | ||||
| @@ -821,7 +821,7 @@ Defaults to "0.00001" . \n | |||||
| *batch_ variance: A Tensor of type float32 for the result variance . \n | *batch_ variance: A Tensor of type float32 for the result variance . \n | ||||
| *@attention Constraints: | *@attention Constraints: | ||||
| *For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. | |||||
| *For Ascend 310, the result accuracy fails to reach 0.001 due to the square root instruction. | |||||
| */ | */ | ||||
| REG_OP(INInferV2) | REG_OP(INInferV2) | ||||
| .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -882,7 +882,7 @@ REG_OP(INTrainingReduceV2) | |||||
| *@attention Constraints: | *@attention Constraints: | ||||
| *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. | *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. | ||||
| * This operator is used in conjunction with INTrainingReduceV2. | * This operator is used in conjunction with INTrainingReduceV2. | ||||
| *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. | |||||
| *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. | |||||
| */ | */ | ||||
| REG_OP(INTrainingUpdateV2) | REG_OP(INTrainingUpdateV2) | ||||
| .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -965,7 +965,7 @@ for the updated variance. | |||||
| *@attention Constraints: | *@attention Constraints: | ||||
| *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. | *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. | ||||
| * This operator is used in conjunction with GNTrainingUpdate. | * This operator is used in conjunction with GNTrainingUpdate. | ||||
| *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. | |||||
| *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. | |||||
| */ | */ | ||||
| REG_OP(GNTrainingUpdate) | REG_OP(GNTrainingUpdate) | ||||
| .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -982,6 +982,41 @@ REG_OP(GNTrainingUpdate) | |||||
| .OUTPUT(batch_variance, TensorType({DT_FLOAT})) | .OUTPUT(batch_variance, TensorType({DT_FLOAT})) | ||||
| .OP_END_FACTORY_REG(GNTrainingUpdate) | .OP_END_FACTORY_REG(GNTrainingUpdate) | ||||
| /** | |||||
| * @brief Calculates the standard deviation and average value of Tensors. | |||||
| * @par Inputs: | |||||
| * @li x: A Tensor. Must be one of the following types: | |||||
| * float16, float32. \n | |||||
| * @par Attributes: | |||||
| * Three Attributes, including: | |||||
| * @li dim: An optional listint, Defaults to "None". \n | |||||
| * @li unbiased: An optional bool. Defaults to "True". | |||||
| * If "True", Use Bessel Correction. | |||||
| * If "False", Do not use Bessel Correction. \n | |||||
| * @li keepdim: An optional bool. Defaults to "False". | |||||
| * If "True", Keep the original tensor dimension. | |||||
| * If "False", Do not keep the original tensor dimension. \n | |||||
| * @par Outputs: | |||||
| * Two Outputs, including: | |||||
| * @li y1: A Tensor. Has the same type as "x". | |||||
| * @li y2: A Tensor. Has the same type as "x". \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator ReduceStd. | |||||
| */ | |||||
| REG_OP(ReduceStd) | |||||
| .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .OUTPUT(y1, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .OUTPUT(y2, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .ATTR(dim, ListInt, {}) | |||||
| .ATTR(unbiased, Bool, true) | |||||
| .ATTR(keepdim, Bool, false) | |||||
| .OP_END_FACTORY_REG(ReduceStd) | |||||
| } //namespace ge | } //namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -187,16 +187,16 @@ REG_OP(DynamicRNNGrad) | |||||
| *@brief: DynamicRNN calculation. | *@brief: DynamicRNN calculation. | ||||
| *@par Inputs: | *@par Inputs: | ||||
| *ten inputs: | *ten inputs: | ||||
| *@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
| *@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND. | |||||
| *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n | |||||
| *@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
| *@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND. | |||||
| *@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. | *@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. | ||||
| @@ -221,6 +221,8 @@ REG_OP(DynamicRNNGrad) | |||||
| *@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@par Third-party framework compatibility: | |||||
| * Compatible with the TF operator LSTM. | |||||
| */ | */ | ||||
| REG_OP(DynamicRNN) | REG_OP(DynamicRNN) | ||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| @@ -254,6 +256,63 @@ REG_OP(DynamicRNN) | |||||
| .ATTR(is_training, Bool, true) | .ATTR(is_training, Bool, true) | ||||
| .OP_END_FACTORY_REG(DynamicRNN) | .OP_END_FACTORY_REG(DynamicRNN) | ||||
| /** | |||||
| *@brief: DynamicLSTMV2 calculation. | |||||
| *@par Inputs: | |||||
| *ten inputs: | |||||
| *@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
| *@li cont:A required 2D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
| *@li w_xc_x_static:A optional 2D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
| *@li h0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li c0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li wci:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li wcf:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li wco:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li mask:A optional 1D Tensor. Must be one of the following types: uint8. The format must be ND . | |||||
| *@par Attributes: | |||||
| *@li num_output:An integer identifying the num projection in the op. Default to 0. | |||||
| *@li expose_hidden:An bool identifying the expose_hidden in the op. Default to flase. | |||||
| *@li need_output_last:An bool identifying the time major in the op. Default to true. | |||||
| *@li forget_bias:An float identifying the forget bias in the op. Default to 0. | |||||
| *@par Outputs: | |||||
| *eight outputs: | |||||
| *@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li last_output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li last_output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@par Third-party framework compatibility: | |||||
| * Compatible with the Caffe operator LSTM. | |||||
| *@par Restrictions: | |||||
| * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | |||||
| REG_OP(DynamicLSTMV2) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(cont, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(w_xc_x_static, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(h0, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(c0, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(mask, TensorType({DT_UINT8})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(last_output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(last_output_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(num_output, Int, 0) | |||||
| .ATTR(expose_hidden, Bool, false) | |||||
| .ATTR(need_output_last, Bool, false) | |||||
| .ATTR(forget_bias, Float, 0.0) | |||||
| .OP_END_FACTORY_REG(DynamicLSTMV2) | |||||
| /** | /** | ||||
| *@brief: LSTMInputGrad calculation. | *@brief: LSTMInputGrad calculation. | ||||
| *@par Inputs: | *@par Inputs: | ||||
| @@ -475,9 +534,9 @@ REG_OP(BasicRNNCell) | |||||
| .OP_END_FACTORY_REG(BasicRNNCell) | .OP_END_FACTORY_REG(BasicRNNCell) | ||||
| /** | /** | ||||
| *@brief: DynamicGRU calculation. | |||||
| *@brief DynamicGRU calculation. | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *seven inputs: \n | |||||
| *seven inputs: | |||||
| *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. | *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. | ||||
| *@li w:Must be one of the following types: float16. The format must be FRACTAL_Z. | *@li w:Must be one of the following types: float16. The format must be FRACTAL_Z. | ||||
| *@li b:Must be one of the following types: float16, float32. The format must be ND. | *@li b:Must be one of the following types: float16, float32. The format must be ND. | ||||
| @@ -497,7 +556,7 @@ REG_OP(BasicRNNCell) | |||||
| *@li is_training:An bool identifying is training in the op. Default to true. | *@li is_training:An bool identifying is training in the op. Default to true. | ||||
| *@par Outputs: | *@par Outputs: | ||||
| *five outputs: \n | |||||
| *five outputs: | |||||
| *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| @@ -531,9 +590,9 @@ REG_OP(DynamicGRU) | |||||
| .OP_END_FACTORY_REG(DynamicGRU) | .OP_END_FACTORY_REG(DynamicGRU) | ||||
| /** | /** | ||||
| *@brief: DynamicGRUV2 calculation. | |||||
| *@brief DynamicGRUV2 calculation. | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *seven inputs: \n | |||||
| *seven inputs: | |||||
| *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. | *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. | ||||
| *@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z. | *@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z. | ||||
| *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. | *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. | ||||
| @@ -555,7 +614,7 @@ REG_OP(DynamicGRU) | |||||
| *@li is_training:An bool identifying is training in the op. Default to true. | *@li is_training:An bool identifying is training in the op. Default to true. | ||||
| *@par Outputs: | *@par Outputs: | ||||
| *six outputs: \n | |||||
| *six outputs: | |||||
| *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| @@ -592,6 +651,68 @@ REG_OP(DynamicGRUV2) | |||||
| .ATTR(is_training, Bool, true) | .ATTR(is_training, Bool, true) | ||||
| .OP_END_FACTORY_REG(DynamicGRUV2) | .OP_END_FACTORY_REG(DynamicGRUV2) | ||||
| /** | |||||
| *@brief DynamicGRUV2Hidden calculation. | |||||
| *@par Inputs: | |||||
| *five inputs: | |||||
| *@li x_weight_input:Must be one of the following types: float32. The format must be FRACTAL_NZ. | |||||
| *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
| *@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. | |||||
| *@li seq_length:Must be one of the following types: int32. The format must be ND. | |||||
| *@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@par Attributes: | |||||
| *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". | |||||
| Only UNIDIRECTIONAL is currently supported. | |||||
| *@li cell_depth:An integer identifying the cell depth in the op. Default to 1. | |||||
| *@li keep_prob:An float identifying the keep prob in the op. Default to 1. | |||||
| *@li cell_clip:An float identifying the cell clip in the op. Default to -1. | |||||
| *@li num_proj:An integer identifying the num projection in the op. Default to 0. | |||||
| *@li time_major:An bool identifying the time major in the op. Default to true. | |||||
| *@li activation:An string identifying the type of activation function in the op. Default to "tanh". | |||||
| Only tanh is currently supported. | |||||
| *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. | |||||
| *@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true. | |||||
| *@li is_training:An bool identifying is training in the op. Default to true. | |||||
| *@par Outputs: | |||||
| *six outputs: | |||||
| *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | |||||
| REG_OP(DynamicGRUV2Hidden) | |||||
| .INPUT(x_weight_input, TensorType({DT_FLOAT32})) | |||||
| .INPUT(weight_hidden, TensorType({DT_FLOAT16})) | |||||
| .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) | |||||
| .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(update, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(new, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(direction, String, "UNIDIRECTIONAL") | |||||
| .ATTR(cell_depth, Int, 1) | |||||
| .ATTR(keep_prob, Float, 1.0) | |||||
| .ATTR(cell_clip, Float, -1.0) | |||||
| .ATTR(num_proj, Int, 0) | |||||
| .ATTR(time_major, Bool, true) | |||||
| .ATTR(activation, String, "tanh") | |||||
| .ATTR(gate_order, String, "zrh") | |||||
| .ATTR(reset_after, Bool, true) | |||||
| .ATTR(is_training, Bool, true) | |||||
| .OP_END_FACTORY_REG(DynamicGRUV2Hidden) | |||||
| /** | /** | ||||
| *@brief: DynamicGRUV2Grad calculation. | *@brief: DynamicGRUV2Grad calculation. | ||||
| *@par Inputs: | *@par Inputs: | ||||
| @@ -618,7 +739,6 @@ REG_OP(DynamicGRUV2) | |||||
| *@li cell_clip:An float identifying the cell clip in the op. Default to -1. | *@li cell_clip:An float identifying the cell clip in the op. Default to -1. | ||||
| *@li num_proj:An integer identifying the num projection in the op. Default to 0. | *@li num_proj:An integer identifying the num projection in the op. Default to 0. | ||||
| *@li time_major:An bool identifying the time major in the op. Default to true. | *@li time_major:An bool identifying the time major in the op. Default to true. | ||||
| *@li bias_type:An string identifying the type of bias_type function in the op. Default to "double_bias". | |||||
| *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. | *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. | ||||
| *@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true. | *@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true. | ||||
| @@ -630,6 +750,9 @@ REG_OP(DynamicGRUV2) | |||||
| *@li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(DynamicGRUV2Grad) | REG_OP(DynamicGRUV2Grad) | ||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| @@ -658,7 +781,6 @@ REG_OP(DynamicGRUV2Grad) | |||||
| .ATTR(cell_clip, Float, -1.0) | .ATTR(cell_clip, Float, -1.0) | ||||
| .ATTR(num_proj, Int, 0) | .ATTR(num_proj, Int, 0) | ||||
| .ATTR(time_major, Bool, true) | .ATTR(time_major, Bool, true) | ||||
| .ATTR(bias_type, String, "double_bias") | |||||
| .ATTR(gate_order, String, "zrh") | .ATTR(gate_order, String, "zrh") | ||||
| .ATTR(reset_after, Bool, true) | .ATTR(reset_after, Bool, true) | ||||
| .OP_END_FACTORY_REG(DynamicGRUV2Grad) | .OP_END_FACTORY_REG(DynamicGRUV2Grad) | ||||
| @@ -667,7 +789,7 @@ REG_OP(DynamicGRUV2Grad) | |||||
| *@brief: GRUV2HiddenGrad calculation. | *@brief: GRUV2HiddenGrad calculation. | ||||
| *@par Inputs: | *@par Inputs: | ||||
| *nine inputs: \n | *nine inputs: \n | ||||
| *@li weight_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| @@ -678,6 +800,7 @@ REG_OP(DynamicGRUV2Grad) | |||||
| *@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li t_state:An Int identifying the current t state. Default to [0, 4]. | |||||
| *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. | *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. | ||||
| *@par Outputs: | *@par Outputs: | ||||
| @@ -685,10 +808,12 @@ REG_OP(DynamicGRUV2Grad) | |||||
| *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | */ | ||||
| REG_OP(GRUV2HiddenGrad) | |||||
| .INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| REG_OP(GRUV2HiddenGradCell) | |||||
| .INPUT(dh_pre_t, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| @@ -699,8 +824,142 @@ REG_OP(GRUV2HiddenGrad) | |||||
| .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .ATTR(t_state, Int, 0) | |||||
| .ATTR(gate_order, String, "zrh") | .ATTR(gate_order, String, "zrh") | ||||
| .OP_END_FACTORY_REG(GRUV2HiddenGrad) | |||||
| .OP_END_FACTORY_REG(GRUV2HiddenGradCell) | |||||
| /** | |||||
| * @brief Calculates the reversed outputs of the function "embedding". \n | |||||
| * @par Inputs: | |||||
| * Two inputs, including: | |||||
| * @li grad: A mutable Tensor of word grad. Must be one of the following types: | |||||
| * float32. | |||||
| * @li indices: A mutable word index Tensor of the int32 type.\n | |||||
| * @par Attributes: | |||||
| * @li num_weights: An int attr which use to judge how many words in dict. \n | |||||
| * @li padding_idx: An int attr judge which word to fill zeros. Defaults to "-1". \n | |||||
| * @li scale_grad_by_freq: An optional bool. Defaults to "False". | |||||
| * If "True", "grad_weight" will be scale by word_frequency. | |||||
| * If "False", "grad_weight" will not be scale by word_frequency. \n | |||||
| * @par Outputs: | |||||
| * @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator EmbeddingDenseGrad. | |||||
| */ | |||||
| REG_OP(EmbeddingDenseGrad) | |||||
| .INPUT(grad, TensorType({ DT_FLOAT32 })) /* "First operand." */ | |||||
| .INPUT(indices, TensorType({ DT_INT32 })) /* "Second operand." */ | |||||
| .OUTPUT(y, TensorType({ DT_FLOAT32 })) /* "Result, has same element type as two inputs" */ | |||||
| .REQUIRED_ATTR(num_weights, Int) | |||||
| .ATTR(padding_idx, Int, -1) | |||||
| .ATTR(scale_grad_by_freq, Bool, false) | |||||
| .OP_END_FACTORY_REG(EmbeddingDenseGrad) | |||||
| /** | |||||
| *@brief CommonLSTM calculation. | |||||
| *@par Inputs: | |||||
| *eight inputs: \n | |||||
| *@li x:Each time step is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li w:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li r:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
| *@li b:An optional input. Each direction is a 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
| *@li sequence_lens:An optional input. A 1D Tensor.Must be one of the following types: int32. The format must be ND. | |||||
| *@li initial_h:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li initial_c:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li p:An optional input. Each direction is a 1D Tensor.Must be one of the following types: float16, float32. The format must be ND. | |||||
| *@par Attributes: | |||||
| *@li activation_alpha:Optional scaling values used by some activation functions. Empty is currently supported. | |||||
| *@li activation_beta:Optional scaling values used by some activation functions. Empty is currently supported. | |||||
| *@li activations:The list of activation functions. Empty is currently supported. | |||||
| *@li clip:An float identifying the cell clip in the op. Default to -1. | |||||
| *@li direction:Specify if the RNN is forward, reverse, or bidirectional. Must be one of forward(default), reverse, or bidirectional. | |||||
| *@li hidden_size:Number of neurons in the hidden layer. Reserved. | |||||
| *@li input_forget:Couple the input and forget gates if 1. Reserved. | |||||
| *@par Outputs: | |||||
| *three outputs: \n | |||||
| *@li y:First dimension is time step, second dimension is direction, others is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li y_h:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| *@li y_c:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
| */ | |||||
| REG_OP(CommonLSTM) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32})) | |||||
| .OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(initial_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(p, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(activation_alpha, ListFloat, {}) | |||||
| .ATTR(activation_beta, ListFloat, {}) | |||||
| .ATTR(activations, ListString, {}) | |||||
| .ATTR(clip, Float, -1.0) | |||||
| .ATTR(direction, String, "forward") | |||||
| .REQUIRED_ATTR(hidden_size, Int) | |||||
| .ATTR(input_forget, Int, 0) | |||||
| .OP_END_FACTORY_REG(CommonLSTM) | |||||
| /** | |||||
| * @brief Common GRU calculation. | |||||
| * @par Inputs: | |||||
| * Eight inputs, including: | |||||
| * @li x: The input sequences packed (and pontentially padded) into on 3D Tesnor(float16). The format must be FRACTAL_NZ | |||||
| * @li w: The weight tensor for the gates is 3D Tensor(float16). The format must be FRACTAL_Z | |||||
| * @li r: The recurrence weight tesnor is 3D Tensor(float16). The format must be FRACTAL_Z | |||||
| * @li b: The bias tensor for the gates. The format must be ND | |||||
| * @li sequence_lens: Optional tensor specifying lengths of sequences(int32). The format must be ND | |||||
| * @li init_h: Optional initial value of the hidden(float16,float32). The format must be FRACTAL_NZ | |||||
| * @par Attributes: | |||||
| * @li activation_alpha: Optional scaling values used by some activation functions. \n | |||||
| * @li activation_beta: Optional scaling values used by some activation functions. \n | |||||
| * @li activations: A list of 2 (or 4 if bidirectional) activation functions for update, reset, and hidden gates. \n | |||||
| * @li clip: Cell clip threshold. \n | |||||
| * @li direction: Specify if the RNN is forward, reverse, or bidirectional. \n | |||||
| * @li hidden_size: Number of neurons in the hidden layer. \n | |||||
| * @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n | |||||
| * @par Outputs: | |||||
| * @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). The format must be FRACTAL_NZ | |||||
| * @li y_h: The last output value of the hidden(float16,float32). The format must be FRACTAL_NZ | |||||
| */ | |||||
| REG_OP(CommonGRU) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32})) | |||||
| .OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(activation_alpha, ListFloat, {}) | |||||
| .ATTR(activation_beta , ListFloat, {}) | |||||
| .ATTR(activations , ListString, {}) | |||||
| .ATTR(clip, Float, -1.0) | |||||
| .ATTR(direction, String, "forward") | |||||
| .REQUIRED_ATTR(hidden_size, Int) | |||||
| .ATTR(linear_before_reset , Int, 0) | |||||
| .OP_END_FACTORY_REG(CommonGRU) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -796,6 +796,34 @@ REG_OP(SliceD) | |||||
| .REQUIRED_ATTR(size, ListInt) | .REQUIRED_ATTR(size, ListInt) | ||||
| .OP_END_FACTORY_REG(SliceD) | .OP_END_FACTORY_REG(SliceD) | ||||
| /** | |||||
| *@brief Extracts a slice from a tensor. | |||||
| * This operation extracts a slice of size "size" from a tensor "x" | |||||
| * starting at the location specified by "begin" . \n | |||||
| *@par Inputs: | |||||
| *@li x: A Tensor. Must be one of the following types: | |||||
| * float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, | |||||
| * int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32 . \n | |||||
| *@par Inputs: | |||||
| *@li offsets: The starting location for the slice. | |||||
| *@par Attributes: | |||||
| *@li size: The tensor shape . \n | |||||
| *@par Outputs: | |||||
| *y: A Tensor. Has the same type as "x". The slice extracted from the tensor. | |||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS DEPRECATED. Please use Slice instead. | |||||
| */ | |||||
| REG_OP(SliceDV2) | |||||
| .INPUT(x, TensorType::BasicType()) | |||||
| .INPUT(offsets, TensorType::IndexNumberType()) | |||||
| .OUTPUT(y, TensorType::BasicType()) | |||||
| .REQUIRED_ATTR(size, ListInt) | |||||
| .OP_END_FACTORY_REG(SliceDV2) | |||||
| /** | /** | ||||
| * @brief Finds values and indices of the "k" largest elements for the last | * @brief Finds values and indices of the "k" largest elements for the last | ||||
| * dimension . \n | * dimension . \n | ||||
| @@ -1921,6 +1949,160 @@ REG_OP(CumulativeLogsumexpD) | |||||
| .ATTR(exclusive, Bool, false) | .ATTR(exclusive, Bool, false) | ||||
| .ATTR(reverse, Bool, false) | .ATTR(reverse, Bool, false) | ||||
| .OP_END_FACTORY_REG(CumulativeLogsumexpD) | .OP_END_FACTORY_REG(CumulativeLogsumexpD) | ||||
| /** | |||||
| * @brief Add updates to var according to axis and indices. | |||||
| * @par Inputs: | |||||
| * Three inputs, including: | |||||
| * @li var: A Tensor. Must be one of the following types: | |||||
| * float16, float32, int16, int32, int8, uint8. | |||||
| * @li indices: A Tensor of the indices, type should be int32. | |||||
| * @li updates: A Tensor of the same type as "var". \n | |||||
| * @par Attributes: | |||||
| * @li axis: An required int to specify the axis to perform indices add. \n | |||||
| * @par Outputs: | |||||
| * @li var: A Tensor. Same as input "var". | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator index_add_. | |||||
| */ | |||||
| REG_OP(InplaceIndexAdd) | |||||
| .INPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8, | |||||
| DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||||
| .INPUT(indices, TensorType({DT_INT32})) | |||||
| .INPUT(updates, TensorType({DT_INT16, DT_INT32, DT_INT8, | |||||
| DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||||
| .OUTPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8, | |||||
| DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||||
| .REQUIRED_ATTR(axis, Int) | |||||
| .OP_END_FACTORY_REG(InplaceIndexAdd) | |||||
| /** | |||||
| * @brief Replace the value of X with value according to mask. | |||||
| * @par Inputs: | |||||
| * three inputs, including: | |||||
| * @li x: A Tensor of dtype is float16 or float32 or int32 or int8. | |||||
| * @li mask: A Tensor of dtype float16 or float32 or int32 or int8. | |||||
| * @li value: A Tensor or scalar of dtype float16 or float32 or int32 or int8. \n | |||||
| * @par Outputs: | |||||
| * @li y: A tensor. Must be one of the following dtypes: | |||||
| * float16, float32, int32, int8. | |||||
| */ | |||||
| REG_OP(MaskedFill) | |||||
| .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32})) | |||||
| .INPUT(mask, TensorType({DT_BOOL})) | |||||
| .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32})) | |||||
| .OP_END_FACTORY_REG(MaskedFill) | |||||
| /** | |||||
| * @brief Choose the value of X with value according to mask. | |||||
| * @par Inputs: | |||||
| * two inputs, including: | |||||
| * @li x: A Tensor of dtype is float16 or float32. | |||||
| * @li mask: A Tensor of dtype is bool. \n | |||||
| * @par Outputs: | |||||
| * @li y: A tensor with the same type as x. \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Numpy operator select. | |||||
| * Replaces the pytorch operator masked_select in some scenarios.\n | |||||
| */ | |||||
| REG_OP(MaskedSelectV2) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(mask, TensorType({DT_BOOL})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OP_END_FACTORY_REG(MaskedSelectV2) | |||||
| /** | |||||
| * @brief Slice a tensor at its last dim, e.x. a[..., begin:end:stride]. \n | |||||
| * @par Inputs: | |||||
| * One inputs, including: | |||||
| * @li x: A Tensor. Must be one of the following types: float16, float32, int16, int32. | |||||
| * @par Attributes: | |||||
| * @li start: An attribute of type Int, start index of last dim. \n | |||||
| * @li end: An attribute of type Int, end index of last dim. \n | |||||
| * @li stride: An attribute of type Int, stride of slice. \n | |||||
| * @par Outputs: | |||||
| * @li y: A Tensor. Has the same type as "x". \n | |||||
| * @par Third-party framework compatibility | |||||
| * No compatibility | |||||
| */ | |||||
| REG_OP(SliceLastDim) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) | |||||
| .REQUIRED_ATTR(start, Int) | |||||
| .REQUIRED_ATTR(end, Int) | |||||
| .ATTR(stride, Int, 1) | |||||
| .OP_END_FACTORY_REG(SliceLastDim) | |||||
| /** | |||||
| * @brief Extracts a strided slice of a tensor. Roughly speaking, this op \n | |||||
| * extracts a slice of size (end-begin)/stride from the given input tensor. \n | |||||
| * Starting at the location specified by begin the slice continues by \n | |||||
| * adding stride to the index until all dimensions are not less than end. \n | |||||
| * | |||||
| * @par Inputs: | |||||
| * Four inputs, including: | |||||
| * @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n | |||||
| * complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n | |||||
| * complex128, float16, uint32, uint64, complex64, complex128. \n | |||||
| * @li begin: A Tensor of type int32 or int64, for the index of the first value to select. | |||||
| * | |||||
| * @li end: A Tensor of type int32 or int64, for the index of the last value to select. | |||||
| * | |||||
| * @li axes: A Tensor of type int32 or int64, indicate axis to be select. | |||||
| * | |||||
| * @li strides: A Tensor of type int32 or int64, for the increment. | |||||
| * | |||||
| * @par Attributes: | |||||
| * @li begin_mask: A Tensor of type int32. \n | |||||
| * A bitmask where a bit "i" being "1" means to ignore the begin \n | |||||
| * value and instead use the largest interval possible. | |||||
| * @li end_mask: A Tensor of type int32. \n | |||||
| * Analogous to "begin_mask". | |||||
| * @li ellipsis_mask: A Tensor of type int32. \n | |||||
| * A bitmask where bit "i" being "1" means the "i"th position \n | |||||
| * is actually an ellipsis. | |||||
| * @li new_axis_mask: A Tensor of type int32. \n | |||||
| * A bitmask where bit "i" being "1" means the "i"th \n | |||||
| * specification creates a new shape 1 dimension. | |||||
| * @li shrink_axis_mask: A Tensor of type int32. \n | |||||
| * A bitmask where bit "i" implies that the "i"th \n | |||||
| * specification should shrink the dimensionality. | |||||
| * | |||||
| * @par Outputs: | |||||
| * y: A Tensor. Has the same type as "x". | |||||
| * | |||||
| * @attention Constraints: | |||||
| * | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the TensorFlow operator StridedSliceV2. | |||||
| */ | |||||
| REG_OP(StridedSliceV2) | |||||
| .INPUT(x, TensorType::BasicType()) | |||||
| .INPUT(begin, TensorType::IndexNumberType()) | |||||
| .INPUT(end, TensorType::IndexNumberType()) | |||||
| .OPTIONAL_INPUT(axes, TensorType::IndexNumberType()) | |||||
| .OPTIONAL_INPUT(strides, TensorType::IndexNumberType()) | |||||
| .ATTR(begin_mask, Int, 0) | |||||
| .ATTR(end_mask, Int, 0) | |||||
| .ATTR(ellipsis_mask, Int, 0) | |||||
| .ATTR(new_axis_mask, Int, 0) | |||||
| .ATTR(shrink_axis_mask, Int, 0) | |||||
| .OUTPUT(y, TensorType::BasicType()) | |||||
| .OP_END_FACTORY_REG(StridedSliceV2) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -141,7 +141,7 @@ support "NHWC/NCHW" to "NC1HWC0" and "NC1HWC0" to "NHWC/NCHW" | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Zn" etc. | *@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Zn" etc. | ||||
| *@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Zn" etc. | *@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Zn" etc. | ||||
| *@li group: A required int32, default value is 1. \n | |||||
| *@li group: A optional int32, default value is 1. \n | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *dst: A Tensor dtype of all types. | *dst: A Tensor dtype of all types. | ||||
| @@ -151,7 +151,7 @@ REG_OP(TransData) | |||||
| .OUTPUT(dst, TensorType::BasicType()) | .OUTPUT(dst, TensorType::BasicType()) | ||||
| .REQUIRED_ATTR(src_format, String) | .REQUIRED_ATTR(src_format, String) | ||||
| .REQUIRED_ATTR(dst_format, String) | .REQUIRED_ATTR(dst_format, String) | ||||
| .ATTR(group, Int, 1) | |||||
| .ATTR(groups, Int, 1) | |||||
| .OP_END_FACTORY_REG(TransData) | .OP_END_FACTORY_REG(TransData) | ||||
| /** | /** | ||||
| @@ -357,7 +357,7 @@ REG_OP(DepthToSpace) | |||||
| *@brief Permutes data into spatial data blocks and then prunes them . \n | *@brief Permutes data into spatial data blocks and then prunes them . \n | ||||
| *@par Inputs: | *@par Inputs: | ||||
| *@li x: A 4D Tensor with format NHWC. | |||||
| *@li x: A 4D Tensor with format. Must set the format, supported format list ["NCHW, NHWC"] | |||||
| *@li crops: A 1D list or tuple of int32 or int64 . \n | *@li crops: A 1D list or tuple of int32 or int64 . \n | ||||
| *Must be one of the following types: float16, float32 | *Must be one of the following types: float16, float32 | ||||
| @@ -434,9 +434,10 @@ REG_OP(BatchToSpaceD) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| * Two inputs, including: | * Two inputs, including: | ||||
| *@li x: An NHWC Tensor. Must be one of the following types: | |||||
| *@li x: An 4D Tensor. Must be one of the following types: | |||||
| * float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, | * float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, | ||||
| * int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. | * int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. | ||||
| * Must set the format, supported format list ["NCHW, NHWC"] | |||||
| *@li paddings: A 2D tensor of type int, specifying the input . \n | *@li paddings: A 2D tensor of type int, specifying the input . \n | ||||
| *@par Attributes: | *@par Attributes: | ||||
| @@ -518,7 +519,8 @@ REG_OP(Unpack) | |||||
| * @par Inputs: | * @par Inputs: | ||||
| * x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the | * x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the | ||||
| * following types:float32, double, int32, uint8, int16, int8, int64, uint16, | * following types:float32, double, int32, uint8, int16, int8, int64, uint16, | ||||
| * float16, uint32, uint64 | |||||
| * float16, uint32, uint64. The inputs must have data_format with one of follows: | |||||
| * NHWC, NCHW. | |||||
| * @par Attributes: | * @par Attributes: | ||||
| * @li ksizes: A required list or tuple. The size of the sliding window for each | * @li ksizes: A required list or tuple. The size of the sliding window for each | ||||
| @@ -533,7 +535,6 @@ REG_OP(Unpack) | |||||
| * This is equivalent to rate in dilated (a.k.a. Atrous) convolutions. | * This is equivalent to rate in dilated (a.k.a. Atrous) convolutions. | ||||
| * @li padding: A required string. The type of padding algorithm to use, | * @li padding: A required string. The type of padding algorithm to use, | ||||
| support "SAME" or "VALID". \n | support "SAME" or "VALID". \n | ||||
| * @li data_format: A required string. The format of input, only supported NHWC. \n | |||||
| * @par Outputs: | * @par Outputs: | ||||
| * y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows * | * y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows * | ||||
| @@ -554,7 +555,6 @@ REG_OP(ExtractImagePatches) | |||||
| .REQUIRED_ATTR(strides, ListInt) | .REQUIRED_ATTR(strides, ListInt) | ||||
| .REQUIRED_ATTR(rates, ListInt) | .REQUIRED_ATTR(rates, ListInt) | ||||
| .REQUIRED_ATTR(padding, String) | .REQUIRED_ATTR(padding, String) | ||||
| .ATTR(data_format, String, "NHWC") | |||||
| .OP_END_FACTORY_REG(ExtractImagePatches) | .OP_END_FACTORY_REG(ExtractImagePatches) | ||||
| /** | /** | ||||
| @@ -563,6 +563,7 @@ REG_OP(ExtractImagePatches) | |||||
| * @par Inputs: | * @par Inputs: | ||||
| * x: A 5D Tensor with shape [batch, in_planes, in_rows, in_cols, depth] . \n | * x: A 5D Tensor with shape [batch, in_planes, in_rows, in_cols, depth] . \n | ||||
| * The inputs must have data_format with one of follows: NDHWC, NCDHW. \n | |||||
| * @par Attributes: | * @par Attributes: | ||||
| * @li ksizes: A required list or tuple. The size of the sliding window for each | * @li ksizes: A required list or tuple. The size of the sliding window for each | ||||
| @@ -571,7 +572,6 @@ REG_OP(ExtractImagePatches) | |||||
| * patches are in "x". Must be: [1, stride_planes, stride_rows, stride_cols, 1]. | * patches are in "x". Must be: [1, stride_planes, stride_rows, stride_cols, 1]. | ||||
| * @li padding: A required string. The type of padding algorithm to use , | * @li padding: A required string. The type of padding algorithm to use , | ||||
| * support "SAME" or "VALID" . \n | * support "SAME" or "VALID" . \n | ||||
| * @li data_format: An optional string. The format of input, only supported NDHWC. \n | |||||
| * @par Outputs: | * @par Outputs: | ||||
| * Output: A 5D Tensor with shape [batch, out_planes, out_rows, out_cols, ksize_planes * | * Output: A 5D Tensor with shape [batch, out_planes, out_rows, out_cols, ksize_planes * | ||||
| @@ -590,7 +590,6 @@ REG_OP(ExtractVolumePatches) | |||||
| .REQUIRED_ATTR(ksizes, ListInt) | .REQUIRED_ATTR(ksizes, ListInt) | ||||
| .REQUIRED_ATTR(strides, ListInt) | .REQUIRED_ATTR(strides, ListInt) | ||||
| .REQUIRED_ATTR(padding, String) | .REQUIRED_ATTR(padding, String) | ||||
| .ATTR(data_format, String, "NDHWC") | |||||
| .OP_END_FACTORY_REG(ExtractVolumePatches) | .OP_END_FACTORY_REG(ExtractVolumePatches) | ||||
| /** | /** | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -1,12 +1,18 @@ | |||||
| /** | /** | ||||
| * @file adx_datadump_server.h | |||||
| * | |||||
| * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. | |||||
| * | |||||
| * This program is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
| */ | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef ADX_DATADUMP_SERVER_H | #ifndef ADX_DATADUMP_SERVER_H | ||||
| #define ADX_DATADUMP_SERVER_H | #define ADX_DATADUMP_SERVER_H | ||||
| @@ -14,151 +14,99 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MSPROF_ENGINE_PROF_ACL_API_H_ | |||||
| #define MSPROF_ENGINE_PROF_ACL_API_H_ | |||||
| #define MSVP_MAX_DEV_NUM 64 | |||||
| #define MSVP_PROF_API __attribute__((visibility("default"))) | |||||
| #ifndef MSPROFILER_API_PROF_ACL_API_H_ | |||||
| #define MSPROFILER_API_PROF_ACL_API_H_ | |||||
| // DataTypeConfig | // DataTypeConfig | ||||
| #define PROF_ACL_API 0x0001 | |||||
| #define PROF_TASK_TIME 0x0002 | |||||
| #define PROF_AICORE_METRICS 0x0004 | |||||
| #define PROF_AICPU_TRACE 0x0008 | |||||
| #define PROF_MODEL_EXECUTE 0x0010 | |||||
| #define PROF_RUNTIME_API 0x0020 | |||||
| #define PROF_RUNTIME_TRACE 0x0040 | |||||
| #define PROF_SCHEDULE_TIMELINE 0x0080 | |||||
| #define PROF_SCHEDULE_TRACE 0x0100 | |||||
| #define PROF_AIVECTORCORE_METRICS 0x0200 | |||||
| #define PROF_SUBTASK_TIME 0x0400 | |||||
| #define PROF_TRAINING_TRACE 0x0800 | |||||
| #define PROF_HCCL_TRACE 0x1000 | |||||
| #define PROF_DATA_PROCESS 0x2000 | |||||
| #define PROF_TASK_TRACE 0x3842 | |||||
| #define PROF_ACL_API 0x00000001 | |||||
| #define PROF_TASK_TIME 0x00000002 | |||||
| #define PROF_AICORE_METRICS 0x00000004 | |||||
| #define PROF_AICPU_TRACE 0x00000008 | |||||
| #define PROF_MODEL_EXECUTE 0x00000010 | |||||
| #define PROF_RUNTIME_API 0x00000020 | |||||
| #define PROF_RUNTIME_TRACE 0x00000040 | |||||
| #define PROF_SCHEDULE_TIMELINE 0x00000080 | |||||
| #define PROF_SCHEDULE_TRACE 0x00000100 | |||||
| #define PROF_AIVECTORCORE_METRICS 0x00000200 | |||||
| #define PROF_SUBTASK_TIME 0x00000400 | |||||
| #define PROF_TRAINING_TRACE 0x00000800 | |||||
| #define PROF_HCCL_TRACE 0x00001000 | |||||
| #define PROF_TASK_TRACE 0x00001852 | |||||
| // system profilinig switch | |||||
| #define PROF_CPU 0x00010000 | |||||
| #define PROF_HARDWARE_MEMORY 0x00020000 | |||||
| #define PROF_IO 0x00040000 | |||||
| #define PROF_INTER_CONNECTION 0x00080000 | |||||
| #define PROF_DVPP 0x00100000 | |||||
| #define PROF_SYS_AICORE_SAMPLE 0x00200000 | |||||
| #define PROF_AIVECTORCORE_SAMPLE 0x00400000 | |||||
| #define PROF_MODEL_LOAD 0x8000000000000000 | #define PROF_MODEL_LOAD 0x8000000000000000 | ||||
| // DataTypeConfig MASK | // DataTypeConfig MASK | ||||
| #define PROF_ACL_API_MASK 0x0001 | |||||
| #define PROF_TASK_TIME_MASK 0x0002 | |||||
| #define PROF_AICORE_METRICS_MASK 0x0004 | |||||
| #define PROF_AICPU_TRACE_MASK 0x0008 | |||||
| #define PROF_MODEL_EXECUTE_MASK 0x0010 | |||||
| #define PROF_RUNTIME_API_MASK 0x0020 | |||||
| #define PROF_RUNTIME_TRACE_MASK 0x0040 | |||||
| #define PROF_SCHEDULE_TIMELINE_MASK 0x0080 | |||||
| #define PROF_SCHEDULE_TRACE_MASK 0x0100 | |||||
| #define PROF_AIVECTORCORE_METRICS_MASK 0x0200 | |||||
| #define PROF_SUBTASK_TIME_MASK 0x0400 | |||||
| #define PROF_TRAINING_TRACE_MASK 0x0800 | |||||
| #define PROF_HCCL_TRACE_MASK 0x1000 | |||||
| #define PROF_DATA_PROCESS_MASK 0x2000 | |||||
| #define PROF_ACL_API_MASK 0x00000001 | |||||
| #define PROF_TASK_TIME_MASK 0x00000002 | |||||
| #define PROF_AICORE_METRICS_MASK 0x00000004 | |||||
| #define PROF_AICPU_TRACE_MASK 0x00000008 | |||||
| #define PROF_MODEL_EXECUTE_MASK 0x00000010 | |||||
| #define PROF_RUNTIME_API_MASK 0x00000020 | |||||
| #define PROF_RUNTIME_TRACE_MASK 0x00000040 | |||||
| #define PROF_SCHEDULE_TIMELINE_MASK 0x00000080 | |||||
| #define PROF_SCHEDULE_TRACE_MASK 0x00000100 | |||||
| #define PROF_AIVECTORCORE_METRICS_MASK 0x00000200 | |||||
| #define PROF_SUBTASK_TIME_MASK 0x00000400 | |||||
| #define PROF_TRAINING_TRACE_MASK 0x00000800 | |||||
| #define PROF_HCCL_TRACE_MASK 0x00001000 | |||||
| // system profilinig mask | |||||
| #define PROF_CPU_MASK 0x00010000 | |||||
| #define PROF_HARDWARE_MEMORY_MASK 0x00020000 | |||||
| #define PROF_IO_MASK 0x00040000 | |||||
| #define PROF_INTER_CONNECTION_MASK 0x00080000 | |||||
| #define PROF_DVPP_MASK 0x00100000 | |||||
| #define PROF_SYS_AICORE_SAMPLE_MASK 0x00200000 | |||||
| #define PROF_AIVECTORCORE_SAMPLE_MASK 0x00400000 | |||||
| #define PROF_MODEL_LOAD_MASK 0x8000000000000000 | #define PROF_MODEL_LOAD_MASK 0x8000000000000000 | ||||
| #include <cstdint> | |||||
| #include <string> | |||||
| /** | |||||
| * @name ProrErrorCode | |||||
| * @brief error code enum of prof_acl_apis | |||||
| */ | |||||
| enum ProfErrorCode { | |||||
| PROF_ERROR_NONE = 0, // ok | |||||
| PROF_ERROR_PARAM_INVALID, // param invalid, for example nullptr | |||||
| PROF_ERROR_REPEAT_INIT, // profiling has already been inited | |||||
| PROF_ERROR_CONFIG_INVALID, // config invalid, for example invalid json string | |||||
| PROF_ERROR_DIR_NO_ACCESS, // dir is not accessable | |||||
| PROF_ERROR_FAILURE, // failed to init or start profiling | |||||
| PROF_ERROR_NOT_INITED, // profiling has not been inited | |||||
| PROF_ERROR_DEVICE_INVALID, // device id invalid | |||||
| PROF_ERROR_UNSUPPORTED, // unsupported data type or ai core metrics | |||||
| PROF_ERROR_REPEAT_START, // profiilng has already been started | |||||
| PROF_ERROR_NOT_STARTED, // profiling has not been started | |||||
| }; | |||||
| /** | |||||
| * @brief transfer profiling config in acl.json to sample config | |||||
| * @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...} | |||||
| * @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]} | |||||
| * @return ProfErrorCode | |||||
| */ | |||||
| MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg); | |||||
| #ifndef OS_TYPE | |||||
| #define OS_TYPE 0 | |||||
| #endif // OS_TYPE | |||||
| /** | |||||
| * @name ProfInit | |||||
| * @brief init profiling | |||||
| * @param profInitCfg [IN] config of init profiling of json format | |||||
| * @return ProfErrorCode | |||||
| */ | |||||
| MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg); | |||||
| /** | |||||
| * @name ProfAicoreMetrics | |||||
| * @brief aicore metrics enum | |||||
| */ | |||||
| enum ProfAicoreMetrics { | |||||
| PROF_AICORE_ARITHMATIC_THROUGHPUT = 0, | |||||
| PROF_AICORE_PIPELINE = 1, | |||||
| PROF_AICORE_SYNCHRONIZATION = 2, | |||||
| PROF_AICORE_MEMORY = 3, | |||||
| PROF_AICORE_INTERNAL_MEMORY = 4, | |||||
| PROF_AICORE_STALL = 5, | |||||
| PROF_AICORE_EVENT = 255 | |||||
| }; | |||||
| #if (OS_TYPE != LINUX) | |||||
| #define MSVP_PROF_API __declspec(dllexport) | |||||
| #else | |||||
| #define MSVP_PROF_API __attribute__((visibility("default"))) | |||||
| #endif | |||||
| /** | |||||
| * @name ProfConfig | |||||
| * @brief struct of ProfStart | |||||
| */ | |||||
| struct ProfConfig { | |||||
| uint32_t devNums; // length of device id list | |||||
| uint32_t devIdList[MSVP_MAX_DEV_NUM]; // physical device id list | |||||
| ProfAicoreMetrics aicoreMetrics; // aicore metric | |||||
| uint64_t dataTypeConfig; // data type to start profiling | |||||
| }; | |||||
| #include <cstdint> | |||||
| namespace Msprofiler { | |||||
| namespace Api { | |||||
| /** | /** | ||||
| * @name ProfStartProfiling | |||||
| * @brief start profiling | |||||
| * @param profStartCfg [IN] config to start profiling | |||||
| * @return ProfErrorCode | |||||
| * @name ProfGetOpExecutionTime | |||||
| * @brief get op execution time of specific part of data | |||||
| * @param data [IN] data read from pipe | |||||
| * @param len [IN] data length | |||||
| * @param index [IN] index of part(op) | |||||
| * @return op execution time (us) | |||||
| */ | */ | ||||
| MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg); | |||||
| MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); | |||||
| } | |||||
| } | |||||
| /** | |||||
| * @name ProfStopConfig | |||||
| * @brief struct of ProfStop | |||||
| */ | |||||
| struct ProfStopConfig { | |||||
| uint64_t padding; | |||||
| }; | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| /** | |||||
| * @name ProfStopProfiling | |||||
| * @brief stop profiling | |||||
| * @param profStopCfg [IN] config to stop profiling | |||||
| * @return ProfErrorCode | |||||
| */ | |||||
| MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg); | |||||
| /** | |||||
| * @name ProfFinalize | |||||
| * @brief finalize profiling task | |||||
| * @return ProfErrorCode | |||||
| */ | |||||
| MSVP_PROF_API int32_t ProfFinalize(); | |||||
| MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); | |||||
| /** | |||||
| * @name ProfGetDataTypeConfig | |||||
| * @brief get dataTypeConfig started with of one device | |||||
| * @param deviceId [IN] deviceId to get dataTypeConfig | |||||
| * @param dataTypeConfig [OUT] result get | |||||
| * @return ProfErrorCode | |||||
| */ | |||||
| MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // MSPROF_ENGINE_PROF_ACL_API_H_ | |||||
| #endif // MSPROFILER_API_PROF_ACL_API_H_ | |||||
| @@ -16,7 +16,16 @@ | |||||
| #ifndef MSPROF_ENGINE_PROF_MGR_CORE_H_ | #ifndef MSPROF_ENGINE_PROF_MGR_CORE_H_ | ||||
| #define MSPROF_ENGINE_PROF_MGR_CORE_H_ | #define MSPROF_ENGINE_PROF_MGR_CORE_H_ | ||||
| #ifndef OS_TYPE | |||||
| #define OS_TYPE 0 | |||||
| #endif // OS_TYPE | |||||
| #if (OS_TYPE != LINUX) | |||||
| #define MSVP_PROF_API __declspec(dllexport) | |||||
| #else | |||||
| #define MSVP_PROF_API __attribute__((visibility("default"))) | #define MSVP_PROF_API __attribute__((visibility("default"))) | ||||
| #endif | |||||
| #include <string> | #include <string> | ||||
| #include <vector> | #include <vector> | ||||