From: @changzherui Reviewed-by: @liujunzhu,@ljl0711 Signed-off-by: @ljl0711tags/v1.2.0
@@ -0,0 +1,73 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_EXTERNAL_ACL_ACL_H_ | |||
#define INC_EXTERNAL_ACL_ACL_H_ | |||
#include "acl_rt.h" | |||
#include "acl_op.h" | |||
#include "acl_mdl.h" | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
// Current version is 1.0.0 | |||
#define ACL_MAJOR_VERSION 1 | |||
#define ACL_MINOR_VERSION 0 | |||
#define ACL_PATCH_VERSION 0 | |||
/** | |||
* @ingroup AscendCL | |||
* @brief acl initialize | |||
* | |||
* @par Restriction | |||
* The aclInit interface can be called only once in a process | |||
* @param configPath [IN] the config path,it can be NULL | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclInit(const char *configPath); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief acl finalize | |||
* | |||
* @par Restriction | |||
* Need to call aclFinalize before the process exits. | |||
* After calling aclFinalize,the services cannot continue to be used normally. | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclFinalize(); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief query ACL interface version | |||
* | |||
* @param majorVersion[OUT] ACL interface major version | |||
* @param minorVersion[OUT] ACL interface minor version | |||
* @param patchVersion[OUT] ACL interface patch version | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *minorVersion, int32_t *patchVersion); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // INC_EXTERNAL_ACL_ACL_H_ |
@@ -158,7 +158,11 @@ typedef enum { | |||
ACL_FORMAT_NC1HWC0 = 3, | |||
ACL_FORMAT_FRACTAL_Z = 4, | |||
ACL_FORMAT_NC1HWC0_C04 = 12, | |||
ACL_FORMAT_NDHWC = 27, | |||
ACL_FORMAT_FRACTAL_NZ = 29, | |||
ACL_FORMAT_NCDHW = 30, | |||
ACL_FORMAT_NDC1HWC0 = 32, | |||
ACL_FRACTAL_Z_3D = 33 | |||
} aclFormat; | |||
typedef enum { | |||
@@ -223,6 +227,29 @@ ACL_FUNC_VISIBILITY aclDataBuffer *aclCreateDataBuffer(void *data, size_t size); | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclDestroyDataBuffer(const aclDataBuffer *dataBuffer); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief update new data of aclDataBuffer | |||
* | |||
* @param dataBuffer [OUT] pointer to aclDataBuffer | |||
* @li The old data need to be released by the user, otherwise it may occur memory leak leakage | |||
* call aclGetDataBufferAddr interface to get old data address | |||
* call aclrtFree interface to release memory | |||
* | |||
* @param data [IN] pointer to new data | |||
* @li Need to be managed by the user, | |||
* call aclrtMalloc interface to apply for memory, | |||
* call aclrtFree interface to release memory | |||
* | |||
* @param size [IN] size of data in bytes | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtMalloc | aclrtFree | aclGetDataBufferAddr | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclUpdateDataBuffer(aclDataBuffer *dataBuffer, void *data, size_t size); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get data address from aclDataBuffer | |||
@@ -547,6 +574,19 @@ ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc); | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclSetTensorDynamicInput(aclTensorDesc *desc, const char *dynamicInputName); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Set const data specified by the tensor description | |||
* | |||
* @param desc [OUT] pointer to the instance of aclTensorDesc | |||
* @param dataBuffer [IN] pointer to the const databuffer | |||
* @param length [IN] the length of const databuffer | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBuffer, size_t length); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief an interface for users to output APP logs | |||
@@ -0,0 +1,296 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_EXTERNAL_ACL_PROF_H_ | |||
#define INC_EXTERNAL_ACL_PROF_H_ | |||
#include "acl_base.h" | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
#define ACL_PROF_ACL_API 0x0001 | |||
#define ACL_PROF_TASK_TIME 0x0002 | |||
#define ACL_PROF_AICORE_METRICS 0x0004 | |||
#define ACL_PROF_AICPU 0x0008 | |||
#define ACL_PROF_MAX_OP_NAME_LEN 257 | |||
#define ACL_PROF_MAX_OP_TYPE_LEN 65 | |||
typedef enum { | |||
ACL_AICORE_ARITHMETIC_UTILIZATION = 0, | |||
ACL_AICORE_PIPE_UTILIZATION = 1, | |||
ACL_AICORE_MEMORY_BANDWIDTH = 2, | |||
ACL_AICORE_L0B_AND_WIDTH = 3, | |||
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, | |||
ACL_AICORE_NONE = 0xFF | |||
} aclprofAicoreMetrics; | |||
typedef struct aclprofConfig aclprofConfig; | |||
typedef struct aclprofStopConfig aclprofStopConfig; | |||
typedef struct aclprofAicoreEvents aclprofAicoreEvents; | |||
typedef struct aclprofSubscribeConfig aclprofSubscribeConfig; | |||
/** | |||
* @ingroup AscendCL | |||
* @brief profiling initialize | |||
* | |||
* @param profilerResultPath [IN] path of profiling result | |||
* @param length [IN] length of profilerResultPath | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclprofFinalize | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofInit(const char *profilerResultPath, size_t length); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief profiling finalize | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclprofInit | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofFinalize(); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Start profiling modules by profilerConfig | |||
* | |||
* @param profilerConfig [IN] config of profiling | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclprofStop | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Create data of type aclprofConfig | |||
* | |||
* @param deviceIdList [IN] list of device id | |||
* @param deviceNums [IN] number of devices | |||
* @param aicoreMetrics [IN] type of aicore metrics | |||
* @param aicoreEvents [IN] pointer to aicore events, only support NULL now | |||
* @param dataTypeConfig [IN] config modules need profiling | |||
* | |||
* @retval the aclprofConfig pointer | |||
* | |||
* @see aclprofDestroyConfig | |||
*/ | |||
ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, | |||
aclprofAicoreMetrics aicoreMetrics, | |||
aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Destroy data of type aclprofConfig | |||
* | |||
* @param profilerConfig [IN] config of profiling | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclprofCreateConfig | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofDestroyConfig(const aclprofConfig *profilerConfig); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief stop profiling modules by stopProfilingConfig | |||
* | |||
* @param profilerConfig [IN] pointer to stop config of profiling | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclprofStart | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief subscribe profiling data of model | |||
* | |||
* @param modelId [IN] the model id subscribed | |||
* @param profSubscribeConfig [IN] pointer to config of model subscribe | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclprofModelUnSubscribe | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief unsubscribe profiling data of model | |||
* | |||
* @param modelId [IN] the model id unsubscribed | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclprofModelSubscribe | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief create subscribe config | |||
* | |||
* @param timeInfoSwitch [IN] switch whether get time info from model | |||
* @param aicoreMetrics [IN] aicore metrics | |||
* @param fd [IN] pointer to write pipe | |||
* | |||
* @retval the aclprofSubscribeConfig pointer | |||
* | |||
* @see aclprofDestroySubscribeConfig | |||
*/ | |||
ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch, | |||
aclprofAicoreMetrics aicoreMetrics, void *fd); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief destroy subscribe config | |||
* | |||
* @param profSubscribeConfig [IN] subscribe config | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclprofCreateSubscribeConfig | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofDestroySubscribeConfig(const aclprofSubscribeConfig *profSubscribeConfig); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief create subscribe config | |||
* | |||
* @param opDescSize [OUT] size of op desc | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofGetOpDescSize(size_t *opDescSize); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get op number from subscription data | |||
* | |||
* @param opInfo [IN] pointer to subscription data | |||
* @param opInfoLen [IN] memory size of subscription data | |||
* @param opNumber [OUT] op number of subscription data | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLen, uint32_t *opNumber); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get op type from subscription data | |||
* | |||
* @param opInfo [IN] pointer to subscription data | |||
* @param opInfoLen [IN] memory size of subscription data | |||
* @param index [IN] index of op array in opInfo | |||
* @param opType [OUT] obtained op type string | |||
* @param opTypeLen [IN] obtained length of op type string | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType, | |||
size_t opTypeLen); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get op type from subscription data | |||
* | |||
* @param opInfo [IN] pointer to subscription data | |||
* @param opInfoLen [IN] memory size of subscription data | |||
* @param index [IN] index of op array in opInfo | |||
* @param opName [OUT] obtained op name string | |||
* @param opNameLen [IN] obtained length of op name string | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName, | |||
size_t opNameLen); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get start time of specified op from subscription data | |||
* | |||
* @param opInfo [IN] pointer to subscription data | |||
* @param opInfoLen [IN] memory size of subscription data | |||
* @param index [IN] index of op array in opInfo | |||
* | |||
* @retval start time(us) of specified op with timestamp | |||
* @retval 0 for failed | |||
*/ | |||
ACL_FUNC_VISIBILITY uint64_t aclprofGetOpStart(const void *opInfo, size_t opInfoLen, uint32_t index); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get end time of specified op from subscription data | |||
* | |||
* @param opInfo [IN] pointer to subscription data | |||
* @param opInfoLen [IN] memory size of subscription data | |||
* @param index [IN] index of op array in opInfo | |||
* | |||
* @retval end time(us) of specified op with timestamp | |||
* @retval 0 for failed | |||
*/ | |||
ACL_FUNC_VISIBILITY uint64_t aclprofGetOpEnd(const void *opInfo, size_t opInfoLen, uint32_t index); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get excution time of specified op from subscription data | |||
* | |||
* @param opInfo [IN] pointer to subscription data | |||
* @param opInfoLen [IN] memory size of subscription data | |||
* @param index [IN] index of op array in opInfo | |||
* | |||
* @retval execution time(us) of specified op with timestamp | |||
* @retval 0 for failed | |||
*/ | |||
ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opInfoLen, uint32_t index); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get model id from subscription data | |||
* | |||
* @param opInfo [IN] pointer to subscription data | |||
* @param opInfoLen [IN] memory size of subscription data | |||
* | |||
* @retval model id of subscription data | |||
* @retval 0 for failed | |||
*/ | |||
ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // INC_EXTERNAL_ACL_PROF_H_ |
@@ -0,0 +1,932 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_EXTERNAL_ACL_ACL_RT_H_ | |||
#define INC_EXTERNAL_ACL_ACL_RT_H_ | |||
#include <stdint.h> | |||
#include <stddef.h> | |||
#include "acl_base.h" | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
typedef enum aclrtRunMode { | |||
ACL_DEVICE, | |||
ACL_HOST, | |||
} aclrtRunMode; | |||
typedef enum aclrtTsId { | |||
ACL_TS_ID_AICORE = 0, | |||
ACL_TS_ID_AIVECTOR = 1, | |||
ACL_TS_ID_RESERVED = 2, | |||
} aclrtTsId; | |||
typedef enum aclrtEventStatus { | |||
ACL_EVENT_STATUS_COMPLETE = 0, | |||
ACL_EVENT_STATUS_NOT_READY = 1, | |||
ACL_EVENT_STATUS_RESERVED = 2, | |||
} aclrtEventStatus; | |||
typedef enum aclrtCallbackBlockType { | |||
ACL_CALLBACK_NO_BLOCK, | |||
ACL_CALLBACK_BLOCK, | |||
} aclrtCallbackBlockType; | |||
typedef enum aclrtMemcpyKind { | |||
ACL_MEMCPY_HOST_TO_HOST, | |||
ACL_MEMCPY_HOST_TO_DEVICE, | |||
ACL_MEMCPY_DEVICE_TO_HOST, | |||
ACL_MEMCPY_DEVICE_TO_DEVICE, | |||
} aclrtMemcpyKind; | |||
typedef enum aclrtMemMallocPolicy { | |||
ACL_MEM_MALLOC_HUGE_FIRST, | |||
ACL_MEM_MALLOC_HUGE_ONLY, | |||
ACL_MEM_MALLOC_NORMAL_ONLY, | |||
ACL_MEM_MALLOC_HUGE_FIRST_P2P, | |||
ACL_MEM_MALLOC_HUGE_ONLY_P2P, | |||
ACL_MEM_MALLOC_NORMAL_ONLY_P2P, | |||
} aclrtMemMallocPolicy; | |||
typedef enum aclrtMemAttr { | |||
ACL_DDR_MEM, | |||
ACL_HBM_MEM, | |||
ACL_DDR_MEM_HUGE, | |||
ACL_DDR_MEM_NORMAL, | |||
ACL_HBM_MEM_HUGE, | |||
ACL_HBM_MEM_NORMAL, | |||
ACL_DDR_MEM_P2P_HUGE, | |||
ACL_DDR_MEM_P2P_NORMAL, | |||
ACL_HBM_MEM_P2P_HUGE, | |||
ACL_HBM_MEM_P2P_NORMAL, | |||
} aclrtMemAttr; | |||
typedef enum aclrtGroupAttr { | |||
ACL_GROUP_AICORE_INT, | |||
ACL_GROUP_AIV_INT, | |||
ACL_GROUP_AIC_INT, | |||
ACL_GROUP_SDMANUM_INT, | |||
ACL_GROUP_ASQNUM_INT | |||
} aclrtGroupAttr; | |||
typedef struct tagRtGroupInfo aclrtGroupInfo; | |||
typedef struct rtExceptionInfo aclrtExceptionInfo; | |||
typedef void (*aclrtCallback)(void *userData); | |||
typedef void (*aclrtExceptionInfoCallback)(aclrtExceptionInfo *exceptionInfo); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Set a callback function to handle exception information | |||
* | |||
* @param callback [IN] callback function to handle exception information | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtSetExceptionInfoCallback(aclrtExceptionInfoCallback callback); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Get task id from exception information | |||
* | |||
* @param info [IN] pointer of exception information | |||
* | |||
* @retval The task id from exception information | |||
* @retval 0xFFFFFFFF if info is null | |||
*/ | |||
ACL_FUNC_VISIBILITY uint32_t aclrtGetTaskIdFromExceptionInfo(const aclrtExceptionInfo *info); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Get stream id from exception information | |||
* | |||
* @param info [IN] pointer of exception information | |||
* | |||
* @retval The stream id from exception information | |||
* @retval 0xFFFFFFFF if info is null | |||
*/ | |||
ACL_FUNC_VISIBILITY uint32_t aclrtGetStreamIdFromExceptionInfo(const aclrtExceptionInfo *info); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Get thread id from exception information | |||
* | |||
* @param info [IN] pointer of exception information | |||
* | |||
* @retval The thread id of fail task | |||
* @retval 0xFFFFFFFF if info is null | |||
*/ | |||
ACL_FUNC_VISIBILITY uint32_t aclrtGetThreadIdFromExceptionInfo(const aclrtExceptionInfo *info); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Get device id from exception information | |||
* | |||
* @param info [IN] pointer of exception information | |||
* | |||
* @retval The thread id of fail task | |||
* @retval 0xFFFFFFFF if info is null | |||
*/ | |||
ACL_FUNC_VISIBILITY uint32_t aclrtGetDeviceIdFromExceptionInfo(const aclrtExceptionInfo *info); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief The thread that handles the callback function on the Stream | |||
* | |||
* @param threadId [IN] thread ID | |||
* @param stream [IN] stream handle | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtSubscribeReport(uint64_t threadId, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Add a callback function to be executed on the host | |||
* to the task queue of the Stream | |||
* | |||
* @param fn [IN] Specify the callback function to be added | |||
* The function prototype of the callback function is: | |||
* typedef void (*aclrtCallback)(void *userData); | |||
* @param userData [IN] User data to be passed to the callback function | |||
* @param blockType [IN] callback block type | |||
* @param stream [IN] stream handle | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtLaunchCallback(aclrtCallback fn, void *userData, aclrtCallbackBlockType blockType, | |||
aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief After waiting for a specified time, trigger callback processing | |||
* | |||
* @par Function | |||
* The thread processing callback specified by | |||
* the aclrtSubscribeReport interface | |||
* | |||
* @param timeout [IN] timeout value | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtSubscribeReport | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtProcessReport(int32_t timeout); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Cancel thread registration, | |||
* the callback function on the specified Stream | |||
* is no longer processed by the specified thread | |||
* | |||
* @param threadId [IN] thread ID | |||
* @param stream [IN] stream handle | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtUnSubscribeReport(uint64_t threadId, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief create context and associates it with the calling thread | |||
* | |||
* @par Function | |||
* The following use cases are supported: | |||
* @li If you don't call the aclrtCreateContext interface | |||
* to explicitly create the context, | |||
* the system will use the default context, which is implicitly created | |||
* when the aclrtSetDevice interface is called. | |||
* @li If multiple contexts are created in a process | |||
* (there is no limit on the number of contexts), | |||
* the current thread can only use one of them at the same time. | |||
* It is recommended to explicitly specify the context of the current thread | |||
* through the aclrtSetCurrentContext interface to increase. | |||
* the maintainability of the program. | |||
* | |||
* @param context [OUT] point to the created context | |||
* @param deviceId [IN] device to create context on | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtSetDevice | aclrtSetCurrentContext | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtCreateContext(aclrtContext *context, int32_t deviceId); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief destroy context instance | |||
* | |||
* @par Function | |||
* Can only destroy context created through aclrtCreateContext interface | |||
* | |||
* @param context [IN] the context to destroy | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtCreateContext | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtDestroyContext(aclrtContext context); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief set the context of the thread | |||
* | |||
* @par Function | |||
* The following scenarios are supported: | |||
* @li If the aclrtCreateContext interface is called in a thread to explicitly | |||
* create a Context (for example: ctx1), the thread's Context can be specified | |||
* without calling the aclrtSetCurrentContext interface. | |||
* The system uses ctx1 as the context of thread1 by default. | |||
* @li If the aclrtCreateContext interface is not explicitly created, | |||
* the system uses the default context as the context of the thread. | |||
* At this time, the aclrtDestroyContext interface cannot be used to release | |||
* the default context. | |||
* @li If the aclrtSetCurrentContext interface is called multiple times to | |||
* set the thread's Context, the last one prevails. | |||
* | |||
* @par Restriction | |||
* @li If the cevice corresponding to the context set for the thread | |||
* has been reset, you cannot set the context as the context of the thread, | |||
* otherwise a business exception will result. | |||
* @li It is recommended to use the context created in a thread. | |||
* If the aclrtCreateContext interface is called in thread A to create a context, | |||
* and the context is used in thread B, | |||
* the user must guarantee the execution order of tasks in the same stream | |||
* under the same context in two threads. | |||
* | |||
* @param context [IN] the current context of the thread | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtCreateContext | aclrtDestroyContext | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtSetCurrentContext(aclrtContext context); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get the context of the thread | |||
* | |||
* @par Function | |||
* If the user calls the aclrtSetCurrentContext interface | |||
* multiple times to set the context of the current thread, | |||
* then the last set context is obtained | |||
* | |||
* @param context [OUT] the current context of the thread | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtSetCurrentContext | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtGetCurrentContext(aclrtContext *context); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Specify the device to use for the operation | |||
* implicitly create the default context and the default stream | |||
* | |||
* @par Function | |||
* The following use cases are supported: | |||
* @li Device can be specified in the process or thread. | |||
* If you call the aclrtSetDevice interface multiple | |||
* times to specify the same device, | |||
* you only need to call the aclrtResetDevice interface to reset the device. | |||
* @li The same device can be specified for operation | |||
* in different processes or threads. | |||
* @li Device is specified in a process, | |||
* and multiple threads in the process can share this device to explicitly | |||
* create a Context (aclrtCreateContext interface). | |||
* @li In multi-device scenarios, you can switch to other devices | |||
* through the aclrtSetDevice interface in the process. | |||
* | |||
* @param deviceId [IN] the device id | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtResetDevice |aclrtCreateContext | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtSetDevice(int32_t deviceId); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Reset the current operating Device and free resources on the device, | |||
* including the default context, the default stream, | |||
* and all streams created under the default context, | |||
* and synchronizes the interface. | |||
* If the task under the default context or stream has not been completed, | |||
* the system will wait for the task to complete before releasing it. | |||
* | |||
* @par Restriction | |||
* @li The Context, Stream, and Event that are explicitly created | |||
* on the device to be reset. Before resetting, | |||
* it is recommended to follow the following interface calling sequence, | |||
* otherwise business abnormalities may be caused. | |||
* @li Interface calling sequence: | |||
* call aclrtDestroyEvent interface to release Event or | |||
* call aclrtDestroyStream interface to release explicitly created Stream-> | |||
* call aclrtDestroyContext to release explicitly created Context-> | |||
* call aclrtResetDevice interface | |||
* | |||
* @param deviceId [IN] the device id | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtResetDevice(int32_t deviceId); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get target device of current thread | |||
* | |||
* @param deviceId [OUT] the device id | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtGetDevice(int32_t *deviceId); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get target side | |||
* | |||
* @param runMode [OUT] the run mode | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtGetRunMode(aclrtRunMode *runMode); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Wait for compute device to finish | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtSynchronizeDevice(void); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Set Scheduling TS | |||
* | |||
* @param tsId [IN] the ts id | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtSetTsDevice(aclrtTsId tsId); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get total device number. | |||
* | |||
* @param count [OUT] the device number | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtGetDeviceCount(uint32_t *count); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief create event instance | |||
* | |||
* @param event [OUT] created event | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtCreateEvent(aclrtEvent *event); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief destroy event instance | |||
* | |||
* @par Function | |||
* Only events created through the aclrtCreateEvent interface can be | |||
* destroyed, synchronous interfaces. When destroying an event, | |||
* the user must ensure that the tasks involved in the aclrtSynchronizeEvent | |||
* interface or the aclrtStreamWaitEvent interface are completed before | |||
* they are destroyed. | |||
* | |||
* @param event [IN] event to destroy | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtCreateEvent | aclrtSynchronizeEvent | aclrtStreamWaitEvent | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtDestroyEvent(aclrtEvent event); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Record an Event in the Stream | |||
* | |||
* @param event [IN] event to record | |||
* @param stream [IN] stream handle | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Reset an event | |||
* | |||
* @par Function | |||
* Users need to make sure to wait for the tasks in the Stream | |||
* to complete before resetting the Event | |||
* | |||
* @param event [IN] event to reset | |||
* @param stream [IN] stream handle | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Queries an event's status | |||
* | |||
* @param event [IN] event to query | |||
* @param status [OUT] event status | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Block Host Running, wait event to be complete | |||
* | |||
* @param event [IN] event to wait | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief computes the elapsed time between events. | |||
* | |||
* @param ms [OUT] time between start and end in ms | |||
* @param start [IN] starting event | |||
* @param end [IN] ending event | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, aclrtEvent end); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief alloc memory on device | |||
* | |||
* @par Function | |||
* alloc for size linear memory on device | |||
* and return a pointer to allocated memory by *devPtr | |||
* | |||
* @par Restriction | |||
* @li The memory requested by the aclrtMalloc interface needs to be released | |||
* through the aclrtFree interface. | |||
* @li Before calling the media data processing interface, | |||
* if you need to apply memory on the device to store input or output data, | |||
* you need to call acldvppMalloc to apply for memory. | |||
* | |||
* @param devPtr [OUT] pointer to pointer to allocated memory on device | |||
* @param size [IN] alloc memory size | |||
* @param policy [IN] memory alloc policy | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtFree | acldvppMalloc | aclrtMallocCached | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief allocate memory on device with cache | |||
* | |||
* @par Function | |||
* alloc for size linear memory on device | |||
* and return a pointer to allocated memory by *devPtr | |||
* | |||
* @par Restriction | |||
* @li The memory requested by the aclrtMallocCached interface needs to be released | |||
* through the aclrtFree interface. | |||
* | |||
* @param devPtr [OUT] pointer to pointer to allocated memory on device | |||
* @param size [IN] alloc memory size | |||
* @param policy [IN] memory alloc policy | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtFree | aclrtMalloc | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief flush cache data to ddr | |||
* | |||
* @param devPtr [IN] the pointer that flush data to ddr | |||
* @param size [IN] flush size | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtMemFlush(void *devPtr, size_t size); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief invalidate cache data | |||
* | |||
* @param devPtr [IN] pointer to invalidate cache data | |||
* @param size [IN] invalidate size | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtMemInvalidate(void *devPtr, size_t size); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief free device memory | |||
* | |||
* @par Function | |||
* can only free memory allocated through the aclrtMalloc interface | |||
* | |||
* @param devPtr [IN] Pointer to memory to be freed | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtMalloc | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtFree(void *devPtr); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief alloc memory on host | |||
* | |||
* @par Restriction | |||
* @li The requested memory cannot be used in the Device | |||
* and needs to be explicitly copied to the Device. | |||
* @li The memory requested by the aclrtMallocHost interface | |||
* needs to be released through the aclrtFreeHost interface. | |||
* | |||
* @param hostPtr [OUT] pointer to pointer to allocated memory on the host | |||
* @param size [IN] alloc memory size | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtFreeHost | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtMallocHost(void **hostPtr, size_t size); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief free host memory | |||
* | |||
* @par Function | |||
* can only free memory allocated through the aclrtMallocHost interface | |||
* | |||
* @param hostPtr [IN] free memory pointer | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtMallocHost | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief synchronous memory replication between host and device | |||
* | |||
* @param dst [IN] destination address pointer | |||
* @param destMax [IN] Max length of the destination address memory | |||
* @param src [IN] source address pointer | |||
* @param count [IN] the length of byte to copy | |||
* @param kind [IN] memcpy type | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count, | |||
aclrtMemcpyKind kind); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Initialize memory and set contents of memory to specified value | |||
* | |||
* @par Function | |||
* The memory to be initialized is on the Host or device side, | |||
* and the system determines whether | |||
* it is host or device according to the address | |||
* | |||
* @param devPtr [IN] Starting address of memory | |||
* @param maxCount [IN] Max length of destination address memory | |||
* @param value [IN] Set value | |||
* @param count [IN] The length of memory | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t value, size_t count); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Asynchronous memory replication between Host and Device | |||
* | |||
* @par Function | |||
* After calling this interface, | |||
* be sure to call the aclrtSynchronizeStream interface to ensure that | |||
* the task of memory replication has been completed | |||
* | |||
* @par Restriction | |||
* @li For on-chip Device-to-Device memory copy, | |||
* both the source and destination addresses must be 64-byte aligned | |||
* | |||
* @param dst [IN] destination address pointer | |||
* @param destMax [IN] Max length of destination address memory | |||
* @param src [IN] source address pointer | |||
* @param count [IN] the number of byte to copy | |||
* @param kind [IN] memcpy type | |||
* @param stream [IN] asynchronized task stream | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtSynchronizeStream | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count, | |||
aclrtMemcpyKind kind, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Asynchronous initialize memory | |||
* and set contents of memory to specified value async | |||
* | |||
* @par Function | |||
* The memory to be initialized is on the Host or device side, | |||
* and the system determines whether | |||
* it is host or device according to the address | |||
* | |||
* @param devPtr [IN] destination address pointer | |||
* @param maxCount [IN] Max length of destination address memory | |||
* @param value [IN] set value | |||
* @param count [IN] the number of byte to set | |||
* @param stream [IN] asynchronized task stream | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtSynchronizeStream | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count, | |||
aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief create stream instance | |||
* | |||
* @param stream [OUT] the created stream | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtCreateStream(aclrtStream *stream); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief destroy stream instance | |||
* | |||
* @par Function | |||
* Can only destroy streams created through the aclrtCreateStream interface | |||
* | |||
* @par Restriction | |||
* Before calling the aclrtDestroyStream interface to destroy | |||
* the specified Stream, you need to call the aclrtSynchronizeStream interface | |||
* to ensure that the tasks in the Stream have been completed. | |||
* | |||
* @param stream [IN] the stream to destroy | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtCreateStream | aclrtSynchronizeStream | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtDestroyStream(aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief block the host until all tasks | |||
* in the specified stream have completed | |||
* | |||
* @param stream [IN] the stream to wait | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtSynchronizeStream(aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Blocks the operation of the specified Stream until | |||
* the specified Event is completed. | |||
* Support for multiple streams waiting for the same event. | |||
* | |||
* @param stream [IN] the wait stream If using thedefault Stream, set NULL | |||
* @param event [IN] the event to wait | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtStreamWaitEvent(aclrtStream stream, aclrtEvent event); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief set group | |||
* | |||
* @par Function | |||
* set the task to the corresponding group | |||
* | |||
* @param groupId [IN] group id | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtGetGroupCount | aclrtGetAllGroupInfo | aclrtGetGroupInfoDetail | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtSetGroup(int32_t groupId); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get the number of group | |||
* | |||
* @par Function | |||
* get the number of group. if the number of group is zero, | |||
* it means that group is not supported or group is not created. | |||
* | |||
* @param count [OUT] the number of group | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtGetGroupCount(uint32_t *count); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief create group information | |||
* | |||
* @retval null for failed. | |||
* @retval OtherValues success. | |||
* | |||
* @see aclrtDestroyGroupInfo | |||
*/ | |||
ACL_FUNC_VISIBILITY aclrtGroupInfo *aclrtCreateGroupInfo(); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief destroy group information | |||
* | |||
* @param groupInfo [IN] pointer to group information | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtCreateGroupInfo | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtDestroyGroupInfo(aclrtGroupInfo *groupInfo); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get all group information | |||
* | |||
* @param groupInfo [OUT] pointer to group information | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtGetGroupCount | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get detail information of group | |||
* | |||
* @param groupInfo [IN] pointer to group information | |||
* @param groupId [IN] group index value | |||
* @param attr [IN] group attribute | |||
* @param attrValue [OUT] pointer to attribute value | |||
* @param valueLen [IN] length of attribute value | |||
* @param paramRetSize [OUT] pointer to real length of attribute value | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtGetGroupCount | aclrtGetAllGroupInfo | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupId, | |||
aclrtGroupAttr attr, void *attrValue, size_t valueLen, | |||
size_t *paramRetSize); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief checking whether current device and peer device support the p2p feature | |||
* | |||
* @param canAccessPeer [OUT] pointer to save the checking result | |||
* @param deviceId [IN] current device id | |||
* @param peerDeviceId [IN] peer device id | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtDeviceEnablePeerAccess | aclrtDeviceDisablePeerAccess | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtDeviceCanAccessPeer(int32_t *canAccessPeer, int32_t deviceId, int32_t peerDeviceId); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief enable the peer device to support the p2p feature | |||
* | |||
* @param peerDeviceId [IN] the peer device id | |||
* @param flags [IN] reserved field, now it must be zero | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtDeviceCanAccessPeer | aclrtDeviceDisablePeerAccess | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtDeviceEnablePeerAccess(int32_t peerDeviceId, uint32_t flags); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief disable the peer device to support the p2p function | |||
* | |||
* @param peerDeviceId [IN] the peer device id | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtDeviceCanAccessPeer | aclrtDeviceEnablePeerAccess | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtDeviceDisablePeerAccess(int32_t peerDeviceId); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Obtain the free memory and total memory of specified attribute. | |||
* the specified memory include normal memory and huge memory. | |||
* | |||
* @param attr [IN] the memory attribute of specified device | |||
* @param free [OUT] the free memory of specified device | |||
* @param total [OUT] the total memory of specified device. | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, size_t *total); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // INC_EXTERNAL_ACL_ACL_RT_H_ |
@@ -0,0 +1,276 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_EXTERNAL_ACL_ACL_TDT_H_ | |||
#define INC_EXTERNAL_ACL_ACL_TDT_H_ | |||
#include "acl/acl_base.h" | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
enum acltdtTensorType { | |||
ACL_TENSOR_DATA_UNDEFINED = -1, | |||
ACL_TENSOR_DATA_TENSOR, | |||
ACL_TENSOR_DATA_END_OF_SEQUENCE, | |||
ACL_TENSOR_DATA_ABNORMAL | |||
}; | |||
typedef struct acltdtDataItem acltdtDataItem; | |||
typedef struct acltdtDataset acltdtDataset; | |||
typedef struct acltdtChannelHandle acltdtChannelHandle; | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Get tensor type from item | |||
* | |||
* @param dataItem [IN] pointer to the data item | |||
* | |||
* @retval Tensor type. | |||
* @retval ACL_DT_UNDEFINED if dataItem is null | |||
*/ | |||
ACL_FUNC_VISIBILITY acltdtTensorType acltdtGetTensorTypeFromItem(const acltdtDataItem *dataItem); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Get data type from item | |||
* | |||
* @param dataItem [IN] pointer to the data item | |||
* | |||
* @retval Data type. | |||
* @retval ACL_DT_UNDEFINED if dataItem is null | |||
*/ | |||
ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *dataItem); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Get data address from item | |||
* | |||
* @param dataItem [IN] pointer to data item | |||
* | |||
* @retval null for failed | |||
* @retval OtherValues success | |||
*/ | |||
ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Get data size from item | |||
* | |||
* @param dataItem [IN] pointer to data item | |||
* | |||
* @retval 0 for failed | |||
* @retval OtherValues success | |||
*/ | |||
ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Get dim's number from item | |||
* | |||
* @param dataItem [IN] pointer to data item | |||
* | |||
* @retval 0 for failed | |||
* @retval OtherValues success | |||
*/ | |||
ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Get dims from item | |||
* | |||
* @param dataItem [IN] the struct of data item | |||
* @param dims [IN|OUT] pointer to the dims of dataTtem | |||
* @param dimNum [IN] the size of the dims | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataItem, int64_t *dims, size_t dimNum); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Create the struct of data item | |||
* | |||
* @param tdtType [IN] Tdt tensor type | |||
* @param dims [IN] pointer of tdtDataItem's dims | |||
* @param dimNum [IN] Dim number | |||
* @param dataType [IN] Data type | |||
* @param data [IN] Data pointer | |||
* @param size [IN] Data size | |||
* | |||
* @retval null for failed | |||
* @retval OtherValues success | |||
* | |||
* @see acltdtDestroyDataItem | |||
*/ | |||
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum, | |||
aclDataType dataType, void *data, size_t size); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Destroy the struct of data item | |||
* | |||
* @param dataItem [IN] pointer to the data item | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtCreateDataItem | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtDestroyDataItem(acltdtDataItem *dataItem); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Create the tdt dataset | |||
* | |||
* @retval null for failed | |||
* @retval OtherValues success | |||
* | |||
* @see acltdtDestroyDataset | |||
*/ | |||
ACL_FUNC_VISIBILITY acltdtDataset *acltdtCreateDataset(); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Destroy the tdt dataset | |||
* | |||
* @param dataset [IN] pointer to the dataset | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtCreateDataset | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtDestroyDataset(acltdtDataset *dataset); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Get the data item | |||
* | |||
* @param dataset [IN] pointer to the dataset | |||
* @param index [IN] index of the dataset | |||
* | |||
* @retval null for failed | |||
* @retval OtherValues success | |||
* | |||
* @see acltdtAddDataItem | |||
*/ | |||
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtGetDataItem(const acltdtDataset *dataset, size_t index); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Get the data item | |||
* | |||
* @param dataset [OUT] pointer to the dataset | |||
* @param dataItem [IN] pointer to the data item | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtGetDataItem | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtAddDataItem(acltdtDataset *dataset, acltdtDataItem *dataItem); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Get the size of dataset | |||
* | |||
* @param dataset [IN] pointer to the dataset | |||
* | |||
* @retval 0 for failed | |||
* @retval OtherValues success | |||
*/ | |||
ACL_FUNC_VISIBILITY size_t acltdtGetDatasetSize(const acltdtDataset *dataset); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Stop the channel | |||
* | |||
* @param handle [IN] pointer to the channel handle | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtCreateChannel | acltdtDestroyChannel | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtStopChannel(acltdtChannelHandle *handle); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Create the channel | |||
* | |||
* @param deviceId [IN] the device id | |||
* @param name [IN] the channel's name | |||
* | |||
* @retval null for failed | |||
* @retval OtherValues success | |||
* | |||
* @see acltdtStopChannel | acltdtDestroyChannel | |||
*/ | |||
ACL_FUNC_VISIBILITY acltdtChannelHandle *acltdtCreateChannel(uint32_t deviceId, const char *name); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Destroy the channel | |||
* | |||
* @param handle [IN] pointer to the channel handle | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtCreateChannel | acltdtStopChannel | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Send tensor to device | |||
* | |||
* @param handle [IN] pointer to the channel handle | |||
* @param dataset [IN] pointer to the dataset | |||
* @param timeout [IN] to be reserved, now it must be -1 | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtReceiveTensor | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset, | |||
int32_t timeout); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Receive tensor from device | |||
* | |||
* @param handle [IN] pointer to the channel handle | |||
* @param dataset [OUT] pointer to the dataset | |||
* @param timeout [IN] to be reserved, now it must be -1 | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtSendTensor | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset, | |||
int32_t timeout); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // INC_EXTERNAL_ACL_ACL_TDT_H_ |
@@ -42,12 +42,22 @@ static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group n | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPROT = 207000; // feature not support | |||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||
static const int32_t ACL_ERROR_RT_INTERNEL_ERROR = 507000; // runtime internel error | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||
@@ -82,7 +92,7 @@ static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not | |||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||
static const int32_t ACL_ERROR_RT_DRV_INTERNEL_ERROR = 507899; // drv internel error | |||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||
#ifdef __cplusplus | |||
} | |||
@@ -0,0 +1,134 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
/** | |||
* @file hccl.h | |||
* @brief HCCL API | |||
*/ | |||
#ifndef HCCL_H_ | |||
#define HCCL_H_ | |||
#include <hccl/hccl_types.h> | |||
#include <acl/acl.h> | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif // __cplusplus | |||
/** | |||
* @brief Initialize HCCL. | |||
* | |||
* @param clusterInfo A string identifying the cluster info file path, include file name. | |||
* @param rank A integer identifying the identify for the rank. | |||
* @param comm A pointer identifying the initialized communication resource. | |||
* @return HcclResult | |||
* @see HcclCommDestroy() | |||
*/ | |||
extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm); | |||
/** | |||
* @brief Get hccl root info. | |||
* | |||
* @param rootInfo A pointer identifying the hccl root info. | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo); | |||
/** | |||
* @brief Initialize HCCL with root info. | |||
* | |||
* @param nRanks A integer identifying the rank size of the cluster. | |||
* @param rootInfo A struct identifying the hccl root info. | |||
* @param rank A integer identifying the identify for the rank. | |||
* @param comm A pointer identifying the initialized communication resource. | |||
* @return HcclResult | |||
* @see HcclCommDestroy() | |||
*/ | |||
extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm); | |||
/** | |||
* @brief AllReduce operator. | |||
* | |||
* @param sendBuf A pointer identifying the input data address of the operator. | |||
* @param recvBuf A pointer identifying the output data address of the operator. | |||
* @param count An integer(u64) identifying the number of the output data. | |||
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, | |||
* float32. | |||
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||
* @param comm A pointer identifying the communication resource based on. | |||
* @param stream A pointer identifying the stream information. | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, | |||
HcclComm comm, aclrtStream stream); | |||
/** | |||
* @brief Broadcast operator. | |||
* | |||
* @param buf A pointer identifying the data address of the operator. | |||
* @param count An integer(u64) identifying the number of the data. | |||
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||
* @param root An integer(u32) identifying the the root rank in the operator. | |||
* @param comm A pointer identifying the communication resource based on | |||
* @param stream A pointer identifying the stream information. | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, | |||
aclrtStream stream); | |||
/** | |||
* @brief ReduceScatter operator. | |||
* | |||
* @param sendBuf A pointer identifying the input data address of the operator. | |||
* @param recvBuf A pointer identifying the output data address of the operator. | |||
* @param recvCount An integer(u64) identifying the number of the output data. | |||
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||
* @param comm A pointer identifying the communication resource based on. | |||
* @param stream A pointer identifying the stream information. | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, | |||
HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||
/** | |||
* @brief AllGather operator. | |||
* | |||
* @param sendBuf A pointer identifying the input data address of the operator. | |||
* @param recvBuf A pointer identifying the output data address of the operator. | |||
* @param sendCount An integer(u64) identifying the number of the input data. | |||
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||
* @param comm A pointer identifying the communication resource based on. | |||
* @param stream A pointer identifying the stream information. | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, | |||
aclrtStream stream); | |||
/** | |||
* @brief Destroy HCCL comm | |||
* | |||
* @param comm A pointer identifying the communication resource targetting | |||
* @return HcclResult | |||
* @see HcclCommInitClusterInfo() | |||
*/ | |||
extern HcclResult HcclCommDestroy(HcclComm comm); | |||
#ifdef __cplusplus | |||
} | |||
#endif // __cplusplus | |||
#endif // HCCL_H_ |
@@ -0,0 +1,101 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
/** | |||
* @file hccl_types.h | |||
* @brief HCCL data type definition | |||
* | |||
*/ | |||
#ifndef HCCL_TYPES_H_ | |||
#define HCCL_TYPES_H_ | |||
#include <stdint.h> | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif // __cplusplus | |||
/** | |||
* @brief HCCL functions return value definition | |||
*/ | |||
typedef enum { | |||
HCCL_SUCCESS = 0, /**< success */ | |||
HCCL_E_PARA = 1, /**< parameter error */ | |||
HCCL_E_PTR = 2, /**< empty pointer */ | |||
HCCL_E_MEMORY = 3, /**< memory error */ | |||
HCCL_E_INTERNAL = 4, /**< internal error */ | |||
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||
HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||
HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||
HCCL_E_TIMEOUT = 9, /**< timeout */ | |||
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||
HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||
HCCL_E_DRV = 16, /**< call driver api fail */ | |||
HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||
HCCL_E_CCE = 18, /**< call cce api fail */ | |||
HCCL_E_NETWORK = 19, /**< call network api fail */ | |||
HCCL_E_RESERVED /**< reserved */ | |||
} HcclResult; | |||
/** | |||
* @brief handle to HCCL communicator | |||
*/ | |||
typedef void *HcclComm; | |||
/** | |||
* @brief HCCL Reduction opperation | |||
*/ | |||
typedef enum { | |||
HCCL_REDUCE_SUM = 0, /**< sum */ | |||
HCCL_REDUCE_PROD = 1, /**< prod */ | |||
HCCL_REDUCE_MAX = 2, /**< max */ | |||
HCCL_REDUCE_MIN = 3, /**< min */ | |||
HCCL_REDUCE_RESERVED /**< reserved */ | |||
} HcclReduceOp; | |||
/** | |||
* @brief HCCL data type | |||
*/ | |||
typedef enum { | |||
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||
} HcclDataType; | |||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||
/** | |||
* @brief HCCL root info | |||
*/ | |||
typedef struct HcclRootInfoDef { | |||
char internal[HCCL_ROOT_INFO_BYTES]; | |||
} HcclRootInfo; | |||
#ifdef __cplusplus | |||
} | |||
#endif // __cplusplus | |||
#endif // HCCL_TYPES_H_ |
@@ -42,12 +42,22 @@ static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group n | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPROT = 207000; // feature not support | |||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||
static const int32_t ACL_ERROR_RT_INTERNEL_ERROR = 507000; // runtime internel error | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||
@@ -82,7 +92,7 @@ static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not | |||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||
static const int32_t ACL_ERROR_RT_DRV_INTERNEL_ERROR = 507899; // drv internel error | |||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||
#ifdef __cplusplus | |||
} | |||
@@ -18,43 +18,43 @@ | |||
#define AICPU_OP_TYPE_LIST_H_ | |||
enum OpKernelType { | |||
TF_KERNEL, | |||
CPU_KERNEL | |||
TF_KERNEL, | |||
CPU_KERNEL | |||
}; | |||
enum ReturnCode { | |||
OP_TYPE_NOT_SUPPORT, | |||
FORMAT_NOT_SUPPORT, | |||
DTYPE_NOT_SUPPORT | |||
OP_TYPE_NOT_SUPPORT, | |||
FORMAT_NOT_SUPPORT, | |||
DTYPE_NOT_SUPPORT | |||
}; | |||
#pragma pack(push, 1) | |||
//One byte alignment | |||
struct SysOpInfo { | |||
uint64_t opLen; | |||
uint64_t opType; | |||
OpKernelType kernelsType; | |||
uint64_t opLen; | |||
uint64_t opType; | |||
OpKernelType kernelsType; | |||
}; | |||
struct OpParamInfo { | |||
uint64_t num; | |||
uint64_t dtypeList; | |||
uint64_t formatList; | |||
uint64_t num; | |||
uint64_t dtypeList; | |||
uint64_t formatList; | |||
}; | |||
struct SysOpCheckInfo { | |||
uint64_t opListNum; | |||
uint64_t offSetLen; | |||
uint64_t sysOpInfoList; | |||
uint64_t opParamInfoList; | |||
uint64_t opListNum; | |||
uint64_t offSetLen; | |||
uint64_t sysOpInfoList; | |||
uint64_t opParamInfoList; | |||
}; | |||
struct SysOpCheckResp { | |||
uint64_t opListNum; | |||
bool isWithoutJson; | |||
uint64_t returnCodeList; | |||
uint64_t sysOpInfoList; | |||
uint64_t opParamInfoList; | |||
uint64_t opListNum; | |||
bool isWithoutJson; | |||
uint64_t returnCodeList; | |||
uint64_t sysOpInfoList; | |||
uint64_t opParamInfoList; | |||
}; | |||
#pragma pack(pop) | |||
#endif // AICPU_OP_TYPE_LIST_H_ |
@@ -31,6 +31,7 @@ typedef enum { | |||
AE_STATUS_KERNEL_API_INNER_ERROR = 5, | |||
AE_STATUS_END_OF_SEQUENCE = 6, | |||
AE_STATUS_DUMP_FAILED = 7, | |||
AE_STATUS_TASK_WAIT = 101, | |||
AE_STATUS_RESERVED | |||
} aeStatus_t; | |||
@@ -60,6 +60,7 @@ enum FWKTaskExtInfoType { | |||
FWK_ADPT_EXT_UPDATE_ADDR, | |||
FWK_ADPT_EXT_OP_NAME, | |||
FWK_ADPT_EXT_SESSION_INFO, | |||
FWK_ADPT_EXT_BITMAP, | |||
FWK_ADPT_EXT_INVALID | |||
}; | |||
@@ -110,6 +110,34 @@ HcclResult HcomDestroyGroup(const char *group); | |||
/** | |||
* @brief Set the gradient split strategy with in the group, according to gradient index. | |||
* | |||
* @param group A string identifying the group name. | |||
* @param segmentNum An integer(u32) identifying the segments number of gradients. | |||
* @param IdxList A list identifying the index of end gradient in each segment. | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); | |||
/** | |||
* @brief Set the gradient split strategy with in the group, according to gradient data size. | |||
* | |||
* @param group A string identifying the group name. | |||
* @param segmentNum An integer(u32) identifying the segments number of gradients. | |||
* @param sizeList A list identifying the percent of each segment. | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); | |||
/** | |||
* @brief Initialize hcom executor. | |||
* | |||
* @param void | |||
* @return HcclResult | |||
*/ | |||
HcclResult HcomExecInitialize(); | |||
/** | |||
* @brief Finalize hcom executor. | |||
* | |||
* @param void | |||
* @return HcclResult | |||
*/ | |||
@@ -50,7 +50,7 @@ typedef int (*mmFilter)(const mmDirent *entry); | |||
typedef int (*mmFilter2)(const mmDirent2 *entry); | |||
typedef int (*mmSort)(const mmDirent **a, const mmDirent **b); | |||
typedef int (*mmSort2)(const mmDirent2 **a, const mmDirent2 **b); | |||
typedef size_t mmSize_t; | |||
typedef size_t mmSize_t; //lint !e410 !e1051 | |||
typedef off_t mmOfft_t; | |||
typedef pid_t mmPid_t; | |||
typedef long MM_LONG; | |||
@@ -283,6 +283,7 @@ typedef struct { | |||
#define M_W_OK W_OK | |||
#define M_R_OK R_OK | |||
#define MM_DT_DIR DT_DIR | |||
#define MM_DT_REG DT_REG | |||
@@ -1,83 +1,83 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef MMPA_TYPEDEF_WIN_H | |||
#define MMPA_TYPEDEF_WIN_H | |||
#ifdef __cplusplus | |||
#if __cplusplus | |||
extern "C" { | |||
#endif // __cpluscplus | |||
#endif // __cpluscplus | |||
#ifndef FALSE | |||
#define FALSE 0 | |||
#endif | |||
#ifndef TRUE | |||
#define TRUE 1 | |||
#endif | |||
#define EN_OK 0 | |||
#define EN_ERR 1 | |||
#define EN_ERROR (-1) | |||
#define EN_INVALID_PARAM (-2) | |||
#define EN_TIMEOUT (-3) | |||
#define HANDLE_INVALID_VALUE (-1) | |||
#define INVALID_SOCKET_HANDLE INVALID_SOCKET | |||
#define MMPA_MEM_MAX_LEN (0x7fffffff) | |||
#define MMPA_PROCESS_ERROR (0x7fffffff) | |||
#define MMPA_ONE_THOUSAND 1000 | |||
#define MMPA_COMPUTER_BEGIN_YEAR 1900 | |||
#define SUMMER_TIME_OR_NOT (-1) | |||
#define MMPA_ZERO 0 | |||
#define MMPA_VALUE_ONE 1 | |||
#define MMPA_SOCKET_MAIN_EDITION 2 | |||
#define MMPA_SOCKET_SECOND_EDITION 0 | |||
#define MMPA_PIPE_BUF_SIZE 1024 | |||
#define MMPA_MAX_SCANDIR_COUNT 1024 | |||
#define MAX_IOVEC_SIZE 32 | |||
#define MMPA_PIPE_COUNT 2 | |||
#define MMPA_THREADNAME_SIZE 16 | |||
#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1) | |||
#define MMPA_MIN_OS_VERSION_SIZE 64 | |||
#define MMPA_MAX_NI 19 | |||
#define MMPA_MIDDLE_NI 5 | |||
#define MMPA_LOW_NI (-5) | |||
#define MMPA_MIN_NI (-20) | |||
#define MMPA_MAX_FILE 128 | |||
#define MMPA_MAX_THREAD_PIO 99 | |||
#define MMPA_MIDDLE_THREAD_PIO 66 | |||
#define MMPA_LOW_THREAD_PIO 33 | |||
#define MMPA_MIN_THREAD_PIO 1 | |||
#define MMPA_THREAD_SCHED_RR 0 | |||
#define MMPA_THREAD_SCHED_FIFO 0 | |||
#define MMPA_THREAD_SCHED_OTHER 0 | |||
#define MMPA_THREAD_MIN_STACK_SIZE 0 | |||
#define MM_MUTEX_INITIALIZER NULL | |||
#ifdef __cplusplus | |||
#if __cplusplus | |||
} | |||
#endif // __cpluscplus | |||
#endif // __cpluscplus | |||
#endif // _MMPA_TYPEDEF_WIN_H_ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef MMPA_TYPEDEF_WIN_H | |||
#define MMPA_TYPEDEF_WIN_H | |||
#ifdef __cplusplus | |||
#if __cplusplus | |||
extern "C" { | |||
#endif // __cpluscplus | |||
#endif // __cpluscplus | |||
#ifndef FALSE | |||
#define FALSE 0 | |||
#endif | |||
#ifndef TRUE | |||
#define TRUE 1 | |||
#endif | |||
#define EN_OK 0 | |||
#define EN_ERR 1 | |||
#define EN_ERROR (-1) | |||
#define EN_INVALID_PARAM (-2) | |||
#define EN_TIMEOUT (-3) | |||
#define HANDLE_INVALID_VALUE (-1) | |||
#define INVALID_SOCKET_HANDLE INVALID_SOCKET | |||
#define MMPA_MEM_MAX_LEN (0x7fffffff) | |||
#define MMPA_PROCESS_ERROR (0x7fffffff) | |||
#define MMPA_ONE_THOUSAND 1000 | |||
#define MMPA_COMPUTER_BEGIN_YEAR 1900 | |||
#define SUMMER_TIME_OR_NOT (-1) | |||
#define MMPA_ZERO 0 | |||
#define MMPA_VALUE_ONE 1 | |||
#define MMPA_SOCKET_MAIN_EDITION 2 | |||
#define MMPA_SOCKET_SECOND_EDITION 0 | |||
#define MMPA_PIPE_BUF_SIZE 1024 | |||
#define MMPA_MAX_SCANDIR_COUNT 1024 | |||
#define MAX_IOVEC_SIZE 32 | |||
#define MMPA_PIPE_COUNT 2 | |||
#define MMPA_THREADNAME_SIZE 16 | |||
#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1) | |||
#define MMPA_MIN_OS_VERSION_SIZE 64 | |||
#define MMPA_MAX_NI 19 | |||
#define MMPA_MIDDLE_NI 5 | |||
#define MMPA_LOW_NI (-5) | |||
#define MMPA_MIN_NI (-20) | |||
#define MMPA_MAX_FILE 128 | |||
#define MMPA_MAX_THREAD_PIO 99 | |||
#define MMPA_MIDDLE_THREAD_PIO 66 | |||
#define MMPA_LOW_THREAD_PIO 33 | |||
#define MMPA_MIN_THREAD_PIO 1 | |||
#define MMPA_THREAD_SCHED_RR 0 | |||
#define MMPA_THREAD_SCHED_FIFO 0 | |||
#define MMPA_THREAD_SCHED_OTHER 0 | |||
#define MMPA_THREAD_MIN_STACK_SIZE 0 | |||
#define MM_MUTEX_INITIALIZER NULL | |||
#ifdef __cplusplus | |||
#if __cplusplus | |||
} | |||
#endif // __cpluscplus | |||
#endif // __cpluscplus | |||
#endif // _MMPA_TYPEDEF_WIN_H_ |
@@ -1,4 +1,4 @@ | |||
/** | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -65,6 +65,8 @@ in aipp config file, framework will auto add one input node to graph at last. \n | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator AippData. | |||
*@par Restrictions: | |||
*Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly. | |||
*/ | |||
REG_OP(AippData) | |||
.INPUT(data, TensorType::ALL()) | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -39,6 +39,7 @@ | |||
#include "image_ops.h" | |||
#include "internal_ops.h" | |||
#include "linalg_ops.h" | |||
#include "list_ops.h" | |||
#include "logging_ops.h" | |||
#include "lookup_ops.h" | |||
#include "math_ops.h" | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1153,6 +1153,79 @@ REG_OP(EditDistance) | |||
.OUTPUT(output, TensorType({DT_FLOAT})) | |||
.OP_END_FACTORY_REG(EditDistance) | |||
/** | |||
* @brief sort_v2. | |||
* @par Inputs: | |||
* @li x: An ND tensor of type float16. | |||
* @par Attributes: | |||
* @li axis: An optional int. The dimension to sort along. This value defaults to -1. | |||
* @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False. | |||
* @par Outputs: | |||
* @li y: An ND tensor of type float16. | |||
* @attention Constraints: | |||
* @li Axis should select the last dim. | |||
* @li When the sorting data is less than 150K, it is recommended to use this tbe ops, | |||
and the descending performance is better than the ascending. | |||
* @li The upper limit of data on Ascend910 is 2000K. | |||
*/ | |||
REG_OP(SortV2) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.ATTR(axis, Int, -1) | |||
.ATTR(descending, Bool, false) | |||
.OP_END_FACTORY_REG(SortV2) | |||
/** | |||
* @brief Expand the input tensor to a compatible shape. \n | |||
* @par Inputs: | |||
* One inputs, including: | |||
* @li x: A Tensor. Must be one of the following types: | |||
* float16, float32, int32, int8 ,uint8. \n | |||
* @li shape: A Tensor to specify the shape that the input tensor expanded to. \n | |||
* @par Outputs: | |||
* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the ONNX operator Expand. | |||
*/ | |||
REG_OP(Expand) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||
.INPUT(shape, TensorType({DT_INT16, DT_INT32, DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||
.OP_END_FACTORY_REG(Expand) | |||
/** | |||
* @brief Expand the input tensor to a compatible shape. \n | |||
* @par Inputs: | |||
* One inputs, including: | |||
* @li x: A Tensor. Must be one of the following types: | |||
* float16, float32, int32, int8 ,uint8. \n | |||
* @par Attributes: | |||
* @li shape: A required listInt to specify the shape that the input tensor expanded to. \n | |||
* @par Outputs: | |||
* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the ONNX operator Expand. | |||
*/ | |||
REG_OP(ExpandD) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||
.REQUIRED_ATTR(shape, ListInt) | |||
.OP_END_FACTORY_REG(ExpandD) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -908,7 +908,7 @@ REG_OP(TensorArray) | |||
.OUTPUT(handle, TensorType({DT_RESOURCE})) | |||
.OUTPUT(flow, TensorType({DT_FLOAT})) | |||
.REQUIRED_ATTR(dtype, Type) | |||
.ATTR(element_shape, ListInt, ge::UNKNOWN_SHAPE) | |||
.ATTR(element_shape, ListInt, ge::UNKNOWN_RANK) | |||
.ATTR(dynamic_size, Bool, false) | |||
.ATTR(clear_after_read, Bool, true) | |||
.ATTR(identical_element_shapes, Bool, false) | |||
@@ -963,7 +963,7 @@ REG_OP(TensorArrayConcat) | |||
DT_QUINT8, DT_QINT32})) | |||
.OUTPUT(lengths, TensorType({DT_INT64})) | |||
.REQUIRED_ATTR(dtype, Type) | |||
.ATTR(element_shape_except0, ListInt, ge::UNKNOWN_SHAPE) | |||
.ATTR(element_shape_except0, ListInt, ge::UNKNOWN_RANK) | |||
.OP_END_FACTORY_REG(TensorArrayConcat) | |||
/** | |||
@@ -999,7 +999,7 @@ REG_OP(TensorArrayGather) | |||
DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, | |||
DT_QUINT8, DT_QINT32})) | |||
.REQUIRED_ATTR(dtype, Type) | |||
.ATTR(element_shape, ListInt, ge::UNKNOWN_SHAPE) | |||
.ATTR(element_shape, ListInt, ge::UNKNOWN_RANK) | |||
.OP_END_FACTORY_REG(TensorArrayGather) | |||
/** | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -122,7 +122,8 @@ REG_OP(MinimumGrad) | |||
*@par Inputs: | |||
*One input: | |||
*x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8, | |||
int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. \n | |||
int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. | |||
For float32 type, the actual calculation on the chip is based on float16. \n | |||
*@par Attributes: | |||
*dst_type: An required attribute of type int32, specifying the dst data type. \n | |||
@@ -611,6 +612,15 @@ REG_OP(Log1p) | |||
*@par Outputs: | |||
*y: A Tensor. Has the same type as "x1". | |||
*@attention Constraints: | |||
*@li x2: The input data does not support 0 | |||
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the | |||
*requirement of double thousandths in the mini form | |||
*@li Due to different architectures, the calculation results of this operator | |||
*on NPU and CPU may be inconsistent | |||
*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator Mod. | |||
*/ | |||
@@ -2042,6 +2052,15 @@ REG_OP(FloorDiv) | |||
* | |||
*@par Outputs: | |||
*y: Result remainder. | |||
*@attention Constraints: | |||
*@li x2: The input data does not support 0 | |||
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the | |||
*requirement of double thousandths in the mini form | |||
*@li Due to different architectures, the calculation results of this operator | |||
*on NPU and CPU may be inconsistent | |||
*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator FloorMod. | |||
*/ | |||
@@ -2168,6 +2187,14 @@ REG_OP(Tan) | |||
*@par Outputs: | |||
*y: A Tensor. Has the same type as "x1". \n | |||
*@attention Constraints: | |||
*@li x2: The input data does not support 0 | |||
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the | |||
*requirement of double thousandths in the mini form | |||
*@li Due to different architectures, the calculation results of this operator | |||
*on NPU and CPU may be inconsistent | |||
*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 | |||
*@par Third-party framework compatibility | |||
*@li Compatible with the TensorFlow operator TruncateMod. | |||
*/ | |||
@@ -2829,9 +2856,9 @@ REG_OP(AdamApplyOneAssign) | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(LambApplyOptimizerAssign) | |||
.INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.INPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.INPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
@@ -2842,6 +2869,8 @@ REG_OP(LambApplyOptimizerAssign) | |||
.INPUT(do_use_weight, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.INPUT(weight_decay_rate, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.OUTPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.OUTPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.OP_END_FACTORY_REG(LambApplyOptimizerAssign) | |||
/** | |||
@@ -2873,7 +2902,8 @@ REG_OP(LambApplyWeightAssign) | |||
.INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.INPUT(input4, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.INPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.OUTPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
.OP_END_FACTORY_REG(LambApplyWeightAssign) | |||
/** | |||
@@ -3329,8 +3359,297 @@ REG_OP(TensorRedirect) | |||
.OUTPUT(output_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, | |||
DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32})) | |||
.OP_END_FACTORY_REG(TensorRedirect) | |||
} // namespace ge | |||
/** | |||
* @brief Performs the element-wise division of tensor x2 by tensor x3, | |||
* multiply the result by the scalar value and add it to tensor x1 | |||
* @par Inputs: | |||
* Three inputs, including: | |||
* @li input_data: A mutable input Tensor. Must be one of the following types: | |||
* float16, float32. | |||
* @li x1: A mutable input Tensor of the same type as x1. | |||
* @li x2: A mutable input Tensor of the same type as x1. | |||
* @li value: A mutable input Tensor. Must be one of the following types: | |||
* float16, float32, int32. \n | |||
* @par Outputs: | |||
* @li y: A mutable Tensor. Has the same type as "x1". \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator Addcdiv. | |||
*/ | |||
REG_OP(Addcdiv) | |||
.INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32 })) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OP_END_FACTORY_REG(Addcdiv) | |||
/** | |||
* @brief Performs the element-wise multiplication of tensor x2 by tensor x3, | |||
* multiply the result by the scalar value and add it to tensor input_data | |||
* @par Inputs: | |||
* Three inputs, including: | |||
* @li input_data: A mutable input Tensor. Must be one of the following types: | |||
* float16, float32, int8, int32, uint8. | |||
* @li x1: A mutable input Tensor of the same type as x1. | |||
* @li x2: A mutable input Tensor of the same type as x1. | |||
* @li value: A tensor which includes only one element of the same type as x1. \n | |||
* @par Outputs: | |||
* @li y: A mutable output Tensor. Has the same type as "x1". \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator Addcmul. | |||
*/ | |||
REG_OP(Addcmul) | |||
.INPUT(input_data, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||
.INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||
.INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||
.INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||
.OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||
.OP_END_FACTORY_REG(Addcmul) | |||
/** | |||
* @brief Computes the result of x2 * alpha + x1. | |||
* @par Inputs: | |||
* @li x1: An ND tensor of type float16, float32, int32. | |||
* @li x2: An ND tensor of type float16, float32, int32. | |||
* @li alpha: A scalar tensor of type float16, float32. \n | |||
* @par Outputs: | |||
* @li y: An ND tensor tensor with the same shape and type as "x1". \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator Axpy. | |||
*/ | |||
REG_OP(AxpyV2) | |||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.OP_END_FACTORY_REG(AxpyV2) | |||
/** | |||
* @brief Computes the result of x1 + x2. | |||
* @par Inputs: | |||
* @li x1: An ND tensor of type float16, float, int32. | |||
* @li x2: An ND tensor of type float16, float, int32. \n | |||
* @par Outputs: | |||
* @li y: An ND tensor tensor with the same type as "x1". \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator Add. | |||
*/ | |||
REG_OP(PtAdd) | |||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.OP_END_FACTORY_REG(PtAdd) | |||
/** | |||
* @brief Computes the result of x1 * x2. | |||
* @par Inputs: | |||
* @li x1: An ND tensor of type float16, float32, int32. | |||
* @li x2: An ND tensor of type float16, float32, int32. \n | |||
* @par Outputs: | |||
* @li y: Same shape and type as the largest ND tensor in x1 x2. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator muls. | |||
*/ | |||
REG_OP(PtMuls) | |||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.OP_END_FACTORY_REG(PtMuls) | |||
/** | |||
* @brief Computes the result of x1 - x2. | |||
* @par Inputs: | |||
* @li x1: An ND tensor of type float16, float, int32. | |||
* @li x2: An ND tensor of type float16, float, int32. \n | |||
* @par Outputs: | |||
* @li y: An ND tensor tensor with the same type as "x1". \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator Sub. | |||
*/ | |||
REG_OP(PtSub) | |||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.OP_END_FACTORY_REG(PtSub) | |||
/** | |||
* @brief Add the partial values of two tensors in format NC1HWC0. | |||
* @par Inputs: | |||
* @li x1: A Tensor in 5HD, and must be one of the following types: float16, | |||
* float32. \n | |||
* @li x2: A Tensor of the same type as "x1", and the same shape as "x1", | |||
* except for the C1 value. \n | |||
* @par Attributes: | |||
* @li x1_c1_offset: A required int. Offset value of C1 in "x1". \n | |||
* @li x2_c1_offset: A required int. Offset value of C1 in "x2". \n | |||
* @li c1_len: A required int. C1 len of "y". The value must be less than | |||
* the difference between C1 and offset in "x1" and "x2". \n | |||
* @par Outputs: | |||
* @li y: A Tensor of the same type as "x1", and the same shape as "x1", | |||
* except for the C1 value. Record the result after adding. \n | |||
*/ | |||
REG_OP(StrideAdd) | |||
.INPUT(x1, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.INPUT(x2, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.REQUIRED_ATTR(x1_c1_offset, Int) | |||
.REQUIRED_ATTR(x2_c1_offset, Int) | |||
.REQUIRED_ATTR(c1_len, Int) | |||
.OP_END_FACTORY_REG(StrideAdd) | |||
/** | |||
* @brief Compare two tensors are totally equal or not, only output a bool value" | |||
* @par Inputs: | |||
* Two inputs, including: | |||
* @li input_x: A Tensor. the first tensor. \n | |||
* @li input_y: A Tensor. the second tensor. \n | |||
* @par Outputs: | |||
* @li output_z: A Tensor. Bool type, compare result of the two inputs. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch equal operator. \n | |||
*/ | |||
REG_OP(TensorEqual) | |||
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||
.INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||
.OUTPUT(output_z, TensorType({DT_BOOL})) | |||
.OP_END_FACTORY_REG(TensorEqual) | |||
/** | |||
* @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). | |||
* All inputs and outputs must have the same data type. This operator supports multidirectional | |||
* (i.e., Numpy-style) broadcasting | |||
* | |||
* @par inputs | |||
* one input including: | |||
* @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64 | |||
* | |||
* @par output | |||
* one output including: | |||
* @li y:A Tensor of the same type as x | |||
* | |||
*/ | |||
REG_OP(MaxN) | |||
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) | |||
.OP_END_FACTORY_REG(MaxN) | |||
/** | |||
* @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). | |||
* All inputs and outputs must have the same data type. This operator supports multidirectional | |||
* (i.e., Numpy-style) broadcasting | |||
* | |||
* @par inputs | |||
* one input including: | |||
* @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64 | |||
* | |||
* @par output | |||
* one output including: | |||
* @li y:A Tensor of the same type as x | |||
* | |||
*/ | |||
REG_OP(MinN) | |||
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, | |||
DT_INT32, DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, | |||
DT_INT32, DT_INT64})) | |||
.OP_END_FACTORY_REG(MinN) | |||
/** | |||
* @brief Calculates x * maske * value. | |||
* | |||
* @par Inputs: | |||
* @li x: An tensor of type float16 or float32, specifying the input to the data layer. | |||
* @li mask: An tensor of type int8 or float16 or float32, be same shape with x. \n | |||
* | |||
* @par Attributes: | |||
* value: A optional float. \n | |||
* | |||
* @par Outputs: | |||
* y: The output tensor of type float16 or float32. | |||
@ li y:A Tensor of the same type and shape as x | |||
* | |||
*/ | |||
REG_OP(MaskedScale) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32})) | |||
.INPUT(mask, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32})) | |||
.REQUIRED_ATTR(value, Float) | |||
.OP_END_FACTORY_REG(MaskedScale) | |||
/** | |||
* @brief Calculate the lerp function. \n | |||
* @par Inputs: | |||
* Three inputs, including: | |||
* @li start: A tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
* @li end: A tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
* @li weight: A tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
* @par Outputs: | |||
* y: A Tensor with the same type and shape of input_x's. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator Lerp. \n | |||
*/ | |||
REG_OP(Lerp) | |||
.INPUT(start, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(end, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OP_END_FACTORY_REG(Lerp) | |||
/** | |||
*@brief Hardmax(element in input, axis) = 1 if the element is the first maximum value along the specified axis, 0 | |||
*otherwise The input does not need to explicitly be a 2D vector.The "axis" attribute indicates the dimension along | |||
*which Hardmax will be performed.The output tensor has the same shape and contains the Hardmax values of the | |||
*corresponding input. | |||
* | |||
*@par inputs | |||
*one input including: | |||
*@li x: input A Tensor.Must be one of the following types:float32,float16 | |||
* | |||
*@par Attributes: | |||
*@li axis:A required int attribute that decides which dimension will be used to cal the hard_max | |||
* | |||
*@par output: | |||
*one output including: | |||
*@li y:A Tensor of the same type as x | |||
* | |||
*/ | |||
REG_OP(HardMax) | |||
.INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT })) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(axis, Int, -1) | |||
.OP_END_FACTORY_REG(HardMax) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -45,8 +45,6 @@ REG_OP(HcomAllGather) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) | |||
.REQUIRED_ATTR(rank_size, Int) | |||
.REQUIRED_ATTR(group, String) | |||
.ATTR(alpha, Float, 1.0) | |||
.ATTR(beta, Float, 0.0) | |||
.OP_END_FACTORY_REG(HcomAllGather) | |||
/** | |||
@@ -77,8 +75,6 @@ REG_OP(HcomAllReduce) | |||
.REQUIRED_ATTR(group, String) | |||
.ATTR(fusion, Int, 1) | |||
.ATTR(fusion_id, Int, -1) | |||
.ATTR(alpha, Float, 1.0) | |||
.ATTR(beta, Float, 0.0) | |||
.OP_END_FACTORY_REG(HcomAllReduce) | |||
/** | |||
@@ -91,7 +87,7 @@ REG_OP(HcomAllReduce) | |||
input of this rank will be broadcast to other ranks. | |||
* @li fusion: A required integer identifying if the op need to fusion,the | |||
default value is none fusion | |||
* @li fusion: A required integer identifying the fusion id if para fusion | |||
* @li fusion_id: A required integer identifying the fusion id if para fusion | |||
is set. | |||
* @li group: A required string identifying the group name of ranks | |||
participating in the op. | |||
@@ -109,10 +105,39 @@ REG_OP(HcomBroadcast) | |||
.REQUIRED_ATTR(group, String) | |||
.ATTR(fusion, Int, 0) | |||
.ATTR(fusion_id, Int, -1) | |||
.ATTR(alpha, Float, 1.0) | |||
.ATTR(beta, Float, 0.0) | |||
.OP_END_FACTORY_REG(HcomBroadcast) | |||
/** | |||
* @brief preforms reduction from others rank to rootrank | |||
* @par Inputs: | |||
* @li root_rank: A required integer identifying the root rank in the op | |||
the reduction result will be on this root rank | |||
* x: A tensor. Must be one of the following types: int8, int16, int32, float16, | |||
float32. | |||
* @par Attributes: | |||
* @li reduction: A required string identifying the reduction operation to | |||
perform.The supported operation are: "sum", "max", "min", "prod". | |||
* @li group: A required string identifying the group name of ranks | |||
participating in the op. | |||
* @li fusion: An optional integer identifying the fusion flag of the op. | |||
0: no fusion; 1 (default): fusion; 2: fusion the ops by fusion id. | |||
* @li fusion_id: An optional integer identifying the fusion id of the op. | |||
* The HcomReduce ops with the same fusion id will be fused. | |||
* @par Outputs: | |||
* y: A Tensor. Has the same type as "x". | |||
* @attention Constraints: | |||
*"group" is limited to 128 characters. Use "hccl_world_group" | |||
as the name of a world group. | |||
*/ | |||
REG_OP(HcomReduce) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) | |||
.REQUIRED_ATTR(root_rank, Int) | |||
.REQUIRED_ATTR(reduction, String) | |||
.REQUIRED_ATTR(group, String) | |||
.ATTR(fusion, Int, 0) | |||
.ATTR(fusion_id, Int, -1) | |||
.OP_END_FACTORY_REG(HcomReduce) | |||
/** | |||
* @brief Performs reduction across all input tensors, scattering in equal | |||
blocks among ranks, each rank getting a chunk of data based on its rank | |||
@@ -139,8 +164,6 @@ REG_OP(HcomReduceScatter) | |||
.REQUIRED_ATTR(reduction, String) | |||
.REQUIRED_ATTR(group, String) | |||
.REQUIRED_ATTR(rank_size, Int) | |||
.ATTR(alpha, Float, 1.0) | |||
.ATTR(beta, Float, 0.0) | |||
.OP_END_FACTORY_REG(HcomReduceScatter) | |||
/** | |||
@@ -167,8 +190,6 @@ REG_OP(HcomSend) | |||
.REQUIRED_ATTR(group, String) | |||
.REQUIRED_ATTR(sr_tag, Int) | |||
.REQUIRED_ATTR(dest_rank, Int) | |||
.ATTR(alpha, Float, 1.0) | |||
.ATTR(beta, Float, 0.0) | |||
.OP_END_FACTORY_REG(HcomSend) | |||
/** | |||
@@ -202,8 +223,6 @@ REG_OP(HcomReceive) | |||
.REQUIRED_ATTR(src_rank, Int) | |||
.REQUIRED_ATTR(shape, ListInt) | |||
.REQUIRED_ATTR(dtype, Type) | |||
.ATTR(alpha, Float, 1.0) | |||
.ATTR(beta, Float, 0.0) | |||
.OP_END_FACTORY_REG(HcomReceive) | |||
/** | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -31,11 +31,12 @@ namespace ge { | |||
*@par Inputs: | |||
*Input images is a tensor of at least 3 dimensions. The last dimension is | |||
interpretted as channels, and must be three. Inputs include: | |||
*@li images:A Tensor of type float. Images to adjust. At least 3-D. | |||
*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format | |||
must be NHWC. | |||
*@li delta:A Tensor of type float. A float delta to add to the hue . \n | |||
*@par Outputs: | |||
*y:A Tensor of type float . \n | |||
*y:A Tensor of type float. The format must be NHWC. \n | |||
*@attention Constraints: | |||
*Input images is a tensor of at least 3 dimensions. The last dimension is | |||
@@ -57,11 +58,12 @@ REG_OP(AdjustHue) | |||
*@par Inputs: | |||
*Input images is a tensor of at least 3 dimensions. The last dimension is | |||
interpretted as channels, and must be three. Inputs include: | |||
*@li images:A Tensor of type float. Images to adjust. At least 3-D. | |||
*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format | |||
must be NHWC. | |||
*@li scale:A Tensor of type float. A float scale to add to the saturation . \n | |||
*@par Outputs: | |||
*y:A Tensor of type float . \n | |||
*y:A Tensor of type float. The format must be NHWC. \n | |||
*@attention Constraints: | |||
*Input images is a tensor of at least 3 dimensions. The last dimension is | |||
@@ -83,11 +85,12 @@ REG_OP(AdjustSaturation) | |||
*@par Inputs: | |||
*Input images is a tensor of at least 3 dimensions. The last 3 dimensions are | |||
interpreted as '[height, width, channels]'. Inputs include: | |||
*@li images:A Tensor of type float. Images to adjust. At least 3-D. | |||
*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format | |||
must be NHWC. | |||
*@li scale:A Tensor of type float. A float multiplier for adjusting contrast . \n | |||
*@par Outputs: | |||
*y:A Tensor of type float . \n | |||
*y:A Tensor of type float. The format must be NHWC. \n | |||
*@attention Constraints: | |||
*Input images is a tensor of at least 3 dimensions. The last dimension is | |||
@@ -112,7 +115,7 @@ nearest neighbor sampling to a common output size specified by crop_size . \n | |||
*Input images must be a 4-D tensor. Inputs include: | |||
*@li images:A Tensor. Must be one of the following types:uint8, uint16, int8, | |||
int16, int32, int64, float16, float, double. A 4-D tensor of shape | |||
[batch, image_height, image_width, depth]. | |||
[batch, image_height, image_width, depth]. The format must be NHWC. | |||
*@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4]. | |||
*@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with | |||
int32 values in [0, batch). | |||
@@ -127,7 +130,7 @@ extrapolation, when applicable. | |||
NearestNeighbor . \n | |||
*@par Outputs: | |||
*y:A Tensor of type float . \n | |||
*y:A Tensor of type float. The format must be NHWC. \n | |||
*@attention Constraints: | |||
*Input images must be a 4-D tensor . \n | |||
@@ -193,7 +196,9 @@ boxes tensor . \n | |||
*@par Inputs: | |||
*Input images and grads must be a 4-D tensor. Inputs include: | |||
*@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. | |||
The format must be NHWC. | |||
*@li images: A 4-D tensor of shape [batch, image_height, image_width, depth]. | |||
The format must be NHWC. | |||
Both image_height and image_width need to be positive. | |||
*@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor | |||
specifies the coordinates of a box in the box_ind[i] image and is specified in | |||
@@ -233,6 +238,7 @@ images tensor . \n | |||
*@par Inputs: | |||
*Input grads must be a 4-D tensor. Inputs include: | |||
*@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. | |||
The format must be NHWC. | |||
*@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor | |||
specifies the coordinates of a box in the box_ind[i] image and is specified | |||
in normalized coordinates [y1, x1, y2, x2]. | |||
@@ -248,7 +254,8 @@ method: A string specifying the interpolation method. Only 'bilinear' is | |||
supported for now . \n | |||
*@par Outputs: | |||
*y:A 4-D tensor of shape [batch, image_height, image_width, depth] . \n | |||
*y:A 4-D tensor of shape [batch, image_height, image_width, depth]. The format | |||
must be NHWC. \n | |||
*@attention Constraints: | |||
*Input grads must be a 4-D tensor . \n | |||
@@ -273,6 +280,7 @@ REG_OP(CropAndResizeGradImage) | |||
*@par Inputs: | |||
*Input x must be a 4-D tensor. Inputs include: | |||
*@li x: A 4-D float tensor of shape [batch_size, height, width, channels]. | |||
The format must be NHWC. | |||
*@li size: A 1-D tensor of 2 elements containing the size of the glimpses to | |||
extract. The glimpse height must be specified first, following by the glimpse | |||
width. | |||
@@ -293,7 +301,7 @@ uniform_noise . \n | |||
*@par Outputs: | |||
*y:A tensor representing the glimpses [batch_size, glimpse_height, | |||
glimpse_width, channels] . \n | |||
glimpse_width, channels]. The format must be NHWC. \n | |||
*@attention Constraints: | |||
*Input x must be a 4-D tensor . \n | |||
@@ -340,7 +348,8 @@ REG_OP(HSVToRGB) | |||
*@par Inputs: | |||
*Input images must be a 4-D tensor. Inputs include: | |||
*@li images: 4-D with shape [batch, height, width, channels]. | |||
*@li images: 4-D with shape [batch, height, width, channels]. The format must | |||
be NHWC. | |||
*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new | |||
size for the images. | |||
*@li min: A Tensor of type float. | |||
@@ -354,6 +363,7 @@ the values at the corner pixels. Defaults to false. | |||
*@par Outputs: | |||
*@li resized_images: 4-D with shape [batch, new_height, new_width, channels]. | |||
The format must be NHWC. | |||
*@li y_min: A Tensor of type float. | |||
*@li y_max: A Tensor of type float . \n | |||
@@ -381,7 +391,8 @@ REG_OP(QuantizedResizeBilinear) | |||
*@par Inputs: | |||
*Input images must be a 4-D tensor. Inputs include: | |||
*@li images: 4-D with shape [batch, height, width, channels]. | |||
*@li images: 4-D with shape [batch, height, width, channels]. The format must | |||
be NHWC. | |||
*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. | |||
The new size for the images . \n | |||
@@ -391,7 +402,8 @@ output tensors are aligned, preserving the values at the corner pixels. | |||
Defaults to false . \n | |||
*@par Outputs: | |||
*y: 4-D with shape [batch, new_height, new_width, channels] . \n | |||
*y: 4-D with shape [batch, new_height, new_width, channels]. The format must | |||
be NHWC. \n | |||
*@attention Constraints: | |||
*Input images can be of different types but output images are always float . \n | |||
@@ -414,10 +426,10 @@ REG_OP(ResizeArea) | |||
*@par Inputs: | |||
*Input grads must be a 4-D tensor. Inputs include: | |||
*@li grads: A Tensor of type float. 4-D with shape [batch, height, width, | |||
channels]. | |||
channels]. The format must be NHWC. | |||
*@li original_image: A Tensor. Must be one of the following types: float, | |||
double. 4-D with shape [batch, orig_height, orig_width, channels], The image | |||
tensor that was resized . \n | |||
tensor that was resized. The format must be NHWC. \n | |||
*@par Attributes: | |||
*@li align_corners: An optional bool. Defaults to False. If true, the centers | |||
@@ -426,10 +438,10 @@ false. | |||
*@li half_pixel_centers: An optional bool. Defaults to False . \n | |||
*@par Outputs: | |||
*y: A Tensor. Has the same type as original_image . \n | |||
*y: A Tensor. Has the same type as original_image. The format must be NHWC. \n | |||
*@attention Constraints: | |||
*Input images can be of different types but output images are always float . \n | |||
*Input images can be of different types but output images are always float . | |||
*@par Third-party framework compatibility | |||
*Compatible with tensorflow ResizeBicubicGrad operator. | |||
@@ -448,7 +460,8 @@ REG_OP(ResizeBicubicGrad) | |||
*@par Inputs: | |||
*Input images must be a 4-D tensor. Inputs include: | |||
*@li images: 4-D with shape [batch, height, width, channels]. | |||
*@li images: 4-D with shape [batch, height, width, channels]. The format | |||
must be NHWC. | |||
*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new | |||
size for the images . \n | |||
@@ -459,10 +472,11 @@ Defaults to false. | |||
*@li half_pixel_centers: An optional bool. Defaults to False . \n | |||
*@par Outputs: | |||
*y: 4-D with shape [batch, new_height, new_width, channels] . \n | |||
*y: 4-D with shape [batch, new_height, new_width, channels]. The format | |||
must be NHWC. \n | |||
*@attention Constraints: | |||
*Input images can be of different types but output images are always float . \n | |||
*Input images can be of different types but output images are always float . | |||
*@par Third-party framework compatibility | |||
*Compatible with tensorflow ResizeBicubic operator. | |||
@@ -483,7 +497,7 @@ REG_OP(ResizeBicubic) | |||
*@par Inputs: | |||
*Input grads must be a 4-D tensor. Inputs include: | |||
*@li grads: A Tensor. Must be one of the following types: uint8, int8, int32, | |||
float16, float, double. 4-D with shape [batch, height, width, channels]. | |||
float16, float, double. Must set the format, supported format list ["NCHW, NHWC"] | |||
*@li size: A 1-D int32 Tensor of 2 elements: orig_height, orig_width. | |||
The original input size . \n | |||
@@ -550,9 +564,8 @@ REG_OP(ResizeNearestNeighborV2GradD) | |||
*@par Inputs: | |||
*Input grads must be a 4-D tensor. Inputs include: | |||
*@li grads: A Tensor of type float32. 4-D with shape [batch, height, width, | |||
channels]. | |||
*@li original_image: A Tensor. 4-D with shape [batch, orig_height, orig_width, | |||
*@li grads: A Tensor of type float32. Must set the format, supported format list ["NCHW, NHWC"] | |||
*@li original_image: A Tensor. 4-D shape. Must set the format, supported format list ["NCHW, NHWC"] | |||
channels], The image tensor that was resized . \n | |||
*@par Attributes: | |||
@@ -583,7 +596,7 @@ REG_OP(ResizeBilinearV2Grad) | |||
*@par Inputs: | |||
*Input images must be a 4-D tensor. Inputs include: | |||
*@li x: 4-D with shape [batch, height, width, channels]. | |||
*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"] | |||
*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new | |||
size for the images . \n | |||
@@ -697,7 +710,7 @@ REG_OP(SampleDistortedBoundingBoxExt2) | |||
*@par Inputs: | |||
*Input x must be a 4-D tensor. Inputs include: | |||
*@li x: 4-D with shape [batch, height, width, channels]. | |||
*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"]. | |||
*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. | |||
The new size for the images . \n | |||
@@ -729,12 +742,12 @@ REG_OP(ResizeNearestNeighborV2) | |||
*@par Inputs: | |||
*Input images must be a 4-D tensor. Inputs include: | |||
*@li images: A Tensor. Must be one of the following types: float. 4-D with | |||
shape [batch, height, width, depth]. A batch of images. | |||
shape [batch, height, width, depth]. A batch of images. The format must be NHWC. | |||
*@li boxes: A Tensor of type float32. 3-D with shape [batch, | |||
num_bounding_boxes, 4] containing bounding boxes . \n | |||
*@par Outputs: | |||
*A Tensor. Has the same type as images . \n | |||
*A Tensor. Has the same type as images. The format must be NHWC. \n | |||
*@attention Constraints: | |||
*Input images must be a 4-D tensor . \n | |||
@@ -1342,6 +1355,129 @@ REG_OP(SpatialTransformerD) | |||
.ATTR(use_default_theta, ListBool, {}) | |||
.OP_END_FACTORY_REG(SpatialTransformerD) | |||
} // namespace ge | |||
/** | |||
* @brief Resize the input tensor. \n | |||
currently, only support resize image tensor using nearest neighbor and linear interpolation. | |||
* @par Inputs: | |||
* Input x must be a 4-D tensor. Inputs include: \n | |||
* @li x: A Tensor. Must be one of the following types: uint8, int8, int16, \n | |||
int32, int64, float16, float, double. 4-D with shape [batch, height, width, channels] \n | |||
or shape [batch, channels, height, width]. | |||
* @li roi: A 1-D float Tensor. only takes effect when attr coordinate_transformation_mode \n | |||
is "tf_crop_and_resize" | |||
* @li scales: A 1-D float Tensor, the scale array along each dimension, Only one of \n | |||
'scales' and 'sizes' can be specified. | |||
* @li sizes: A 1-D int64 Tensor, The size of the output tensor. nly one of \n | |||
'scales' and 'sizes' can be specified. If 'size' is specified, then set scales \n | |||
to empty data (zero shape) in this operator's input list. | |||
* @par Attributes: | |||
* @li coordinate_transformation_mode: String. Defaults to half_pixel. how to transform \n | |||
the coordinate in the resized tensor to the coordinate in the original tensor. \n | |||
other optional: pytorch_half_pixel, align_corners, asymmetric, tf_half_pixel_for_nn, \n | |||
tf_crop_and_resize. | |||
* @li cubic_coeff_a: Float. Defaults to -0.75, only used in cubic interpolation. \n | |||
other optional: -0.5 | |||
* @li exclude_outside: Int. Defaults to 0, If set to 1, the weight of sampling \n | |||
locations outside the tensor will be set to 0 and the weight will be renormalized \n | |||
so that their sum is 1.0. | |||
* @li extrapolation_value: Float. Defaults to 0.0f. When coordinate_transformation_mode \n | |||
is "tf_crop_and_resize" and x_original is outside the range [0, length_original - 1], \n | |||
this value is used as the corresponding output value. | |||
* @li mode: String. Defaults to nearest. Three interpolation modes: nearest (default), \n | |||
linear and cubic. | |||
* @li nearest_mode: String. Defaults to round_prefer_floor. Four modes: round_prefer_floor, \n | |||
round_prefer_ceil, floor, ceil. Only used by nearest interpolation. | |||
* @par Outputs: | |||
* y: A Tensor. Has the same type as x. | |||
* @attention Constraints: \n | |||
* Input x must be a 4-D tensor. | |||
* @par Third-party framework compatibility | |||
* Compatible with tensorflow ResizeNearestNeighborV2 operator. | |||
*/ | |||
REG_OP(Resize) | |||
.INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, | |||
DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(roi, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.INPUT(scales, TensorType({DT_FLOAT})) | |||
.OPTIONAL_INPUT(sizes, TensorType({DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, | |||
DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||
.ATTR(coordinate_transformation_mode, String, "half_pixel") | |||
.ATTR(cubic_coeff_a, Float, -0.75) | |||
.ATTR(exclude_outside, Int, 0) | |||
.ATTR(extrapolation_value, Float, 0) | |||
.ATTR(mode, String, "nearest") | |||
.ATTR(nearest_mode, String, "round_prefer_floor") | |||
.OP_END_FACTORY_REG(Resize) | |||
/** | |||
*@brief Function parse image from string to int. \n | |||
*@par Inputs: | |||
*@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n | |||
*@par Attributes: | |||
*@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image. | |||
*@li ratio: An optional int. Defaults to 1. Downscaling ratio. | |||
*@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower but nicer upscaling of the chroma planes | |||
*@li try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input. | |||
*@li acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted. | |||
*@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n | |||
*@par Outputs: | |||
*image: A Tensor dtype of uint8. | |||
*/ | |||
REG_OP(DecodeJpeg) | |||
.INPUT(contents, TensorType({DT_STRING})) | |||
.OUTPUT(image, TensorType({DT_UINT8})) | |||
.ATTR(channels, Int, 0) | |||
.ATTR(ratio, Int, 1) | |||
.ATTR(fancy_upscaling, Bool, true) | |||
.ATTR(try_recover_truncated, Bool, false) | |||
.ATTR(acceptable_fraction, Float, 1.0) | |||
.ATTR(dct_method, String, "") | |||
.OP_END_FACTORY_REG(DecodeJpeg) | |||
/** | |||
*@brief Image warping using per-pixel flow vectors. \n | |||
*@par Inputs: | |||
*@li images: 4-D Tensor with shape `[batch, height, width, channels]`. | |||
*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n | |||
*@par Outputs: | |||
*y: Returns 4-D with the same shape and dtype as `images`. \n | |||
*/ | |||
REG_OP(DenseImageWarp) | |||
.INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OP_END_FACTORY_REG(DenseImageWarp) | |||
/** | |||
*@brief Computes the gradients of DenseImageWarp with respect to image and flow. \n | |||
*@par Inputs: | |||
*@li grad: gradients with respect to DenseImageWarp output. | |||
*@li images: 4-D Tensor with shape `[batch, height, width, channels]`. | |||
*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n | |||
*@par Outputs: | |||
*grad_image: Returns 4-D with the same shape and dtype as `images`. | |||
*grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n | |||
*/ | |||
REG_OP(DenseImageWarpGrad) | |||
.INPUT(grad, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(grad_image, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(grad_flow, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OP_END_FACTORY_REG(DenseImageWarpGrad) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -0,0 +1,230 @@ | |||
/** | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
/*! | |||
* \file list_ops.h | |||
* \brief | |||
*/ | |||
#ifndef OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_ | |||
#define OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_ | |||
#include <algorithm> | |||
#include "graph/operator_reg.h" | |||
#include "graph/operator.h" | |||
namespace ge { | |||
/** | |||
*@brief Creates and returns an empty tensor list. \n | |||
*@par Inputs: | |||
*@li element_shape: A shape compatible with that of elements in the list. | |||
*@li max_num_elements: The maximum number of elements. \n | |||
*@par Attributes: | |||
*@li element_dtype: The type of elements in the list. \n | |||
*@par Outputs: | |||
*@li handle: An empty tensor list . \n | |||
*@par Third-party framework compatibility. | |||
*Compatible with tensorflow EmptyTensorList operator. | |||
*/ | |||
REG_OP(EmptyTensorList) | |||
.INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) | |||
.INPUT(max_num_elements, TensorType({DT_INT32})) | |||
.OUTPUT(handle, TensorType({DT_VARIANT})) | |||
.ATTR(element_dtype, Type, DT_INT32) | |||
.OP_END_FACTORY_REG(EmptyTensorList) | |||
/** | |||
*@brief Returns a list which has the passed-in `Tensor` as last element | |||
and the other elements of the given list in `input_handle`. \n | |||
*@par Inputs: | |||
*@li input_handle: The old list. | |||
*@li tensor: The tensor to put on the list. \n | |||
*@par Attributes: | |||
*@li element_dtype: The type of elements in the list. \n | |||
*@par Outputs: | |||
*@li output_handle:A list with the elements of old list followed by tensor. \n | |||
*@par Third-party framework compatibility. | |||
*Compatible with tensorflow TensorListPushBack operator. | |||
*/ | |||
REG_OP(TensorListPushBack) | |||
.INPUT(input_handle, TensorType({DT_VARIANT})) | |||
.INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE, | |||
DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||
.OUTPUT(output_handle, TensorType({DT_VARIANT})) | |||
.ATTR(element_dtype, Type, DT_INT32) | |||
.OP_END_FACTORY_REG(TensorListPushBack) | |||
/** | |||
*@brief The last element of the input list as well as a | |||
list with all but that element. \n | |||
*@par Inputs: | |||
*@li input_handle: The input list. | |||
*@li element_shape: A shape compatible with that of elements in the list. \n | |||
*@par Attributes: | |||
*@li element_dtype: The type of elements in the list. \n | |||
*@par Outputs: | |||
*@li output_handle:A list with the elements of the old list followed by tensor. | |||
*@li tensor:The withdrawn last element of the list. \n | |||
*@par Third-party framework compatibility. | |||
*Compatible with tensorflow TensorListPopBack operator. | |||
*/ | |||
REG_OP(TensorListPopBack) | |||
.INPUT(input_handle, TensorType({DT_VARIANT})) | |||
.INPUT(element_shape, TensorType({DT_INT32})) | |||
.OUTPUT(output_handle, TensorType({DT_VARIANT})) | |||
.OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE, | |||
DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||
.ATTR(element_dtype, Type, DT_INT32) | |||
.OP_END_FACTORY_REG(TensorListPopBack) | |||
/** | |||
*@brief The number of tensors in the input tensor list. \n | |||
*@par Inputs: | |||
*@li input_handle: The input list. \n | |||
*@par Outputs: | |||
*@li length:The number of tensors in the list. \n | |||
*@par Third-party framework compatibility. | |||
*Compatible with tensorflow TensorListLength operator. | |||
*/ | |||
REG_OP(TensorListLength) | |||
.INPUT(input_handle, TensorType({DT_VARIANT})) | |||
.OUTPUT(length, TensorType({DT_INT32})) | |||
.OP_END_FACTORY_REG(TensorListLength) | |||
/** | |||
*@brief The shape of elements in the input tensor list. \n | |||
*@par Inputs: | |||
*@li input_handle: The input list. \n | |||
*@par Attributes: | |||
*@li shape_type: The type of shape in the list. \n | |||
*@par Outputs: | |||
*@li element_shape:A shape compatible with that of elements in the list. \n | |||
*@par Third-party framework compatibility. | |||
*Compatible with tensorflow TensorListElementShape operator. | |||
*/ | |||
REG_OP(TensorListElementShape) | |||
.INPUT(input_handle, TensorType({DT_VARIANT})) | |||
.OUTPUT(element_shape, TensorType({DT_INT32,DT_INT64})) | |||
.ATTR(shape_type, Type, DT_INT32) | |||
.OP_END_FACTORY_REG(TensorListElementShape) | |||
/** | |||
*@brief List of the given size with empty elements. \n | |||
*@par Inputs: | |||
*@li element_shape: A shape compatible with that of elements in the list. | |||
*@li num_elements: The number of elements to reserve. \n | |||
*@par Attributes: | |||
*@li element_dtype: The type of elements in the list. | |||
*@li shape_type: The type of shape in the list. \n | |||
*@par Outputs: | |||
*@li handle: An output tensor list . \n | |||
*@par Third-party framework compatibility. | |||
*Compatible with tensorflow TensorListReserve operator. | |||
*/ | |||
REG_OP(TensorListReserve) | |||
.INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) | |||
.INPUT(num_elements, TensorType({DT_INT32})) | |||
.OUTPUT(handle, TensorType({DT_VARIANT})) | |||
.ATTR(element_dtype, Type, DT_INT32) | |||
.ATTR(shape_type, Type, DT_INT32) | |||
.OP_END_FACTORY_REG(TensorListReserve) | |||
/** | |||
*@brief Get input tensor list elements of index position. \n | |||
*@par Inputs: | |||
*@li input_handle: The input list. | |||
*@li index: A tensor of position. | |||
*@li element_shape: A shape compatible with that of elements in the list. \n | |||
*@par Attributes: | |||
*@li element_dtype: The type of elements in the list. \n | |||
*@par Outputs: | |||
*@li item: An output tensor value of index position . \n | |||
*@par Third-party framework compatibility. | |||
*Compatible with tensorflow TensorListGetItem operator. | |||
*/ | |||
REG_OP(TensorListGetItem) | |||
.INPUT(input_handle, TensorType({DT_VARIANT})) | |||
.INPUT(index, TensorType({DT_INT32})) | |||
.INPUT(element_shape, TensorType({DT_INT32})) | |||
.OUTPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE, | |||
DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||
.ATTR(element_dtype, Type, DT_INT32) | |||
.OP_END_FACTORY_REG(TensorListGetItem) | |||
/** | |||
*@brief Sets the index-th position of the list to contain the given tensor. \n | |||
*@par Inputs: | |||
*@li input_handle: The input list. | |||
*@li index: The position in the list to which the tensor will be assigned. | |||
*@li item: The element to be assigned to that position. \n | |||
*@par Attributes: | |||
*@li element_dtype: The type of elements in the list. \n | |||
*@par Outputs: | |||
*@li output_handle: An output tensor list . \n | |||
*@par Third-party framework compatibility. | |||
*Compatible with tensorflow TensorListSetItem operator. | |||
*/ | |||
REG_OP(TensorListSetItem) | |||
.INPUT(input_handle, TensorType({DT_VARIANT})) | |||
.INPUT(index, TensorType({DT_INT32})) | |||
.INPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE, | |||
DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||
.OUTPUT(output_handle, TensorType({DT_VARIANT})) | |||
.ATTR(element_dtype, Type, DT_INT32) | |||
.OP_END_FACTORY_REG(TensorListSetItem) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -365,6 +365,27 @@ REG_OP(GetNext) | |||
.ATTR(channel_name, String, "") | |||
.OP_END_FACTORY_REG(GetNext) | |||
/** | |||
*@brief Get dynamic dims after GetNext. \n | |||
*@par Inputs: | |||
*input: A nested structure of Tensor objects, from GetNext's output. \n | |||
*@par Attributes: | |||
*@li shape_info: GE shape_info for each inputs, -1 means unknow dim. | |||
*@li N: Inputs number. \n | |||
*@par Outputs: | |||
*dims: GE unknow dims, a vector of int64. \n | |||
*/ | |||
REG_OP(GetDynamicDims) | |||
.DYNAMIC_INPUT(input, TensorType({DT_INT32, DT_INT64})) | |||
.OUTPUT(dims, TensorType({DT_INT32, DT_INT64})) | |||
.REQUIRED_ATTR(shape_info, ListInt) | |||
.REQUIRED_ATTR(N, Int) | |||
.OP_END_FACTORY_REG(GetDynamicDims) | |||
/** | |||
*@brief End of sequence . \n | |||
@@ -710,6 +731,9 @@ REG_OP(IFMR) | |||
*@par Third-party framework compatibility | |||
*Compatible with mindspore | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(WtsARQ) | |||
@@ -741,6 +765,9 @@ REG_OP(WtsARQ) | |||
*@par Third-party framework compatibility | |||
*Compatible with mindspore | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(ActsULQ) | |||
@@ -768,6 +795,9 @@ REG_OP(ActsULQ) | |||
*@par Third-party framework compatibility | |||
*Compatible with mindspore | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(ActsULQInputGrad) | |||
@@ -790,6 +820,9 @@ REG_OP(ActsULQInputGrad) | |||
*@par Third-party framework compatibility | |||
*Compatible with mindspore | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(ActULQClampMaxGrad) | |||
@@ -812,6 +845,9 @@ REG_OP(ActULQClampMaxGrad) | |||
*@par Third-party framework compatibility | |||
*Compatible with mindspore | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(ActULQClampMinGrad) | |||
@@ -821,6 +857,33 @@ REG_OP(ActULQClampMinGrad) | |||
.OUTPUT(clamp_min_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OP_END_FACTORY_REG(ActULQClampMinGrad) | |||
/** | |||
* @brief Computes Lp norm. | |||
* @par Inputs: | |||
* @li x: An ND tensor of type float16, float32. \n | |||
* | |||
* @par Attributes: | |||
* @li p: Int, "inf" or "-inf", default value is 2. | |||
* @li axes: ListInt, {} means all axes will be computed. | |||
* @li keepdim: Bool, default is false. | |||
* @li epsilon: Float, default is 1e-12. \n | |||
* @par Outputs: | |||
* @li y: An ND tensor of type float16, float32. The shape of y is depending | |||
* on axes and keepdim. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator LpNorm. | |||
*/ | |||
REG_OP(LpNorm) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(p, Int, 2) | |||
.ATTR(axes, ListInt, {}) | |||
.ATTR(keepdim, Bool, false) | |||
.ATTR(epsilon, Float, 1e-12) | |||
.OP_END_FACTORY_REG(LpNorm) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -38,8 +38,8 @@ namespace ge { | |||
* float32, int32. Has format [ND, NHWC] . \n | |||
*@par Attributes: | |||
*@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. | |||
*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n | |||
*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. | |||
*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n | |||
*@par Outputs: | |||
*y: The result matrix Tensor. 2D. Must be one of the following types: float16, | |||
@@ -70,8 +70,8 @@ REG_OP(MatMul) | |||
* float32, int32. Has format [ND, NHWC] . \n | |||
*@par Attributes: | |||
*@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. | |||
*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n | |||
*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. | |||
*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n | |||
*@par Outputs: | |||
*y: The result matrix Tensor. 2D. Must be one of the following types: float16, | |||
@@ -156,8 +156,8 @@ REG_OP(GEMM) | |||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n | |||
*@par Attributes: | |||
*@li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. | |||
*@li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n | |||
*@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. | |||
*@li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n | |||
*@par Outputs: | |||
*y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, | |||
@@ -175,6 +175,41 @@ REG_OP(BatchMatMul) | |||
.ATTR(adj_x2, Bool, false) | |||
.OP_END_FACTORY_REG(BatchMatMul) | |||
/** | |||
* @brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n | |||
* @par Inputs: | |||
* Three inputs, including: | |||
* @li x1: A matrix Tensor. Must be one of the following types: float16, | |||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. | |||
* @li x2: A matrix Tensor. Must be one of the following types: float16, | |||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n | |||
* @li bias: A matrix Tensor. Must be one of the following types: float16, | |||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n | |||
* @par Attributes: | |||
* @li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. | |||
* @li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n | |||
* @par Outputs: | |||
* y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, | |||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2" . \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator BatchMatmul. | |||
*/ | |||
REG_OP(BatchMatMulV2) | |||
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.ATTR(adj_x1, Bool, false) | |||
.ATTR(adj_x2, Bool, false) | |||
.OP_END_FACTORY_REG(BatchMatMulV2) | |||
/** | |||
*@brief Computes half the L2 norm of a tensor without the sqrt . \n | |||
@@ -979,6 +1014,14 @@ REG_OP(MatrixDiagV2) | |||
.OUTPUT(output, TensorType::BasicType()) | |||
.OP_END_FACTORY_REG(MatrixDiagV2) | |||
REG_OP(IndexAdd) | |||
.INPUT(var, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||
.INPUT(indices, TensorType({DT_INT32})) | |||
.INPUT(updates, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||
.OUTPUT(var_out, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||
.ATTR(axis, Int, 0) | |||
.OP_END_FACTORY_REG(IndexAdd) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -365,6 +365,25 @@ REG_OP(BiasAddGrad) | |||
* 4-D with shape [batch, out_height, out_width, out_channels] | |||
* or [batch, out_channels, out_height, out_width]. | |||
* Gradients with respect to the output of the convolution. | |||
*\n | |||
*\n | |||
* The following are the supported data types and data formats: | |||
*@verbatim | |||
| Tensor | out_bckprop | filter | y | |||
------------|-------------|---------|-------- | |||
| Data Type | float16 | float16 | float16 | |||
| |-------------|---------|-------- | |||
| | float32 | float32 | float32 | |||
| |-------------|---------|-------- | |||
| | float64 | float64 | float64 | |||
------------|-------------|---------|-------- | |||
| Format | NCHW | NCHW | NCHW | |||
| | NHWC | HWCN | NHWC | |||
@endverbatim | |||
* For float32 and float64 type, the actual calculation on the chip is based on | |||
* float16. | |||
*\n | |||
* | |||
*@par Attributes: | |||
* Five attributes: | |||
* @li strides: A tuple/list of 4 integers. The stride of the sliding window | |||
@@ -377,8 +396,52 @@ REG_OP(BiasAddGrad) | |||
* channels. | |||
* @li data_format: An optional string from: "NHWC", "NCHW". Defaults to | |||
* "NHWC". Specify the data format of the input and output data. | |||
*\n | |||
*\n | |||
* The following value range restrictions must be met: | |||
*@verbatim | |||
| Name | Field | Scope | |||
-------------------|----------|-------------- | |||
| input_size | H | [1, 4096] | |||
| | W | [1, 4096] | |||
-------------------|----------|-------------- | |||
| Filter | H | [1, 255] | |||
| | W | [1, 255] | |||
-------------------|----------|-------------- | |||
| out_backprop | H | [1, 4096] | |||
| | W | [1, 4096] | |||
-------------------|----------|-------------- | |||
| y(fmap) | H | [1, 4096] | |||
| | W | [1, 4096] | |||
-------------------|----------|-------------- | |||
| Stride | H | [1, 63] | |||
| | W | [1, 63] | |||
-------------------|----------|-------------- | |||
| Padding | Top | [0, 255] | |||
| | Bottom | [0, 255] | |||
| | Left | [0, 255] | |||
| | Right | [0, 255] | |||
-------------------|----------|-------------- | |||
| Dilation | H | [1, 255] | |||
| | W | [1, 255] | |||
@endverbatim | |||
* In Ascend910, fmap or out_backprop's H and W not support 1 when | |||
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 | |||
*\n | |||
* | |||
*@par Outputs: | |||
* y: A Tensor. Has the same type as filter,and has same format as input_size. | |||
*\n | |||
* out_backprop_height = (fmap_height + pad_top + pad_bottom - | |||
* (dilation_h * (filter_height - 1) + 1)) | |||
* / stride_h + 1 | |||
*\n | |||
* out_backprop_width = (fmap_width + pad_left + pad_right - | |||
* (dilation_w * (filter_width - 1) + 1)) | |||
* / stride_w + 1 | |||
*\n | |||
* | |||
*@par Third-party framework compatibility | |||
* Compatible with Tensorflow's conv2d_backprop_input | |||
*/ | |||
@@ -454,6 +517,21 @@ REG_OP(Conv2DBackpropInputD) | |||
* @li bias: An optional tensor. Must have the same type as "y". | |||
* @li offset_w: An optional 1D tensor for quantized deconvolution. | |||
* Type is int8. Reserved.\n | |||
*\n | |||
*\n | |||
* The following are the supported data types and data formats: | |||
*@verbatim | |||
| Tensor | x | filter | bias | y | |||
------------|---------|---------|---------|-------- | |||
| Data Type | float16 | float16 | float16 | float16 | |||
| |---------|---------|---------|-------- | |||
| | int8 | int8 | int32 | int32 | |||
------------|---------|---------|---------|-------- | |||
| Format | NCHW | NCHW | ND | NCHW | |||
@endverbatim | |||
* For int8, a dequant or requant operator must be followed. | |||
*\n | |||
* | |||
*@par Attributes: | |||
* Six attributes: | |||
* @li strides: A tuple or list of 2 integers. The stride of the sliding window | |||
@@ -468,8 +546,51 @@ REG_OP(Conv2DBackpropInputD) | |||
Specify the data format of the input and output data. | |||
* @li offset_x: An optional integer for quantized deconvolution. | |||
* Defaults to "0". | |||
*\n | |||
*\n | |||
* The following value range restrictions must be met: | |||
*@verbatim | |||
| Name | Field | Scope | |||
-------------------|----------|-------------- | |||
| x (out_backprop) | H | [1, 4096] | |||
| | W | [1, 4096] | |||
-------------------|----------|-------------- | |||
| Filter | H | [1, 255] | |||
| | W | [1, 255] | |||
-------------------|----------|-------------- | |||
| y (fmap) | H | [1, 4096] | |||
| | W | [1, 4096] | |||
-------------------|----------|-------------- | |||
| Stride | H | [1, 63] | |||
| | W | [1, 63] | |||
-------------------|----------|-------------- | |||
| Padding | Top | [0, 255] | |||
| | Bottom | [0, 255] | |||
| | Left | [0, 255] | |||
| | Right | [0, 255] | |||
-------------------|----------|-------------- | |||
| Dilation | H | [1, 255] | |||
| | W | [1, 255] | |||
-------------------|----------|-------------- | |||
| Offset_x | | [-128, 127] | |||
@endverbatim | |||
* In Ascend910, fmap or out_backprop's H and W not support 1 when | |||
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 | |||
*\n | |||
* | |||
*@par Outputs: | |||
* y: A Tensor. 4D tensor with shape [batch, channels, height, width]. | |||
*\n | |||
* out_backprop_height = (fmap_height + pad_top + pad_bottom - | |||
* (dilation_h * (filter_height - 1) + 1)) | |||
* / stride_h + 1 | |||
*\n | |||
* out_backprop_width = (fmap_width + pad_left + pad_right - | |||
* (dilation_w * (filter_width - 1) + 1)) | |||
* / stride_w + 1 | |||
*\n | |||
* | |||
* When type of x is float16, the type of y must be float16. | |||
* When type of x is int8, the type of y must be int32. | |||
*/ | |||
@@ -502,6 +623,25 @@ REG_OP(Deconvolution) | |||
* [batch, out_height, out_width, out_channels] or [batch, out_channels, | |||
* out_height, out_width]. Gradients with respect to the output of the | |||
* convolution. | |||
*\n | |||
*\n | |||
* The following are the supported data types and data formats: | |||
*@verbatim | |||
| Tensor | x | out_backprop | y | |||
------------|---------|--------------|--------- | |||
| Data Type | float16 | float16 | float16 | |||
| |---------|--------------|--------- | |||
| | float32 | float32 | float32 | |||
| |---------|--------------|--------- | |||
| | float64 | float64 | float64 | |||
|-----------|---------|--------------|--------- | |||
| Format | NCHW | NCHW | NCHW | |||
| | NHWC | NHWC | HWCN | |||
@endverbatim | |||
* For float32 and float64 type of x and outbackprop, the actual calculation on the chip | |||
* is based on float16. | |||
*\n | |||
* | |||
*@par Attributes: | |||
* Five attributes: | |||
* @li strides: A tuple/list of 4 integers. The stride of the sliding window | |||
@@ -514,8 +654,52 @@ REG_OP(Deconvolution) | |||
* channels. | |||
* @li data_format: An optional string from: "NHWC", "NCHW". Defaults to | |||
* "NHWC". Specify the data format of the input and output data. | |||
*\n | |||
*\n | |||
* The following value range restrictions must be met: | |||
*@verbatim | |||
| Name | Field | Scope | |||
-------------------|----------|-------------- | |||
| x(fmap) | H | [1, 4096] | |||
| | W | [1, 4096] | |||
-------------------|----------|-------------- | |||
| Filter Size | H | [1, 255] | |||
| | W | [1, 255] | |||
-------------------|----------|-------------- | |||
| out_backprop | H | [1, 4096] | |||
| | W | [1, 4096] | |||
-------------------|----------|-------------- | |||
| y | H | [1, 4096] | |||
| | W | [1, 4096] | |||
-------------------|----------|-------------- | |||
| Stride | H | [1, 63] | |||
| | W | [1, 63] | |||
-------------------|----------|-------------- | |||
| Padding | Top | [0, 255] | |||
| | Bottom | [0, 255] | |||
| | Left | [0, 255] | |||
| | Right | [0, 255] | |||
-------------------|----------|-------------- | |||
| Dilation | H | [1, 255] | |||
| | W | [1, 255] | |||
@endverbatim | |||
* In Ascend910, out_backprop's H and W not support 1 when | |||
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 | |||
*\n | |||
* | |||
*@par Outputs: | |||
* y: A Tensor. Has the same type as x, has the same format as filter_size. | |||
*\n | |||
* out_backprop_height = (in_height + pad_top + pad_bottom - | |||
* (dilation_h * (filter_height - 1) + 1)) | |||
* / stride_h + 1 | |||
*\n | |||
* out_backprop_width = (in_width + pad_left + pad_right - | |||
* (dilation_w * (filter_width - 1) + 1)) | |||
* / stride_w + 1 | |||
*\n | |||
* | |||
*@par Third-party framework compatibility | |||
* Compatible with Tensorflow's conv2d_backprop_filter | |||
*/ | |||
@@ -617,8 +801,7 @@ REG_OP(Conv2DBackpropFilterD) | |||
* (top, bottom, left, right) side of the input. | |||
*@li dilations: Optional. A list of 4 integers. The dilation factor for each | |||
* dimension of input. The dimension order is determined by the data format of | |||
* "x". The N and C dimensions must be set to 1. The H and W dimensions must be | |||
* set to 1 for int8 type. Defaults to [1, 1, 1, 1]. | |||
* "x". The N and C dimensions must be set to 1. Defaults to [1, 1, 1, 1]. | |||
*@li groups: Optional. An integer of type int32. The number of blocked | |||
* connections from input channels to output channels. In_channels and | |||
* out_channels must both be divisible by "groups". Defaults to 1. | |||
@@ -652,6 +835,8 @@ REG_OP(Conv2DBackpropFilterD) | |||
| Offset_x | | [-128, 127] | |||
@endverbatim | |||
* The W dimension of the input image supports cases exceeding 4096, but it may | |||
* cause compilation errors. | |||
*\n | |||
* | |||
*@par Outputs: | |||
@@ -666,21 +851,6 @@ REG_OP(Conv2DBackpropFilterD) | |||
* out_width = (in_width + pad_left + pad_right - | |||
* (dilation_w * (filter_width - 1) + 1)) | |||
* / stride_w + 1 | |||
* | |||
*@attention Constraints: | |||
*@li The following restrictions on the output must be met: | |||
*@verbatim | |||
| Output | Restrictions | |||
----------|-------------------------------- | |||
| H == 1 | H * W(input) == H * W(filter) | |||
| W == 1 | | |||
----------|-------------------------------- | |||
| H != 1 | W(input) == W(filter) | |||
| W == 1 | Only for Ascend310 Hi3796V300CS | |||
@endverbatim | |||
* "H * W (input)" indicates the image size after padding and "H * W (filter)" | |||
* indicates the filter size after dilation."W(input)" and W(filter) indicate | |||
* the same rule on the W dimension. | |||
*\n | |||
* | |||
*@par Quantization supported or not | |||
@@ -778,7 +948,7 @@ REG_OP(Conv2DCompress) | |||
* With the format "HWCN" , the data is stored in the order of: [filter_height, | |||
* filter_width, in_channels / groups, out_channels]. | |||
*@li offsets: A 4D tensor of x-y coordinates offset and mask. With the format | |||
* "NHWC", the data is stored in the order of: [batch, in_height, in_width, | |||
* "NHWC", the data is stored in the order of: [batch, out_height, out_width, | |||
* deformable_groups * filter_height * filter_width * 3]. | |||
*@li bias: An optional 1D tensor of additive biases to the filter outputs. | |||
* The data is stored in the order of: [out_channels]. | |||
@@ -822,25 +992,12 @@ REG_OP(Conv2DCompress) | |||
*@verbatim | |||
| Name | Field | Scope | |||
--------------------|--------|---------------------------- | |||
| Input Image Size | H | [1, 100000] | |||
| | W | [1, 4096] | |||
| Input Image Size | H | [1, 100000 / filter_height] | |||
| | W | [1, 4096 / filter_width] | |||
--------------------|--------|---------------------------- | |||
| Filter Size | H | [1, 255] | |||
| | W | [1, 255] | |||
--------------------|--------|---------------------------- | |||
| Stride | H | [1, 63] | |||
| Filter Size | H | [1, 63] | |||
| | W | [1, 63] | |||
--------------------|--------|---------------------------- | |||
| Padding | Top | [0, 255] | |||
| | Bottom | [0, 255] | |||
| | Left | [0, 255] | |||
| | Right | [0, 255] | |||
------------ -------|--------|---------------------------- | |||
| Dilation | H | [1, 255] | |||
| | W | [1, 255] | |||
@endverbatim | |||
* "W(input)" indicate the image width after padding and W(filter) indicates the | |||
* filter width after dilation. | |||
*\n | |||
* | |||
*@par Outputs: | |||
@@ -855,21 +1012,7 @@ REG_OP(Conv2DCompress) | |||
* out_width = (in_width + pad_left + pad_right - | |||
* (dilation_w * (filter_width - 1) + 1)) | |||
* / stride_w + 1 | |||
* | |||
*@attention Constraints: | |||
*@li The following restrictions on the output must be met: | |||
*@verbatim | |||
| Output | Restrictions | |||
----------|-------------------------------- | |||
| H == 1 | H * W(input) == H * W(filter) | |||
| W == 1 | | |||
----------|-------------------------------- | |||
| H != 1 | W(input) == W(filter) | |||
| W == 1 | Only for Ascend310 Hi3796V300CS | |||
@endverbatim | |||
* "H * W(input)" indicates the image size after padding and "H * W(filter)" | |||
* indicates the filter size after dilation. "W(input)" and W(filter) indicate | |||
* the same rule on the W dimension. | |||
*\n | |||
* | |||
*@par Quantization supported or not | |||
*@li No | |||
@@ -920,8 +1063,8 @@ REG_OP(DeformableConv2D) | |||
* @li data_format: An optional string from: "NDHWC", "NCDHW". | |||
* Defaults to "NDHWC". Specify the data format of the input and output data. | |||
* @li dilations: A list of 5 integers. Specifies the dilation factor for each | |||
* dimension of "x", now only support [1,1,1,1,1] | |||
* The N and C dimensions must be 1. Has the same format as "x". | |||
* dimension of "x". | |||
* The N, C and D dimensions must be 1. Has the same format as "x". | |||
* @li offset_x: An optional int. Input offset, used for quantized inference. | |||
* Defaults to 0. Reserved . \n | |||
@@ -967,8 +1110,8 @@ REG_OP(Conv3D) | |||
*@par Required Attributes: | |||
* @li strides: A list of 5 integers. Specifies the stride of the sliding window | |||
* for each dimension of "x". | |||
* The N and C dimensions must be 1. Has the same format as "x". | |||
* for each dimension of "out_backprop". | |||
* The N and C dimensions must be 1. Has the same format as "out_backprop". | |||
* @li pads: A list of 6 integers. | |||
* Supports only padding along the D, H and W dimensions in sequence of head, | |||
* tail, top, bottom, left and right . \n | |||
@@ -980,10 +1123,11 @@ REG_OP(Conv3D) | |||
* @li data_format: An optional string from: "NDHWC", "NCDHW". | |||
* Defaults to "NDHWC". Specify the data format of the input and output data. | |||
* @li dilations: A tuple/list of 5 integers, The dilation factor for each | |||
* dimension of the input, now only support [1,1,1,1,1] | |||
* dimension of the input. | |||
* The N, C and D dimensions must be 1. Has the same format as "out_backprop". | |||
*@par Outputs: | |||
* y: A Tensor. Has the same type as filter,and has same format as input_size | |||
* y: A Tensor. Has the same type as filter,and has same format as "input_size" | |||
*@par Third-party framework compatibility | |||
* Compatible with Tensorflow's conv3d_backprop_input | |||
@@ -1011,8 +1155,8 @@ REG_OP(Conv3DBackpropInput) | |||
*@par Required Attributes: | |||
* @li strides: A list of 5 integers. Specifies the stride of the sliding window | |||
* for each dimension of "x". | |||
* The N and C dimensions must be 1. Has the same format as "x". | |||
* for each dimension of "out_backprop". | |||
* The N and C dimensions must be 1. Has the same format as "out_backprop". | |||
* @li pads: A list of 6 integers. Supports only padding along the D, H and W | |||
* dimensions in sequence of head, tail, top, bottom, left and right. | |||
* @li input_size: A tuple/list of type int32, int64. An integer vector | |||
@@ -1027,9 +1171,10 @@ REG_OP(Conv3DBackpropInput) | |||
* @li data_format: An optional string from: "NDHWC", "NCDHW". | |||
* Defaults to "NDHWC". Specify the data format of the input and output data. | |||
* @li dilations: A tuple/list of 5 integers, The dilation factor for each | |||
* dimension of input, now only support [1,1,1,1,1] | |||
* dimension of input. | |||
* The N, C and D dimensions must be 1. Has the same format as "out_backprop". | |||
*@par Outputs: | |||
* y: A Tensor. Has the same type and data format as out_backprop. | |||
* y: A Tensor. Has the same type and data format as "out_backprop". | |||
*@par Third-party framework compatibility | |||
* Compatible with Tensorflow's conv3d_backprop_input | |||
@@ -1072,9 +1217,7 @@ REG_OP(Conv3DBackpropInputD) | |||
* @li c_t: A optinal Tensor dtype of float16, float32. The cell state at time t . \n | |||
*@par Third-party framework compatibility: | |||
* Compatible with the Pytorch operator adds. | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
* Compatible with the Caffe operator LSTM. | |||
*/ | |||
REG_OP(LSTM) | |||
.INPUT(x, TensorType({DT_FLOAT16})) | |||
@@ -1121,14 +1264,15 @@ REG_OP(LSTM) | |||
*@par Attributes: | |||
* Three attributes: | |||
* @li dilations: A tuple/list of 5 integers, The dilation factor for each | |||
* dimension of input, now only support [1,1,1,1,1]. | |||
* dimension of input. | |||
* The N, C and D dimensions must be 1. Has the same format as "x". | |||
* @li groups: Number of blocked connections from input channels to output | |||
* channels. Reserved. | |||
* @li data_format: An optional string from: "NDHWC", "NCDHW". | |||
* Defaults to "NDHWC". Specify the data format of the input and output data. | |||
*@par Outputs: | |||
* y: A Tensor that has the same type as x | |||
* y: A Tensor that has the same type as "x" | |||
* and the format is NDHWC, NCDHW or DHWCN. | |||
*@par Third-party framework compatibility | |||
* Compatible with Tensorflow's conv3d_backprop_filter | |||
@@ -1172,7 +1316,8 @@ REG_OP(Conv3DBackpropFilter) | |||
*@par Attributes: | |||
* Three attributes: | |||
* @li dilations: A tuple/list of 5 integers, The dilation factor for each | |||
* dimension of input, now only support [1,1,1,1,1]. | |||
* dimension of input. | |||
* The N, C and D dimensions must be 1. Has the same format as "x". | |||
* @li groups: Number of blocked connections from input channels to output | |||
* channels. Reserved. | |||
* @li data_format: An optional string from: "NDHWC", "NCDHW". | |||
@@ -1226,13 +1371,14 @@ REG_OP(Conv3DBackpropFilterD) | |||
* @li groups: Number of blocked connections from input channels to output | |||
* channels. Reserved. | |||
* @li dilations: A tuple/list of 5 integers, | |||
* The dilation factor for each dimension of input, now only support [1,1,1,1,1] | |||
* The dilation factor for each dimension of input. | |||
* The N, C and D dimensions must be 1. Has the same format as "x". | |||
* @li data_format: An optional string from: "NDHWC", "NCDHW". | |||
* Defaults to "NDHWC". Specify the data format of the input and output data. | |||
* @li output_padding: The size will be added in the output shape. | |||
* @li offset_x: Input offset_x value. Reserved. | |||
*@par Outputs: | |||
* y: A Tensor. Has the same type and format as x. | |||
* y: A Tensor. Has the same type and format as "x". | |||
*/ | |||
REG_OP(Conv3DTranspose) | |||
.INPUT(input_size, TensorType({DT_INT32, DT_INT64})) | |||
@@ -1273,7 +1419,8 @@ REG_OP(Conv3DTranspose) | |||
*@par Attributes: | |||
* Five attributes: | |||
* @li dilations: A tuple/list of 5 integers, The dilation factor for each | |||
* dimension of input, now only support [1,1,1,1,1] | |||
* dimension of input. | |||
* The N, C and D dimensions must be 1. Has the same format as "x". | |||
* @li groups: Number of blocked connections from input channels to output | |||
* channels. Reserved. | |||
* @li data_format: An optional string from: "NDHWC", "NCDHW". | |||
@@ -1281,7 +1428,7 @@ REG_OP(Conv3DTranspose) | |||
* @li output_padding: The size will be added in the output shape. | |||
* @li offset_x: Input offset_x value. Reserved. | |||
*@par Outputs: | |||
* y: A Tensor. Has the same type and format as x. | |||
* y: A Tensor. Has the same type and format as "x". | |||
*@par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead. | |||
*/ | |||
@@ -1316,6 +1463,22 @@ REG_OP(Conv3DTransposeD) | |||
* or [out_channels, in_channel, filter_height, filter_width]. | |||
* @li bias: An optional 1D tensor of type float16 or int32. Format is "ND". | |||
* @li offset_w: An optional 1D tensor for quantized inference. Reserved. | |||
*\n | |||
*\n | |||
* The following are the supported data types and data formats: | |||
*@verbatim | |||
| Tensor | x | filter | bias | y | |||
------------|---------|---------|---------|-------- | |||
| Data Type | float16 | float16 | float16 | float16 | |||
| |---------|---------|---------|-------- | |||
| | int8 | int8 | int32 | int32 | |||
------------|---------|---------|---------|-------- | |||
| Format | NCHW | NCHW | ND | NCHW | |||
| | NHWC | HWCN | | NHWC | |||
@endverbatim | |||
* For int8, a dequant or requant operator must be followed. | |||
*\n | |||
* | |||
*@par Required Attributes: | |||
* @li strides: A required tuple/list of 4 integers. The stride of the sliding | |||
* window for H/W dimension. The index of H/W is same as data_format. | |||
@@ -1334,9 +1497,55 @@ REG_OP(Conv3DTransposeD) | |||
* to [0, 0, 0, 0]. | |||
* @li offset_x: An optional int. Input offset, used for quantized inference. | |||
* Defaults to "0". | |||
*\n | |||
*\n | |||
* The following value range restrictions must be met: | |||
*@verbatim | |||
| Name | Field | Scope | |||
-------------------|----------|-------------- | |||
| input_size | H | [1, 4096] | |||
| | W | [1, 4096] | |||
-------------------|----------|-------------- | |||
| x (out_backprop) | H | [1, 4096] | |||
| | W | [1, 4096] | |||
-------------------|----------|-------------- | |||
| filter | H | [1, 255] | |||
| | W | [1, 255] | |||
-------------------|----------|-------------- | |||
| y (fmap) | H | [1, 4096] | |||
| | W | [1, 4096] | |||
-------------------|----------|-------------- | |||
| Stride | H | [1, 63] | |||
| | W | [1, 63] | |||
-------------------|----------|-------------- | |||
| Padding | Top | [0, 255] | |||
| | Bottom | [0, 255] | |||
| | Left | [0, 255] | |||
| | Right | [0, 255] | |||
-------------------|----------|-------------- | |||
| Dilation | H | [1, 255] | |||
| | W | [1, 255] | |||
-------------------|----------|-------------- | |||
| Offset_x | | [-128, 127] | |||
@endverbatim | |||
* In Ascend910, fmap or out_backprop's H and W not support 1 when | |||
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 | |||
*\n | |||
* | |||
*@par Outputs: | |||
* y: A Tensor. A Tensor of type float16 or int32, and has same format as | |||
* input_size. | |||
*\n | |||
* out_backprop_height = (fmap_height + pad_top + pad_bottom - | |||
* (dilation_h * (filter_height - 1) + 1)) | |||
* / stride_h + 1 | |||
*\n | |||
* out_backprop_width = (fmap_width + pad_left + pad_right - | |||
* (dilation_w * (filter_width - 1) + 1)) | |||
* / stride_w + 1 | |||
*\n | |||
* | |||
*/ | |||
REG_OP(Conv2DTranspose) | |||
.INPUT(input_size, TensorType({DT_INT32, DT_INT64})) | |||
@@ -1405,13 +1614,13 @@ REG_OP(Conv2DTransposeD) | |||
/** | |||
*@brief Computes the deformed convolution output with the expected input | |||
*@par Inputs: | |||
* Four inputs: | |||
* Two inputs: | |||
* @li x: A Tensor of type float16,float32 | |||
* @li offsets: A Tensor of type float16,float32.Deformation offset parameter. | |||
*@par Required Attributes: | |||
* @li strides: A tuple/list of 4 integers.The stride of the sliding window for | |||
* height and width for H/W dimension. | |||
* @li pads: A tuple/list of 4 integers.Padding added to each dimension | |||
* @li pads: A tuple/list of 4 integers.Padding added to H/W dimension | |||
* of the input. | |||
* @li ksize: A tuple/list of 2 integers.kernel size. | |||
*@par Attributes: | |||
@@ -1420,6 +1629,7 @@ REG_OP(Conv2DTransposeD) | |||
* of input. Defaults to [1, 1, 1, 1] | |||
* @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. | |||
* @li deformable_groups: Specify the c-axis grouping number of input x. | |||
* @li modulated: Specify version of DeformableConv2D, true means v2, false means v1 | |||
*@par Outputs: | |||
* y: A Tensor. A Tensor of type float16, float32. | |||
*/ | |||
@@ -1433,7 +1643,69 @@ REG_OP(DeformableOffsets) | |||
.ATTR(dilations, ListInt, {1, 1, 1, 1}) | |||
.ATTR(data_format, String, "NCHW") | |||
.ATTR(deformable_groups, Int, 1) | |||
.ATTR(modulated, Bool, true) | |||
.OP_END_FACTORY_REG(DeformableOffsets) | |||
/** | |||
*@brief Computes the gradients of DeformableOffsets with respect to input and offsets | |||
*@par Inputs: | |||
* Three inputs: | |||
* @li grad: A Tensor of type float16,float32. gradients with respect to DeformableOffsets output | |||
* @li x: A Tensor of type float16,float32. | |||
* @li offsets: A Tensor of type float16,float32.Deformation offset parameter. | |||
*@par Required Attributes: | |||
* @li strides: A tuple/list of 4 integers.The stride of the sliding window for | |||
* height and width for H/W dimension. | |||
* @li pads: A tuple/list of 4 integers.Padding added to H/W dimension | |||
* of the input. | |||
* @li ksize: A tuple/list of 2 integers.kernel size. | |||
*@par Attributes: | |||
* Three attributes: | |||
* @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension | |||
* of input. Defaults to [1, 1, 1, 1] | |||
* @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. | |||
* @li deformable_groups: Specify the c-axis grouping number of input x. | |||
* @li modulated: Specify version of DeformableConv2D, true means v2, false means v1. | |||
*@par Outputs: | |||
* grad_x: A Tensor of type float16, float32. Gradients with respect to input_x | |||
* grad_offsets: A Tensor of type float16, float32. Gradients with respect to input_offsets | |||
*/ | |||
REG_OP(DeformableOffsetsGrad) | |||
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(offsets, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(grad_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(grad_offsets, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.REQUIRED_ATTR(strides, ListInt) | |||
.REQUIRED_ATTR(pads, ListInt) | |||
.REQUIRED_ATTR(ksize, ListInt) | |||
.ATTR(dilations, ListInt, {1, 1, 1, 1}) | |||
.ATTR(data_format, String, "NCHW") | |||
.ATTR(deformable_groups, Int, 1) | |||
.ATTR(modulated, Bool, true) | |||
.OP_END_FACTORY_REG(DeformableOffsetsGrad) | |||
/** | |||
*@brief Computes the deformed dilation output with the expected input | |||
*@par Inputs: | |||
* One inputs: | |||
* @li x: A Tensor of type int8, float16, float32 | |||
*@par Required Attributes: | |||
* @li dilations: A tuple/list of integers. | |||
*@par Attributes: | |||
* Two attributes: | |||
* @li padding_value: default value filling in blank | |||
* @li pads: A tuple/list of integers. | |||
*@par Outputs: | |||
* y: A Tensor. A Tensor of type int8, float16, float32. | |||
*/ | |||
REG_OP(Dilation) | |||
.INPUT(x, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT})) | |||
.REQUIRED_ATTR(dilations, ListInt) | |||
.ATTR(pads, ListInt, {}) | |||
.ATTR(padding_value, Float, 0.0) | |||
.OP_END_FACTORY_REG(Dilation) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1383,6 +1383,7 @@ REG_OP(DecodeWheelsTarget) | |||
*@attention Constraints: | |||
* Only computation of float16 data is supported. | |||
* Note: when the class num per image * max_size_per_class is too big, will compile fail with ERROR-insufficient memory | |||
*/ | |||
REG_OP(BatchMultiClassNonMaxSuppression) | |||
.INPUT(boxes, TensorType({DT_FLOAT16})) | |||
@@ -1485,7 +1486,10 @@ REG_OP(DecodeBboxV2) | |||
* | |||
*@par Outputs: | |||
* @li y1: A Tensor. Must have the same type as x. | |||
* @li y2: A Tensor. Indices of y1 in x.Dtype must be int32. | |||
* @li y2: A Tensor. Indices of y1 in x. Dtype must be int32. | |||
* | |||
*@attention Constraints: | |||
* The upper limit of data on the direction axis is 7040. | |||
*/ | |||
REG_OP(Sort) | |||
.INPUT(x, TensorType({ DT_FLOAT16 })) | |||
@@ -1495,6 +1499,111 @@ REG_OP(Sort) | |||
.ATTR(descending, Bool, false) | |||
.OP_END_FACTORY_REG(Sort) | |||
REG_OP(PtIou) | |||
.INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(mode, String, "iou") | |||
.OP_END_FACTORY_REG(PtIou) | |||
/** | |||
*@brief Greedily selects a subset of bounding boxes in descending order of | |||
score . \n | |||
*@par Inputs: | |||
*Input boxes and scores must be float16 type. Inputs include: | |||
*@li boxes: A input tensor with shape [num_batches,spatial_dimension,4]. | |||
The single box data format is indicated by center_point_box. | |||
*@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension] | |||
*@li max_output_size: A scalar integer tensor representing the maximum number | |||
of boxes to be selected by non max suppression. | |||
*@li iou_threshold: A 0-D float tensor representing the threshold for deciding | |||
whether boxes overlap too much with respect to IOU. | |||
*@li score_threshold: A 0-D float tensor representing the threshold for | |||
deciding when to remove boxes based on score . \n | |||
*@par Attributes: | |||
*center_point_box:Integer indicate the format of the box data. | |||
The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] | |||
where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair | |||
of box corners and the coordinates can be provided as normalized | |||
(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models. | |||
1 - the box data is supplied as [x_center, y_center, width, height]. | |||
Mostly used for Pytorch models. \n | |||
*@par Outputs: | |||
*@li selected_indices: A 2-D integer tensor of shape [M] representing the | |||
selected indices from the boxes tensor, where M <= max_output_size. \n | |||
*@attention Constraints: | |||
*Input boxes and scores must be float16 type . \n | |||
*@par Third-party framework compatibility | |||
*Compatible with onnx NonMaxSuppression operator. | |||
*/ | |||
REG_OP(NonMaxSuppressionV6) | |||
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32})) | |||
.OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT})) | |||
.OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT})) | |||
.OUTPUT(selected_indices, TensorType({DT_INT32})) | |||
.ATTR(center_point_box, Int, 0) | |||
.ATTR(max_boxes_size, Int, 0) | |||
.OP_END_FACTORY_REG(NonMaxSuppressionV6) | |||
/** | |||
*@brief Greedily selects a subset of bounding boxes in descending order of | |||
score . \n | |||
*@par Inputs: | |||
*Input boxes and scores must be float16 type. Inputs include: | |||
*@li boxes: A input tensor with shape [num_batches,spatial_dimension,4]. | |||
The single box data format is indicated by center_point_box. | |||
*@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension] | |||
*@li max_output_size: A scalar integer tensor representing the maximum number | |||
of boxes to be selected by non max suppression. | |||
*@li iou_threshold: A 0-D float tensor representing the threshold for deciding | |||
whether boxes overlap too much with respect to IOU. | |||
*@li score_threshold: A 0-D float tensor representing the threshold for | |||
deciding when to remove boxes based on score . \n | |||
*@li index_id: A input tensor with shape [num_batches,num_classes,spatial_dimension,3] | |||
the last dim representing (batch_id,class_id,index_id) . \n | |||
*@par Attributes: | |||
*center_point_box:Integer indicate the format of the box data. | |||
The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] | |||
where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair | |||
of box corners and the coordinates can be provided as normalized | |||
(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models. | |||
1 - the box data is supplied as [x_center, y_center, width, height]. | |||
Mostly used for Pytorch models. \n | |||
*@par Outputs: | |||
*@li selected_indices: A 2-D integer tensor of shape [M] representing the | |||
selected indices from the boxes tensor, where M <= max_output_size. \n | |||
*@attention Constraints: | |||
*Input boxes and scores must be float16 type . \n | |||
*@par Third-party framework compatibility | |||
*Compatible with onnx NonMaxSuppression operator. | |||
*/ | |||
REG_OP(NonMaxSuppressionV7) | |||
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32})) | |||
.OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT})) | |||
.OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT})) | |||
.OPTIONAL_INPUT(index_id, TensorType({DT_FLOAT16})) | |||
.OUTPUT(selected_indices, TensorType({DT_INT32})) | |||
.ATTR(center_point_box, Int, 0) | |||
.ATTR(max_boxes_size, Int, 0) | |||
.OP_END_FACTORY_REG(NonMaxSuppressionV7) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -160,20 +160,20 @@ REG_OP(SigmoidCrossEntropyWithLogits) | |||
.OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogits) | |||
/** | |||
*@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n | |||
*@brief Computes the sigmoid cross entropy loss of "predict" and "target". | |||
*@par Inputs: | |||
* four inputs, including: | |||
*@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value. | |||
*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n | |||
*@li weight: An multi-dimensional Tensor, specifying the weight value. \n | |||
*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value. | |||
*@li weight: An multi-dimensional Tensor, specifying the weight value. | |||
*@li pos_weight: An multi-dimensional Tensor, specifying the pos weight value. \n | |||
*@par Attributes: | |||
*reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean" . \n | |||
*reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean". \n | |||
*@par Outputs: | |||
*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n | |||
*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict". \n | |||
*@par Third-party framework compatibility | |||
* Compatible with PyTorch operator BCEWithLogitsLoss. | |||
@@ -978,6 +978,261 @@ REG_OP(InHost) | |||
.OUTPUT(variance_sqrt, TensorType({DT_FLOAT})) | |||
.ATTR(epsilon, Float, 0.00001) | |||
.OP_END_FACTORY_REG(InHost) | |||
/** | |||
* @brief perform instance normalization to x. \n | |||
* @par Inputs: | |||
* Three inputs, including: | |||
* @li x: A Tensor. Must be one of the following types: float16, float32, format is NC1HWC0. | |||
* @li gamma: A Tensor. Must be one of the following types: float16, float32, format is ND. | |||
* @li beta: A Tensor. Must be one of the following types: float16, float32, format is ND. | |||
* @par Attributes: | |||
* @li data_format: An attribute of type String \n | |||
* @li epsilon: An attribute of type Float, . \n | |||
* @par Outputs: | |||
* @li y: A Tensor. Has the same type as "x", format is NC1HWC0. \n | |||
* @li mean: A Tensor. Has the same type as "x", format is NC1HWC0 and the shape is [N, C1, 1, 1, C0]. \n | |||
* @li variance: A Tensor. Has the same type as "x", format is NC1HWC0 and the shape is [N, C1, 1, 1, C0]. \n | |||
* @par Third-party framework compatibility | |||
* Can be used by onnx InstanceNormalization | |||
*/ | |||
REG_OP(InstanceNorm) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(gamma, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(beta, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(variance, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.REQUIRED_ATTR(data_format, String) | |||
.REQUIRED_ATTR(epsilon, Float) | |||
.OP_END_FACTORY_REG(InstanceNorm) | |||
REG_OP(KlDivLossGrad) | |||
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(reduction, String, "mean") | |||
.ATTR(log_target, Bool, false) | |||
.OP_END_FACTORY_REG(KlDivLossGrad) | |||
/** | |||
* @brief Computes l1_loss_grad or l1_loss_backward. \n | |||
* @par Inputs: | |||
* Three inputs, including: | |||
* @li grads: A Tensor. Must be one of the following types: float16, float32. | |||
* Required. | |||
* @li predict: A Tensor. Has the same type as "grads". Required. | |||
* @li label: A Tensor. Has the same type as "grads". Required. \n | |||
* @par Attributes: | |||
* @li reduction: An optional attribute of type String. Defaults to "mean". \n | |||
* @par Outputs: | |||
* @li y: A Tensor. Has the same type as "x". \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator L1LossGrad. | |||
*/ | |||
REG_OP(L1LossGrad) | |||
.INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(reduction, String, "mean") | |||
.OP_END_FACTORY_REG(L1LossGrad) | |||
/** | |||
* @brief Computes loss of lp, p=1,2,3.... | |||
* @par Inputs: | |||
* @li predict: An ND tensor of type float16, float32. | |||
* @li label: An ND tensor of type float16, float32. \n | |||
* @par Attributes: | |||
* @li p: A required int attribute that decides which loss to compute, now the p only can be 1 to compute l1_loss. | |||
* @li reduction: An optional string.Defaults to "mean". \n | |||
* @par Outputs: | |||
* @li y: An ND tensor tensor with the same shape and type as "predict". \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator LpLoss. | |||
*/ | |||
REG_OP(LpLoss) | |||
.INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.REQUIRED_ATTR(p, Int) | |||
.ATTR(reduction, String, "mean") | |||
.OP_END_FACTORY_REG(LpLoss) | |||
/** | |||
* @brief Computes gradients of mse loss. | |||
* @par Inputs: | |||
* @li predict: An ND tensor of type float16, float32. | |||
* @li label: An ND tensor of type float16, float32. | |||
* @li dout: An ND tensor of type float16, float32. \n | |||
* @par Attributes: | |||
* @li reduction: An optional string.Defaults to "mean". \n | |||
* @par Outputs: | |||
* @li y: An ND tensor tensor with the same shape and type as "predict". \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator MseLossGrad. | |||
*/ | |||
REG_OP(MseLossGrad) | |||
.INPUT(predict, TensorType({DT_FLOAT32, DT_FLOAT16})) | |||
.INPUT(label, TensorType({DT_FLOAT32, DT_FLOAT16})) | |||
.INPUT(dout, TensorType({DT_FLOAT32, DT_FLOAT16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT32, DT_FLOAT16})) | |||
.ATTR(reduction, String, "mean") | |||
.OP_END_FACTORY_REG(MseLossGrad) | |||
/** | |||
* @brief Computes mse loss. | |||
* @par Inputs: | |||
* two inputs, including: | |||
* @li predict: An ND Tensor of dtype float16 or float32. | |||
* @li label: An ND Tensor of dtype float16 or float32.\n | |||
* | |||
* @par Attributes: | |||
* @li reduction:An optional str from sum, none, mean, Defaults to "mean".\n | |||
* | |||
* @par Outputs: | |||
* @li y: when reduction=sum/mean, y is scale. when reduction=none, y has | |||
* same type and shape as "predict".\n | |||
*/ | |||
REG_OP(MseLoss) | |||
.INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(reduction, String, "mean") | |||
.OP_END_FACTORY_REG(MseLoss) | |||
/** | |||
* @brief Calculates the reversed outputs of the function "smooth_l1_loss_v2". \n | |||
* @par Inputs: | |||
* Three Inputs, including: | |||
* @li predict: A Tensor. Must be one of the following types: | |||
* float16, float32. | |||
* @li label: A Tensor. Has the same type as "predict". | |||
* @li dout: A Tensor. Has the same type as "predict". \n | |||
* @par Attributes: | |||
* Two Attributes, including: | |||
* @li sigma: An optional float. Defaults to 1.0. \n | |||
* @li reduction: An optional string. Defaults to "mean", | |||
* Must be one of the following: "none", "mean", "sum". \n | |||
* @par Outputs: | |||
* @li gradient: A Tensor. Has the same type as "predict". \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator SmoothL1LossBackward. | |||
*/ | |||
REG_OP(SmoothL1LossGradV2) | |||
.INPUT(predict, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(label, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(dout, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(gradient, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.ATTR(sigma, Float, 1.0) | |||
.ATTR(reduction, String, "mean") | |||
.OP_END_FACTORY_REG(SmoothL1LossGradV2) | |||
/** | |||
* @brief Creates a criterion that uses a squared term if the absolute | |||
* element-wise error falls below beta and an L1 term otherwise. It is | |||
* less sensitive to outliers than the MSELoss and in some cases prevents | |||
* exploding gradients. | |||
* @par Inputs: | |||
* @li predict: A multi-dimensional Tensor of type float16 or float32, | |||
* specifying the predictive value. \n | |||
* @li label: A multi-dimensional Tensor of type float16 or float32, | |||
* specifying the target value. \n | |||
* @par Attributes: | |||
* @li sigma: An optional int. Specifies the threshold of loss. Defaults | |||
* to "1.0". \n | |||
* @li reduction: An optional str. Specifies the reduction to apply to | |||
* the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, | |||
* 'mean': the sum of the output will be divided by the number of elements in | |||
* the output,'sum': the output will be summed. Default: 'mean'. \n | |||
* @par Outputs: | |||
* @li loss: Indicates the loss between the predictive value and target value. | |||
* Has the same dimensions as "predict". \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator smooth_l1_loss. \n | |||
*/ | |||
REG_OP(SmoothL1LossV2) | |||
.INPUT(predict, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.INPUT(label, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.OUTPUT(loss, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.ATTR(sigma, Float, 1.0) | |||
.ATTR(reduction, String, "mean") | |||
.OP_END_FACTORY_REG(SmoothL1LossV2) | |||
/** | |||
* @brief Computes Centralization. result = x - mean(x, axes) | |||
* @par Inputs: | |||
* @li x: An ND tensor of type float16, float32. | |||
* @par Attributes: | |||
* @li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType. | |||
* Must be in the range [-rank(x), rank(x)). | |||
* @par Outputs: | |||
* @li y: A Tensor. Has the same type as "x". \n | |||
* @par Third-party framework compatibility | |||
* custom operator \n | |||
*/ | |||
REG_OP(Centralization) | |||
.INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.ATTR(axes, ListInt, {-1}) | |||
.OP_END_FACTORY_REG(Centralization) | |||
/** | |||
* @brief Computes gradients of sigmoid_cross_entropy_with_logits_v2. | |||
* @par Inputs: | |||
* @li predict: An ND tensor of type float16, float32. | |||
* @li target: An ND tensor of type float16, float32. | |||
* @li dout: An ND tensor of type float16, float32. | |||
* @li weight: An optional ND tensor of type float16, float32. | |||
* @li pos_weight: An optional ND tensor of type float16, float32. \n | |||
* @par Attributes: | |||
* @li reduction: An optional string.Defaults to "mean". \n | |||
* @par Outputs: | |||
* @li gradient: An ND tensor tensor with the same shape and type as "predict". \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator SigmoidCrossEntropyWithLogitsGrad. | |||
*/ | |||
REG_OP(SigmoidCrossEntropyWithLogitsGradV2) | |||
.INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(dout, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(pos_weight, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(reduction, String, "mean") | |||
.OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -182,6 +182,125 @@ REG_OP(AvgPool3D) | |||
.ATTR(data_format, String, "NDHWC") | |||
.OP_END_FACTORY_REG(AvgPool3D) | |||
/** | |||
*@brief Performs average pooling on the input. | |||
*@par Inputs: | |||
*@li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double. | |||
*@li filter: An optional tensor of type float16, float32, double, fractal_z_3d layout. | |||
*@li multiplier: An optional tensor of float16, float32, double. | |||
*@par Attributes: | |||
*@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor. | |||
*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor. | |||
*@li pads: List of ints, implicit zero paddings on both sides of the input. | |||
*@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. | |||
*@li count_include_pad: When true, will include the zero-padding in the averaging calculation. | |||
*@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. | |||
*@li data_format: A string, format of input data . \n | |||
*@par Outputs: | |||
*y: The average pooled output tensor . \n | |||
*@attention Constraints: | |||
*@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator AvgPool3D. | |||
*/ | |||
REG_OP(AvgPool3DD) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||
.OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||
.OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||
.REQUIRED_ATTR(ksize, ListInt) | |||
.REQUIRED_ATTR(strides, ListInt) | |||
.REQUIRED_ATTR(pads, ListInt) | |||
.ATTR(ceil_mode, Bool, false) | |||
.ATTR(count_include_pad, Bool, true) | |||
.ATTR(divisor_override, Int, 0) | |||
.ATTR(data_format, String, "NDHWC") | |||
.OP_END_FACTORY_REG(AvgPool3DD) | |||
/** | |||
* @brief Computes AvgPool3DGrad function. | |||
* @par Inputs: | |||
* @li orig_input_shape: An NDHWC tensor of type float16, float32, or double. | |||
* @li grads: An NDHWC tensor of type int32. | |||
* @par Attributes: | |||
* @li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor. | |||
* @li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor. | |||
* @li pads: List of ints, implicit zero paddings on both sides of the input. | |||
* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. | |||
* @li count_include_pad: When true, will include the zero-padding in the averaging calculation. | |||
* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. | |||
* @li data_format: A string, format of input data . | |||
* @par Outputs: | |||
* @output: A mutable tensor with the same shape and type as "orig_input". | |||
* @par Third-party framework compatibility | |||
* @li Compatible with the TensorFlow operator AvgPoolGrad. | |||
*/ | |||
REG_OP(AvgPool3DGrad) | |||
.INPUT(orig_input_shape, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||
.INPUT(grads, TensorType({DT_INT32})) | |||
.OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||
.REQUIRED_ATTR(ksize, ListInt) | |||
.REQUIRED_ATTR(strides, ListInt) | |||
.REQUIRED_ATTR(pads, ListInt) | |||
.ATTR(ceil_mode, Bool, false) | |||
.ATTR(count_include_pad, Bool, true) | |||
.ATTR(divisor_override, Int, 0) | |||
.ATTR(data_format, String, "NDHWC") | |||
.OP_END_FACTORY_REG(AvgPool3DGrad) | |||
/** | |||
* @brief Performs average pooling on the input. | |||
* @par Inputs: | |||
* @li grads: An NDHWC tensor of type float16. | |||
* @li filter: An optional tensor of type float16, fractal_z_3d layout. | |||
* @li multiplier: An optional tensor of float16. | |||
* @par Attributes: | |||
* @li orig_input_shape: List of ints that has length 5. The size of the window for each dimension of the input tensor. | |||
* @li ksize: List of ints that has length 3. The size of the window for each dimension of the input tensor. | |||
* @li strides:List of ints that has length 3. The stride of the sliding window for each dimension of the input tensor. | |||
* @li pads: List of ints, implicit zero paddings on both sides of the input. | |||
* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. | |||
* @li count_include_pad: When true, will include the zero-padding in the averaging calculation. | |||
* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. | |||
* @li data_format: A string, format of input data . \n | |||
* @par Outputs: | |||
* @output: The average pooled output tensor . \n | |||
* @attention Constraints: | |||
* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] | |||
* @par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator AvgPool3DGradD. | |||
*/ | |||
REG_OP(AvgPool3DGradD) | |||
.INPUT(grads, TensorType({DT_FLOAT16})) | |||
.OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16})) | |||
.OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16})) | |||
.OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||
.REQUIRED_ATTR(orig_input_shape, ListInt) | |||
.REQUIRED_ATTR(ksize, ListInt) | |||
.REQUIRED_ATTR(strides, ListInt) | |||
.REQUIRED_ATTR(pads, ListInt) | |||
.ATTR(ceil_mode, Bool, false) | |||
.ATTR(count_include_pad, Bool, true) | |||
.ATTR(divisor_override, Int, 0) | |||
.ATTR(data_format, String, "NDHWC") | |||
.OP_END_FACTORY_REG(AvgPool3DGradD) | |||
/** | |||
*@brief Performs max_pool_ext2 on the input . \n | |||
@@ -308,6 +427,31 @@ REG_OP(MaxPool3D) | |||
.ATTR(data_format, String, "NDHWC") | |||
.OP_END_FACTORY_REG(MaxPool3D) | |||
/** | |||
*@brief Applies a 2D adaptive max pooling over an input signal conposed of several input planes. \n | |||
* The output is of size H x W, for any input size. | |||
* @par Inputs: | |||
* One input, including: | |||
* @li x: A Tensor. Must be one of the following data types: | |||
* float16, float32, float64. \n | |||
* @par Attributes: | |||
* @li output_size: A required list of 2 ints | |||
* specifying the size (H,W) of the output tensor. \n | |||
* @par Outputs: | |||
* @li y: A Tensor. Has the same data type as "x" \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator AdaptiveMaxPool2d. | |||
*/ | |||
REG_OP(AdaptiveMaxPool2d) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||
.OUTPUT(argmax, TensorType::IndexNumberType()) | |||
.REQUIRED_ATTR(output_size, ListInt) | |||
.OP_END_FACTORY_REG(AdaptiveMaxPool2d) | |||
/** | |||
* @brief Computes second-order gradients of the maxpooling3d function . \n | |||
@@ -477,8 +621,9 @@ REG_OP(MaxPoolV2) | |||
*@par Inputs: | |||
* One input: | |||
*x: An NC1HWC0 Tensor. Supported type: float, double, int32, | |||
* uint8, int16, int8, int64, uint16, half, uint32, uint64 . \n | |||
*x: An 4D Tensor. Supported type: float, double, int32, | |||
* uint8, int16, int8, int64, uint16, half, uint32, uint64. | |||
* Must set the format, supported format list ["NCHW, NHWC"]. \n | |||
*@par Attributes: | |||
*@li ksize: A required list of int8, int16, int32, or int64 values, | |||
@@ -517,10 +662,12 @@ REG_OP(MaxPoolWithArgmax) | |||
*@par Inputs: | |||
* Three inputs, including: | |||
*@li x: An NC1HWC0 tensor. Supported type: float, double, int32, | |||
*@li x: An 4d tensor. Supported type: float, double, int32, | |||
* uint8, int16, int8, int64, uint16, half, uint32, uint64. | |||
*@li grad: An NC1HWC0 tensor. Supported type: float, double, int32, | |||
* Must set the format, supported format list ["NCHW, NHWC"] | |||
*@li grad: An 4d tensor. Supported type: float, double, int32, | |||
* uint8, int16, int8, int64, uint16, half, uint32, uint64. | |||
* Must set the format, supported format list ["NCHW, NHWC"] | |||
*@li argmx: An NC1HWC0 tensor of type int32 or int64 . \n | |||
*@par Attributes: | |||
@@ -1107,7 +1254,7 @@ REG_OP(AvgPool1DD) | |||
*@par Inputs: | |||
* One input: | |||
*x: An NC1HWC0 Tensor of type float16. | |||
*x: An 4d Tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"]. | |||
*@par Attributes: | |||
*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | |||
* each dimension of the input tensor. No default value. | |||
@@ -1148,9 +1295,9 @@ REG_OP(MaxPoolWithArgmaxV2) | |||
*@par Inputs: | |||
* Three inputs, including: | |||
*@li x: An NC1HWC0 tensor of type float16. | |||
*@li grad: An NC1HWC0 tensor of type float16. | |||
*@li argmx: An NC1HWC0 tensor of type uint16 or int64 . \n | |||
*@li x: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"] | |||
*@li grad: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"] | |||
*@li argmx: An 4d tensor of type uint16 or int64. Must set the format, supported format list ["NCHW, NHWC"] \n | |||
*@par Attributes: | |||
*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | |||
@@ -1291,5 +1438,171 @@ REG_OP(MaxPoolV3Grad) | |||
.ATTR(global_pooling, Bool, false) | |||
.ATTR(ceil_mode, Bool, false) | |||
.OP_END_FACTORY_REG(MaxPoolV3Grad) | |||
/** | |||
*@brief Performs dilation2d on the input . \n | |||
*@par Inputs: | |||
*x: A tensor of shape is 4d, format is support NHWC. | |||
*filter: A tensor of shape is 3d, the type is same with x, | |||
and the c dimension is same with x. \n | |||
*@par Attributes: | |||
*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. | |||
*@li rates: A required list of 4 ints. The rates of the N and C dimensions are 1. | |||
*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID. | |||
*@li pads: An optional list of 4 ints. | |||
*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". | |||
*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n | |||
*@par Outputs: | |||
*y: The output tensor. Has the same type and format as input "x" . \n | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator Dilation2D. | |||
*/ | |||
REG_OP(Dilation2D) | |||
.INPUT(x,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) | |||
.INPUT(filter,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) | |||
.OUTPUT(y,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) | |||
.REQUIRED_ATTR(strides, ListInt) | |||
.REQUIRED_ATTR(rates, ListInt) | |||
.ATTR(padding_mode, String, "SAME") | |||
.ATTR(pads, ListInt, {0,0,0,0}) | |||
.ATTR(ceil_mode, Bool, false) | |||
.ATTR(data_format, String, "NHWC") | |||
.OP_END_FACTORY_REG(Dilation2D) | |||
/** | |||
* @brief Applies a 2D adaptive average pooling over | |||
* an input signal composed of several input planes. \n | |||
* @par Inputs: | |||
* One input, including: | |||
* @li x: A Tensor. Must be one of the following data types: | |||
* float16, float32. \n | |||
* @par Attributes: | |||
* @li output_size: A required list of 2 ints | |||
* specifying the size (H,W) of the output tensor. \n | |||
* @par Outputs: | |||
* @li y: A Tensor. Has the same data type as "x" \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator AdaptiveAvgPool2d. | |||
*/ | |||
REG_OP(AdaptiveAvgPool2d) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.REQUIRED_ATTR(output_size, ListInt) | |||
.OP_END_FACTORY_REG(AdaptiveAvgPool2d) | |||
/** | |||
* @brief Compute gradients of adaptive averagev2 pooling function. | |||
* @par Inputs: | |||
* @li input_grad: A NCHW Tensor. Must be one of the following data types: | |||
* float16, float32. | |||
* @par Attributes: | |||
* @li orig_input_shape: A required tuple or list of type int32. | |||
* @par Outputs: | |||
* @li output_grad: A tensor with the same shape and type as "orig_input_shape". | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator AdaptiveAvgPool2dGrad. | |||
*/ | |||
REG_OP(AdaptiveAvgPool2dGrad) | |||
.INPUT(input_grad, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(output_grad, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.REQUIRED_ATTR(orig_input_shape, ListInt) | |||
.OP_END_FACTORY_REG(AdaptiveAvgPool2dGrad) | |||
/** | |||
* @brief Performs the backpropagation of MaxPoolWithGradArgmaxV1. | |||
* @par Inputs: | |||
* Three inputs, including: | |||
* @li x: An NC1HWC0 tensor of type float16. | |||
* @li grad: An NC1HWC0 tensor of type float16. | |||
* @li argmax: An NC1HWC0 tensor of type uint16 or int64. \n | |||
* @par Attributes: | |||
* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | |||
* each dimension of the input tensor. No default value. | |||
* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for | |||
* each dimension of the input tensor. No default value. | |||
* @li pads: A required listint. \n | |||
* @par Outputs: | |||
* y: A Tensor. Has the same type and format as input "x". \n | |||
* @attention Constraints: | |||
* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | |||
* @li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1 | |||
* @li "pads" is listint. | |||
* @li "ceil_mode" defaults to False. | |||
* @li "data_format" defaults to "NC1HWC0". \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV1. | |||
*/ | |||
REG_OP(MaxPoolGradWithArgmaxV1) | |||
.INPUT(x, TensorType({DT_FLOAT16})) | |||
.INPUT(grad, TensorType({DT_FLOAT16})) | |||
.INPUT(argmax, TensorType({DT_UINT16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16})) | |||
.REQUIRED_ATTR(ksize, ListInt) | |||
.REQUIRED_ATTR(strides, ListInt) | |||
.REQUIRED_ATTR(pads, ListInt) | |||
.ATTR(dtype, Int, 3) | |||
.ATTR(dilation, ListInt, {1, 1, 1, 1}) | |||
.ATTR(ceil_mode, Bool, false) | |||
.OP_END_FACTORY_REG(MaxPoolGradWithArgmaxV1) | |||
/** | |||
* @brief Performs max pooling on the input and outputs both max values and indices. | |||
* @par Inputs: | |||
* One input: | |||
* x: An NC1HWC0 Tensor of type float16. \n | |||
* @par Attributes: | |||
* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | |||
* each dimension of the input tensor. No default value. | |||
* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for | |||
* each dimension of the input tensor. No default value. | |||
* @li pads: A required string. No default value. \n | |||
* @par Outputs: | |||
* y: A Tensor. Has the same type and format as input "x". | |||
* argmax: A Tensor. type:uint16, format:NC1HWC0. \n | |||
* @attention Constraints: | |||
* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | |||
* @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, | |||
* strides[2] <= 63, strides[2] >= 1. | |||
* @li "pads" is listint. | |||
* @li "ceil_mode" defaults to False. | |||
* @li "data_format" defaults to "NC1HWC0". \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator MaxPoolWithArgmaxV1. | |||
*/ | |||
REG_OP(MaxPoolWithArgmaxV1) | |||
.INPUT(x, TensorType({DT_FLOAT16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16})) | |||
.OUTPUT(argmax, TensorType({DT_UINT16})) | |||
.REQUIRED_ATTR(ksize, ListInt) | |||
.REQUIRED_ATTR(strides, ListInt) | |||
.REQUIRED_ATTR(pads, ListInt) | |||
.ATTR(dtype, Int, 3) | |||
.ATTR(dilation, ListInt, {1, 1, 1, 1}) | |||
.ATTR(ceil_mode, Bool, false) | |||
.OP_END_FACTORY_REG(MaxPoolWithArgmaxV1) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -640,6 +640,208 @@ REG_OP(Mish) | |||
.OUTPUT(y, TensorType({ DT_FLOAT,DT_FLOAT16 })) | |||
.OP_END_FACTORY_REG(Mish) | |||
/** | |||
* @brief pytorch hardtanh_backward operator. | |||
* | |||
* @par Inputs: | |||
* 2 inputs, including: | |||
* @li result, minimum tensor of the linear region range, | |||
* datatype: float16/float32, format:ND/5HD. | |||
* @li grad, maximum tensor of the linear region range, | |||
* datatype:float16/float32, format:ND/5HD. \n | |||
* @par Attributes: | |||
* 2 attributes, including: | |||
* @li min_val, minimum value of the linear region range, datatype:float. | |||
* @li max_val, maximum value of the linear region range, datatype:float. \n | |||
* @par Outputs: | |||
* 1 output, including: | |||
* @li y, hardtanh_backward output tensor, datatype and format is same as | |||
* input result. \n | |||
* @attention Constraints: | |||
* This operator only supports dataType: float16/float32, format: ND/5HD. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator HardtanhGrad. | |||
*/ | |||
REG_OP(HardtanhGrad) | |||
.INPUT(result, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "First operand." */ | |||
.INPUT(grad, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "Second operand." */ | |||
.OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "Result, has same element type as two inputs" */ | |||
.ATTR(min_val, Float, -1.0) | |||
.ATTR(max_val, Float, 1.0) | |||
.OP_END_FACTORY_REG(HardtanhGrad) | |||
/** | |||
* @brief Calculates the softplus loss function with attributes of beta and threshold. \n | |||
* @par Inputs: | |||
* One inputs, including: | |||
* @li x: A mutable Tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
* @par Attributes: | |||
* @li beta: An optional float. Defaults to "1.0" \n | |||
* @li threshold: An optional float. Defaults to "20.0" \n | |||
* @par Outputs: | |||
* @li y: A mutable Tensor. Has the same type as "x" \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator Softplus. | |||
*/ | |||
REG_OP(SoftplusV2) | |||
.INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.ATTR(beta, Float, 1.0) | |||
.ATTR(threshold, Float, 20.0) | |||
.OP_END_FACTORY_REG(SoftplusV2) | |||
/** | |||
* @brief Calculates the reversed outputs of the function "softplus_v2". \n | |||
* @par Inputs: | |||
* Two inputs, including: | |||
* @li input_gradients: A mutable Tensor. Must be one of the following types: | |||
* float16, float32. | |||
* @li input_features: A mutable Tensor of the same type as "input_gradients" \n | |||
* @par Attributes: | |||
* @li beta: An optional float. Defaults to "1.0" \n | |||
* @li threshold: An optional float. Defaults to "20.0" \n | |||
* @par Outputs: | |||
* @li output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator SoftplusGrad. | |||
*/ | |||
REG_OP(SoftplusV2Grad) | |||
.INPUT(input_gradients, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.INPUT(input_features, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.OUTPUT(output_backprops, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||
.ATTR(beta, Float, 1.0) | |||
.ATTR(threshold, Float, 20.0) | |||
.OP_END_FACTORY_REG(SoftplusV2Grad) | |||
/** | |||
* @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor) | |||
* where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise. | |||
* | |||
* @par inputs | |||
* one input including: | |||
* @li x: input A Tensor. Must be one of the following types: float32, float16 | |||
* | |||
* @par output | |||
* one output including: | |||
* @li y:A Tensor of the same type as x | |||
* | |||
*/ | |||
REG_OP(ThresholdedRelu) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(alpha, Float, 1.0) | |||
.OP_END_FACTORY_REG(ThresholdedRelu) | |||
/** | |||
* @brief Calculate the hard shrinkage function. \n | |||
* @par Inputs: | |||
* One inputs, including: | |||
* @li input_x: A tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
* @par Attributes: | |||
* @li lambd: An optional float. Defaults to 0.5. \n | |||
* @par Outputs: | |||
* y: A Tensor with the same dtype and shape of input_x's. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator Hardshrink. \n | |||
*/ | |||
REG_OP(HardShrink) | |||
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(lambd, Float, 0.5) | |||
.OP_END_FACTORY_REG(HardShrink) | |||
/** | |||
* @brief Calculate the hard sigmoid function. \n | |||
* @par Inputs: | |||
* One inputs, including: | |||
* @li input_x: A tensor. Must be one of the following types: | |||
* float16, float32, int32. \n | |||
* @par Attributes: | |||
* @li alpha: An optional float. Defaults to 0.16666666. \n | |||
* @li beta: An optional float. Defaults to 0.5. \n | |||
* @par Outputs: | |||
* y: A Tensor with the same dtype and shape of input_x's. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator Hardsigmoid. \n | |||
*/ | |||
REG_OP(HardSigmoid) | |||
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.ATTR(alpha, Float, 0.16666666) | |||
.ATTR(beta, Float, 0.5) | |||
.OP_END_FACTORY_REG(HardSigmoid) | |||
/** | |||
* @brief Calculate the soft shrinkage function. \n | |||
* @par Inputs: | |||
* One inputs, including: | |||
* @li input_x: A tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
* @par Attributes: | |||
* @li lambd: An optional float. Defaults to 0.5. \n | |||
* @par Outputs: | |||
* y: A Tensor with the same dtype and shape of input_x's. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator Softshrink. \n | |||
*/ | |||
REG_OP(SoftShrink) | |||
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(lambd, Float, 0.5) | |||
.OP_END_FACTORY_REG(SoftShrink) | |||
/** | |||
* @brief Calculate the reversed outputs of the function "soft_shrink". \n | |||
* @par Inputs: | |||
* Two inputs, including: | |||
* @li input_grad: A tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
* @li input_x: A tensor of the same dtype as "input_grad". \n | |||
* @par Attributes: | |||
* @li lambd: An optional float. Defaults to 0.5. \n | |||
* @par Outputs: | |||
* y: A Tensor of the same dtype and shape as "input_graxd". \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator SoftShrinkGrad. \n | |||
*/ | |||
REG_OP(SoftShrinkGrad) | |||
.INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(lambd, Float, 0.5) | |||
.OP_END_FACTORY_REG(SoftShrinkGrad) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -161,7 +161,7 @@ REG_OP(Pad) | |||
*@brief Pads a tensor . \n | |||
*@par Inputs: | |||
*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n | |||
*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n | |||
*@par Attributes: | |||
*paddings: An optional "vector<vector<int>>". Defaults to "{}". | |||
@@ -180,8 +180,8 @@ REG_OP(Pad) | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead. | |||
*/ | |||
REG_OP(PadD) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.REQUIRED_ATTR(paddings, ListListInt) | |||
.OP_END_FACTORY_REG(PadD) | |||
@@ -213,7 +213,7 @@ REG_OP(PadV2) | |||
*@brief Pads a tensor . \n | |||
*@par Inputs: | |||
*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n | |||
*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n | |||
*constant_values: A Tensor. Must have the same type as input. | |||
*@par Attributes: | |||
@@ -227,10 +227,7 @@ REG_OP(PadV2) | |||
*y: A Tensor of the same type as "x" . \n | |||
*@par Third-party framework compatibility: | |||
* Compatible with TensorFlow operator Pad. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead. | |||
* Compatible with TensorFlow operator PadV2. | |||
*/ | |||
REG_OP(PadV2D) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
@@ -403,5 +400,46 @@ REG_OP(EmbeddingRankId) | |||
.ATTR(mode, String, "mod") | |||
.OP_END_FACTORY_REG(EmbeddingRankId) | |||
/** | |||
* @brief Fill the value to a tensor has the specified shape. | |||
* @par Inputs: | |||
* One inputs, including: | |||
* @li dims: An Tensor, specify the shape that the value to fill. | |||
* @par Attributes: | |||
* @li value: An optional float value. Defaults to 0.0. | |||
* @par Outputs: | |||
* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value. | |||
* @par Third-party framework compatibility | |||
* Compatible with the ONNX operator ConstantOfShape. | |||
*/ | |||
REG_OP(FillV2) | |||
.INPUT(dims, TensorType({DT_INT16, DT_INT32, DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) | |||
.ATTR(value, Float, 0) | |||
.OP_END_FACTORY_REG(FillV2) | |||
/** | |||
* @brief Fill the value to a tensor has the specified shape. | |||
* @par Attributes: | |||
* @li value: An optional float value. Defaults to 0.0. | |||
* @li dims: An required listInt to specify the shape that the value to fill. | |||
* @par Outputs: | |||
* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value. | |||
* @par Third-party framework compatibility | |||
* Compatible with the ONNX operator ConstantOfShape. | |||
*/ | |||
REG_OP(FillV2D) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64})) | |||
.ATTR(value, Float, 0) | |||
.REQUIRED_ATTR(dims, ListInt) | |||
.OP_END_FACTORY_REG(FillV2D) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -495,6 +495,60 @@ REG_OP(ShuffleChannel) | |||
DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64})) | |||
.ATTR(group, Int, 1) | |||
.OP_END_FACTORY_REG(ShuffleChannel) | |||
/** | |||
* @briefGenerate a tensor of samples from a multinomial | |||
* distribution according to the probabilities of each of | |||
* the possible outcomes. | |||
* | |||
* @par inputs | |||
* one input including: | |||
* @li x:Input tensor with shape [batch_size, class_size], | |||
* where class_size is the number of all possible outcomes. | |||
* Each value along the axis zero represents the unnormalized | |||
* log-probability of each corresponding outcome in a batch. | |||
* | |||
* @par output | |||
* one output including: | |||
* @li y:Output tensor with shape [batch_size, sample_size], | |||
* where sample_size is the number of times to sample. | |||
* Each value along the axis zero represents the outcome of | |||
* the corresponding sample in a batch. | |||
* | |||
*/ | |||
REG_OP(MultinomialFuss) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64})) | |||
.OUTPUT(y, TensorType({DT_INT32, DT_INT64})) | |||
.ATTR(dtype, Int, 6) | |||
.ATTR(sample_size, Int, 1) | |||
.ATTR(seed, Float, 0) | |||
.OP_END_FACTORY_REG(MultinomialFuss) | |||
/** | |||
* @brief During training, randomly zeroes some of the elements of the input tensor | |||
* with probability | |||
* | |||
* @par Inputs: | |||
* @li x: A ND Tensor. Must be one of the following data types: Float, Float16 | |||
* @li seed: A ND Tensor. Must be one of the following data types: Float | |||
* | |||
* @par Attributes: | |||
* @li p: probability of an element to be zeroed | |||
* | |||
* @par Outputs: | |||
* @li y: A tensor with the same shape and type as "x". | |||
* @li mask: A tensor with the same shape and type as "x". | |||
* @li new_seed: A tensor with the same shape and type as "seed". | |||
*/ | |||
REG_OP(DropoutV2) | |||
.INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT })) | |||
.INPUT(seed, TensorType({ DT_FLOAT })) | |||
.OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT })) | |||
.OUTPUT(mask, TensorType({ DT_FLOAT })) | |||
.OUTPUT(seed, TensorType({ DT_FLOAT })) | |||
.REQUIRED_ATTR(p, Float) | |||
.OP_END_FACTORY_REG(DropoutV2) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -635,8 +635,8 @@ REG_OP(ReduceMin) | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead. | |||
*/ | |||
REG_OP(ReduceMinD) | |||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) | |||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32})) | |||
.REQUIRED_ATTR(axes, ListInt) | |||
.ATTR(keep_dims, Bool, false) | |||
.OP_END_FACTORY_REG(ReduceMinD) | |||
@@ -821,7 +821,7 @@ Defaults to "0.00001" . \n | |||
*batch_ variance: A Tensor of type float32 for the result variance . \n | |||
*@attention Constraints: | |||
*For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. | |||
*For Ascend 310, the result accuracy fails to reach 0.001 due to the square root instruction. | |||
*/ | |||
REG_OP(INInferV2) | |||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
@@ -882,7 +882,7 @@ REG_OP(INTrainingReduceV2) | |||
*@attention Constraints: | |||
*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. | |||
* This operator is used in conjunction with INTrainingReduceV2. | |||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. | |||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. | |||
*/ | |||
REG_OP(INTrainingUpdateV2) | |||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
@@ -965,7 +965,7 @@ for the updated variance. | |||
*@attention Constraints: | |||
*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. | |||
* This operator is used in conjunction with GNTrainingUpdate. | |||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. | |||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. | |||
*/ | |||
REG_OP(GNTrainingUpdate) | |||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||
@@ -982,6 +982,41 @@ REG_OP(GNTrainingUpdate) | |||
.OUTPUT(batch_variance, TensorType({DT_FLOAT})) | |||
.OP_END_FACTORY_REG(GNTrainingUpdate) | |||
/** | |||
* @brief Calculates the standard deviation and average value of Tensors. | |||
* @par Inputs: | |||
* @li x: A Tensor. Must be one of the following types: | |||
* float16, float32. \n | |||
* @par Attributes: | |||
* Three Attributes, including: | |||
* @li dim: An optional listint, Defaults to "None". \n | |||
* @li unbiased: An optional bool. Defaults to "True". | |||
* If "True", Use Bessel Correction. | |||
* If "False", Do not use Bessel Correction. \n | |||
* @li keepdim: An optional bool. Defaults to "False". | |||
* If "True", Keep the original tensor dimension. | |||
* If "False", Do not keep the original tensor dimension. \n | |||
* @par Outputs: | |||
* Two Outputs, including: | |||
* @li y1: A Tensor. Has the same type as "x". | |||
* @li y2: A Tensor. Has the same type as "x". \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator ReduceStd. | |||
*/ | |||
REG_OP(ReduceStd) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(y1, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(y2, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.ATTR(dim, ListInt, {}) | |||
.ATTR(unbiased, Bool, true) | |||
.ATTR(keepdim, Bool, false) | |||
.OP_END_FACTORY_REG(ReduceStd) | |||
} //namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -187,16 +187,16 @@ REG_OP(DynamicRNNGrad) | |||
*@brief: DynamicRNN calculation. | |||
*@par Inputs: | |||
*ten inputs: | |||
*@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||
*@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||
*@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND. | |||
*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||
*@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||
*@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||
*@li mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n | |||
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||
*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND. | |||
*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||
*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||
*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||
*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n | |||
*@par Attributes: | |||
*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. | |||
@@ -221,6 +221,8 @@ REG_OP(DynamicRNNGrad) | |||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@par Third-party framework compatibility: | |||
* Compatible with the TF operator LSTM. | |||
*/ | |||
REG_OP(DynamicRNN) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
@@ -254,6 +256,63 @@ REG_OP(DynamicRNN) | |||
.ATTR(is_training, Bool, true) | |||
.OP_END_FACTORY_REG(DynamicRNN) | |||
/** | |||
*@brief: DynamicLSTMV2 calculation. | |||
*@par Inputs: | |||
*ten inputs: | |||
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||
*@li cont:A required 2D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||
*@li w_xc_x_static:A optional 2D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||
*@li h0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li c0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li wci:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||
*@li wcf:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||
*@li wco:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||
*@li mask:A optional 1D Tensor. Must be one of the following types: uint8. The format must be ND . | |||
*@par Attributes: | |||
*@li num_output:An integer identifying the num projection in the op. Default to 0. | |||
*@li expose_hidden:An bool identifying the expose_hidden in the op. Default to flase. | |||
*@li need_output_last:An bool identifying the time major in the op. Default to true. | |||
*@li forget_bias:An float identifying the forget bias in the op. Default to 0. | |||
*@par Outputs: | |||
*eight outputs: | |||
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li last_output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li last_output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@par Third-party framework compatibility: | |||
* Compatible with the Caffe operator LSTM. | |||
*@par Restrictions: | |||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(DynamicLSTMV2) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(cont, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(w_xc_x_static, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(h0, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(c0, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(mask, TensorType({DT_UINT8})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(last_output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(last_output_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(num_output, Int, 0) | |||
.ATTR(expose_hidden, Bool, false) | |||
.ATTR(need_output_last, Bool, false) | |||
.ATTR(forget_bias, Float, 0.0) | |||
.OP_END_FACTORY_REG(DynamicLSTMV2) | |||
/** | |||
*@brief: LSTMInputGrad calculation. | |||
*@par Inputs: | |||
@@ -475,9 +534,9 @@ REG_OP(BasicRNNCell) | |||
.OP_END_FACTORY_REG(BasicRNNCell) | |||
/** | |||
*@brief: DynamicGRU calculation. | |||
*@brief DynamicGRU calculation. | |||
*@par Inputs: | |||
*seven inputs: \n | |||
*seven inputs: | |||
*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||
*@li w:Must be one of the following types: float16. The format must be FRACTAL_Z. | |||
*@li b:Must be one of the following types: float16, float32. The format must be ND. | |||
@@ -497,7 +556,7 @@ REG_OP(BasicRNNCell) | |||
*@li is_training:An bool identifying is training in the op. Default to true. | |||
*@par Outputs: | |||
*five outputs: \n | |||
*five outputs: | |||
*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
@@ -531,9 +590,9 @@ REG_OP(DynamicGRU) | |||
.OP_END_FACTORY_REG(DynamicGRU) | |||
/** | |||
*@brief: DynamicGRUV2 calculation. | |||
*@brief DynamicGRUV2 calculation. | |||
*@par Inputs: | |||
*seven inputs: \n | |||
*seven inputs: | |||
*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||
*@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z. | |||
*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. | |||
@@ -555,7 +614,7 @@ REG_OP(DynamicGRU) | |||
*@li is_training:An bool identifying is training in the op. Default to true. | |||
*@par Outputs: | |||
*six outputs: \n | |||
*six outputs: | |||
*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
@@ -592,6 +651,68 @@ REG_OP(DynamicGRUV2) | |||
.ATTR(is_training, Bool, true) | |||
.OP_END_FACTORY_REG(DynamicGRUV2) | |||
/** | |||
*@brief DynamicGRUV2Hidden calculation. | |||
*@par Inputs: | |||
*five inputs: | |||
*@li x_weight_input:Must be one of the following types: float32. The format must be FRACTAL_NZ. | |||
*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. | |||
*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. | |||
*@li seq_length:Must be one of the following types: int32. The format must be ND. | |||
*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@par Attributes: | |||
*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". | |||
Only UNIDIRECTIONAL is currently supported. | |||
*@li cell_depth:An integer identifying the cell depth in the op. Default to 1. | |||
*@li keep_prob:An float identifying the keep prob in the op. Default to 1. | |||
*@li cell_clip:An float identifying the cell clip in the op. Default to -1. | |||
*@li num_proj:An integer identifying the num projection in the op. Default to 0. | |||
*@li time_major:An bool identifying the time major in the op. Default to true. | |||
*@li activation:An string identifying the type of activation function in the op. Default to "tanh". | |||
Only tanh is currently supported. | |||
*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. | |||
*@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true. | |||
*@li is_training:An bool identifying is training in the op. Default to true. | |||
*@par Outputs: | |||
*six outputs: | |||
*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(DynamicGRUV2Hidden) | |||
.INPUT(x_weight_input, TensorType({DT_FLOAT32})) | |||
.INPUT(weight_hidden, TensorType({DT_FLOAT16})) | |||
.OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) | |||
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(update, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(new, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(direction, String, "UNIDIRECTIONAL") | |||
.ATTR(cell_depth, Int, 1) | |||
.ATTR(keep_prob, Float, 1.0) | |||
.ATTR(cell_clip, Float, -1.0) | |||
.ATTR(num_proj, Int, 0) | |||
.ATTR(time_major, Bool, true) | |||
.ATTR(activation, String, "tanh") | |||
.ATTR(gate_order, String, "zrh") | |||
.ATTR(reset_after, Bool, true) | |||
.ATTR(is_training, Bool, true) | |||
.OP_END_FACTORY_REG(DynamicGRUV2Hidden) | |||
/** | |||
*@brief: DynamicGRUV2Grad calculation. | |||
*@par Inputs: | |||
@@ -618,7 +739,6 @@ REG_OP(DynamicGRUV2) | |||
*@li cell_clip:An float identifying the cell clip in the op. Default to -1. | |||
*@li num_proj:An integer identifying the num projection in the op. Default to 0. | |||
*@li time_major:An bool identifying the time major in the op. Default to true. | |||
*@li bias_type:An string identifying the type of bias_type function in the op. Default to "double_bias". | |||
*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. | |||
*@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true. | |||
@@ -630,6 +750,9 @@ REG_OP(DynamicGRUV2) | |||
*@li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(DynamicGRUV2Grad) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
@@ -658,7 +781,6 @@ REG_OP(DynamicGRUV2Grad) | |||
.ATTR(cell_clip, Float, -1.0) | |||
.ATTR(num_proj, Int, 0) | |||
.ATTR(time_major, Bool, true) | |||
.ATTR(bias_type, String, "double_bias") | |||
.ATTR(gate_order, String, "zrh") | |||
.ATTR(reset_after, Bool, true) | |||
.OP_END_FACTORY_REG(DynamicGRUV2Grad) | |||
@@ -667,7 +789,7 @@ REG_OP(DynamicGRUV2Grad) | |||
*@brief: GRUV2HiddenGrad calculation. | |||
*@par Inputs: | |||
*nine inputs: \n | |||
*@li weight_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
@@ -678,6 +800,7 @@ REG_OP(DynamicGRUV2Grad) | |||
*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@par Attributes: | |||
*@li t_state:An Int identifying the current t state. Default to [0, 4]. | |||
*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. | |||
*@par Outputs: | |||
@@ -685,10 +808,12 @@ REG_OP(DynamicGRUV2Grad) | |||
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(GRUV2HiddenGrad) | |||
.INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
REG_OP(GRUV2HiddenGradCell) | |||
.INPUT(dh_pre_t, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
@@ -699,8 +824,142 @@ REG_OP(GRUV2HiddenGrad) | |||
.OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(t_state, Int, 0) | |||
.ATTR(gate_order, String, "zrh") | |||
.OP_END_FACTORY_REG(GRUV2HiddenGrad) | |||
.OP_END_FACTORY_REG(GRUV2HiddenGradCell) | |||
/** | |||
* @brief Calculates the reversed outputs of the function "embedding". \n | |||
* @par Inputs: | |||
* Two inputs, including: | |||
* @li grad: A mutable Tensor of word grad. Must be one of the following types: | |||
* float32. | |||
* @li indices: A mutable word index Tensor of the int32 type.\n | |||
* @par Attributes: | |||
* @li num_weights: An int attr which use to judge how many words in dict. \n | |||
* @li padding_idx: An int attr judge which word to fill zeros. Defaults to "-1". \n | |||
* @li scale_grad_by_freq: An optional bool. Defaults to "False". | |||
* If "True", "grad_weight" will be scale by word_frequency. | |||
* If "False", "grad_weight" will not be scale by word_frequency. \n | |||
* @par Outputs: | |||
* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator EmbeddingDenseGrad. | |||
*/ | |||
REG_OP(EmbeddingDenseGrad) | |||
.INPUT(grad, TensorType({ DT_FLOAT32 })) /* "First operand." */ | |||
.INPUT(indices, TensorType({ DT_INT32 })) /* "Second operand." */ | |||
.OUTPUT(y, TensorType({ DT_FLOAT32 })) /* "Result, has same element type as two inputs" */ | |||
.REQUIRED_ATTR(num_weights, Int) | |||
.ATTR(padding_idx, Int, -1) | |||
.ATTR(scale_grad_by_freq, Bool, false) | |||
.OP_END_FACTORY_REG(EmbeddingDenseGrad) | |||
/** | |||
*@brief CommonLSTM calculation. | |||
*@par Inputs: | |||
*eight inputs: \n | |||
*@li x:Each time step is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li w:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||
*@li r:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||
*@li b:An optional input. Each direction is a 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||
*@li sequence_lens:An optional input. A 1D Tensor.Must be one of the following types: int32. The format must be ND. | |||
*@li initial_h:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li initial_c:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li p:An optional input. Each direction is a 1D Tensor.Must be one of the following types: float16, float32. The format must be ND. | |||
*@par Attributes: | |||
*@li activation_alpha:Optional scaling values used by some activation functions. Empty is currently supported. | |||
*@li activation_beta:Optional scaling values used by some activation functions. Empty is currently supported. | |||
*@li activations:The list of activation functions. Empty is currently supported. | |||
*@li clip:An float identifying the cell clip in the op. Default to -1. | |||
*@li direction:Specify if the RNN is forward, reverse, or bidirectional. Must be one of forward(default), reverse, or bidirectional. | |||
*@li hidden_size:Number of neurons in the hidden layer. Reserved. | |||
*@li input_forget:Couple the input and forget gates if 1. Reserved. | |||
*@par Outputs: | |||
*three outputs: \n | |||
*@li y:First dimension is time step, second dimension is direction, others is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li y_h:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li y_c:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*/ | |||
REG_OP(CommonLSTM) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32})) | |||
.OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(initial_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(p, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(activation_alpha, ListFloat, {}) | |||
.ATTR(activation_beta, ListFloat, {}) | |||
.ATTR(activations, ListString, {}) | |||
.ATTR(clip, Float, -1.0) | |||
.ATTR(direction, String, "forward") | |||
.REQUIRED_ATTR(hidden_size, Int) | |||
.ATTR(input_forget, Int, 0) | |||
.OP_END_FACTORY_REG(CommonLSTM) | |||
/** | |||
* @brief Common GRU calculation. | |||
* @par Inputs: | |||
* Eight inputs, including: | |||
* @li x: The input sequences packed (and pontentially padded) into on 3D Tesnor(float16). The format must be FRACTAL_NZ | |||
* @li w: The weight tensor for the gates is 3D Tensor(float16). The format must be FRACTAL_Z | |||
* @li r: The recurrence weight tesnor is 3D Tensor(float16). The format must be FRACTAL_Z | |||
* @li b: The bias tensor for the gates. The format must be ND | |||
* @li sequence_lens: Optional tensor specifying lengths of sequences(int32). The format must be ND | |||
* @li init_h: Optional initial value of the hidden(float16,float32). The format must be FRACTAL_NZ | |||
* @par Attributes: | |||
* @li activation_alpha: Optional scaling values used by some activation functions. \n | |||
* @li activation_beta: Optional scaling values used by some activation functions. \n | |||
* @li activations: A list of 2 (or 4 if bidirectional) activation functions for update, reset, and hidden gates. \n | |||
* @li clip: Cell clip threshold. \n | |||
* @li direction: Specify if the RNN is forward, reverse, or bidirectional. \n | |||
* @li hidden_size: Number of neurons in the hidden layer. \n | |||
* @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n | |||
* @par Outputs: | |||
* @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). The format must be FRACTAL_NZ | |||
* @li y_h: The last output value of the hidden(float16,float32). The format must be FRACTAL_NZ | |||
*/ | |||
REG_OP(CommonGRU) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32})) | |||
.OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(activation_alpha, ListFloat, {}) | |||
.ATTR(activation_beta , ListFloat, {}) | |||
.ATTR(activations , ListString, {}) | |||
.ATTR(clip, Float, -1.0) | |||
.ATTR(direction, String, "forward") | |||
.REQUIRED_ATTR(hidden_size, Int) | |||
.ATTR(linear_before_reset , Int, 0) | |||
.OP_END_FACTORY_REG(CommonGRU) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -796,6 +796,34 @@ REG_OP(SliceD) | |||
.REQUIRED_ATTR(size, ListInt) | |||
.OP_END_FACTORY_REG(SliceD) | |||
/** | |||
*@brief Extracts a slice from a tensor. | |||
* This operation extracts a slice of size "size" from a tensor "x" | |||
* starting at the location specified by "begin" . \n | |||
*@par Inputs: | |||
*@li x: A Tensor. Must be one of the following types: | |||
* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, | |||
* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32 . \n | |||
*@par Inputs: | |||
*@li offsets: The starting location for the slice. | |||
*@par Attributes: | |||
*@li size: The tensor shape . \n | |||
*@par Outputs: | |||
*y: A Tensor. Has the same type as "x". The slice extracted from the tensor. | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS DEPRECATED. Please use Slice instead. | |||
*/ | |||
REG_OP(SliceDV2) | |||
.INPUT(x, TensorType::BasicType()) | |||
.INPUT(offsets, TensorType::IndexNumberType()) | |||
.OUTPUT(y, TensorType::BasicType()) | |||
.REQUIRED_ATTR(size, ListInt) | |||
.OP_END_FACTORY_REG(SliceDV2) | |||
/** | |||
* @brief Finds values and indices of the "k" largest elements for the last | |||
* dimension . \n | |||
@@ -1921,6 +1949,160 @@ REG_OP(CumulativeLogsumexpD) | |||
.ATTR(exclusive, Bool, false) | |||
.ATTR(reverse, Bool, false) | |||
.OP_END_FACTORY_REG(CumulativeLogsumexpD) | |||
/** | |||
* @brief Add updates to var according to axis and indices. | |||
* @par Inputs: | |||
* Three inputs, including: | |||
* @li var: A Tensor. Must be one of the following types: | |||
* float16, float32, int16, int32, int8, uint8. | |||
* @li indices: A Tensor of the indices, type should be int32. | |||
* @li updates: A Tensor of the same type as "var". \n | |||
* @par Attributes: | |||
* @li axis: An required int to specify the axis to perform indices add. \n | |||
* @par Outputs: | |||
* @li var: A Tensor. Same as input "var". | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator index_add_. | |||
*/ | |||
REG_OP(InplaceIndexAdd) | |||
.INPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8, | |||
DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||
.INPUT(indices, TensorType({DT_INT32})) | |||
.INPUT(updates, TensorType({DT_INT16, DT_INT32, DT_INT8, | |||
DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||
.OUTPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8, | |||
DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||
.REQUIRED_ATTR(axis, Int) | |||
.OP_END_FACTORY_REG(InplaceIndexAdd) | |||
/** | |||
* @brief Replace the value of X with value according to mask. | |||
* @par Inputs: | |||
* three inputs, including: | |||
* @li x: A Tensor of dtype is float16 or float32 or int32 or int8. | |||
* @li mask: A Tensor of dtype float16 or float32 or int32 or int8. | |||
* @li value: A Tensor or scalar of dtype float16 or float32 or int32 or int8. \n | |||
* @par Outputs: | |||
* @li y: A tensor. Must be one of the following dtypes: | |||
* float16, float32, int32, int8. | |||
*/ | |||
REG_OP(MaskedFill) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32})) | |||
.INPUT(mask, TensorType({DT_BOOL})) | |||
.INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32})) | |||
.OP_END_FACTORY_REG(MaskedFill) | |||
/** | |||
* @brief Choose the value of X with value according to mask. | |||
* @par Inputs: | |||
* two inputs, including: | |||
* @li x: A Tensor of dtype is float16 or float32. | |||
* @li mask: A Tensor of dtype is bool. \n | |||
* @par Outputs: | |||
* @li y: A tensor with the same type as x. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Numpy operator select. | |||
* Replaces the pytorch operator masked_select in some scenarios.\n | |||
*/ | |||
REG_OP(MaskedSelectV2) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(mask, TensorType({DT_BOOL})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OP_END_FACTORY_REG(MaskedSelectV2) | |||
/** | |||
* @brief Slice a tensor at its last dim, e.x. a[..., begin:end:stride]. \n | |||
* @par Inputs: | |||
* One inputs, including: | |||
* @li x: A Tensor. Must be one of the following types: float16, float32, int16, int32. | |||
* @par Attributes: | |||
* @li start: An attribute of type Int, start index of last dim. \n | |||
* @li end: An attribute of type Int, end index of last dim. \n | |||
* @li stride: An attribute of type Int, stride of slice. \n | |||
* @par Outputs: | |||
* @li y: A Tensor. Has the same type as "x". \n | |||
* @par Third-party framework compatibility | |||
* No compatibility | |||
*/ | |||
REG_OP(SliceLastDim) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) | |||
.REQUIRED_ATTR(start, Int) | |||
.REQUIRED_ATTR(end, Int) | |||
.ATTR(stride, Int, 1) | |||
.OP_END_FACTORY_REG(SliceLastDim) | |||
/** | |||
* @brief Extracts a strided slice of a tensor. Roughly speaking, this op \n | |||
* extracts a slice of size (end-begin)/stride from the given input tensor. \n | |||
* Starting at the location specified by begin the slice continues by \n | |||
* adding stride to the index until all dimensions are not less than end. \n | |||
* | |||
* @par Inputs: | |||
* Four inputs, including: | |||
* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n | |||
* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n | |||
* complex128, float16, uint32, uint64, complex64, complex128. \n | |||
* @li begin: A Tensor of type int32 or int64, for the index of the first value to select. | |||
* | |||
* @li end: A Tensor of type int32 or int64, for the index of the last value to select. | |||
* | |||
* @li axes: A Tensor of type int32 or int64, indicate axis to be select. | |||
* | |||
* @li strides: A Tensor of type int32 or int64, for the increment. | |||
* | |||
* @par Attributes: | |||
* @li begin_mask: A Tensor of type int32. \n | |||
* A bitmask where a bit "i" being "1" means to ignore the begin \n | |||
* value and instead use the largest interval possible. | |||
* @li end_mask: A Tensor of type int32. \n | |||
* Analogous to "begin_mask". | |||
* @li ellipsis_mask: A Tensor of type int32. \n | |||
* A bitmask where bit "i" being "1" means the "i"th position \n | |||
* is actually an ellipsis. | |||
* @li new_axis_mask: A Tensor of type int32. \n | |||
* A bitmask where bit "i" being "1" means the "i"th \n | |||
* specification creates a new shape 1 dimension. | |||
* @li shrink_axis_mask: A Tensor of type int32. \n | |||
* A bitmask where bit "i" implies that the "i"th \n | |||
* specification should shrink the dimensionality. | |||
* | |||
* @par Outputs: | |||
* y: A Tensor. Has the same type as "x". | |||
* | |||
* @attention Constraints: | |||
* | |||
* @par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator StridedSliceV2. | |||
*/ | |||
REG_OP(StridedSliceV2) | |||
.INPUT(x, TensorType::BasicType()) | |||
.INPUT(begin, TensorType::IndexNumberType()) | |||
.INPUT(end, TensorType::IndexNumberType()) | |||
.OPTIONAL_INPUT(axes, TensorType::IndexNumberType()) | |||
.OPTIONAL_INPUT(strides, TensorType::IndexNumberType()) | |||
.ATTR(begin_mask, Int, 0) | |||
.ATTR(end_mask, Int, 0) | |||
.ATTR(ellipsis_mask, Int, 0) | |||
.ATTR(new_axis_mask, Int, 0) | |||
.ATTR(shrink_axis_mask, Int, 0) | |||
.OUTPUT(y, TensorType::BasicType()) | |||
.OP_END_FACTORY_REG(StridedSliceV2) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ |
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -141,7 +141,7 @@ support "NHWC/NCHW" to "NC1HWC0" and "NC1HWC0" to "NHWC/NCHW" | |||
*@par Attributes: | |||
*@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Zn" etc. | |||
*@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Zn" etc. | |||
*@li group: A required int32, default value is 1. \n | |||
*@li group: A optional int32, default value is 1. \n | |||
*@par Outputs: | |||
*dst: A Tensor dtype of all types. | |||
@@ -151,7 +151,7 @@ REG_OP(TransData) | |||
.OUTPUT(dst, TensorType::BasicType()) | |||
.REQUIRED_ATTR(src_format, String) | |||
.REQUIRED_ATTR(dst_format, String) | |||
.ATTR(group, Int, 1) | |||
.ATTR(groups, Int, 1) | |||
.OP_END_FACTORY_REG(TransData) | |||
/** | |||
@@ -357,7 +357,7 @@ REG_OP(DepthToSpace) | |||
*@brief Permutes data into spatial data blocks and then prunes them . \n | |||
*@par Inputs: | |||
*@li x: A 4D Tensor with format NHWC. | |||
*@li x: A 4D Tensor with format. Must set the format, supported format list ["NCHW, NHWC"] | |||
*@li crops: A 1D list or tuple of int32 or int64 . \n | |||
*Must be one of the following types: float16, float32 | |||
@@ -434,9 +434,10 @@ REG_OP(BatchToSpaceD) | |||
*@par Inputs: | |||
* Two inputs, including: | |||
*@li x: An NHWC Tensor. Must be one of the following types: | |||
*@li x: An 4D Tensor. Must be one of the following types: | |||
* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, | |||
* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. | |||
* Must set the format, supported format list ["NCHW, NHWC"] | |||
*@li paddings: A 2D tensor of type int, specifying the input . \n | |||
*@par Attributes: | |||
@@ -518,7 +519,8 @@ REG_OP(Unpack) | |||
* @par Inputs: | |||
* x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the | |||
* following types:float32, double, int32, uint8, int16, int8, int64, uint16, | |||
* float16, uint32, uint64 | |||
* float16, uint32, uint64. The inputs must have data_format with one of follows: | |||
* NHWC, NCHW. | |||
* @par Attributes: | |||
* @li ksizes: A required list or tuple. The size of the sliding window for each | |||
@@ -533,7 +535,6 @@ REG_OP(Unpack) | |||
* This is equivalent to rate in dilated (a.k.a. Atrous) convolutions. | |||
* @li padding: A required string. The type of padding algorithm to use, | |||
support "SAME" or "VALID". \n | |||
* @li data_format: A required string. The format of input, only supported NHWC. \n | |||
* @par Outputs: | |||
* y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows * | |||
@@ -554,7 +555,6 @@ REG_OP(ExtractImagePatches) | |||
.REQUIRED_ATTR(strides, ListInt) | |||
.REQUIRED_ATTR(rates, ListInt) | |||
.REQUIRED_ATTR(padding, String) | |||
.ATTR(data_format, String, "NHWC") | |||
.OP_END_FACTORY_REG(ExtractImagePatches) | |||
/** | |||
@@ -563,6 +563,7 @@ REG_OP(ExtractImagePatches) | |||
* @par Inputs: | |||
* x: A 5D Tensor with shape [batch, in_planes, in_rows, in_cols, depth] . \n | |||
* The inputs must have data_format with one of follows: NDHWC, NCDHW. \n | |||
* @par Attributes: | |||
* @li ksizes: A required list or tuple. The size of the sliding window for each | |||
@@ -571,7 +572,6 @@ REG_OP(ExtractImagePatches) | |||
* patches are in "x". Must be: [1, stride_planes, stride_rows, stride_cols, 1]. | |||
* @li padding: A required string. The type of padding algorithm to use , | |||
* support "SAME" or "VALID" . \n | |||
* @li data_format: An optional string. The format of input, only supported NDHWC. \n | |||
* @par Outputs: | |||
* Output: A 5D Tensor with shape [batch, out_planes, out_rows, out_cols, ksize_planes * | |||
@@ -590,7 +590,6 @@ REG_OP(ExtractVolumePatches) | |||
.REQUIRED_ATTR(ksizes, ListInt) | |||
.REQUIRED_ATTR(strides, ListInt) | |||
.REQUIRED_ATTR(padding, String) | |||
.ATTR(data_format, String, "NDHWC") | |||
.OP_END_FACTORY_REG(ExtractVolumePatches) | |||
/** | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -1,18 +1,18 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
*/ | |||
#ifndef __CCE_RUNTIME_BASE_H__ | |||
#define __CCE_RUNTIME_BASE_H__ | |||
@@ -41,12 +41,12 @@ static const int32_t RT_ERROR_NONE = 0; // success | |||
* @brief runtime exception numbers. | |||
*/ | |||
typedef enum tagRtExceptionType { | |||
RT_EXCEPTION_NONE = 0, | |||
RT_EXCEPTION_TS_DOWN = 1, | |||
RT_EXCEPTION_TASK_TIMEOUT = 2, | |||
RT_EXCEPTION_TASK_FAILURE = 3, | |||
RT_EXCEPTION_DEV_RUNNING_DOWN = 4, | |||
RT_EXCEPTION_STREAM_ID_FREE_FAILED = 5 | |||
RT_EXCEPTION_NONE = 0, | |||
RT_EXCEPTION_TS_DOWN = 1, | |||
RT_EXCEPTION_TASK_TIMEOUT = 2, | |||
RT_EXCEPTION_TASK_FAILURE = 3, | |||
RT_EXCEPTION_DEV_RUNNING_DOWN = 4, | |||
RT_EXCEPTION_STREAM_ID_FREE_FAILED = 5 | |||
} rtExceptionType; | |||
/** | |||
@@ -54,12 +54,12 @@ typedef enum tagRtExceptionType { | |||
* @brief Switch type. | |||
*/ | |||
typedef enum tagRtCondition { | |||
RT_EQUAL = 0, | |||
RT_NOT_EQUAL, | |||
RT_GREATER, | |||
RT_GREATER_OR_EQUAL, | |||
RT_LESS, | |||
RT_LESS_OR_EQUAL | |||
RT_EQUAL = 0, | |||
RT_NOT_EQUAL, | |||
RT_GREATER, | |||
RT_GREATER_OR_EQUAL, | |||
RT_LESS, | |||
RT_LESS_OR_EQUAL | |||
} rtCondition_t; | |||
/** | |||
@@ -67,25 +67,25 @@ typedef enum tagRtCondition { | |||
* @brief Data Type of Extensible Switch Task. | |||
*/ | |||
typedef enum tagRtSwitchDataType { | |||
RT_SWITCH_INT32 = 0, | |||
RT_SWITCH_INT64 = 1, | |||
RT_SWITCH_INT32 = 0, | |||
RT_SWITCH_INT64 = 1, | |||
} rtSwitchDataType_t; | |||
typedef enum tagRtStreamFlagType { | |||
RT_HEAD_STREAM = 0, // first stream | |||
RT_INVALID_FLAG = 0xFFFFFFFF, | |||
RT_HEAD_STREAM = 0, // first stream | |||
RT_INVALID_FLAG = 0xFFFFFFFF, | |||
} rtStreamFlagType_t; | |||
typedef enum tagRtLimitType { | |||
RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0, // timeout for power down , ms | |||
RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0, // timeout for power down , ms | |||
} rtLimitType_t; | |||
typedef struct rtExceptionInfo { | |||
uint32_t taskid; | |||
uint32_t streamid; | |||
uint32_t tid; | |||
uint32_t deviceid; | |||
uint32_t retcode; | |||
uint32_t taskid; | |||
uint32_t streamid; | |||
uint32_t tid; | |||
uint32_t deviceid; | |||
uint32_t retcode; | |||
} rtExceptionInfo; | |||
typedef void (*rtErrorCallback)(rtExceptionType); | |||
@@ -1,18 +1,18 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
*/ | |||
#ifndef __CCE_RUNTIME_CONFIG_H__ | |||
#define __CCE_RUNTIME_CONFIG_H__ | |||
@@ -24,105 +24,106 @@ extern "C" { | |||
#endif | |||
#define PLAT_COMBINE(arch, chip, ver) ((arch << 16) | (chip << 8) | (ver)) | |||
#define PLAT_GET_ARCH(type) ((type >> 16) & 0xffff) | |||
#define PLAT_GET_CHIP(type) ((type >> 8) & 0xff) | |||
#define PLAT_GET_VER(type) (type & 0xff) | |||
#define PLAT_GET_ARCH(type) ((type >> 16) & 0xffff) | |||
#define PLAT_GET_CHIP(type) ((type >> 8) & 0xff) | |||
#define PLAT_GET_VER(type) (type & 0xff) | |||
typedef enum tagRtArchType { | |||
ARCH_BEGIN = 0, | |||
ARCH_V100 = ARCH_BEGIN, | |||
ARCH_V200, | |||
ARCH_END, | |||
ARCH_BEGIN = 0, | |||
ARCH_V100 = ARCH_BEGIN, | |||
ARCH_V200, | |||
ARCH_END, | |||
} rtArchType_t; | |||
typedef enum tagRtChipType { | |||
CHIP_BEGIN = 0, | |||
CHIP_MINI = CHIP_BEGIN, | |||
CHIP_CLOUD, | |||
CHIP_MDC, | |||
CHIP_LHISI, | |||
CHIP_DC, | |||
CHIP_CLOUD_V2, | |||
CHIP_END, | |||
CHIP_BEGIN = 0, | |||
CHIP_MINI = CHIP_BEGIN, | |||
CHIP_CLOUD, | |||
CHIP_MDC, | |||
CHIP_LHISI, | |||
CHIP_DC, | |||
CHIP_CLOUD_V2, | |||
CHIP_END, | |||
} rtChipType_t; | |||
typedef enum tagRtVersion { | |||
VER_BEGIN = 0, | |||
VER_NA = VER_BEGIN, | |||
VER_ES, | |||
VER_CS, | |||
VER_END, | |||
VER_BEGIN = 0, | |||
VER_NA = VER_BEGIN, | |||
VER_ES, | |||
VER_CS, | |||
VER_SD3403, | |||
VER_END, | |||
} rtVersion_t; | |||
/* match rtChipType_t */ | |||
typedef enum tagRtPlatformType { | |||
PLATFORM_BEGIN = 0, | |||
PLATFORM_MINI_V1 = PLATFORM_BEGIN, | |||
PLATFORM_CLOUD_V1, | |||
PLATFORM_MINI_V2, | |||
PLATFORM_LHISI_ES, | |||
PLATFORM_LHISI_CS, | |||
PLATFORM_DC, | |||
PLATFORM_CLOUD_V2, | |||
PLATFORM_END, | |||
PLATFORM_BEGIN = 0, | |||
PLATFORM_MINI_V1 = PLATFORM_BEGIN, | |||
PLATFORM_CLOUD_V1, | |||
PLATFORM_MINI_V2, | |||
PLATFORM_LHISI_ES, | |||
PLATFORM_LHISI_CS, | |||
PLATFORM_DC, | |||
PLATFORM_CLOUD_V2, | |||
PLATFORM_END, | |||
} rtPlatformType_t; | |||
typedef enum tagRtCubeFracMKNFp16 { | |||
RT_CUBE_MKN_FP16_2_16_16 = 0, | |||
RT_CUBE_MKN_FP16_4_16_16, | |||
RT_CUBE_MKN_FP16_16_16_16, | |||
RT_CUBE_MKN_FP16_Default, | |||
RT_CUBE_MKN_FP16_2_16_16 = 0, | |||
RT_CUBE_MKN_FP16_4_16_16, | |||
RT_CUBE_MKN_FP16_16_16_16, | |||
RT_CUBE_MKN_FP16_Default, | |||
} rtCubeFracMKNFp16_t; | |||
typedef enum tagRtCubeFracMKNInt8 { | |||
RT_CUBE_MKN_INT8_2_32_16 = 0, | |||
RT_CUBE_MKN_INT8_4_32_4, | |||
RT_CUBE_MKN_INT8_4_32_16, | |||
RT_CUBE_MKN_INT8_16_32_16, | |||
RT_CUBE_MKN_INT8_Default, | |||
RT_CUBE_MKN_INT8_2_32_16 = 0, | |||
RT_CUBE_MKN_INT8_4_32_4, | |||
RT_CUBE_MKN_INT8_4_32_16, | |||
RT_CUBE_MKN_INT8_16_32_16, | |||
RT_CUBE_MKN_INT8_Default, | |||
} rtCubeFracMKNInt8_t; | |||
typedef enum tagRtVecFracVmulMKNFp16 { | |||
RT_VEC_VMUL_MKN_FP16_1_16_16 = 0, | |||
RT_VEC_VMUL_MKN_FP16_Default, | |||
RT_VEC_VMUL_MKN_FP16_1_16_16 = 0, | |||
RT_VEC_VMUL_MKN_FP16_Default, | |||
} rtVecFracVmulMKNFp16_t; | |||
typedef enum tagRtVecFracVmulMKNInt8 { | |||
RT_VEC_VMUL_MKN_INT8_1_32_16 = 0, | |||
RT_VEC_VMUL_MKN_INT8_Default, | |||
RT_VEC_VMUL_MKN_INT8_1_32_16 = 0, | |||
RT_VEC_VMUL_MKN_INT8_Default, | |||
} rtVecFracVmulMKNInt8_t; | |||
typedef struct tagRtAiCoreSpec { | |||
uint32_t cubeFreq; | |||
uint32_t cubeMSize; | |||
uint32_t cubeKSize; | |||
uint32_t cubeNSize; | |||
rtCubeFracMKNFp16_t cubeFracMKNFp16; | |||
rtCubeFracMKNInt8_t cubeFracMKNInt8; | |||
rtVecFracVmulMKNFp16_t vecFracVmulMKNFp16; | |||
rtVecFracVmulMKNInt8_t vecFracVmulMKNInt8; | |||
uint32_t cubeFreq; | |||
uint32_t cubeMSize; | |||
uint32_t cubeKSize; | |||
uint32_t cubeNSize; | |||
rtCubeFracMKNFp16_t cubeFracMKNFp16; | |||
rtCubeFracMKNInt8_t cubeFracMKNInt8; | |||
rtVecFracVmulMKNFp16_t vecFracVmulMKNFp16; | |||
rtVecFracVmulMKNInt8_t vecFracVmulMKNInt8; | |||
} rtAiCoreSpec_t; | |||
typedef struct tagRtAiCoreRatesPara { | |||
uint32_t ddrRate; | |||
uint32_t l2Rate; | |||
uint32_t l2ReadRate; | |||
uint32_t l2WriteRate; | |||
uint32_t l1ToL0ARate; | |||
uint32_t l1ToL0BRate; | |||
uint32_t l0CToUBRate; | |||
uint32_t ubToL2; | |||
uint32_t ubToDDR; | |||
uint32_t ubToL1; | |||
uint32_t ddrRate; | |||
uint32_t l2Rate; | |||
uint32_t l2ReadRate; | |||
uint32_t l2WriteRate; | |||
uint32_t l1ToL0ARate; | |||
uint32_t l1ToL0BRate; | |||
uint32_t l0CToUBRate; | |||
uint32_t ubToL2; | |||
uint32_t ubToDDR; | |||
uint32_t ubToL1; | |||
} rtAiCoreMemoryRates_t; | |||
typedef struct tagRtMemoryConfig { | |||
uint32_t flowtableSize; | |||
uint32_t compilerSize; | |||
uint32_t flowtableSize; | |||
uint32_t compilerSize; | |||
} rtMemoryConfig_t; | |||
typedef struct tagRtPlatformConfig { | |||
uint32_t platformConfig; | |||
uint32_t platformConfig; | |||
} rtPlatformConfig_t; | |||
/** | |||
@@ -165,7 +166,6 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate | |||
*/ | |||
RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig); | |||
/** | |||
* @ingroup | |||
* @brief get l2 buffer Info,virtual baseaddr,Size | |||
@@ -176,14 +176,16 @@ RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size); | |||
/** | |||
* @ingroup | |||
* @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be represented by 9020. | |||
* @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be | |||
* represented by 9020. | |||
* @param [out] runtimeVersion | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion); | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_STREAM_H__ | |||
#endif // __CCE_RUNTIME_STREAM_H__ |
@@ -1,18 +1,18 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
*/ | |||
#ifndef __CCE_RUNTIME_CONTEXT_H__ | |||
#define __CCE_RUNTIME_CONTEXT_H__ | |||
@@ -30,24 +30,24 @@ extern "C" { | |||
typedef void *rtContext_t; | |||
typedef enum tagDryRunFlag { | |||
RT_DRYRUN_FLAG_FALSE = 0, | |||
RT_DRYRUN_FLAG_TRUE = 1, | |||
RT_DRYRUN_FLAG_FALSE = 0, | |||
RT_DRYRUN_FLAG_TRUE = 1, | |||
} rtDryRunFlag_t; | |||
typedef enum tagCtxMode { | |||
RT_CTX_NORMAL_MODE = 0, | |||
RT_CTX_GEN_MODE = 1, | |||
RT_CTX_NORMAL_MODE = 0, | |||
RT_CTX_GEN_MODE = 1, | |||
} rtCtxMode_t; | |||
typedef struct tagRtGroupInfo { | |||
int32_t groupId; | |||
uint32_t flag; | |||
uint32_t aicoreNum; | |||
uint32_t aicpuNum; | |||
uint32_t aivectorNum; | |||
uint32_t sdmaNum; | |||
uint32_t activeStreamNum; | |||
void *extrPtr; | |||
int32_t groupId; | |||
uint32_t flag; | |||
uint32_t aicoreNum; | |||
uint32_t aicpuNum; | |||
uint32_t aivectorNum; | |||
uint32_t sdmaNum; | |||
uint32_t activeStreamNum; | |||
void *extrPtr; | |||
} rtGroupInfo_t; | |||
/** | |||
@@ -156,6 +156,7 @@ RTS_API rtError_t rtGetGroupCount(uint32_t *count); | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtSetCtxINFMode(bool mode); | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
@@ -1,18 +1,18 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
*/ | |||
#ifndef __CCE_RUNTIME_DEVICE_H__ | |||
#define __CCE_RUNTIME_DEVICE_H__ | |||
@@ -27,44 +27,44 @@ extern "C" { | |||
#define RT_CAPABILITY_NOT_SUPPORT (0x0) | |||
typedef struct tagRTDeviceInfo { | |||
uint8_t env_type; // 0: FPGA 1: EMU 2: ESL | |||
uint32_t ctrl_cpu_ip; | |||
uint32_t ctrl_cpu_id; | |||
uint32_t ctrl_cpu_core_num; | |||
uint32_t ctrl_cpu_endian_little; | |||
uint32_t ts_cpu_core_num; | |||
uint32_t ai_cpu_core_num; | |||
uint32_t ai_core_num; | |||
uint32_t ai_core_freq; | |||
uint32_t ai_cpu_core_id; | |||
uint32_t ai_core_id; | |||
uint32_t aicpu_occupy_bitmap; | |||
uint32_t hardware_version; | |||
uint32_t ts_num; | |||
uint8_t env_type; // 0: FPGA 1: EMU 2: ESL | |||
uint32_t ctrl_cpu_ip; | |||
uint32_t ctrl_cpu_id; | |||
uint32_t ctrl_cpu_core_num; | |||
uint32_t ctrl_cpu_endian_little; | |||
uint32_t ts_cpu_core_num; | |||
uint32_t ai_cpu_core_num; | |||
uint32_t ai_core_num; | |||
uint32_t ai_core_freq; | |||
uint32_t ai_cpu_core_id; | |||
uint32_t ai_core_id; | |||
uint32_t aicpu_occupy_bitmap; | |||
uint32_t hardware_version; | |||
uint32_t ts_num; | |||
} rtDeviceInfo_t; | |||
typedef enum tagRtRunMode { | |||
RT_RUN_MODE_OFFLINE = 0, | |||
RT_RUN_MODE_ONLINE = 1, | |||
RT_RUN_MODE_AICPU_SCHED = 2, | |||
RT_RUN_MODE_RESERVED | |||
RT_RUN_MODE_OFFLINE = 0, | |||
RT_RUN_MODE_ONLINE = 1, | |||
RT_RUN_MODE_AICPU_SCHED = 2, | |||
RT_RUN_MODE_RESERVED | |||
} rtRunMode; | |||
typedef enum tagRtAicpuDeployType { | |||
AICPU_DEPLOY_CROSS_OS = 0x0, | |||
AICPU_DEPLOY_CROSS_PROCESS = 0x1, | |||
AICPU_DEPLOY_CROSS_THREAD = 0x2, | |||
AICPU_DEPLOY_RESERVED | |||
AICPU_DEPLOY_CROSS_OS = 0x0, | |||
AICPU_DEPLOY_CROSS_PROCESS = 0x1, | |||
AICPU_DEPLOY_CROSS_THREAD = 0x2, | |||
AICPU_DEPLOY_RESERVED | |||
} rtAicpuDeployType_t; | |||
typedef enum tagRtFeatureType { | |||
FEATURE_TYPE_MEMCPY = 0, | |||
FEATURE_TYPE_RSV | |||
FEATURE_TYPE_MEMCPY = 0, | |||
FEATURE_TYPE_RSV | |||
} rtFeatureType_t; | |||
typedef enum tagMemcpyInfo { | |||
MEMCPY_INFO_SUPPORT_ZEROCOPY = 0, | |||
MEMCPY_INFO_RSV | |||
MEMCPY_INFO_SUPPORT_ZEROCOPY = 0, | |||
MEMCPY_INFO_RSV | |||
} rtMemcpyInfo_t; | |||
/** | |||
@@ -356,6 +356,7 @@ RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device); | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device); | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
@@ -1,18 +1,18 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
*/ | |||
#ifndef __CCE_RUNTIME_DVFSPROFILE_H__ | |||
#define __CCE_RUNTIME_DVFSPROFILE_H__ | |||
@@ -1,18 +1,18 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
*/ | |||
#ifndef __CCE_RUNTIME_EVENT_H__ | |||
#define __CCE_RUNTIME_EVENT_H__ | |||
@@ -1,18 +1,18 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
*/ | |||
#ifndef __CCE_RUNTIME_KERNEL_H__ | |||
#define __CCE_RUNTIME_KERNEL_H__ | |||
@@ -29,15 +29,15 @@ extern "C" { | |||
* @brief shared memory data control | |||
*/ | |||
typedef struct tagRtSmData { | |||
uint64_t L2_mirror_addr; // preload or swap source address | |||
uint32_t L2_data_section_size; // every data size | |||
uint8_t L2_preload; // 1 - preload from mirrorAddr, 0 - no preload | |||
uint8_t modified; // 1 - data will be modified by kernel, 0 - no modified | |||
uint8_t priority; // data priority | |||
int8_t prev_L2_page_offset_base; // remap source section offset | |||
uint8_t L2_page_offset_base; // remap destination section offset | |||
uint8_t L2_load_to_ddr; // 1 - need load out, 0 - no need | |||
uint8_t reserved[2]; // reserved | |||
uint64_t L2_mirror_addr; // preload or swap source address | |||
uint32_t L2_data_section_size; // every data size | |||
uint8_t L2_preload; // 1 - preload from mirrorAddr, 0 - no preload | |||
uint8_t modified; // 1 - data will be modified by kernel, 0 - no modified | |||
uint8_t priority; // data priority | |||
int8_t prev_L2_page_offset_base; // remap source section offset | |||
uint8_t L2_page_offset_base; // remap destination section offset | |||
uint8_t L2_load_to_ddr; // 1 - need load out, 0 - no need | |||
uint8_t reserved[2]; // reserved | |||
} rtSmData_t; | |||
/** | |||
@@ -45,12 +45,12 @@ typedef struct tagRtSmData { | |||
* @brief shared memory description | |||
*/ | |||
typedef struct tagRtSmCtrl { | |||
rtSmData_t data[8]; // data description | |||
uint64_t size; // max page Num | |||
uint8_t remap[64]; /* just using for static remap mode, default:0xFF | |||
rtSmData_t data[8]; // data description | |||
uint64_t size; // max page Num | |||
uint8_t remap[64]; /* just using for static remap mode, default:0xFF | |||
array index: virtual l2 page id, array value: physic l2 page id */ | |||
uint8_t l2_in_main; // 0-DDR, 1-L2, default:0xFF | |||
uint8_t reserved[3]; | |||
uint8_t l2_in_main; // 0-DDR, 1-L2, default:0xFF | |||
uint8_t reserved[3]; | |||
} rtSmDesc_t; | |||
typedef rtSmDesc_t rtL2Ctrl_t; | |||
@@ -60,10 +60,10 @@ typedef rtSmDesc_t rtL2Ctrl_t; | |||
* @brief device binary type | |||
*/ | |||
typedef struct tagRtDevBinary { | |||
uint32_t magic; // magic number | |||
uint32_t version; // version of binary | |||
const void *data; // binary data | |||
uint64_t length; // binary length | |||
uint32_t magic; // magic number | |||
uint32_t version; // version of binary | |||
const void *data; // binary data | |||
uint64_t length; // binary length | |||
} rtDevBinary_t; | |||
/** | |||
@@ -73,15 +73,15 @@ typedef struct tagRtDevBinary { | |||
#define ONLINE_PROF_MAX_PMU_NUM (8) | |||
typedef struct ProfilefDataInfo { | |||
const void *stubFunc; | |||
uint32_t blockDim; | |||
const void *args; | |||
uint32_t argsSize; | |||
rtSmDesc_t *smDesc; | |||
rtStream_t stream; | |||
uint64_t totalcycle; | |||
uint64_t ovcycle; | |||
uint64_t pmu_cnt[ONLINE_PROF_MAX_PMU_NUM]; | |||
const void *stubFunc; | |||
uint32_t blockDim; | |||
const void *args; | |||
uint32_t argsSize; | |||
rtSmDesc_t *smDesc; | |||
rtStream_t stream; | |||
uint64_t totalcycle; | |||
uint64_t ovcycle; | |||
uint64_t pmu_cnt[ONLINE_PROF_MAX_PMU_NUM]; | |||
} rtProfDataInfo_t; | |||
/** | |||
@@ -89,12 +89,12 @@ typedef struct ProfilefDataInfo { | |||
* @brief function mode type | |||
*/ | |||
typedef enum { | |||
FUNC_MODE_NORMAL = 0, | |||
FUNC_MODE_PCTRACE_USERPROFILE_RECORDLOOP, | |||
FUNC_MODE_PCTRACE_USERPROFILE_SKIPLOOP, | |||
FUNC_MODE_PCTRACE_CYCLECNT_RECORDLOOP, | |||
FUNC_MODE_PCTRACE_CYCLECNT_SKIPLOOP, | |||
FUNC_MODE_BUTT | |||
FUNC_MODE_NORMAL = 0, | |||
FUNC_MODE_PCTRACE_USERPROFILE_RECORDLOOP, | |||
FUNC_MODE_PCTRACE_USERPROFILE_SKIPLOOP, | |||
FUNC_MODE_PCTRACE_CYCLECNT_RECORDLOOP, | |||
FUNC_MODE_PCTRACE_CYCLECNT_SKIPLOOP, | |||
FUNC_MODE_BUTT | |||
} rtFuncModeType_t; | |||
/** | |||
@@ -102,23 +102,23 @@ typedef enum { | |||
* @brief kernel info | |||
*/ | |||
typedef struct rtKernelInfo { | |||
uint64_t task_offset; // kernel offset in module | |||
/* flowtable */ | |||
void *arg; // launch kernel arg | |||
uint32_t arg_size; | |||
/* module */ | |||
void *module_addr; // module::baseaddr_ | |||
uint32_t module_size; | |||
} * rtKernelInfo_t; | |||
uint64_t task_offset; // kernel offset in module | |||
/* flowtable */ | |||
void *arg; // launch kernel arg | |||
uint32_t arg_size; | |||
/* module */ | |||
void *module_addr; // module::baseaddr_ | |||
uint32_t module_size; | |||
} *rtKernelInfo_t; | |||
/** | |||
* @ingroup rt_KernelConfigDump | |||
* @brief device dump type | |||
*/ | |||
typedef enum tagRtDumpKind { | |||
RT_DATA_DUMP_KIND_INVALID = -1, | |||
RT_DATA_DUMP_KIND_DUMP = 0, | |||
RT_DATA_DUMP_KIND_RESERVED | |||
RT_DATA_DUMP_KIND_INVALID = -1, | |||
RT_DATA_DUMP_KIND_DUMP = 0, | |||
RT_DATA_DUMP_KIND_RESERVED | |||
} rtDumpKind_t; | |||
/** | |||
@@ -414,6 +414,7 @@ RTS_API rtError_t rtDatadumpInfoLoad(const void *dumpInfo, uint32_t length); | |||
RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc = nullptr, rtStream_t stream = nullptr); | |||
#else | |||
RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc, rtStream_t stream); | |||
#endif | |||
#endif // __CLANG_CCE_RUNTIME_H__ | |||
@@ -1,18 +1,18 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
*/ | |||
#ifndef __CCE_RUNTIME_MEM_H__ | |||
#define __CCE_RUNTIME_MEM_H__ | |||
@@ -34,6 +34,7 @@ extern "C" { | |||
*/ | |||
#define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device | |||
#define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device | |||
#define RT_MEMORY_RDMA_HBM ((uint32_t)0x3) // RDMA-HBM memory on device | |||
#define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device | |||
#define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device | |||
#define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device | |||
@@ -89,40 +90,40 @@ typedef uint32_t rtMemType_t; | |||
* @brief memory copy type | |||
*/ | |||
typedef enum tagRtMemcpyKind { | |||
RT_MEMCPY_HOST_TO_HOST = 0, // host to host | |||
RT_MEMCPY_HOST_TO_DEVICE, // host to device | |||
RT_MEMCPY_DEVICE_TO_HOST, // device to host | |||
RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P | |||
RT_MEMCPY_MANAGED, // managed memory | |||
RT_MEMCPY_ADDR_DEVICE_TO_DEVICE, | |||
RT_MEMCPY_HOST_TO_DEVICE_EX, // host to device ex (only used for 8 bytes) | |||
RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex | |||
RT_MEMCPY_RESERVED, | |||
RT_MEMCPY_HOST_TO_HOST = 0, // host to host | |||
RT_MEMCPY_HOST_TO_DEVICE, // host to device | |||
RT_MEMCPY_DEVICE_TO_HOST, // device to host | |||
RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P | |||
RT_MEMCPY_MANAGED, // managed memory | |||
RT_MEMCPY_ADDR_DEVICE_TO_DEVICE, | |||
RT_MEMCPY_HOST_TO_DEVICE_EX, // host to device ex (only used for 8 bytes) | |||
RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex | |||
RT_MEMCPY_RESERVED, | |||
} rtMemcpyKind_t; | |||
typedef enum tagRtMemInfoType { | |||
RT_MEMORYINFO_DDR, | |||
RT_MEMORYINFO_HBM, | |||
RT_MEMORYINFO_DDR_HUGE, // Hugepage memory of DDR | |||
RT_MEMORYINFO_DDR_NORMAL, // Normal memory of DDR | |||
RT_MEMORYINFO_HBM_HUGE, // Hugepage memory of HBM | |||
RT_MEMORYINFO_HBM_NORMAL, // Normal memory of HBM | |||
RT_MEMORYINFO_DDR_P2P_HUGE, // Hugepage memory of DDR | |||
RT_MEMORYINFO_DDR_P2P_NORMAL, // Normal memory of DDR | |||
RT_MEMORYINFO_HBM_P2P_HUGE, // Hugepage memory of HBM | |||
RT_MEMORYINFO_HBM_P2P_NORMAL, // Normal memory of HBM | |||
RT_MEMORYINFO_DDR, | |||
RT_MEMORYINFO_HBM, | |||
RT_MEMORYINFO_DDR_HUGE, // Hugepage memory of DDR | |||
RT_MEMORYINFO_DDR_NORMAL, // Normal memory of DDR | |||
RT_MEMORYINFO_HBM_HUGE, // Hugepage memory of HBM | |||
RT_MEMORYINFO_HBM_NORMAL, // Normal memory of HBM | |||
RT_MEMORYINFO_DDR_P2P_HUGE, // Hugepage memory of DDR | |||
RT_MEMORYINFO_DDR_P2P_NORMAL, // Normal memory of DDR | |||
RT_MEMORYINFO_HBM_P2P_HUGE, // Hugepage memory of HBM | |||
RT_MEMORYINFO_HBM_P2P_NORMAL, // Normal memory of HBM | |||
} rtMemInfoType_t; | |||
typedef enum tagRtRecudeKind { | |||
RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P | |||
RT_RECUDE_KIND_END | |||
RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P | |||
RT_RECUDE_KIND_END | |||
} rtRecudeKind_t; | |||
typedef enum tagRtDataType { | |||
RT_DATA_TYPE_FP32 = 0, // fp32 | |||
RT_DATA_TYPE_FP16 = 1, // fp16 | |||
RT_DATA_TYPE_INT16 = 2, // int16 | |||
RT_DATA_TYPE_END | |||
RT_DATA_TYPE_FP32 = 0, // fp32 | |||
RT_DATA_TYPE_FP16 = 1, // fp16 | |||
RT_DATA_TYPE_INT16 = 2, // int16 | |||
RT_DATA_TYPE_END | |||
} rtDataType_t; | |||
/** | |||
@@ -130,10 +131,10 @@ typedef enum tagRtDataType { | |||
* @brief memory copy channel type | |||
*/ | |||
typedef enum tagRtMemcpyChannelType { | |||
RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P | |||
RT_MEMCPY_CHANNEL_TYPE_PCIe, | |||
RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now | |||
RT_MEMCPY_CHANNEL_TYPE_RESERVED, | |||
RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P | |||
RT_MEMCPY_CHANNEL_TYPE_PCIe, | |||
RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now | |||
RT_MEMCPY_CHANNEL_TYPE_RESERVED, | |||
} rtMemcpyChannelType_t; | |||
/** | |||
@@ -141,18 +142,18 @@ typedef enum tagRtMemcpyChannelType { | |||
* @brief ai core memory size | |||
*/ | |||
typedef struct rtAiCoreMemorySize { | |||
uint32_t l0ASize; | |||
uint32_t l0BSize; | |||
uint32_t l0CSize; | |||
uint32_t l1Size; | |||
uint32_t ubSize; | |||
uint32_t l2Size; | |||
uint32_t l2PageNum; | |||
uint32_t blockSize; | |||
uint64_t bankSize; | |||
uint64_t bankNum; | |||
uint64_t burstInOneBlock; | |||
uint64_t bankGroupNum; | |||
uint32_t l0ASize; | |||
uint32_t l0BSize; | |||
uint32_t l0CSize; | |||
uint32_t l1Size; | |||
uint32_t ubSize; | |||
uint32_t l2Size; | |||
uint32_t l2PageNum; | |||
uint32_t blockSize; | |||
uint64_t bankSize; | |||
uint64_t bankNum; | |||
uint64_t burstInOneBlock; | |||
uint64_t bankGroupNum; | |||
} rtAiCoreMemorySize_t; | |||
/** | |||
@@ -160,10 +161,10 @@ typedef struct rtAiCoreMemorySize { | |||
* @brief memory type | |||
*/ | |||
typedef enum tagRtMemoryType { | |||
RT_MEMORY_TYPE_HOST = 1, | |||
RT_MEMORY_TYPE_DEVICE = 2, | |||
RT_MEMORY_TYPE_SVM = 3, | |||
RT_MEMORY_TYPE_DVPP = 4 | |||
RT_MEMORY_TYPE_HOST = 1, | |||
RT_MEMORY_TYPE_DEVICE = 2, | |||
RT_MEMORY_TYPE_SVM = 3, | |||
RT_MEMORY_TYPE_DVPP = 4 | |||
} rtMemoryType_t; | |||
/** | |||
@@ -171,31 +172,31 @@ typedef enum tagRtMemoryType { | |||
* @brief memory attribute | |||
*/ | |||
typedef struct tagRtPointerAttributes { | |||
rtMemoryType_t memoryType; // host memory or device memory | |||
rtMemoryType_t locationType; | |||
uint32_t deviceID; // device ID | |||
uint32_t pageSize; | |||
rtMemoryType_t memoryType; // host memory or device memory | |||
rtMemoryType_t locationType; | |||
uint32_t deviceID; // device ID | |||
uint32_t pageSize; | |||
} rtPointerAttributes_t; | |||
typedef struct rtMallocHostSharedMemoryIn { | |||
const char *name; | |||
const uint64_t size; | |||
uint32_t flag; | |||
const char *name; | |||
const uint64_t size; | |||
uint32_t flag; | |||
} rtMallocHostSharedMemoryIn; | |||
typedef struct rtMallocHostSharedMemoryOut { | |||
int fd; | |||
void *ptr; | |||
void *devPtr; | |||
int fd; | |||
void *ptr; | |||
void *devPtr; | |||
} rtMallocHostSharedMemoryOut; | |||
typedef struct rtFreeHostSharedMemoryIn { | |||
const char *name; | |||
const uint64_t size; | |||
int fd; | |||
void *ptr; | |||
void *devPtr; | |||
const char *name; | |||
const uint64_t size; | |||
int fd; | |||
void *ptr; | |||
void *devPtr; | |||
} rtFreeHostSharedMemoryIn; | |||
@@ -1,18 +1,18 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
*/ | |||
#ifndef __CCE_RUNTIME_RT_H__ | |||
#define __CCE_RUNTIME_RT_H__ | |||
@@ -1,18 +1,18 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
*/ | |||
#ifndef __CCE_RUNTIME_MODEL_H__ | |||
#define __CCE_RUNTIME_MODEL_H__ | |||
@@ -1,18 +1,18 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
*/ | |||
#ifndef __CCE_RUNTIME_STREAM_H__ | |||
#define __CCE_RUNTIME_STREAM_H__ | |||
@@ -1,18 +1,10 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
* @file index_transform.h | |||
* | |||
* Copyright (C) Huawei Technologies Co., Ltd. 2018-2019. All Rights Reserved. | |||
* | |||
* This program is used to get logical device id by phy device id. | |||
*/ | |||
#ifndef INC_TDT_INDEX_TRANSFORM_H | |||
#define INC_TDT_INDEX_TRANSFORM_H | |||
@@ -1,4 +1,4 @@ | |||
/** | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
@@ -61,7 +61,7 @@ int32_t TdtHostInit(uint32_t deviceId); | |||
* @li tdt_host_interface.h: Header file where the interface declaration is located. | |||
* @li data_common.h: Header file where 'DataItem' defined | |||
*/ | |||
int32_t TdtHostPushData(const std::string &channelName, const std::vector<DataItem> &item); | |||
int32_t TdtHostPushData(const std::string &channelName, const std::vector<DataItem> &item, uint32_t deviceId = 0); | |||
/** | |||
* @ingroup TdtHostDestroy | |||
@@ -203,25 +203,6 @@ int32_t TdtInFeedDestroy(uint32_t deviceId); | |||
* @li tdt_host_interface.h: Header file where the interface declaration is located. | |||
*/ | |||
int32_t TdtOutFeedDestroy(); | |||
/** | |||
* @ingroup TdtInFeedData | |||
* @brief Blocking queue. When the queue is full, the Push interface will block. | |||
* | |||
* @par Function | |||
* Blocking queue. When the queue is full, the Push interface will block. | |||
* | |||
* @param channelName [IN] type #String. queue channel name | |||
* @param items [IN] type #vector<DataItem> DataItem is defined in data_common.h. input data | |||
* @retval 0 Success | |||
* @retval OtherValues 0 Fail | |||
* | |||
* @par Dependency | |||
* @li libtsdclient.so: Library to which the interface belongs. | |||
* @li tdt_host_interface.h: Header file where the interface declaration is located. | |||
* @li data_common.h: Header file where 'DataItem' defined | |||
*/ | |||
int32_t TdtInFeedData(const std::string &channelName, const std::vector<DataItem> &item, uint32_t deviceId); | |||
} // namespace tdt | |||
#ifdef __cplusplus | |||
} | |||
@@ -17,380 +17,96 @@ | |||
#ifndef MSPROFILER_API_PROF_ACL_API_H_ | |||
#define MSPROFILER_API_PROF_ACL_API_H_ | |||
#define MSVP_MAX_DEV_NUM 64 | |||
// DataTypeConfig | |||
#define PROF_ACL_API 0x00000001 | |||
#define PROF_TASK_TIME 0x00000002 | |||
#define PROF_AICORE_METRICS 0x00000004 | |||
#define PROF_AICPU_TRACE 0x00000008 | |||
#define PROF_MODEL_EXECUTE 0x00000010 | |||
#define PROF_RUNTIME_API 0x00000020 | |||
#define PROF_RUNTIME_TRACE 0x00000040 | |||
#define PROF_SCHEDULE_TIMELINE 0x00000080 | |||
#define PROF_SCHEDULE_TRACE 0x00000100 | |||
#define PROF_AIVECTORCORE_METRICS 0x00000200 | |||
#define PROF_SUBTASK_TIME 0x00000400 | |||
#define PROF_TRAINING_TRACE 0x00000800 | |||
#define PROF_HCCL_TRACE 0x00001000 | |||
#define PROF_TASK_TRACE 0x00001852 | |||
// system profilinig switch | |||
#define PROF_CPU 0x00010000 | |||
#define PROF_HARDWARE_MEMORY 0x00020000 | |||
#define PROF_IO 0x00040000 | |||
#define PROF_INTER_CONNECTION 0x00080000 | |||
#define PROF_DVPP 0x00100000 | |||
#define PROF_SYS_AICORE_SAMPLE 0x00200000 | |||
#define PROF_AIVECTORCORE_SAMPLE 0x00400000 | |||
#define PROF_MODEL_LOAD 0x8000000000000000 | |||
// DataTypeConfig MASK | |||
#define PROF_ACL_API_MASK 0x00000001 | |||
#define PROF_TASK_TIME_MASK 0x00000002 | |||
#define PROF_AICORE_METRICS_MASK 0x00000004 | |||
#define PROF_AICPU_TRACE_MASK 0x00000008 | |||
#define PROF_MODEL_EXECUTE_MASK 0x00000010 | |||
#define PROF_RUNTIME_API_MASK 0x00000020 | |||
#define PROF_RUNTIME_TRACE_MASK 0x00000040 | |||
#define PROF_SCHEDULE_TIMELINE_MASK 0x00000080 | |||
#define PROF_SCHEDULE_TRACE_MASK 0x00000100 | |||
#define PROF_AIVECTORCORE_METRICS_MASK 0x00000200 | |||
#define PROF_SUBTASK_TIME_MASK 0x00000400 | |||
#define PROF_TRAINING_TRACE_MASK 0x00000800 | |||
#define PROF_HCCL_TRACE_MASK 0x00001000 | |||
// system profilinig mask | |||
#define PROF_CPU_MASK 0x00010000 | |||
#define PROF_HARDWARE_MEMORY_MASK 0x00020000 | |||
#define PROF_IO_MASK 0x00040000 | |||
#define PROF_INTER_CONNECTION_MASK 0x00080000 | |||
#define PROF_DVPP_MASK 0x00100000 | |||
#define PROF_SYS_AICORE_SAMPLE_MASK 0x00200000 | |||
#define PROF_AIVECTORCORE_SAMPLE_MASK 0x00400000 | |||
#define PROF_MODEL_LOAD_MASK 0x8000000000000000 | |||
#ifndef OS_TYPE | |||
#define OS_TYPE 0 | |||
#endif // OS_TYPE | |||
#if (OS_TYPE != LINUX) | |||
#define MSVP_PROF_API __declspec(dllexport) | |||
#else | |||
#define MSVP_PROF_API __attribute__((visibility("default"))) | |||
#endif | |||
// DataTypeConfig | |||
#define PROF_ACL_API 0x0001 | |||
#define PROF_TASK_TIME 0x0002 | |||
#define PROF_AICORE_METRICS 0x0004 | |||
#define PROF_AICPU_TRACE 0x0008 | |||
#define PROF_MODEL_EXECUTE 0x0010 | |||
#define PROF_RUNTIME_API 0x0020 | |||
#define PROF_RUNTIME_TRACE 0x0040 | |||
#define PROF_SCHEDULE_TIMELINE 0x0080 | |||
#define PROF_SCHEDULE_TRACE 0x0100 | |||
#define PROF_AIVECTORCORE_METRICS 0x0200 | |||
#define PROF_SUBTASK_TIME 0x0400 | |||
#define PROF_TRAINING_TRACE 0x0800 | |||
#define PROF_HCCL_TRACE 0x1000 | |||
#define PROF_DATA_PROCESS 0x2000 | |||
#define PROF_TASK_TRACE 0x3842 | |||
#define PROF_MODEL_LOAD 0x8000000000000000 | |||
// DataTypeConfig MASK | |||
#define PROF_ACL_API_MASK 0x0001 | |||
#define PROF_TASK_TIME_MASK 0x0002 | |||
#define PROF_AICORE_METRICS_MASK 0x0004 | |||
#define PROF_AICPU_TRACE_MASK 0x0008 | |||
#define PROF_MODEL_EXECUTE_MASK 0x0010 | |||
#define PROF_RUNTIME_API_MASK 0x0020 | |||
#define PROF_RUNTIME_TRACE_MASK 0x0040 | |||
#define PROF_SCHEDULE_TIMELINE_MASK 0x0080 | |||
#define PROF_SCHEDULE_TRACE_MASK 0x0100 | |||
#define PROF_AIVECTORCORE_METRICS_MASK 0x0200 | |||
#define PROF_SUBTASK_TIME_MASK 0x0400 | |||
#define PROF_TRAINING_TRACE_MASK 0x0800 | |||
#define PROF_HCCL_TRACE_MASK 0x1000 | |||
#define PROF_DATA_PROCESS_MASK 0x2000 | |||
#define PROF_MODEL_LOAD_MASK 0x8000000000000000 | |||
#include <cstdint> | |||
#include <string> | |||
/** | |||
* @name ProrErrorCode | |||
* @brief error code enum of prof_acl_apis | |||
*/ | |||
enum ProfErrorCode { | |||
PROF_ERROR_NONE = 0, // ok | |||
PROF_ERROR_PARAM_INVALID, // param invalid, for example nullptr | |||
PROF_ERROR_REPEAT_INIT, // profiling has already been inited | |||
PROF_ERROR_CONFIG_INVALID, // config invalid, for example invalid json string | |||
PROF_ERROR_DIR_NO_ACCESS, // dir is not accessable | |||
PROF_ERROR_FAILURE, // failed to init or start profiling | |||
PROF_ERROR_NOT_INITED, // profiling has not been inited | |||
PROF_ERROR_DEVICE_INVALID, // device id invalid | |||
PROF_ERROR_UNSUPPORTED, // unsupported data type or ai core metrics | |||
PROF_ERROR_REPEAT_START, // profiilng has already been started | |||
PROF_ERROR_NOT_STARTED, // profiling has not been started | |||
PROF_ERROR_REPEAT_SUBSCRIBE, // same model id has already been subscribed | |||
PROF_ERROR_MODEL_ID_INVALID, // model id does not exist or has not been subscribed | |||
PROF_ERROR_API_CONFLICT, // prof ctrl api mode conflicts with subscribe mode | |||
}; | |||
/** | |||
* @brief transfer profiling config in acl.json to sample config | |||
* @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...} | |||
* @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]} | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg); | |||
/** | |||
* @name ProfInit | |||
* @brief init profiling | |||
* @param profInitCfg [IN] config of init profiling of json format | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg); | |||
/** | |||
* @name ProfAicoreMetrics | |||
* @brief aicore metrics enum | |||
*/ | |||
enum ProfAicoreMetrics { | |||
PROF_AICORE_ARITHMATIC_THROUGHPUT = 0, | |||
PROF_AICORE_PIPELINE = 1, | |||
PROF_AICORE_SYNCHRONIZATION = 2, | |||
PROF_AICORE_MEMORY = 3, | |||
PROF_AICORE_INTERNAL_MEMORY = 4, | |||
PROF_AICORE_STALL = 5, | |||
PROF_AICORE_METRICS_COUNT, | |||
PROF_AICORE_NONE = 0xff, | |||
}; | |||
/** | |||
* @name ProfConfig | |||
* @brief struct of ProfStart | |||
*/ | |||
struct ProfConfig { | |||
uint32_t devNums; // length of device id list | |||
uint32_t devIdList[MSVP_MAX_DEV_NUM]; // physical device id list | |||
ProfAicoreMetrics aicoreMetrics; // aicore metric | |||
uint64_t dataTypeConfig; // data type to start profiling | |||
}; | |||
/** | |||
* @name ProfStartProfiling | |||
* @brief start profiling | |||
* @param profStartCfg [IN] config to start profiling | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg); | |||
/** | |||
* @name ProfStopProfiling | |||
* @brief stop profiling | |||
* @param profStopCfg [IN] config to stop profiling | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg); | |||
/** | |||
* @name ProfFinalize | |||
* @brief finalize profiling task | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfFinalize(); | |||
/** | |||
* @name ProfGetDataTypeConfig | |||
* @brief get dataTypeConfig started with of one device | |||
* @param deviceId [IN] deviceId to get dataTypeConfig | |||
* @param dataTypeConfig [OUT] result get | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig); | |||
namespace Msprofiler { | |||
namespace Api { | |||
/** | |||
* @brief transfer profiling config in acl.json to sample config | |||
* @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...} | |||
* @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]} | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg); | |||
/** | |||
* @name ProfInit | |||
* @brief init profiling | |||
* @param profInitCfg [IN] config of init profiling of json format | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg); | |||
/** | |||
* @name ProfStartProfiling | |||
* @brief start profiling | |||
* @param profStartCfg [IN] config to start profiling | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg); | |||
/** | |||
* @name ProfStopProfiling | |||
* @brief stop profiling | |||
* @param profStopCfg [IN] config to stop profiling | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg); | |||
/** | |||
* @name ProfFinalize | |||
* @brief finalize profiling task | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfFinalize(); | |||
/** | |||
* @name ProfGetDataTypeConfig | |||
* @brief get dataTypeConfig started with of one device | |||
* @param deviceId [IN] deviceId to get dataTypeConfig | |||
* @param dataTypeConfig [OUT] result get | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig); | |||
/** | |||
* @name WorkMode | |||
* @brief profiling api work mode | |||
*/ | |||
enum WorkMode { | |||
WORK_MODE_OFF, // profiling not at work | |||
WORK_MODE_API_CTRL, // profiling work on api ctrl mode, (ProfInit) | |||
WORK_MODE_SUBSCRIBE, // profiling work on subscribe mode | |||
}; | |||
/** | |||
* @name ProfGetApiWorkMode | |||
* @brief get profiling api work mode | |||
* @return WorkMode | |||
*/ | |||
MSVP_PROF_API WorkMode ProfGetApiWorkMode(); | |||
/** | |||
* @name ProfSubscribeConfig | |||
* @brief config of subscribe api | |||
*/ | |||
struct ProfSubscribeConfig { | |||
bool timeInfo; // subscribe op time | |||
ProfAicoreMetrics aicoreMetrics; // subscribe ai core metrics | |||
void* fd; // pipe fd | |||
}; | |||
/** | |||
* @name ProfGetDataTypeConfig | |||
* @brief get DataTypeConfig of subscribe | |||
* @param profSubscribeConfig [IN] config to subscribe data | |||
* @return DataTypeConfig | |||
*/ | |||
MSVP_PROF_API uint64_t ProfGetDataTypeConfig(const ProfSubscribeConfig *profSubscribeConfig); | |||
/** | |||
* @name ProfModelSubscribe | |||
* @brief subscribe data of one model id | |||
* @param modelId [IN] model id to subscribe data | |||
* @param devId [IN] device id of model | |||
* @param profSubscribeConfig [IN] config to subscribe data | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfModelSubscribe(uint32_t modelId, uint32_t devId, | |||
const ProfSubscribeConfig *profSubscribeConfig); | |||
/** | |||
* @name ProfIsModelSubscribed | |||
* @brief check if a model id is subscribed | |||
* @param modeiId [IN] modei id to check | |||
* @return true: subscribed, false: not | |||
*/ | |||
MSVP_PROF_API bool ProfIsModelSubscribed(uint32_t modelId); | |||
/** | |||
* @name ProfModelUnSubscribe | |||
* @brief unsubscribe a model id | |||
* @param modeiId [IN] modei id to unsubscribe | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfModelUnSubscribe(uint32_t modelId); | |||
/** | |||
* @name ProfGetOpDescSize | |||
* @brief get profiling data struct size | |||
* @param opDescSize [OUT] bytes of profiling subscribe data struct | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfGetOpDescSize(uint32_t *opDescSize); | |||
/** | |||
* @name ProfGetOpNum | |||
* @brief get how many op data there are in data | |||
* @param data [IN] data read from pipe | |||
* @param len [IN] data length | |||
* @param opNum [OUT] number of op in data | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfGetOpNum(const void *data, uint32_t len, uint32_t *opNum); | |||
/** | |||
* @name ProfGetModelId | |||
* @brief get model id of specific part of data | |||
* @param data [IN] data read from pipe | |||
* @param len [IN] data length | |||
* @param index [IN] index of part(op) | |||
* @return model id | |||
*/ | |||
MSVP_PROF_API uint32_t ProfGetModelId(const void *data, uint32_t len, uint32_t index); | |||
/** | |||
* @name ProfGetOpType | |||
* @brief get op type of specific part of data | |||
* @param data [IN] data read from pipe | |||
* @param len [IN] data length | |||
* @param opType [OUT] op type buffer | |||
* @param opTypeLen [IN] buffer size of param opType | |||
* @param index [IN] index of part(op) | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfGetOpType(const void *data, uint32_t len, char *opType, uint32_t opTypeLen, uint32_t index); | |||
/** | |||
* @name ProfGetOpName | |||
* @brief get op name of specific part of data | |||
* @param data [IN] data read from pipe | |||
* @param len [IN] data length | |||
* @param opType [OUT] op name buffer | |||
* @param opTypeLen [IN] buffer size of param opName | |||
* @param index [IN] index of part(op) | |||
* @return ProfErrorCode | |||
*/ | |||
MSVP_PROF_API int32_t ProfGetOpName(const void *data, uint32_t len, char *opName, uint32_t opNameLen, uint32_t index); | |||
/** | |||
* @name ProfGetOpStart | |||
* @brief get op start timestamp of specific part of data | |||
* @param data [IN] data read from pipe | |||
* @param len [IN] data length | |||
* @param index [IN] index of part(op) | |||
* @return op start timestamp (us) | |||
*/ | |||
MSVP_PROF_API uint64_t ProfGetOpStart(const void *data, uint32_t len, uint32_t index); | |||
/** | |||
* @name ProfGetOpEnd | |||
* @brief get op end timestamp of specific part of data | |||
* @param data [IN] data read from pipe | |||
* @param len [IN] data length | |||
* @param index [IN] index of part(op) | |||
* @return op end timestamp (us) | |||
*/ | |||
MSVP_PROF_API uint64_t ProfGetOpEnd(const void *data, uint32_t len, uint32_t index); | |||
/** | |||
* @name ProfGetOpDuration | |||
* @brief get op duration of specific part of data | |||
* @param data [IN] data read from pipe | |||
* @param len [IN] data length | |||
* @param index [IN] index of part(op) | |||
* @return op duration (us) | |||
*/ | |||
MSVP_PROF_API uint64_t ProfGetOpDuration(const void *data, uint32_t len, uint32_t index); | |||
/** | |||
* @name ProfGetOpExecutionTime | |||
* @brief get op execution time of specific part of data | |||
* @param data [IN] data read from pipe | |||
* @param len [IN] data length | |||
* @param data [IN] data read from pipe | |||
* @param len [IN] data length | |||
* @param index [IN] index of part(op) | |||
* @return op execution time (us) | |||
*/ | |||
MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); | |||
} | |||
} | |||
/** | |||
* @name ProfGetOpCubeOps | |||
* @brief get op cube fops of specific part of data | |||
* @param data [IN] data read from pipe | |||
* @param len [IN] data length | |||
* @param index [IN] index of part(op) | |||
* @return op cube fops | |||
*/ | |||
MSVP_PROF_API uint64_t ProfGetOpCubeOps(const void *data, uint32_t len, uint32_t index); | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
/** | |||
* @name ProfGetOpVectorOps | |||
* @brief get op vector fops of specific part of data | |||
* @param data [IN] data read from pipe | |||
* @param len [IN] data length | |||
* @param index [IN] index of part(op) | |||
* @return op vector fops | |||
*/ | |||
MSVP_PROF_API uint64_t ProfGetOpVectorOps(const void *data, uint32_t len, uint32_t index); | |||
MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); | |||
} // namespace Api | |||
} // namespace Msprofiler | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // MSPROFILER_API_PROF_ACL_API_H_ |
@@ -26,6 +26,8 @@ | |||
#define MSVP_PROF_API __attribute__((visibility("default"))) | |||
#endif | |||
#include "prof_callback.h" | |||
/** | |||
* @file prof_reporter.h | |||
* @defgroup reporter the reporter group | |||
@@ -1,78 +1,72 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
/** @defgroup mstune mstune调优接口 */ | |||
#ifndef TUNE_API_H | |||
#define TUNE_API_H | |||
#include <vector> | |||
#include <map> | |||
#include <string> | |||
#include "graph/graph.h" | |||
#include "ge/ge_api.h" | |||
/** | |||
* @ingroup mstune | |||
* | |||
* mstune status | |||
*/ | |||
enum MsTuneStatus { | |||
MSTUNE_SUCCESS, /** tune success */ | |||
MSTUNE_FAILED, /** tune failed */ | |||
}; | |||
// Option key: for train options sets | |||
const std::string MSTUNE_SELF_KEY = "mstune"; | |||
const std::string MSTUNE_GEINIT_KEY = "initialize"; | |||
const std::string MSTUNE_GESESS_KEY = "session"; | |||
/** | |||
* @ingroup mstune | |||
* @par 描述: 命令行调优 | |||
* | |||
* @attention 无 | |||
* @param option [IN] 调优参数 | |||
* @param msg [OUT] 调优异常下返回信息 | |||
* @retval #MSTUNE_SUCCESS 执行成功 | |||
* @retval #MSTUNE_FAILED 执行失败 | |||
* @par 依赖: | |||
* @li tune_api.cpp:该接口所属的开发包。 | |||
* @li tune_api.h:该接口声明所在的头文件。 | |||
* @see 无 | |||
* @since | |||
*/ | |||
MsTuneStatus MsTuning(const std::map<std::string, std::string> &option, std::string &msg); | |||
/** | |||
* @ingroup mstune | |||
* @par 描述: 梯度调优 | |||
* | |||
* @attention 无 | |||
* @param tuningGraph [IN] 调优图 | |||
* @param dependGraph [IN] 调优依赖图 | |||
* @param session [IN] ge连接会话 | |||
* @param option [IN] 参数集. 包含调优参数及ge参数 | |||
* @retval #MSTUNE_SUCCESS 执行成功 | |||
* @retval #MSTUNE_FAILED 执行失败 | |||
* @par 依赖: | |||
* @li tune_api.cpp:该接口所属的开发包。 | |||
* @li tune_api.h:该接口声明所在的头文件。 | |||
* @see 无 | |||
* @since | |||
*/ | |||
extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph, | |||
ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option); | |||
#endif | |||
/** | |||
* @file tune_api.h | |||
* | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.\n | |||
* | |||
* This program is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n | |||
* 描述:mstune调优接口头文件 | |||
*/ | |||
/** @defgroup mstune mstune调优接口 */ | |||
#ifndef TUNE_API_H | |||
#define TUNE_API_H | |||
#include <vector> | |||
#include <map> | |||
#include <string> | |||
#include "graph/graph.h" | |||
#include "ge/ge_api.h" | |||
/** | |||
* @ingroup mstune | |||
* | |||
* mstune status | |||
*/ | |||
enum MsTuneStatus { | |||
MSTUNE_SUCCESS, /** tune success */ | |||
MSTUNE_FAILED, /** tune failed */ | |||
}; | |||
// Option key: for train options sets | |||
const std::string MSTUNE_SELF_KEY = "mstune"; | |||
const std::string MSTUNE_GEINIT_KEY = "initialize"; | |||
const std::string MSTUNE_GESESS_KEY = "session"; | |||
/** | |||
* @ingroup mstune | |||
* @par 描述: 命令行调优 | |||
* | |||
* @attention 无 | |||
* @param option [IN] 调优参数 | |||
* @param msg [OUT] 调优异常下返回信息 | |||
* @retval #MSTUNE_SUCCESS 执行成功 | |||
* @retval #MSTUNE_FAILED 执行失败 | |||
* @par 依赖: | |||
* @li tune_api.cpp:该接口所属的开发包。 | |||
* @li tune_api.h:该接口声明所在的头文件。 | |||
* @see 无 | |||
* @since | |||
*/ | |||
MsTuneStatus MsTuning(const std::map<std::string, std::string> &option, std::string &msg); | |||
/** | |||
* @ingroup mstune | |||
* @par 描述: 梯度调优 | |||
* | |||
* @attention 无 | |||
* @param tuningGraph [IN] 调优图 | |||
* @param dependGraph [IN] 调优依赖图 | |||
* @param session [IN] ge连接会话 | |||
* @param option [IN] 参数集. 包含调优参数及ge参数 | |||
* @retval #MSTUNE_SUCCESS 执行成功 | |||
* @retval #MSTUNE_FAILED 执行失败 | |||
* @par 依赖: | |||
* @li tune_api.cpp:该接口所属的开发包。 | |||
* @li tune_api.h:该接口声明所在的头文件。 | |||
* @see 无 | |||
* @since | |||
*/ | |||
extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph, | |||
ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option); | |||
#endif |