Browse Source

!966 sync code 0116 h

From: @changzherui
Reviewed-by: @liujunzhu,@ljl0711
Signed-off-by: @ljl0711
tags/v1.2.0
mindspore-ci-bot Gitee 3 years ago
parent
commit
687d5f2ab1
97 changed files with 8881 additions and 1108 deletions
  1. +73
    -0
      inc/external/acl/acl.h
  2. +40
    -0
      inc/external/acl/acl_base.h
  3. +1186
    -0
      inc/external/acl/acl_mdl.h
  4. +296
    -0
      inc/external/acl/acl_prof.h
  5. +932
    -0
      inc/external/acl/acl_rt.h
  6. +276
    -0
      inc/external/acl/acl_tdt.h
  7. +13
    -3
      inc/external/acl/error_codes/rt_error_codes.h
  8. +2389
    -0
      inc/external/acl/ops/acl_dvpp.h
  9. +134
    -0
      inc/external/hccl/hccl.h
  10. +101
    -0
      inc/external/hccl/hccl_types.h
  11. +13
    -3
      inc/external/runtime/rt_error_codes.h
  12. +20
    -20
      third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
  13. +1
    -0
      third_party/fwkacllib/inc/cce/aicpu_engine.h
  14. +1
    -0
      third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
  15. +28
    -0
      third_party/fwkacllib/inc/hccl/hcom.h
  16. +2
    -1
      third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
  17. +83
    -83
      third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h
  18. +1
    -1
      third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
  19. +3
    -1
      third_party/fwkacllib/inc/ops/aipp.h
  20. +2
    -1
      third_party/fwkacllib/inc/ops/all_ops.h
  21. +74
    -1
      third_party/fwkacllib/inc/ops/array_ops.h
  22. +1
    -1
      third_party/fwkacllib/inc/ops/audio_ops.h
  23. +1
    -1
      third_party/fwkacllib/inc/ops/batch_ops.h
  24. +1
    -1
      third_party/fwkacllib/inc/ops/bitwise_ops.h
  25. +1
    -1
      third_party/fwkacllib/inc/ops/boosted_trees_ops.h
  26. +1
    -1
      third_party/fwkacllib/inc/ops/candidate_sampling_ops.h
  27. +1
    -1
      third_party/fwkacllib/inc/ops/condtake_ops.h
  28. +1
    -1
      third_party/fwkacllib/inc/ops/control_flow_ops.h
  29. +1
    -1
      third_party/fwkacllib/inc/ops/ctc_ops.h
  30. +4
    -4
      third_party/fwkacllib/inc/ops/data_flow_ops.h
  31. +326
    -7
      third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
  32. +1
    -1
      third_party/fwkacllib/inc/ops/functional_ops.h
  33. +1
    -1
      third_party/fwkacllib/inc/ops/get_data_ops.h
  34. +33
    -14
      third_party/fwkacllib/inc/ops/hcom_ops.h
  35. +1
    -1
      third_party/fwkacllib/inc/ops/hvd_ops.h
  36. +166
    -30
      third_party/fwkacllib/inc/ops/image_ops.h
  37. +1
    -1
      third_party/fwkacllib/inc/ops/internal_ops.h
  38. +1
    -1
      third_party/fwkacllib/inc/ops/linalg_ops.h
  39. +230
    -0
      third_party/fwkacllib/inc/ops/list_ops.h
  40. +1
    -1
      third_party/fwkacllib/inc/ops/logging_ops.h
  41. +1
    -1
      third_party/fwkacllib/inc/ops/lookup_ops.h
  42. +64
    -1
      third_party/fwkacllib/inc/ops/math_ops.h
  43. +50
    -7
      third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
  44. +1
    -1
      third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
  45. +344
    -72
      third_party/fwkacllib/inc/ops/nn_calculation_ops.h
  46. +111
    -2
      third_party/fwkacllib/inc/ops/nn_detect_ops.h
  47. +261
    -6
      third_party/fwkacllib/inc/ops/nn_norm_ops.h
  48. +1
    -1
      third_party/fwkacllib/inc/ops/nn_ops.h
  49. +322
    -9
      third_party/fwkacllib/inc/ops/nn_pooling_ops.h
  50. +1
    -1
      third_party/fwkacllib/inc/ops/nn_training_ops.h
  51. +1
    -1
      third_party/fwkacllib/inc/ops/no_op.h
  52. +203
    -1
      third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
  53. +1
    -1
      third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
  54. +1
    -1
      third_party/fwkacllib/inc/ops/outfeed_ops.h
  55. +47
    -9
      third_party/fwkacllib/inc/ops/pad_ops.h
  56. +1
    -1
      third_party/fwkacllib/inc/ops/parsing_ops.h
  57. +1
    -1
      third_party/fwkacllib/inc/ops/quantize_ops.h
  58. +1
    -1
      third_party/fwkacllib/inc/ops/ragged_array_ops.h
  59. +1
    -1
      third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
  60. +1
    -1
      third_party/fwkacllib/inc/ops/ragged_math_ops.h
  61. +55
    -1
      third_party/fwkacllib/inc/ops/random_ops.h
  62. +41
    -6
      third_party/fwkacllib/inc/ops/reduce_ops.h
  63. +1
    -1
      third_party/fwkacllib/inc/ops/resource_variable_ops.h
  64. +283
    -24
      third_party/fwkacllib/inc/ops/rnn.h
  65. +1
    -1
      third_party/fwkacllib/inc/ops/rpn_ops.h
  66. +1
    -1
      third_party/fwkacllib/inc/ops/save_ops.h
  67. +1
    -1
      third_party/fwkacllib/inc/ops/sdca_ops.h
  68. +183
    -1
      third_party/fwkacllib/inc/ops/selection_ops.h
  69. +1
    -1
      third_party/fwkacllib/inc/ops/set_ops.h
  70. +1
    -1
      third_party/fwkacllib/inc/ops/sparse_ops.h
  71. +1
    -1
      third_party/fwkacllib/inc/ops/spectral_ops.h
  72. +1
    -1
      third_party/fwkacllib/inc/ops/split_combination_ops.h
  73. +1
    -1
      third_party/fwkacllib/inc/ops/state_ops.h
  74. +1
    -1
      third_party/fwkacllib/inc/ops/stateful_random_ops.h
  75. +1
    -1
      third_party/fwkacllib/inc/ops/stateless_random_ops.h
  76. +1
    -1
      third_party/fwkacllib/inc/ops/string_ops.h
  77. +1
    -1
      third_party/fwkacllib/inc/ops/swap_co_ops.h
  78. +1
    -1
      third_party/fwkacllib/inc/ops/target_crop_and_resize.h
  79. +9
    -10
      third_party/fwkacllib/inc/ops/transformation_ops.h
  80. +1
    -1
      third_party/fwkacllib/inc/ops/warp_perspective_ops.h
  81. +27
    -27
      third_party/fwkacllib/inc/runtime/base.h
  82. +73
    -71
      third_party/fwkacllib/inc/runtime/config.h
  83. +18
    -17
      third_party/fwkacllib/inc/runtime/context.h
  84. +32
    -31
      third_party/fwkacllib/inc/runtime/dev.h
  85. +5
    -5
      third_party/fwkacllib/inc/runtime/dvfsprofile.h
  86. +5
    -5
      third_party/fwkacllib/inc/runtime/event.h
  87. +50
    -49
      third_party/fwkacllib/inc/runtime/kernel.h
  88. +66
    -65
      third_party/fwkacllib/inc/runtime/mem.h
  89. +5
    -5
      third_party/fwkacllib/inc/runtime/rt.h
  90. +5
    -5
      third_party/fwkacllib/inc/runtime/rt_model.h
  91. +5
    -5
      third_party/fwkacllib/inc/runtime/stream.h
  92. +6
    -14
      third_party/fwkacllib/inc/tdt/index_transform.h
  93. +1
    -1
      third_party/fwkacllib/inc/tdt/status.h
  94. +1
    -20
      third_party/fwkacllib/inc/tdt/tdt_host_interface.h
  95. +67
    -351
      third_party/fwkacllib/inc/toolchain/prof_acl_api.h
  96. +2
    -0
      third_party/fwkacllib/inc/toolchain/prof_reporter.h
  97. +72
    -78
      third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h

+ 73
- 0
inc/external/acl/acl.h View File

@@ -0,0 +1,73 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_EXTERNAL_ACL_ACL_H_
#define INC_EXTERNAL_ACL_ACL_H_

#include "acl_rt.h"
#include "acl_op.h"
#include "acl_mdl.h"

#ifdef __cplusplus
extern "C" {
#endif

// Current version is 1.0.0
#define ACL_MAJOR_VERSION 1
#define ACL_MINOR_VERSION 0
#define ACL_PATCH_VERSION 0

/**
* @ingroup AscendCL
* @brief acl initialize
*
* @par Restriction
* The aclInit interface can be called only once in a process
* @param configPath [IN] the config path,it can be NULL
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclInit(const char *configPath);

/**
* @ingroup AscendCL
* @brief acl finalize
*
* @par Restriction
* Need to call aclFinalize before the process exits.
* After calling aclFinalize,the services cannot continue to be used normally.
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclFinalize();

/**
* @ingroup AscendCL
* @brief query ACL interface version
*
* @param majorVersion[OUT] ACL interface major version
* @param minorVersion[OUT] ACL interface minor version
* @param patchVersion[OUT] ACL interface patch version
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *minorVersion, int32_t *patchVersion);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_H_

+ 40
- 0
inc/external/acl/acl_base.h View File

@@ -158,7 +158,11 @@ typedef enum {
ACL_FORMAT_NC1HWC0 = 3, ACL_FORMAT_NC1HWC0 = 3,
ACL_FORMAT_FRACTAL_Z = 4, ACL_FORMAT_FRACTAL_Z = 4,
ACL_FORMAT_NC1HWC0_C04 = 12, ACL_FORMAT_NC1HWC0_C04 = 12,
ACL_FORMAT_NDHWC = 27,
ACL_FORMAT_FRACTAL_NZ = 29, ACL_FORMAT_FRACTAL_NZ = 29,
ACL_FORMAT_NCDHW = 30,
ACL_FORMAT_NDC1HWC0 = 32,
ACL_FRACTAL_Z_3D = 33
} aclFormat; } aclFormat;


typedef enum { typedef enum {
@@ -223,6 +227,29 @@ ACL_FUNC_VISIBILITY aclDataBuffer *aclCreateDataBuffer(void *data, size_t size);
*/ */
ACL_FUNC_VISIBILITY aclError aclDestroyDataBuffer(const aclDataBuffer *dataBuffer); ACL_FUNC_VISIBILITY aclError aclDestroyDataBuffer(const aclDataBuffer *dataBuffer);


/**
* @ingroup AscendCL
* @brief update new data of aclDataBuffer
*
* @param dataBuffer [OUT] pointer to aclDataBuffer
* @li The old data need to be released by the user, otherwise it may occur memory leak leakage
* call aclGetDataBufferAddr interface to get old data address
* call aclrtFree interface to release memory
*
* @param data [IN] pointer to new data
* @li Need to be managed by the user,
* call aclrtMalloc interface to apply for memory,
* call aclrtFree interface to release memory
*
* @param size [IN] size of data in bytes
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtMalloc | aclrtFree | aclGetDataBufferAddr
*/
ACL_FUNC_VISIBILITY aclError aclUpdateDataBuffer(aclDataBuffer *dataBuffer, void *data, size_t size);

/** /**
* @ingroup AscendCL * @ingroup AscendCL
* @brief get data address from aclDataBuffer * @brief get data address from aclDataBuffer
@@ -547,6 +574,19 @@ ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc);
*/ */
ACL_FUNC_VISIBILITY aclError aclSetTensorDynamicInput(aclTensorDesc *desc, const char *dynamicInputName); ACL_FUNC_VISIBILITY aclError aclSetTensorDynamicInput(aclTensorDesc *desc, const char *dynamicInputName);


/**
* @ingroup AscendCL
* @brief Set const data specified by the tensor description
*
* @param desc [OUT] pointer to the instance of aclTensorDesc
* @param dataBuffer [IN] pointer to the const databuffer
* @param length [IN] the length of const databuffer
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBuffer, size_t length);

/** /**
* @ingroup AscendCL * @ingroup AscendCL
* @brief an interface for users to output APP logs * @brief an interface for users to output APP logs


+ 1186
- 0
inc/external/acl/acl_mdl.h
File diff suppressed because it is too large
View File


+ 296
- 0
inc/external/acl/acl_prof.h View File

@@ -0,0 +1,296 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_EXTERNAL_ACL_PROF_H_
#define INC_EXTERNAL_ACL_PROF_H_

#include "acl_base.h"

#ifdef __cplusplus
extern "C" {
#endif

#define ACL_PROF_ACL_API 0x0001
#define ACL_PROF_TASK_TIME 0x0002
#define ACL_PROF_AICORE_METRICS 0x0004
#define ACL_PROF_AICPU 0x0008

#define ACL_PROF_MAX_OP_NAME_LEN 257
#define ACL_PROF_MAX_OP_TYPE_LEN 65

typedef enum {
ACL_AICORE_ARITHMETIC_UTILIZATION = 0,
ACL_AICORE_PIPE_UTILIZATION = 1,
ACL_AICORE_MEMORY_BANDWIDTH = 2,
ACL_AICORE_L0B_AND_WIDTH = 3,
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
ACL_AICORE_NONE = 0xFF
} aclprofAicoreMetrics;

typedef struct aclprofConfig aclprofConfig;
typedef struct aclprofStopConfig aclprofStopConfig;
typedef struct aclprofAicoreEvents aclprofAicoreEvents;
typedef struct aclprofSubscribeConfig aclprofSubscribeConfig;

/**
* @ingroup AscendCL
* @brief profiling initialize
*
* @param profilerResultPath [IN] path of profiling result
* @param length [IN] length of profilerResultPath
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclprofFinalize
*/
ACL_FUNC_VISIBILITY aclError aclprofInit(const char *profilerResultPath, size_t length);

/**
* @ingroup AscendCL
* @brief profiling finalize
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclprofInit
*/
ACL_FUNC_VISIBILITY aclError aclprofFinalize();

/**
* @ingroup AscendCL
* @brief Start profiling modules by profilerConfig
*
* @param profilerConfig [IN] config of profiling
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclprofStop
*/
ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig);

/**
* @ingroup AscendCL
* @brief Create data of type aclprofConfig
*
* @param deviceIdList [IN] list of device id
* @param deviceNums [IN] number of devices
* @param aicoreMetrics [IN] type of aicore metrics
* @param aicoreEvents [IN] pointer to aicore events, only support NULL now
* @param dataTypeConfig [IN] config modules need profiling
*
* @retval the aclprofConfig pointer
*
* @see aclprofDestroyConfig
*/
ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums,
aclprofAicoreMetrics aicoreMetrics,
aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig);

/**
* @ingroup AscendCL
* @brief Destroy data of type aclprofConfig
*
* @param profilerConfig [IN] config of profiling
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclprofCreateConfig
*/
ACL_FUNC_VISIBILITY aclError aclprofDestroyConfig(const aclprofConfig *profilerConfig);

/**
* @ingroup AscendCL
* @brief stop profiling modules by stopProfilingConfig
*
* @param profilerConfig [IN] pointer to stop config of profiling
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclprofStart
*/
ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig);

/**
* @ingroup AscendCL
* @brief subscribe profiling data of model
*
* @param modelId [IN] the model id subscribed
* @param profSubscribeConfig [IN] pointer to config of model subscribe
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclprofModelUnSubscribe
*/
ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig);

/**
* @ingroup AscendCL
* @brief unsubscribe profiling data of model
*
* @param modelId [IN] the model id unsubscribed
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclprofModelSubscribe
*/
ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId);

/**
* @ingroup AscendCL
* @brief create subscribe config
*
* @param timeInfoSwitch [IN] switch whether get time info from model
* @param aicoreMetrics [IN] aicore metrics
* @param fd [IN] pointer to write pipe
*
* @retval the aclprofSubscribeConfig pointer
*
* @see aclprofDestroySubscribeConfig
*/
ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch,
aclprofAicoreMetrics aicoreMetrics, void *fd);

/**
* @ingroup AscendCL
* @brief destroy subscribe config
*
* @param profSubscribeConfig [IN] subscribe config
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclprofCreateSubscribeConfig
*/
ACL_FUNC_VISIBILITY aclError aclprofDestroySubscribeConfig(const aclprofSubscribeConfig *profSubscribeConfig);

/**
* @ingroup AscendCL
* @brief create subscribe config
*
* @param opDescSize [OUT] size of op desc
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpDescSize(size_t *opDescSize);

/**
* @ingroup AscendCL
* @brief get op number from subscription data
*
* @param opInfo [IN] pointer to subscription data
* @param opInfoLen [IN] memory size of subscription data
* @param opNumber [OUT] op number of subscription data
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLen, uint32_t *opNumber);

/**
* @ingroup AscendCL
* @brief get op type from subscription data
*
* @param opInfo [IN] pointer to subscription data
* @param opInfoLen [IN] memory size of subscription data
* @param index [IN] index of op array in opInfo
* @param opType [OUT] obtained op type string
* @param opTypeLen [IN] obtained length of op type string
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType,
size_t opTypeLen);

/**
* @ingroup AscendCL
* @brief get op type from subscription data
*
* @param opInfo [IN] pointer to subscription data
* @param opInfoLen [IN] memory size of subscription data
* @param index [IN] index of op array in opInfo
* @param opName [OUT] obtained op name string
* @param opNameLen [IN] obtained length of op name string
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName,
size_t opNameLen);

/**
* @ingroup AscendCL
* @brief get start time of specified op from subscription data
*
* @param opInfo [IN] pointer to subscription data
* @param opInfoLen [IN] memory size of subscription data
* @param index [IN] index of op array in opInfo
*
* @retval start time(us) of specified op with timestamp
* @retval 0 for failed
*/
ACL_FUNC_VISIBILITY uint64_t aclprofGetOpStart(const void *opInfo, size_t opInfoLen, uint32_t index);

/**
* @ingroup AscendCL
* @brief get end time of specified op from subscription data
*
* @param opInfo [IN] pointer to subscription data
* @param opInfoLen [IN] memory size of subscription data
* @param index [IN] index of op array in opInfo
*
* @retval end time(us) of specified op with timestamp
* @retval 0 for failed
*/
ACL_FUNC_VISIBILITY uint64_t aclprofGetOpEnd(const void *opInfo, size_t opInfoLen, uint32_t index);

/**
* @ingroup AscendCL
* @brief get excution time of specified op from subscription data
*
* @param opInfo [IN] pointer to subscription data
* @param opInfoLen [IN] memory size of subscription data
* @param index [IN] index of op array in opInfo
*
* @retval execution time(us) of specified op with timestamp
* @retval 0 for failed
*/
ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opInfoLen, uint32_t index);

/**
* @ingroup AscendCL
* @brief get model id from subscription data
*
* @param opInfo [IN] pointer to subscription data
* @param opInfoLen [IN] memory size of subscription data
*
* @retval model id of subscription data
* @retval 0 for failed
*/
ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_PROF_H_

+ 932
- 0
inc/external/acl/acl_rt.h View File

@@ -0,0 +1,932 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_EXTERNAL_ACL_ACL_RT_H_
#define INC_EXTERNAL_ACL_ACL_RT_H_

#include <stdint.h>
#include <stddef.h>
#include "acl_base.h"

#ifdef __cplusplus
extern "C" {
#endif

typedef enum aclrtRunMode {
ACL_DEVICE,
ACL_HOST,
} aclrtRunMode;

typedef enum aclrtTsId {
ACL_TS_ID_AICORE = 0,
ACL_TS_ID_AIVECTOR = 1,
ACL_TS_ID_RESERVED = 2,
} aclrtTsId;

typedef enum aclrtEventStatus {
ACL_EVENT_STATUS_COMPLETE = 0,
ACL_EVENT_STATUS_NOT_READY = 1,
ACL_EVENT_STATUS_RESERVED = 2,
} aclrtEventStatus;

typedef enum aclrtCallbackBlockType {
ACL_CALLBACK_NO_BLOCK,
ACL_CALLBACK_BLOCK,
} aclrtCallbackBlockType;

typedef enum aclrtMemcpyKind {
ACL_MEMCPY_HOST_TO_HOST,
ACL_MEMCPY_HOST_TO_DEVICE,
ACL_MEMCPY_DEVICE_TO_HOST,
ACL_MEMCPY_DEVICE_TO_DEVICE,
} aclrtMemcpyKind;

typedef enum aclrtMemMallocPolicy {
ACL_MEM_MALLOC_HUGE_FIRST,
ACL_MEM_MALLOC_HUGE_ONLY,
ACL_MEM_MALLOC_NORMAL_ONLY,
ACL_MEM_MALLOC_HUGE_FIRST_P2P,
ACL_MEM_MALLOC_HUGE_ONLY_P2P,
ACL_MEM_MALLOC_NORMAL_ONLY_P2P,
} aclrtMemMallocPolicy;

typedef enum aclrtMemAttr {
ACL_DDR_MEM,
ACL_HBM_MEM,
ACL_DDR_MEM_HUGE,
ACL_DDR_MEM_NORMAL,
ACL_HBM_MEM_HUGE,
ACL_HBM_MEM_NORMAL,
ACL_DDR_MEM_P2P_HUGE,
ACL_DDR_MEM_P2P_NORMAL,
ACL_HBM_MEM_P2P_HUGE,
ACL_HBM_MEM_P2P_NORMAL,
} aclrtMemAttr;

typedef enum aclrtGroupAttr {
ACL_GROUP_AICORE_INT,
ACL_GROUP_AIV_INT,
ACL_GROUP_AIC_INT,
ACL_GROUP_SDMANUM_INT,
ACL_GROUP_ASQNUM_INT
} aclrtGroupAttr;

typedef struct tagRtGroupInfo aclrtGroupInfo;

typedef struct rtExceptionInfo aclrtExceptionInfo;

typedef void (*aclrtCallback)(void *userData);

typedef void (*aclrtExceptionInfoCallback)(aclrtExceptionInfo *exceptionInfo);

/**
* @ingroup AscendCL
* @brief Set a callback function to handle exception information
*
* @param callback [IN] callback function to handle exception information
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSetExceptionInfoCallback(aclrtExceptionInfoCallback callback);

/**
* @ingroup AscendCL
* @brief Get task id from exception information
*
* @param info [IN] pointer of exception information
*
* @retval The task id from exception information
* @retval 0xFFFFFFFF if info is null
*/
ACL_FUNC_VISIBILITY uint32_t aclrtGetTaskIdFromExceptionInfo(const aclrtExceptionInfo *info);

/**
* @ingroup AscendCL
* @brief Get stream id from exception information
*
* @param info [IN] pointer of exception information
*
* @retval The stream id from exception information
* @retval 0xFFFFFFFF if info is null
*/
ACL_FUNC_VISIBILITY uint32_t aclrtGetStreamIdFromExceptionInfo(const aclrtExceptionInfo *info);

/**
* @ingroup AscendCL
* @brief Get thread id from exception information
*
* @param info [IN] pointer of exception information
*
* @retval The thread id of fail task
* @retval 0xFFFFFFFF if info is null
*/
ACL_FUNC_VISIBILITY uint32_t aclrtGetThreadIdFromExceptionInfo(const aclrtExceptionInfo *info);

/**
* @ingroup AscendCL
* @brief Get device id from exception information
*
* @param info [IN] pointer of exception information
*
* @retval The thread id of fail task
* @retval 0xFFFFFFFF if info is null
*/
ACL_FUNC_VISIBILITY uint32_t aclrtGetDeviceIdFromExceptionInfo(const aclrtExceptionInfo *info);

/**
* @ingroup AscendCL
* @brief The thread that handles the callback function on the Stream
*
* @param threadId [IN] thread ID
* @param stream [IN] stream handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSubscribeReport(uint64_t threadId, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Add a callback function to be executed on the host
* to the task queue of the Stream
*
* @param fn [IN] Specify the callback function to be added
* The function prototype of the callback function is:
* typedef void (*aclrtCallback)(void *userData);
* @param userData [IN] User data to be passed to the callback function
* @param blockType [IN] callback block type
* @param stream [IN] stream handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtLaunchCallback(aclrtCallback fn, void *userData, aclrtCallbackBlockType blockType,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief After waiting for a specified time, trigger callback processing
*
* @par Function
* The thread processing callback specified by
* the aclrtSubscribeReport interface
*
* @param timeout [IN] timeout value
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSubscribeReport
*/
ACL_FUNC_VISIBILITY aclError aclrtProcessReport(int32_t timeout);

/**
* @ingroup AscendCL
* @brief Cancel thread registration,
* the callback function on the specified Stream
* is no longer processed by the specified thread
*
* @param threadId [IN] thread ID
* @param stream [IN] stream handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtUnSubscribeReport(uint64_t threadId, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create context and associates it with the calling thread
*
* @par Function
* The following use cases are supported:
* @li If you don't call the aclrtCreateContext interface
* to explicitly create the context,
* the system will use the default context, which is implicitly created
* when the aclrtSetDevice interface is called.
* @li If multiple contexts are created in a process
* (there is no limit on the number of contexts),
* the current thread can only use one of them at the same time.
* It is recommended to explicitly specify the context of the current thread
* through the aclrtSetCurrentContext interface to increase.
* the maintainability of the program.
*
* @param context [OUT] point to the created context
* @param deviceId [IN] device to create context on
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSetDevice | aclrtSetCurrentContext
*/
ACL_FUNC_VISIBILITY aclError aclrtCreateContext(aclrtContext *context, int32_t deviceId);

/**
* @ingroup AscendCL
* @brief destroy context instance
*
* @par Function
* Can only destroy context created through aclrtCreateContext interface
*
* @param context [IN] the context to destroy
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtCreateContext
*/
ACL_FUNC_VISIBILITY aclError aclrtDestroyContext(aclrtContext context);

/**
* @ingroup AscendCL
* @brief set the context of the thread
*
* @par Function
* The following scenarios are supported:
* @li If the aclrtCreateContext interface is called in a thread to explicitly
* create a Context (for example: ctx1), the thread's Context can be specified
* without calling the aclrtSetCurrentContext interface.
* The system uses ctx1 as the context of thread1 by default.
* @li If the aclrtCreateContext interface is not explicitly created,
* the system uses the default context as the context of the thread.
* At this time, the aclrtDestroyContext interface cannot be used to release
* the default context.
* @li If the aclrtSetCurrentContext interface is called multiple times to
* set the thread's Context, the last one prevails.
*
* @par Restriction
* @li If the cevice corresponding to the context set for the thread
* has been reset, you cannot set the context as the context of the thread,
* otherwise a business exception will result.
* @li It is recommended to use the context created in a thread.
* If the aclrtCreateContext interface is called in thread A to create a context,
* and the context is used in thread B,
* the user must guarantee the execution order of tasks in the same stream
* under the same context in two threads.
*
* @param context [IN] the current context of the thread
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtCreateContext | aclrtDestroyContext
*/
ACL_FUNC_VISIBILITY aclError aclrtSetCurrentContext(aclrtContext context);

/**
* @ingroup AscendCL
* @brief get the context of the thread
*
* @par Function
* If the user calls the aclrtSetCurrentContext interface
* multiple times to set the context of the current thread,
* then the last set context is obtained
*
* @param context [OUT] the current context of the thread
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSetCurrentContext
*/
ACL_FUNC_VISIBILITY aclError aclrtGetCurrentContext(aclrtContext *context);

/**
* @ingroup AscendCL
* @brief Specify the device to use for the operation
* implicitly create the default context and the default stream
*
* @par Function
* The following use cases are supported:
* @li Device can be specified in the process or thread.
* If you call the aclrtSetDevice interface multiple
* times to specify the same device,
* you only need to call the aclrtResetDevice interface to reset the device.
* @li The same device can be specified for operation
* in different processes or threads.
* @li Device is specified in a process,
* and multiple threads in the process can share this device to explicitly
* create a Context (aclrtCreateContext interface).
* @li In multi-device scenarios, you can switch to other devices
* through the aclrtSetDevice interface in the process.
*
* @param deviceId [IN] the device id
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtResetDevice |aclrtCreateContext
*/
ACL_FUNC_VISIBILITY aclError aclrtSetDevice(int32_t deviceId);

/**
* @ingroup AscendCL
* @brief Reset the current operating Device and free resources on the device,
* including the default context, the default stream,
* and all streams created under the default context,
* and synchronizes the interface.
* If the task under the default context or stream has not been completed,
* the system will wait for the task to complete before releasing it.
*
* @par Restriction
* @li The Context, Stream, and Event that are explicitly created
* on the device to be reset. Before resetting,
* it is recommended to follow the following interface calling sequence,
* otherwise business abnormalities may be caused.
* @li Interface calling sequence:
* call aclrtDestroyEvent interface to release Event or
* call aclrtDestroyStream interface to release explicitly created Stream->
* call aclrtDestroyContext to release explicitly created Context->
* call aclrtResetDevice interface
*
* @param deviceId [IN] the device id
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtResetDevice(int32_t deviceId);

/**
* @ingroup AscendCL
* @brief get target device of current thread
*
* @param deviceId [OUT] the device id
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtGetDevice(int32_t *deviceId);

/**
* @ingroup AscendCL
* @brief get target side
*
* @param runMode [OUT] the run mode
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtGetRunMode(aclrtRunMode *runMode);

/**
* @ingroup AscendCL
* @brief Wait for compute device to finish
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSynchronizeDevice(void);

/**
* @ingroup AscendCL
* @brief Set Scheduling TS
*
* @param tsId [IN] the ts id
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSetTsDevice(aclrtTsId tsId);

/**
* @ingroup AscendCL
* @brief get total device number.
*
* @param count [OUT] the device number
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtGetDeviceCount(uint32_t *count);

/**
* @ingroup AscendCL
* @brief create event instance
*
* @param event [OUT] created event
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtCreateEvent(aclrtEvent *event);

/**
* @ingroup AscendCL
* @brief destroy event instance
*
* @par Function
* Only events created through the aclrtCreateEvent interface can be
* destroyed, synchronous interfaces. When destroying an event,
* the user must ensure that the tasks involved in the aclrtSynchronizeEvent
* interface or the aclrtStreamWaitEvent interface are completed before
* they are destroyed.
*
* @param event [IN] event to destroy
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtCreateEvent | aclrtSynchronizeEvent | aclrtStreamWaitEvent
*/
ACL_FUNC_VISIBILITY aclError aclrtDestroyEvent(aclrtEvent event);

/**
* @ingroup AscendCL
* @brief Record an Event in the Stream
*
* @param event [IN] event to record
* @param stream [IN] stream handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Reset an event
*
* @par Function
* Users need to make sure to wait for the tasks in the Stream
* to complete before resetting the Event
*
* @param event [IN] event to reset
* @param stream [IN] stream handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Queries an event's status
*
* @param event [IN] event to query
* @param status [OUT] event status
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status);

/**
* @ingroup AscendCL
* @brief Block Host Running, wait event to be complete
*
* @param event [IN] event to wait
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event);

/**
* @ingroup AscendCL
* @brief computes the elapsed time between events.
*
* @param ms [OUT] time between start and end in ms
* @param start [IN] starting event
* @param end [IN] ending event
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, aclrtEvent end);

/**
* @ingroup AscendCL
* @brief alloc memory on device
*
* @par Function
* alloc for size linear memory on device
* and return a pointer to allocated memory by *devPtr
*
* @par Restriction
* @li The memory requested by the aclrtMalloc interface needs to be released
* through the aclrtFree interface.
* @li Before calling the media data processing interface,
* if you need to apply memory on the device to store input or output data,
* you need to call acldvppMalloc to apply for memory.
*
* @param devPtr [OUT] pointer to pointer to allocated memory on device
* @param size [IN] alloc memory size
* @param policy [IN] memory alloc policy
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtFree | acldvppMalloc | aclrtMallocCached
*/
ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy);

/**
* @ingroup AscendCL
* @brief allocate memory on device with cache
*
* @par Function
* alloc for size linear memory on device
* and return a pointer to allocated memory by *devPtr
*
* @par Restriction
* @li The memory requested by the aclrtMallocCached interface needs to be released
* through the aclrtFree interface.
*
* @param devPtr [OUT] pointer to pointer to allocated memory on device
* @param size [IN] alloc memory size
* @param policy [IN] memory alloc policy
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtFree | aclrtMalloc
*/
ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy);

/**
* @ingroup AscendCL
* @brief flush cache data to ddr
*
* @param devPtr [IN] the pointer that flush data to ddr
* @param size [IN] flush size
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtMemFlush(void *devPtr, size_t size);

/**
* @ingroup AscendCL
* @brief invalidate cache data
*
* @param devPtr [IN] pointer to invalidate cache data
* @param size [IN] invalidate size
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtMemInvalidate(void *devPtr, size_t size);

/**
* @ingroup AscendCL
* @brief free device memory
*
* @par Function
* can only free memory allocated through the aclrtMalloc interface
*
* @param devPtr [IN] Pointer to memory to be freed
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtMalloc
*/
ACL_FUNC_VISIBILITY aclError aclrtFree(void *devPtr);

/**
* @ingroup AscendCL
* @brief alloc memory on host
*
* @par Restriction
* @li The requested memory cannot be used in the Device
* and needs to be explicitly copied to the Device.
* @li The memory requested by the aclrtMallocHost interface
* needs to be released through the aclrtFreeHost interface.
*
* @param hostPtr [OUT] pointer to pointer to allocated memory on the host
* @param size [IN] alloc memory size
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtFreeHost
*/
ACL_FUNC_VISIBILITY aclError aclrtMallocHost(void **hostPtr, size_t size);

/**
* @ingroup AscendCL
* @brief free host memory
*
* @par Function
* can only free memory allocated through the aclrtMallocHost interface
*
* @param hostPtr [IN] free memory pointer
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtMallocHost
*/
ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr);

/**
* @ingroup AscendCL
* @brief synchronous memory replication between host and device
*
* @param dst [IN] destination address pointer
* @param destMax [IN] Max length of the destination address memory
* @param src [IN] source address pointer
* @param count [IN] the length of byte to copy
* @param kind [IN] memcpy type
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count,
aclrtMemcpyKind kind);

/**
* @ingroup AscendCL
* @brief Initialize memory and set contents of memory to specified value
*
* @par Function
* The memory to be initialized is on the Host or device side,
* and the system determines whether
* it is host or device according to the address
*
* @param devPtr [IN] Starting address of memory
* @param maxCount [IN] Max length of destination address memory
* @param value [IN] Set value
* @param count [IN] The length of memory
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t value, size_t count);

/**
* @ingroup AscendCL
* @brief Asynchronous memory replication between Host and Device
*
* @par Function
* After calling this interface,
* be sure to call the aclrtSynchronizeStream interface to ensure that
* the task of memory replication has been completed
*
* @par Restriction
* @li For on-chip Device-to-Device memory copy,
* both the source and destination addresses must be 64-byte aligned
*
* @param dst [IN] destination address pointer
* @param destMax [IN] Max length of destination address memory
* @param src [IN] source address pointer
* @param count [IN] the number of byte to copy
* @param kind [IN] memcpy type
* @param stream [IN] asynchronized task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count,
aclrtMemcpyKind kind, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Asynchronous initialize memory
* and set contents of memory to specified value async
*
* @par Function
* The memory to be initialized is on the Host or device side,
* and the system determines whether
* it is host or device according to the address
*
* @param devPtr [IN] destination address pointer
* @param maxCount [IN] Max length of destination address memory
* @param value [IN] set value
* @param count [IN] the number of byte to set
* @param stream [IN] asynchronized task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create stream instance
*
* @param stream [OUT] the created stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtCreateStream(aclrtStream *stream);

/**
* @ingroup AscendCL
* @brief destroy stream instance
*
* @par Function
* Can only destroy streams created through the aclrtCreateStream interface
*
* @par Restriction
* Before calling the aclrtDestroyStream interface to destroy
* the specified Stream, you need to call the aclrtSynchronizeStream interface
* to ensure that the tasks in the Stream have been completed.
*
* @param stream [IN] the stream to destroy
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtCreateStream | aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtDestroyStream(aclrtStream stream);

/**
* @ingroup AscendCL
* @brief block the host until all tasks
* in the specified stream have completed
*
* @param stream [IN] the stream to wait
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSynchronizeStream(aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Blocks the operation of the specified Stream until
* the specified Event is completed.
* Support for multiple streams waiting for the same event.
*
* @param stream [IN] the wait stream If using thedefault Stream, set NULL
* @param event [IN] the event to wait
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtStreamWaitEvent(aclrtStream stream, aclrtEvent event);

/**
* @ingroup AscendCL
* @brief set group
*
* @par Function
* set the task to the corresponding group
*
* @param groupId [IN] group id
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtGetGroupCount | aclrtGetAllGroupInfo | aclrtGetGroupInfoDetail
*/
ACL_FUNC_VISIBILITY aclError aclrtSetGroup(int32_t groupId);

/**
* @ingroup AscendCL
* @brief get the number of group
*
* @par Function
* get the number of group. if the number of group is zero,
* it means that group is not supported or group is not created.
*
* @param count [OUT] the number of group
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
*/
ACL_FUNC_VISIBILITY aclError aclrtGetGroupCount(uint32_t *count);

/**
* @ingroup AscendCL
* @brief create group information
*
* @retval null for failed.
* @retval OtherValues success.
*
* @see aclrtDestroyGroupInfo
*/
ACL_FUNC_VISIBILITY aclrtGroupInfo *aclrtCreateGroupInfo();

/**
* @ingroup AscendCL
* @brief destroy group information
*
* @param groupInfo [IN] pointer to group information
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtCreateGroupInfo
*/
ACL_FUNC_VISIBILITY aclError aclrtDestroyGroupInfo(aclrtGroupInfo *groupInfo);

/**
* @ingroup AscendCL
* @brief get all group information
*
* @param groupInfo [OUT] pointer to group information
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtGetGroupCount
*/
ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo);

/**
* @ingroup AscendCL
* @brief get detail information of group
*
* @param groupInfo [IN] pointer to group information
* @param groupId [IN] group index value
* @param attr [IN] group attribute
* @param attrValue [OUT] pointer to attribute value
* @param valueLen [IN] length of attribute value
* @param paramRetSize [OUT] pointer to real length of attribute value
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtGetGroupCount | aclrtGetAllGroupInfo
*/
ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupId,
aclrtGroupAttr attr, void *attrValue, size_t valueLen,
size_t *paramRetSize);

/**
* @ingroup AscendCL
* @brief checking whether current device and peer device support the p2p feature
*
* @param canAccessPeer [OUT] pointer to save the checking result
* @param deviceId [IN] current device id
* @param peerDeviceId [IN] peer device id
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtDeviceEnablePeerAccess | aclrtDeviceDisablePeerAccess
*/
ACL_FUNC_VISIBILITY aclError aclrtDeviceCanAccessPeer(int32_t *canAccessPeer, int32_t deviceId, int32_t peerDeviceId);

/**
* @ingroup AscendCL
* @brief enable the peer device to support the p2p feature
*
* @param peerDeviceId [IN] the peer device id
* @param flags [IN] reserved field, now it must be zero
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtDeviceCanAccessPeer | aclrtDeviceDisablePeerAccess
*/
ACL_FUNC_VISIBILITY aclError aclrtDeviceEnablePeerAccess(int32_t peerDeviceId, uint32_t flags);

/**
* @ingroup AscendCL
* @brief disable the peer device to support the p2p function
*
* @param peerDeviceId [IN] the peer device id
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtDeviceCanAccessPeer | aclrtDeviceEnablePeerAccess
*/
ACL_FUNC_VISIBILITY aclError aclrtDeviceDisablePeerAccess(int32_t peerDeviceId);

/**
* @ingroup AscendCL
* @brief Obtain the free memory and total memory of specified attribute.
* the specified memory include normal memory and huge memory.
*
* @param attr [IN] the memory attribute of specified device
* @param free [OUT] the free memory of specified device
* @param total [OUT] the total memory of specified device.
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, size_t *total);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_RT_H_

+ 276
- 0
inc/external/acl/acl_tdt.h View File

@@ -0,0 +1,276 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_EXTERNAL_ACL_ACL_TDT_H_
#define INC_EXTERNAL_ACL_ACL_TDT_H_

#include "acl/acl_base.h"

#ifdef __cplusplus
extern "C" {
#endif

enum acltdtTensorType {
ACL_TENSOR_DATA_UNDEFINED = -1,
ACL_TENSOR_DATA_TENSOR,
ACL_TENSOR_DATA_END_OF_SEQUENCE,
ACL_TENSOR_DATA_ABNORMAL
};

typedef struct acltdtDataItem acltdtDataItem;
typedef struct acltdtDataset acltdtDataset;
typedef struct acltdtChannelHandle acltdtChannelHandle;

/**
* @ingroup AscendCL
* @brief Get tensor type from item
*
* @param dataItem [IN] pointer to the data item
*
* @retval Tensor type.
* @retval ACL_DT_UNDEFINED if dataItem is null
*/
ACL_FUNC_VISIBILITY acltdtTensorType acltdtGetTensorTypeFromItem(const acltdtDataItem *dataItem);

/**
* @ingroup AscendCL
* @brief Get data type from item
*
* @param dataItem [IN] pointer to the data item
*
* @retval Data type.
* @retval ACL_DT_UNDEFINED if dataItem is null
*/
ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *dataItem);

/**
* @ingroup AscendCL
* @brief Get data address from item
*
* @param dataItem [IN] pointer to data item
*
* @retval null for failed
* @retval OtherValues success
*/
ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem);

/**
* @ingroup AscendCL
* @brief Get data size from item
*
* @param dataItem [IN] pointer to data item
*
* @retval 0 for failed
* @retval OtherValues success
*/
ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem);

/**
* @ingroup AscendCL
* @brief Get dim's number from item
*
* @param dataItem [IN] pointer to data item
*
* @retval 0 for failed
* @retval OtherValues success
*/
ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem);

/**
* @ingroup AscendCL
* @brief Get dims from item
*
* @param dataItem [IN] the struct of data item
* @param dims [IN|OUT] pointer to the dims of dataTtem
* @param dimNum [IN] the size of the dims
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataItem, int64_t *dims, size_t dimNum);

/**
* @ingroup AscendCL
* @brief Create the struct of data item
*
* @param tdtType [IN] Tdt tensor type
* @param dims [IN] pointer of tdtDataItem's dims
* @param dimNum [IN] Dim number
* @param dataType [IN] Data type
* @param data [IN] Data pointer
* @param size [IN] Data size
*
* @retval null for failed
* @retval OtherValues success
*
* @see acltdtDestroyDataItem
*/
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum,
aclDataType dataType, void *data, size_t size);

/**
* @ingroup AscendCL
* @brief Destroy the struct of data item
*
* @param dataItem [IN] pointer to the data item
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtCreateDataItem
*/
ACL_FUNC_VISIBILITY aclError acltdtDestroyDataItem(acltdtDataItem *dataItem);

/**
* @ingroup AscendCL
* @brief Create the tdt dataset
*
* @retval null for failed
* @retval OtherValues success
*
* @see acltdtDestroyDataset
*/
ACL_FUNC_VISIBILITY acltdtDataset *acltdtCreateDataset();

/**
* @ingroup AscendCL
* @brief Destroy the tdt dataset
*
* @param dataset [IN] pointer to the dataset
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtCreateDataset
*/
ACL_FUNC_VISIBILITY aclError acltdtDestroyDataset(acltdtDataset *dataset);

/**
* @ingroup AscendCL
* @brief Get the data item
*
* @param dataset [IN] pointer to the dataset
* @param index [IN] index of the dataset
*
* @retval null for failed
* @retval OtherValues success
*
* @see acltdtAddDataItem
*/
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtGetDataItem(const acltdtDataset *dataset, size_t index);

/**
* @ingroup AscendCL
* @brief Get the data item
*
* @param dataset [OUT] pointer to the dataset
* @param dataItem [IN] pointer to the data item
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtGetDataItem
*/
ACL_FUNC_VISIBILITY aclError acltdtAddDataItem(acltdtDataset *dataset, acltdtDataItem *dataItem);

/**
* @ingroup AscendCL
* @brief Get the size of dataset
*
* @param dataset [IN] pointer to the dataset
*
* @retval 0 for failed
* @retval OtherValues success
*/
ACL_FUNC_VISIBILITY size_t acltdtGetDatasetSize(const acltdtDataset *dataset);

/**
* @ingroup AscendCL
* @brief Stop the channel
*
* @param handle [IN] pointer to the channel handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtCreateChannel | acltdtDestroyChannel
*/
ACL_FUNC_VISIBILITY aclError acltdtStopChannel(acltdtChannelHandle *handle);

/**
* @ingroup AscendCL
* @brief Create the channel
*
* @param deviceId [IN] the device id
* @param name [IN] the channel's name
*
* @retval null for failed
* @retval OtherValues success
*
* @see acltdtStopChannel | acltdtDestroyChannel
*/
ACL_FUNC_VISIBILITY acltdtChannelHandle *acltdtCreateChannel(uint32_t deviceId, const char *name);

/**
* @ingroup AscendCL
* @brief Destroy the channel
*
* @param handle [IN] pointer to the channel handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtCreateChannel | acltdtStopChannel
*/
ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle);

/**
* @ingroup AscendCL
* @brief Send tensor to device
*
* @param handle [IN] pointer to the channel handle
* @param dataset [IN] pointer to the dataset
* @param timeout [IN] to be reserved, now it must be -1
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtReceiveTensor
*/
ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset,
int32_t timeout);

/**
* @ingroup AscendCL
* @brief Receive tensor from device
*
* @param handle [IN] pointer to the channel handle
* @param dataset [OUT] pointer to the dataset
* @param timeout [IN] to be reserved, now it must be -1
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtSendTensor
*/
ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset,
int32_t timeout);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_TDT_H_

+ 13
- 3
inc/external/acl/error_codes/rt_error_codes.h View File

@@ -42,12 +42,22 @@ static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group n
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type


static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPROT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource


static const int32_t ACL_ERROR_RT_INTERNEL_ERROR = 507000; // runtime internel error
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
@@ -82,7 +92,7 @@ static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error


static const int32_t ACL_ERROR_RT_DRV_INTERNEL_ERROR = 507899; // drv internel error
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error


#ifdef __cplusplus #ifdef __cplusplus
} }


+ 2389
- 0
inc/external/acl/ops/acl_dvpp.h
File diff suppressed because it is too large
View File


+ 134
- 0
inc/external/hccl/hccl.h View File

@@ -0,0 +1,134 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* @file hccl.h
* @brief HCCL API
*/

#ifndef HCCL_H_
#define HCCL_H_

#include <hccl/hccl_types.h>
#include <acl/acl.h>

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus

/**
* @brief Initialize HCCL.
*
* @param clusterInfo A string identifying the cluster info file path, include file name.
* @param rank A integer identifying the identify for the rank.
* @param comm A pointer identifying the initialized communication resource.
* @return HcclResult
* @see HcclCommDestroy()
*/
extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm);

/**
* @brief Get hccl root info.
*
* @param rootInfo A pointer identifying the hccl root info.
* @return HcclResult
*/
extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo);

/**
* @brief Initialize HCCL with root info.
*
* @param nRanks A integer identifying the rank size of the cluster.
* @param rootInfo A struct identifying the hccl root info.
* @param rank A integer identifying the identify for the rank.
* @param comm A pointer identifying the initialized communication resource.
* @return HcclResult
* @see HcclCommDestroy()
*/
extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm);

/**
* @brief AllReduce operator.
*
* @param sendBuf A pointer identifying the input data address of the operator.
* @param recvBuf A pointer identifying the output data address of the operator.
* @param count An integer(u64) identifying the number of the output data.
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16,
* float32.
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
*/
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op,
HcclComm comm, aclrtStream stream);

/**
* @brief Broadcast operator.
*
* @param buf A pointer identifying the data address of the operator.
* @param count An integer(u64) identifying the number of the data.
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
* @param root An integer(u32) identifying the the root rank in the operator.
* @param comm A pointer identifying the communication resource based on
* @param stream A pointer identifying the stream information.
* @return HcclResult
*/
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm,
aclrtStream stream);

/**
* @brief ReduceScatter operator.
*
* @param sendBuf A pointer identifying the input data address of the operator.
* @param recvBuf A pointer identifying the output data address of the operator.
* @param recvCount An integer(u64) identifying the number of the output data.
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
*/
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType,
HcclReduceOp op, HcclComm comm, aclrtStream stream);

/**
* @brief AllGather operator.
*
* @param sendBuf A pointer identifying the input data address of the operator.
* @param recvBuf A pointer identifying the output data address of the operator.
* @param sendCount An integer(u64) identifying the number of the input data.
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
*/
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm,
aclrtStream stream);

/**
* @brief Destroy HCCL comm
*
* @param comm A pointer identifying the communication resource targetting
* @return HcclResult
* @see HcclCommInitClusterInfo()
*/
extern HcclResult HcclCommDestroy(HcclComm comm);

#ifdef __cplusplus
}
#endif // __cplusplus
#endif // HCCL_H_

+ 101
- 0
inc/external/hccl/hccl_types.h View File

@@ -0,0 +1,101 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* @file hccl_types.h
* @brief HCCL data type definition
*
*/

#ifndef HCCL_TYPES_H_
#define HCCL_TYPES_H_

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus

/**
* @brief HCCL functions return value definition
*/
typedef enum {
HCCL_SUCCESS = 0, /**< success */
HCCL_E_PARA = 1, /**< parameter error */
HCCL_E_PTR = 2, /**< empty pointer */
HCCL_E_MEMORY = 3, /**< memory error */
HCCL_E_INTERNAL = 4, /**< internal error */
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */
HCCL_E_UNAVAIL = 7, /**< resource unavailable */
HCCL_E_SYSCALL = 8, /**< call system interface error */
HCCL_E_TIMEOUT = 9, /**< timeout */
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */
HCCL_E_RUNTIME = 15, /**< call runtime api fail */
HCCL_E_DRV = 16, /**< call driver api fail */
HCCL_E_PROFILING = 17, /**< call profiling api fail */
HCCL_E_CCE = 18, /**< call cce api fail */
HCCL_E_NETWORK = 19, /**< call network api fail */
HCCL_E_RESERVED /**< reserved */
} HcclResult;

/**
* @brief handle to HCCL communicator
*/
typedef void *HcclComm;

/**
* @brief HCCL Reduction opperation
*/
typedef enum {
HCCL_REDUCE_SUM = 0, /**< sum */
HCCL_REDUCE_PROD = 1, /**< prod */
HCCL_REDUCE_MAX = 2, /**< max */
HCCL_REDUCE_MIN = 3, /**< min */
HCCL_REDUCE_RESERVED /**< reserved */
} HcclReduceOp;

/**
* @brief HCCL data type
*/
typedef enum {
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */
HCCL_DATA_TYPE_RESERVED /**< reserved */
} HcclDataType;

const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length

/**
* @brief HCCL root info
*/
typedef struct HcclRootInfoDef {
char internal[HCCL_ROOT_INFO_BYTES];
} HcclRootInfo;

#ifdef __cplusplus
}
#endif // __cplusplus
#endif // HCCL_TYPES_H_

+ 13
- 3
inc/external/runtime/rt_error_codes.h View File

@@ -42,12 +42,22 @@ static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group n
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type


static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPROT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource


static const int32_t ACL_ERROR_RT_INTERNEL_ERROR = 507000; // runtime internel error
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
@@ -82,7 +92,7 @@ static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error


static const int32_t ACL_ERROR_RT_DRV_INTERNEL_ERROR = 507899; // drv internel error
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error


#ifdef __cplusplus #ifdef __cplusplus
} }


+ 20
- 20
third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h View File

@@ -18,43 +18,43 @@
#define AICPU_OP_TYPE_LIST_H_ #define AICPU_OP_TYPE_LIST_H_
enum OpKernelType { enum OpKernelType {
TF_KERNEL,
CPU_KERNEL
TF_KERNEL,
CPU_KERNEL
}; };
enum ReturnCode { enum ReturnCode {
OP_TYPE_NOT_SUPPORT,
FORMAT_NOT_SUPPORT,
DTYPE_NOT_SUPPORT
OP_TYPE_NOT_SUPPORT,
FORMAT_NOT_SUPPORT,
DTYPE_NOT_SUPPORT
}; };
#pragma pack(push, 1) #pragma pack(push, 1)
//One byte alignment //One byte alignment
struct SysOpInfo { struct SysOpInfo {
uint64_t opLen;
uint64_t opType;
OpKernelType kernelsType;
uint64_t opLen;
uint64_t opType;
OpKernelType kernelsType;
}; };
struct OpParamInfo { struct OpParamInfo {
uint64_t num;
uint64_t dtypeList;
uint64_t formatList;
uint64_t num;
uint64_t dtypeList;
uint64_t formatList;
}; };
struct SysOpCheckInfo { struct SysOpCheckInfo {
uint64_t opListNum;
uint64_t offSetLen;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
uint64_t opListNum;
uint64_t offSetLen;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
}; };
struct SysOpCheckResp { struct SysOpCheckResp {
uint64_t opListNum;
bool isWithoutJson;
uint64_t returnCodeList;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
uint64_t opListNum;
bool isWithoutJson;
uint64_t returnCodeList;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
}; };
#pragma pack(pop) #pragma pack(pop)
#endif // AICPU_OP_TYPE_LIST_H_ #endif // AICPU_OP_TYPE_LIST_H_

+ 1
- 0
third_party/fwkacllib/inc/cce/aicpu_engine.h View File

@@ -31,6 +31,7 @@ typedef enum {
AE_STATUS_KERNEL_API_INNER_ERROR = 5, AE_STATUS_KERNEL_API_INNER_ERROR = 5,
AE_STATUS_END_OF_SEQUENCE = 6, AE_STATUS_END_OF_SEQUENCE = 6,
AE_STATUS_DUMP_FAILED = 7, AE_STATUS_DUMP_FAILED = 7,
AE_STATUS_TASK_WAIT = 101,
AE_STATUS_RESERVED AE_STATUS_RESERVED
} aeStatus_t; } aeStatus_t;




+ 1
- 0
third_party/fwkacllib/inc/cce/fwk_adpt_struct.h View File

@@ -60,6 +60,7 @@ enum FWKTaskExtInfoType {
FWK_ADPT_EXT_UPDATE_ADDR, FWK_ADPT_EXT_UPDATE_ADDR,
FWK_ADPT_EXT_OP_NAME, FWK_ADPT_EXT_OP_NAME,
FWK_ADPT_EXT_SESSION_INFO, FWK_ADPT_EXT_SESSION_INFO,
FWK_ADPT_EXT_BITMAP,
FWK_ADPT_EXT_INVALID FWK_ADPT_EXT_INVALID
}; };




+ 28
- 0
third_party/fwkacllib/inc/hccl/hcom.h View File

@@ -110,6 +110,34 @@ HcclResult HcomDestroyGroup(const char *group);
/** /**
* @brief Set the gradient split strategy with in the group, according to gradient index. * @brief Set the gradient split strategy with in the group, according to gradient index.
* *
* @param group A string identifying the group name.
* @param segmentNum An integer(u32) identifying the segments number of gradients.
* @param IdxList A list identifying the index of end gradient in each segment.
* @return HcclResult
*/
extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList);

/**
* @brief Set the gradient split strategy with in the group, according to gradient data size.
*
* @param group A string identifying the group name.
* @param segmentNum An integer(u32) identifying the segments number of gradients.
* @param sizeList A list identifying the percent of each segment.
* @return HcclResult
*/
extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList);

/**
* @brief Initialize hcom executor.
*
* @param void
* @return HcclResult
*/
HcclResult HcomExecInitialize();

/**
* @brief Finalize hcom executor.
*
* @param void * @param void
* @return HcclResult * @return HcclResult
*/ */


+ 2
- 1
third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h View File

@@ -50,7 +50,7 @@ typedef int (*mmFilter)(const mmDirent *entry);
typedef int (*mmFilter2)(const mmDirent2 *entry); typedef int (*mmFilter2)(const mmDirent2 *entry);
typedef int (*mmSort)(const mmDirent **a, const mmDirent **b); typedef int (*mmSort)(const mmDirent **a, const mmDirent **b);
typedef int (*mmSort2)(const mmDirent2 **a, const mmDirent2 **b); typedef int (*mmSort2)(const mmDirent2 **a, const mmDirent2 **b);
typedef size_t mmSize_t;
typedef size_t mmSize_t; //lint !e410 !e1051
typedef off_t mmOfft_t; typedef off_t mmOfft_t;
typedef pid_t mmPid_t; typedef pid_t mmPid_t;
typedef long MM_LONG; typedef long MM_LONG;
@@ -283,6 +283,7 @@ typedef struct {
#define M_W_OK W_OK #define M_W_OK W_OK
#define M_R_OK R_OK #define M_R_OK R_OK



#define MM_DT_DIR DT_DIR #define MM_DT_DIR DT_DIR
#define MM_DT_REG DT_REG #define MM_DT_REG DT_REG




+ 83
- 83
third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h View File

@@ -1,83 +1,83 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MMPA_TYPEDEF_WIN_H
#define MMPA_TYPEDEF_WIN_H
#ifdef __cplusplus
#if __cplusplus
extern "C" {
#endif // __cpluscplus
#endif // __cpluscplus
#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif
#define EN_OK 0
#define EN_ERR 1
#define EN_ERROR (-1)
#define EN_INVALID_PARAM (-2)
#define EN_TIMEOUT (-3)
#define HANDLE_INVALID_VALUE (-1)
#define INVALID_SOCKET_HANDLE INVALID_SOCKET
#define MMPA_MEM_MAX_LEN (0x7fffffff)
#define MMPA_PROCESS_ERROR (0x7fffffff)
#define MMPA_ONE_THOUSAND 1000
#define MMPA_COMPUTER_BEGIN_YEAR 1900
#define SUMMER_TIME_OR_NOT (-1)
#define MMPA_ZERO 0
#define MMPA_VALUE_ONE 1
#define MMPA_SOCKET_MAIN_EDITION 2
#define MMPA_SOCKET_SECOND_EDITION 0
#define MMPA_PIPE_BUF_SIZE 1024
#define MMPA_MAX_SCANDIR_COUNT 1024
#define MAX_IOVEC_SIZE 32
#define MMPA_PIPE_COUNT 2
#define MMPA_THREADNAME_SIZE 16
#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1)
#define MMPA_MIN_OS_VERSION_SIZE 64
#define MMPA_MAX_NI 19
#define MMPA_MIDDLE_NI 5
#define MMPA_LOW_NI (-5)
#define MMPA_MIN_NI (-20)
#define MMPA_MAX_FILE 128
#define MMPA_MAX_THREAD_PIO 99
#define MMPA_MIDDLE_THREAD_PIO 66
#define MMPA_LOW_THREAD_PIO 33
#define MMPA_MIN_THREAD_PIO 1
#define MMPA_THREAD_SCHED_RR 0
#define MMPA_THREAD_SCHED_FIFO 0
#define MMPA_THREAD_SCHED_OTHER 0
#define MMPA_THREAD_MIN_STACK_SIZE 0
#define MM_MUTEX_INITIALIZER NULL
#ifdef __cplusplus
#if __cplusplus
}
#endif // __cpluscplus
#endif // __cpluscplus
#endif // _MMPA_TYPEDEF_WIN_H_
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MMPA_TYPEDEF_WIN_H
#define MMPA_TYPEDEF_WIN_H
#ifdef __cplusplus
#if __cplusplus
extern "C" {
#endif // __cpluscplus
#endif // __cpluscplus
#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif
#define EN_OK 0
#define EN_ERR 1
#define EN_ERROR (-1)
#define EN_INVALID_PARAM (-2)
#define EN_TIMEOUT (-3)
#define HANDLE_INVALID_VALUE (-1)
#define INVALID_SOCKET_HANDLE INVALID_SOCKET
#define MMPA_MEM_MAX_LEN (0x7fffffff)
#define MMPA_PROCESS_ERROR (0x7fffffff)
#define MMPA_ONE_THOUSAND 1000
#define MMPA_COMPUTER_BEGIN_YEAR 1900
#define SUMMER_TIME_OR_NOT (-1)
#define MMPA_ZERO 0
#define MMPA_VALUE_ONE 1
#define MMPA_SOCKET_MAIN_EDITION 2
#define MMPA_SOCKET_SECOND_EDITION 0
#define MMPA_PIPE_BUF_SIZE 1024
#define MMPA_MAX_SCANDIR_COUNT 1024
#define MAX_IOVEC_SIZE 32
#define MMPA_PIPE_COUNT 2
#define MMPA_THREADNAME_SIZE 16
#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1)
#define MMPA_MIN_OS_VERSION_SIZE 64
#define MMPA_MAX_NI 19
#define MMPA_MIDDLE_NI 5
#define MMPA_LOW_NI (-5)
#define MMPA_MIN_NI (-20)
#define MMPA_MAX_FILE 128
#define MMPA_MAX_THREAD_PIO 99
#define MMPA_MIDDLE_THREAD_PIO 66
#define MMPA_LOW_THREAD_PIO 33
#define MMPA_MIN_THREAD_PIO 1
#define MMPA_THREAD_SCHED_RR 0
#define MMPA_THREAD_SCHED_FIFO 0
#define MMPA_THREAD_SCHED_OTHER 0
#define MMPA_THREAD_MIN_STACK_SIZE 0
#define MM_MUTEX_INITIALIZER NULL
#ifdef __cplusplus
#if __cplusplus
}
#endif // __cpluscplus
#endif // __cpluscplus
#endif // _MMPA_TYPEDEF_WIN_H_

+ 1
- 1
third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h View File

@@ -1,4 +1,4 @@
/**
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd * Copyright 2019-2020 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");


+ 3
- 1
third_party/fwkacllib/inc/ops/aipp.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -65,6 +65,8 @@ in aipp config file, framework will auto add one input node to graph at last. \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with the TensorFlow operator AippData. *Compatible with the TensorFlow operator AippData.
*@par Restrictions:
*Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly.
*/ */
REG_OP(AippData) REG_OP(AippData)
.INPUT(data, TensorType::ALL()) .INPUT(data, TensorType::ALL())


+ 2
- 1
third_party/fwkacllib/inc/ops/all_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -39,6 +39,7 @@
#include "image_ops.h" #include "image_ops.h"
#include "internal_ops.h" #include "internal_ops.h"
#include "linalg_ops.h" #include "linalg_ops.h"
#include "list_ops.h"
#include "logging_ops.h" #include "logging_ops.h"
#include "lookup_ops.h" #include "lookup_ops.h"
#include "math_ops.h" #include "math_ops.h"


+ 74
- 1
third_party/fwkacllib/inc/ops/array_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -1153,6 +1153,79 @@ REG_OP(EditDistance)
.OUTPUT(output, TensorType({DT_FLOAT})) .OUTPUT(output, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(EditDistance) .OP_END_FACTORY_REG(EditDistance)


/**
* @brief sort_v2.

* @par Inputs:
* @li x: An ND tensor of type float16.

* @par Attributes:

* @li axis: An optional int. The dimension to sort along. This value defaults to -1.
* @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False.

* @par Outputs:
* @li y: An ND tensor of type float16.

* @attention Constraints:
* @li Axis should select the last dim.
* @li When the sorting data is less than 150K, it is recommended to use this tbe ops,
and the descending performance is better than the ascending.
* @li The upper limit of data on Ascend910 is 2000K.
*/
REG_OP(SortV2)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.ATTR(axis, Int, -1)
.ATTR(descending, Bool, false)
.OP_END_FACTORY_REG(SortV2)

/**
* @brief Expand the input tensor to a compatible shape. \n

* @par Inputs:
* One inputs, including:
* @li x: A Tensor. Must be one of the following types:
* float16, float32, int32, int8 ,uint8. \n
* @li shape: A Tensor to specify the shape that the input tensor expanded to. \n

* @par Outputs:
* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n

* @par Third-party framework compatibility
* Compatible with the ONNX operator Expand.
*/

REG_OP(Expand)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
.INPUT(shape, TensorType({DT_INT16, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
.OP_END_FACTORY_REG(Expand)

/**
* @brief Expand the input tensor to a compatible shape. \n

* @par Inputs:
* One inputs, including:
* @li x: A Tensor. Must be one of the following types:
* float16, float32, int32, int8 ,uint8. \n

* @par Attributes:
* @li shape: A required listInt to specify the shape that the input tensor expanded to. \n


* @par Outputs:
* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n

* @par Third-party framework compatibility
* Compatible with the ONNX operator Expand.
*/

REG_OP(ExpandD)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
.REQUIRED_ATTR(shape, ListInt)
.OP_END_FACTORY_REG(ExpandD)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_

+ 1
- 1
third_party/fwkacllib/inc/ops/audio_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/batch_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/bitwise_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/boosted_trees_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/candidate_sampling_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/condtake_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/control_flow_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/ctc_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 4
- 4
third_party/fwkacllib/inc/ops/data_flow_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -908,7 +908,7 @@ REG_OP(TensorArray)
.OUTPUT(handle, TensorType({DT_RESOURCE})) .OUTPUT(handle, TensorType({DT_RESOURCE}))
.OUTPUT(flow, TensorType({DT_FLOAT})) .OUTPUT(flow, TensorType({DT_FLOAT}))
.REQUIRED_ATTR(dtype, Type) .REQUIRED_ATTR(dtype, Type)
.ATTR(element_shape, ListInt, ge::UNKNOWN_SHAPE)
.ATTR(element_shape, ListInt, ge::UNKNOWN_RANK)
.ATTR(dynamic_size, Bool, false) .ATTR(dynamic_size, Bool, false)
.ATTR(clear_after_read, Bool, true) .ATTR(clear_after_read, Bool, true)
.ATTR(identical_element_shapes, Bool, false) .ATTR(identical_element_shapes, Bool, false)
@@ -963,7 +963,7 @@ REG_OP(TensorArrayConcat)
DT_QUINT8, DT_QINT32})) DT_QUINT8, DT_QINT32}))
.OUTPUT(lengths, TensorType({DT_INT64})) .OUTPUT(lengths, TensorType({DT_INT64}))
.REQUIRED_ATTR(dtype, Type) .REQUIRED_ATTR(dtype, Type)
.ATTR(element_shape_except0, ListInt, ge::UNKNOWN_SHAPE)
.ATTR(element_shape_except0, ListInt, ge::UNKNOWN_RANK)
.OP_END_FACTORY_REG(TensorArrayConcat) .OP_END_FACTORY_REG(TensorArrayConcat)


/** /**
@@ -999,7 +999,7 @@ REG_OP(TensorArrayGather)
DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8,
DT_QUINT8, DT_QINT32})) DT_QUINT8, DT_QINT32}))
.REQUIRED_ATTR(dtype, Type) .REQUIRED_ATTR(dtype, Type)
.ATTR(element_shape, ListInt, ge::UNKNOWN_SHAPE)
.ATTR(element_shape, ListInt, ge::UNKNOWN_RANK)
.OP_END_FACTORY_REG(TensorArrayGather) .OP_END_FACTORY_REG(TensorArrayGather)


/** /**


+ 326
- 7
third_party/fwkacllib/inc/ops/elewise_calculation_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2020 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -122,7 +122,8 @@ REG_OP(MinimumGrad)
*@par Inputs: *@par Inputs:
*One input: *One input:
*x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8, *x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8,
int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. \n
int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
For float32 type, the actual calculation on the chip is based on float16. \n


*@par Attributes: *@par Attributes:
*dst_type: An required attribute of type int32, specifying the dst data type. \n *dst_type: An required attribute of type int32, specifying the dst data type. \n
@@ -611,6 +612,15 @@ REG_OP(Log1p)


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as "x1". *y: A Tensor. Has the same type as "x1".

*@attention Constraints:
*@li x2: The input data does not support 0
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
*requirement of double thousandths in the mini form
*@li Due to different architectures, the calculation results of this operator
*on NPU and CPU may be inconsistent
*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8

*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with the TensorFlow operator Mod. *Compatible with the TensorFlow operator Mod.
*/ */
@@ -2042,6 +2052,15 @@ REG_OP(FloorDiv)
* *
*@par Outputs: *@par Outputs:
*y: Result remainder. *y: Result remainder.

*@attention Constraints:
*@li x2: The input data does not support 0
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
*requirement of double thousandths in the mini form
*@li Due to different architectures, the calculation results of this operator
*on NPU and CPU may be inconsistent
*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8

*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with the TensorFlow operator FloorMod. * Compatible with the TensorFlow operator FloorMod.
*/ */
@@ -2168,6 +2187,14 @@ REG_OP(Tan)
*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as "x1". \n *y: A Tensor. Has the same type as "x1". \n


*@attention Constraints:
*@li x2: The input data does not support 0
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
*requirement of double thousandths in the mini form
*@li Due to different architectures, the calculation results of this operator
*on NPU and CPU may be inconsistent
*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8

*@par Third-party framework compatibility *@par Third-party framework compatibility
*@li Compatible with the TensorFlow operator TruncateMod. *@li Compatible with the TensorFlow operator TruncateMod.
*/ */
@@ -2829,9 +2856,9 @@ REG_OP(AdamApplyOneAssign)
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(LambApplyOptimizerAssign) REG_OP(LambApplyOptimizerAssign)
.INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2842,6 +2869,8 @@ REG_OP(LambApplyOptimizerAssign)
.INPUT(do_use_weight, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(do_use_weight, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(weight_decay_rate, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(weight_decay_rate, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT})) .OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT}))
.OP_END_FACTORY_REG(LambApplyOptimizerAssign) .OP_END_FACTORY_REG(LambApplyOptimizerAssign)


/** /**
@@ -2873,7 +2902,8 @@ REG_OP(LambApplyWeightAssign)
.INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(input4, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT}))
.OP_END_FACTORY_REG(LambApplyWeightAssign) .OP_END_FACTORY_REG(LambApplyWeightAssign)


/** /**
@@ -3329,8 +3359,297 @@ REG_OP(TensorRedirect)
.OUTPUT(output_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, .OUTPUT(output_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8,
DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32})) DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32}))
.OP_END_FACTORY_REG(TensorRedirect) .OP_END_FACTORY_REG(TensorRedirect)
} // namespace ge


/**
* @brief Performs the element-wise division of tensor x2 by tensor x3,
* multiply the result by the scalar value and add it to tensor x1

* @par Inputs:
* Three inputs, including:
* @li input_data: A mutable input Tensor. Must be one of the following types:
* float16, float32.
* @li x1: A mutable input Tensor of the same type as x1.
* @li x2: A mutable input Tensor of the same type as x1.
* @li value: A mutable input Tensor. Must be one of the following types:
* float16, float32, int32. \n

* @par Outputs:
* @li y: A mutable Tensor. Has the same type as "x1". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Addcdiv.
*/
REG_OP(Addcdiv)
.INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32 }))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(Addcdiv)

/**
* @brief Performs the element-wise multiplication of tensor x2 by tensor x3,
* multiply the result by the scalar value and add it to tensor input_data


* @par Inputs:
* Three inputs, including:
* @li input_data: A mutable input Tensor. Must be one of the following types:
* float16, float32, int8, int32, uint8.
* @li x1: A mutable input Tensor of the same type as x1.
* @li x2: A mutable input Tensor of the same type as x1.
* @li value: A tensor which includes only one element of the same type as x1. \n

* @par Outputs:
* @li y: A mutable output Tensor. Has the same type as "x1". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Addcmul.
*/
REG_OP(Addcmul)
.INPUT(input_data, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
.INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
.INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
.INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
.OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
.OP_END_FACTORY_REG(Addcmul)

/**
* @brief Computes the result of x2 * alpha + x1.

* @par Inputs:
* @li x1: An ND tensor of type float16, float32, int32.
* @li x2: An ND tensor of type float16, float32, int32.
* @li alpha: A scalar tensor of type float16, float32. \n

* @par Outputs:
* @li y: An ND tensor tensor with the same shape and type as "x1". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Axpy.
*/
REG_OP(AxpyV2)
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OP_END_FACTORY_REG(AxpyV2)

/**
* @brief Computes the result of x1 + x2.

* @par Inputs:
* @li x1: An ND tensor of type float16, float, int32.
* @li x2: An ND tensor of type float16, float, int32. \n

* @par Outputs:
* @li y: An ND tensor tensor with the same type as "x1". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Add.
*/
REG_OP(PtAdd)
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OP_END_FACTORY_REG(PtAdd)

/**
* @brief Computes the result of x1 * x2.

* @par Inputs:
* @li x1: An ND tensor of type float16, float32, int32.
* @li x2: An ND tensor of type float16, float32, int32. \n

* @par Outputs:
* @li y: Same shape and type as the largest ND tensor in x1 x2. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator muls.
*/
REG_OP(PtMuls)
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OP_END_FACTORY_REG(PtMuls)

/**
* @brief Computes the result of x1 - x2.

* @par Inputs:
* @li x1: An ND tensor of type float16, float, int32.
* @li x2: An ND tensor of type float16, float, int32. \n

* @par Outputs:
* @li y: An ND tensor tensor with the same type as "x1". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Sub.
*/
REG_OP(PtSub)
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OP_END_FACTORY_REG(PtSub)

/**
* @brief Add the partial values of two tensors in format NC1HWC0.

* @par Inputs:
* @li x1: A Tensor in 5HD, and must be one of the following types: float16,
* float32. \n
* @li x2: A Tensor of the same type as "x1", and the same shape as "x1",
* except for the C1 value. \n

* @par Attributes:
* @li x1_c1_offset: A required int. Offset value of C1 in "x1". \n
* @li x2_c1_offset: A required int. Offset value of C1 in "x2". \n
* @li c1_len: A required int. C1 len of "y". The value must be less than
* the difference between C1 and offset in "x1" and "x2". \n

* @par Outputs:
* @li y: A Tensor of the same type as "x1", and the same shape as "x1",
* except for the C1 value. Record the result after adding. \n
*/
REG_OP(StrideAdd)
.INPUT(x1, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.INPUT(x2, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.REQUIRED_ATTR(x1_c1_offset, Int)
.REQUIRED_ATTR(x2_c1_offset, Int)
.REQUIRED_ATTR(c1_len, Int)
.OP_END_FACTORY_REG(StrideAdd)

/**
* @brief Compare two tensors are totally equal or not, only output a bool value"


* @par Inputs:
* Two inputs, including:
* @li input_x: A Tensor. the first tensor. \n
* @li input_y: A Tensor. the second tensor. \n

* @par Outputs:
* @li output_z: A Tensor. Bool type, compare result of the two inputs. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch equal operator. \n
*/
REG_OP(TensorEqual)
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
.INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
.OUTPUT(output_z, TensorType({DT_BOOL}))
.OP_END_FACTORY_REG(TensorEqual)

/**
* @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support).
* All inputs and outputs must have the same data type. This operator supports multidirectional
* (i.e., Numpy-style) broadcasting
*
* @par inputs
* one input including:
* @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64
*
* @par output
* one output including:
* @li y:A Tensor of the same type as x
*
*/
REG_OP(MaxN)
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64}))
.OP_END_FACTORY_REG(MaxN)

/**
* @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support).
* All inputs and outputs must have the same data type. This operator supports multidirectional
* (i.e., Numpy-style) broadcasting
*
* @par inputs
* one input including:
* @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64
*
* @par output
* one output including:
* @li y:A Tensor of the same type as x
*
*/
REG_OP(MinN)
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64,
DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64,
DT_INT32, DT_INT64}))
.OP_END_FACTORY_REG(MinN)

/**
* @brief Calculates x * maske * value.
*
* @par Inputs:
* @li x: An tensor of type float16 or float32, specifying the input to the data layer.
* @li mask: An tensor of type int8 or float16 or float32, be same shape with x. \n
*
* @par Attributes:
* value: A optional float. \n
*
* @par Outputs:
* y: The output tensor of type float16 or float32.
@ li y:A Tensor of the same type and shape as x
*
*/
REG_OP(MaskedScale)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32}))
.INPUT(mask, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32}))
.REQUIRED_ATTR(value, Float)
.OP_END_FACTORY_REG(MaskedScale)

/**
* @brief Calculate the lerp function. \n

* @par Inputs:
* Three inputs, including:
* @li start: A tensor. Must be one of the following types:
* float16, float32. \n
* @li end: A tensor. Must be one of the following types:
* float16, float32. \n
* @li weight: A tensor. Must be one of the following types:
* float16, float32. \n

* @par Outputs:
* y: A Tensor with the same type and shape of input_x's. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Lerp. \n
*/
REG_OP(Lerp)
.INPUT(start, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(end, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(Lerp)

/**
*@brief Hardmax(element in input, axis) = 1 if the element is the first maximum value along the specified axis, 0
*otherwise The input does not need to explicitly be a 2D vector.The "axis" attribute indicates the dimension along
*which Hardmax will be performed.The output tensor has the same shape and contains the Hardmax values of the
*corresponding input.
*
*@par inputs
*one input including:
*@li x: input A Tensor.Must be one of the following types:float32,float16
*
*@par Attributes:
*@li axis:A required int attribute that decides which dimension will be used to cal the hard_max
*
*@par output:
*one output including:
*@li y:A Tensor of the same type as x
*
*/
REG_OP(HardMax)
.INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT }))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(axis, Int, -1)
.OP_END_FACTORY_REG(HardMax)
} // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_

+ 1
- 1
third_party/fwkacllib/inc/ops/functional_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/get_data_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 33
- 14
third_party/fwkacllib/inc/ops/hcom_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2020 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -45,8 +45,6 @@ REG_OP(HcomAllGather)
.OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
.REQUIRED_ATTR(rank_size, Int) .REQUIRED_ATTR(rank_size, Int)
.REQUIRED_ATTR(group, String) .REQUIRED_ATTR(group, String)
.ATTR(alpha, Float, 1.0)
.ATTR(beta, Float, 0.0)
.OP_END_FACTORY_REG(HcomAllGather) .OP_END_FACTORY_REG(HcomAllGather)


/** /**
@@ -77,8 +75,6 @@ REG_OP(HcomAllReduce)
.REQUIRED_ATTR(group, String) .REQUIRED_ATTR(group, String)
.ATTR(fusion, Int, 1) .ATTR(fusion, Int, 1)
.ATTR(fusion_id, Int, -1) .ATTR(fusion_id, Int, -1)
.ATTR(alpha, Float, 1.0)
.ATTR(beta, Float, 0.0)
.OP_END_FACTORY_REG(HcomAllReduce) .OP_END_FACTORY_REG(HcomAllReduce)


/** /**
@@ -91,7 +87,7 @@ REG_OP(HcomAllReduce)
input of this rank will be broadcast to other ranks. input of this rank will be broadcast to other ranks.
* @li fusion: A required integer identifying if the op need to fusion,the * @li fusion: A required integer identifying if the op need to fusion,the
default value is none fusion default value is none fusion
* @li fusion: A required integer identifying the fusion id if para fusion
* @li fusion_id: A required integer identifying the fusion id if para fusion
is set. is set.
* @li group: A required string identifying the group name of ranks * @li group: A required string identifying the group name of ranks
participating in the op. participating in the op.
@@ -109,10 +105,39 @@ REG_OP(HcomBroadcast)
.REQUIRED_ATTR(group, String) .REQUIRED_ATTR(group, String)
.ATTR(fusion, Int, 0) .ATTR(fusion, Int, 0)
.ATTR(fusion_id, Int, -1) .ATTR(fusion_id, Int, -1)
.ATTR(alpha, Float, 1.0)
.ATTR(beta, Float, 0.0)
.OP_END_FACTORY_REG(HcomBroadcast) .OP_END_FACTORY_REG(HcomBroadcast)


/**
* @brief preforms reduction from others rank to rootrank
* @par Inputs:
* @li root_rank: A required integer identifying the root rank in the op
the reduction result will be on this root rank
* x: A tensor. Must be one of the following types: int8, int16, int32, float16,
float32.
* @par Attributes:
* @li reduction: A required string identifying the reduction operation to
perform.The supported operation are: "sum", "max", "min", "prod".
* @li group: A required string identifying the group name of ranks
participating in the op.
* @li fusion: An optional integer identifying the fusion flag of the op.
0: no fusion; 1 (default): fusion; 2: fusion the ops by fusion id.
* @li fusion_id: An optional integer identifying the fusion id of the op.
* The HcomReduce ops with the same fusion id will be fused.
* @par Outputs:
* y: A Tensor. Has the same type as "x".
* @attention Constraints:
*"group" is limited to 128 characters. Use "hccl_world_group"
as the name of a world group.
*/
REG_OP(HcomReduce)
.INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
.REQUIRED_ATTR(root_rank, Int)
.REQUIRED_ATTR(reduction, String)
.REQUIRED_ATTR(group, String)
.ATTR(fusion, Int, 0)
.ATTR(fusion_id, Int, -1)
.OP_END_FACTORY_REG(HcomReduce)
/** /**
* @brief Performs reduction across all input tensors, scattering in equal * @brief Performs reduction across all input tensors, scattering in equal
blocks among ranks, each rank getting a chunk of data based on its rank blocks among ranks, each rank getting a chunk of data based on its rank
@@ -139,8 +164,6 @@ REG_OP(HcomReduceScatter)
.REQUIRED_ATTR(reduction, String) .REQUIRED_ATTR(reduction, String)
.REQUIRED_ATTR(group, String) .REQUIRED_ATTR(group, String)
.REQUIRED_ATTR(rank_size, Int) .REQUIRED_ATTR(rank_size, Int)
.ATTR(alpha, Float, 1.0)
.ATTR(beta, Float, 0.0)
.OP_END_FACTORY_REG(HcomReduceScatter) .OP_END_FACTORY_REG(HcomReduceScatter)


/** /**
@@ -167,8 +190,6 @@ REG_OP(HcomSend)
.REQUIRED_ATTR(group, String) .REQUIRED_ATTR(group, String)
.REQUIRED_ATTR(sr_tag, Int) .REQUIRED_ATTR(sr_tag, Int)
.REQUIRED_ATTR(dest_rank, Int) .REQUIRED_ATTR(dest_rank, Int)
.ATTR(alpha, Float, 1.0)
.ATTR(beta, Float, 0.0)
.OP_END_FACTORY_REG(HcomSend) .OP_END_FACTORY_REG(HcomSend)


/** /**
@@ -202,8 +223,6 @@ REG_OP(HcomReceive)
.REQUIRED_ATTR(src_rank, Int) .REQUIRED_ATTR(src_rank, Int)
.REQUIRED_ATTR(shape, ListInt) .REQUIRED_ATTR(shape, ListInt)
.REQUIRED_ATTR(dtype, Type) .REQUIRED_ATTR(dtype, Type)
.ATTR(alpha, Float, 1.0)
.ATTR(beta, Float, 0.0)
.OP_END_FACTORY_REG(HcomReceive) .OP_END_FACTORY_REG(HcomReceive)


/** /**


+ 1
- 1
third_party/fwkacllib/inc/ops/hvd_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2020 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 166
- 30
third_party/fwkacllib/inc/ops/image_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -31,11 +31,12 @@ namespace ge {
*@par Inputs: *@par Inputs:
*Input images is a tensor of at least 3 dimensions. The last dimension is *Input images is a tensor of at least 3 dimensions. The last dimension is
interpretted as channels, and must be three. Inputs include: interpretted as channels, and must be three. Inputs include:
*@li images:A Tensor of type float. Images to adjust. At least 3-D.
*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format
must be NHWC.
*@li delta:A Tensor of type float. A float delta to add to the hue . \n *@li delta:A Tensor of type float. A float delta to add to the hue . \n


*@par Outputs: *@par Outputs:
*y:A Tensor of type float . \n
*y:A Tensor of type float. The format must be NHWC. \n


*@attention Constraints: *@attention Constraints:
*Input images is a tensor of at least 3 dimensions. The last dimension is *Input images is a tensor of at least 3 dimensions. The last dimension is
@@ -57,11 +58,12 @@ REG_OP(AdjustHue)
*@par Inputs: *@par Inputs:
*Input images is a tensor of at least 3 dimensions. The last dimension is *Input images is a tensor of at least 3 dimensions. The last dimension is
interpretted as channels, and must be three. Inputs include: interpretted as channels, and must be three. Inputs include:
*@li images:A Tensor of type float. Images to adjust. At least 3-D.
*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format
must be NHWC.
*@li scale:A Tensor of type float. A float scale to add to the saturation . \n *@li scale:A Tensor of type float. A float scale to add to the saturation . \n


*@par Outputs: *@par Outputs:
*y:A Tensor of type float . \n
*y:A Tensor of type float. The format must be NHWC. \n


*@attention Constraints: *@attention Constraints:
*Input images is a tensor of at least 3 dimensions. The last dimension is *Input images is a tensor of at least 3 dimensions. The last dimension is
@@ -83,11 +85,12 @@ REG_OP(AdjustSaturation)
*@par Inputs: *@par Inputs:
*Input images is a tensor of at least 3 dimensions. The last 3 dimensions are *Input images is a tensor of at least 3 dimensions. The last 3 dimensions are
interpreted as '[height, width, channels]'. Inputs include: interpreted as '[height, width, channels]'. Inputs include:
*@li images:A Tensor of type float. Images to adjust. At least 3-D.
*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format
must be NHWC.
*@li scale:A Tensor of type float. A float multiplier for adjusting contrast . \n *@li scale:A Tensor of type float. A float multiplier for adjusting contrast . \n


*@par Outputs: *@par Outputs:
*y:A Tensor of type float . \n
*y:A Tensor of type float. The format must be NHWC. \n


*@attention Constraints: *@attention Constraints:
*Input images is a tensor of at least 3 dimensions. The last dimension is *Input images is a tensor of at least 3 dimensions. The last dimension is
@@ -112,7 +115,7 @@ nearest neighbor sampling to a common output size specified by crop_size . \n
*Input images must be a 4-D tensor. Inputs include: *Input images must be a 4-D tensor. Inputs include:
*@li images:A Tensor. Must be one of the following types:uint8, uint16, int8, *@li images:A Tensor. Must be one of the following types:uint8, uint16, int8,
int16, int32, int64, float16, float, double. A 4-D tensor of shape int16, int32, int64, float16, float, double. A 4-D tensor of shape
[batch, image_height, image_width, depth].
[batch, image_height, image_width, depth]. The format must be NHWC.
*@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4]. *@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4].
*@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with
int32 values in [0, batch). int32 values in [0, batch).
@@ -127,7 +130,7 @@ extrapolation, when applicable.
NearestNeighbor . \n NearestNeighbor . \n


*@par Outputs: *@par Outputs:
*y:A Tensor of type float . \n
*y:A Tensor of type float. The format must be NHWC. \n


*@attention Constraints: *@attention Constraints:
*Input images must be a 4-D tensor . \n *Input images must be a 4-D tensor . \n
@@ -193,7 +196,9 @@ boxes tensor . \n
*@par Inputs: *@par Inputs:
*Input images and grads must be a 4-D tensor. Inputs include: *Input images and grads must be a 4-D tensor. Inputs include:
*@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth].
The format must be NHWC.
*@li images: A 4-D tensor of shape [batch, image_height, image_width, depth]. *@li images: A 4-D tensor of shape [batch, image_height, image_width, depth].
The format must be NHWC.
Both image_height and image_width need to be positive. Both image_height and image_width need to be positive.
*@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor
specifies the coordinates of a box in the box_ind[i] image and is specified in specifies the coordinates of a box in the box_ind[i] image and is specified in
@@ -233,6 +238,7 @@ images tensor . \n
*@par Inputs: *@par Inputs:
*Input grads must be a 4-D tensor. Inputs include: *Input grads must be a 4-D tensor. Inputs include:
*@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth].
The format must be NHWC.
*@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor
specifies the coordinates of a box in the box_ind[i] image and is specified specifies the coordinates of a box in the box_ind[i] image and is specified
in normalized coordinates [y1, x1, y2, x2]. in normalized coordinates [y1, x1, y2, x2].
@@ -248,7 +254,8 @@ method: A string specifying the interpolation method. Only 'bilinear' is
supported for now . \n supported for now . \n


*@par Outputs: *@par Outputs:
*y:A 4-D tensor of shape [batch, image_height, image_width, depth] . \n
*y:A 4-D tensor of shape [batch, image_height, image_width, depth]. The format
must be NHWC. \n


*@attention Constraints: *@attention Constraints:
*Input grads must be a 4-D tensor . \n *Input grads must be a 4-D tensor . \n
@@ -273,6 +280,7 @@ REG_OP(CropAndResizeGradImage)
*@par Inputs: *@par Inputs:
*Input x must be a 4-D tensor. Inputs include: *Input x must be a 4-D tensor. Inputs include:
*@li x: A 4-D float tensor of shape [batch_size, height, width, channels]. *@li x: A 4-D float tensor of shape [batch_size, height, width, channels].
The format must be NHWC.
*@li size: A 1-D tensor of 2 elements containing the size of the glimpses to *@li size: A 1-D tensor of 2 elements containing the size of the glimpses to
extract. The glimpse height must be specified first, following by the glimpse extract. The glimpse height must be specified first, following by the glimpse
width. width.
@@ -293,7 +301,7 @@ uniform_noise . \n


*@par Outputs: *@par Outputs:
*y:A tensor representing the glimpses [batch_size, glimpse_height, *y:A tensor representing the glimpses [batch_size, glimpse_height,
glimpse_width, channels] . \n
glimpse_width, channels]. The format must be NHWC. \n


*@attention Constraints: *@attention Constraints:
*Input x must be a 4-D tensor . \n *Input x must be a 4-D tensor . \n
@@ -340,7 +348,8 @@ REG_OP(HSVToRGB)


*@par Inputs: *@par Inputs:
*Input images must be a 4-D tensor. Inputs include: *Input images must be a 4-D tensor. Inputs include:
*@li images: 4-D with shape [batch, height, width, channels].
*@li images: 4-D with shape [batch, height, width, channels]. The format must
be NHWC.
*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new
size for the images. size for the images.
*@li min: A Tensor of type float. *@li min: A Tensor of type float.
@@ -354,6 +363,7 @@ the values at the corner pixels. Defaults to false.


*@par Outputs: *@par Outputs:
*@li resized_images: 4-D with shape [batch, new_height, new_width, channels]. *@li resized_images: 4-D with shape [batch, new_height, new_width, channels].
The format must be NHWC.
*@li y_min: A Tensor of type float. *@li y_min: A Tensor of type float.
*@li y_max: A Tensor of type float . \n *@li y_max: A Tensor of type float . \n


@@ -381,7 +391,8 @@ REG_OP(QuantizedResizeBilinear)


*@par Inputs: *@par Inputs:
*Input images must be a 4-D tensor. Inputs include: *Input images must be a 4-D tensor. Inputs include:
*@li images: 4-D with shape [batch, height, width, channels].
*@li images: 4-D with shape [batch, height, width, channels]. The format must
be NHWC.
*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width.
The new size for the images . \n The new size for the images . \n


@@ -391,7 +402,8 @@ output tensors are aligned, preserving the values at the corner pixels.
Defaults to false . \n Defaults to false . \n


*@par Outputs: *@par Outputs:
*y: 4-D with shape [batch, new_height, new_width, channels] . \n
*y: 4-D with shape [batch, new_height, new_width, channels]. The format must
be NHWC. \n


*@attention Constraints: *@attention Constraints:
*Input images can be of different types but output images are always float . \n *Input images can be of different types but output images are always float . \n
@@ -414,10 +426,10 @@ REG_OP(ResizeArea)
*@par Inputs: *@par Inputs:
*Input grads must be a 4-D tensor. Inputs include: *Input grads must be a 4-D tensor. Inputs include:
*@li grads: A Tensor of type float. 4-D with shape [batch, height, width, *@li grads: A Tensor of type float. 4-D with shape [batch, height, width,
channels].
channels]. The format must be NHWC.
*@li original_image: A Tensor. Must be one of the following types: float, *@li original_image: A Tensor. Must be one of the following types: float,
double. 4-D with shape [batch, orig_height, orig_width, channels], The image double. 4-D with shape [batch, orig_height, orig_width, channels], The image
tensor that was resized . \n
tensor that was resized. The format must be NHWC. \n


*@par Attributes: *@par Attributes:
*@li align_corners: An optional bool. Defaults to False. If true, the centers *@li align_corners: An optional bool. Defaults to False. If true, the centers
@@ -426,10 +438,10 @@ false.
*@li half_pixel_centers: An optional bool. Defaults to False . \n *@li half_pixel_centers: An optional bool. Defaults to False . \n


*@par Outputs: *@par Outputs:
*y: A Tensor. Has the same type as original_image . \n
*y: A Tensor. Has the same type as original_image. The format must be NHWC. \n


*@attention Constraints: *@attention Constraints:
*Input images can be of different types but output images are always float . \n
*Input images can be of different types but output images are always float .


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with tensorflow ResizeBicubicGrad operator. *Compatible with tensorflow ResizeBicubicGrad operator.
@@ -448,7 +460,8 @@ REG_OP(ResizeBicubicGrad)


*@par Inputs: *@par Inputs:
*Input images must be a 4-D tensor. Inputs include: *Input images must be a 4-D tensor. Inputs include:
*@li images: 4-D with shape [batch, height, width, channels].
*@li images: 4-D with shape [batch, height, width, channels]. The format
must be NHWC.
*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new
size for the images . \n size for the images . \n


@@ -459,10 +472,11 @@ Defaults to false.
*@li half_pixel_centers: An optional bool. Defaults to False . \n *@li half_pixel_centers: An optional bool. Defaults to False . \n


*@par Outputs: *@par Outputs:
*y: 4-D with shape [batch, new_height, new_width, channels] . \n
*y: 4-D with shape [batch, new_height, new_width, channels]. The format
must be NHWC. \n


*@attention Constraints: *@attention Constraints:
*Input images can be of different types but output images are always float . \n
*Input images can be of different types but output images are always float .


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with tensorflow ResizeBicubic operator. *Compatible with tensorflow ResizeBicubic operator.
@@ -483,7 +497,7 @@ REG_OP(ResizeBicubic)
*@par Inputs: *@par Inputs:
*Input grads must be a 4-D tensor. Inputs include: *Input grads must be a 4-D tensor. Inputs include:
*@li grads: A Tensor. Must be one of the following types: uint8, int8, int32, *@li grads: A Tensor. Must be one of the following types: uint8, int8, int32,
float16, float, double. 4-D with shape [batch, height, width, channels].
float16, float, double. Must set the format, supported format list ["NCHW, NHWC"]
*@li size: A 1-D int32 Tensor of 2 elements: orig_height, orig_width. *@li size: A 1-D int32 Tensor of 2 elements: orig_height, orig_width.
The original input size . \n The original input size . \n


@@ -550,9 +564,8 @@ REG_OP(ResizeNearestNeighborV2GradD)


*@par Inputs: *@par Inputs:
*Input grads must be a 4-D tensor. Inputs include: *Input grads must be a 4-D tensor. Inputs include:
*@li grads: A Tensor of type float32. 4-D with shape [batch, height, width,
channels].
*@li original_image: A Tensor. 4-D with shape [batch, orig_height, orig_width,
*@li grads: A Tensor of type float32. Must set the format, supported format list ["NCHW, NHWC"]
*@li original_image: A Tensor. 4-D shape. Must set the format, supported format list ["NCHW, NHWC"]
channels], The image tensor that was resized . \n channels], The image tensor that was resized . \n


*@par Attributes: *@par Attributes:
@@ -583,7 +596,7 @@ REG_OP(ResizeBilinearV2Grad)


*@par Inputs: *@par Inputs:
*Input images must be a 4-D tensor. Inputs include: *Input images must be a 4-D tensor. Inputs include:
*@li x: 4-D with shape [batch, height, width, channels].
*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"]
*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new
size for the images . \n size for the images . \n


@@ -697,7 +710,7 @@ REG_OP(SampleDistortedBoundingBoxExt2)


*@par Inputs: *@par Inputs:
*Input x must be a 4-D tensor. Inputs include: *Input x must be a 4-D tensor. Inputs include:
*@li x: 4-D with shape [batch, height, width, channels].
*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"].
*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width.
The new size for the images . \n The new size for the images . \n


@@ -729,12 +742,12 @@ REG_OP(ResizeNearestNeighborV2)
*@par Inputs: *@par Inputs:
*Input images must be a 4-D tensor. Inputs include: *Input images must be a 4-D tensor. Inputs include:
*@li images: A Tensor. Must be one of the following types: float. 4-D with *@li images: A Tensor. Must be one of the following types: float. 4-D with
shape [batch, height, width, depth]. A batch of images.
shape [batch, height, width, depth]. A batch of images. The format must be NHWC.
*@li boxes: A Tensor of type float32. 3-D with shape [batch, *@li boxes: A Tensor of type float32. 3-D with shape [batch,
num_bounding_boxes, 4] containing bounding boxes . \n num_bounding_boxes, 4] containing bounding boxes . \n


*@par Outputs: *@par Outputs:
*A Tensor. Has the same type as images . \n
*A Tensor. Has the same type as images. The format must be NHWC. \n


*@attention Constraints: *@attention Constraints:
*Input images must be a 4-D tensor . \n *Input images must be a 4-D tensor . \n
@@ -1342,6 +1355,129 @@ REG_OP(SpatialTransformerD)
.ATTR(use_default_theta, ListBool, {}) .ATTR(use_default_theta, ListBool, {})
.OP_END_FACTORY_REG(SpatialTransformerD) .OP_END_FACTORY_REG(SpatialTransformerD)


} // namespace ge
/**
* @brief Resize the input tensor. \n
currently, only support resize image tensor using nearest neighbor and linear interpolation.

* @par Inputs:
* Input x must be a 4-D tensor. Inputs include: \n
* @li x: A Tensor. Must be one of the following types: uint8, int8, int16, \n
int32, int64, float16, float, double. 4-D with shape [batch, height, width, channels] \n
or shape [batch, channels, height, width].
* @li roi: A 1-D float Tensor. only takes effect when attr coordinate_transformation_mode \n
is "tf_crop_and_resize"
* @li scales: A 1-D float Tensor, the scale array along each dimension, Only one of \n
'scales' and 'sizes' can be specified.
* @li sizes: A 1-D int64 Tensor, The size of the output tensor. nly one of \n
'scales' and 'sizes' can be specified. If 'size' is specified, then set scales \n
to empty data (zero shape) in this operator's input list.

* @par Attributes:
* @li coordinate_transformation_mode: String. Defaults to half_pixel. how to transform \n
the coordinate in the resized tensor to the coordinate in the original tensor. \n
other optional: pytorch_half_pixel, align_corners, asymmetric, tf_half_pixel_for_nn, \n
tf_crop_and_resize.
* @li cubic_coeff_a: Float. Defaults to -0.75, only used in cubic interpolation. \n
other optional: -0.5
* @li exclude_outside: Int. Defaults to 0, If set to 1, the weight of sampling \n
locations outside the tensor will be set to 0 and the weight will be renormalized \n
so that their sum is 1.0.
* @li extrapolation_value: Float. Defaults to 0.0f. When coordinate_transformation_mode \n
is "tf_crop_and_resize" and x_original is outside the range [0, length_original - 1], \n
this value is used as the corresponding output value.
* @li mode: String. Defaults to nearest. Three interpolation modes: nearest (default), \n
linear and cubic.
* @li nearest_mode: String. Defaults to round_prefer_floor. Four modes: round_prefer_floor, \n
round_prefer_ceil, floor, ceil. Only used by nearest interpolation.

* @par Outputs:
* y: A Tensor. Has the same type as x.

* @attention Constraints: \n
* Input x must be a 4-D tensor.

* @par Third-party framework compatibility
* Compatible with tensorflow ResizeNearestNeighborV2 operator.
*/

REG_OP(Resize)
.INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(roi, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(scales, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(sizes, TensorType({DT_INT64}))
.OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.ATTR(coordinate_transformation_mode, String, "half_pixel")
.ATTR(cubic_coeff_a, Float, -0.75)
.ATTR(exclude_outside, Int, 0)
.ATTR(extrapolation_value, Float, 0)
.ATTR(mode, String, "nearest")
.ATTR(nearest_mode, String, "round_prefer_floor")
.OP_END_FACTORY_REG(Resize)

/**
*@brief Function parse image from string to int. \n

*@par Inputs:
*@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n


*@par Attributes:
*@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image.
*@li ratio: An optional int. Defaults to 1. Downscaling ratio.
*@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower but nicer upscaling of the chroma planes
*@li try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input.
*@li acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted.
*@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n

*@par Outputs:
*image: A Tensor dtype of uint8.
*/
REG_OP(DecodeJpeg)
.INPUT(contents, TensorType({DT_STRING}))
.OUTPUT(image, TensorType({DT_UINT8}))
.ATTR(channels, Int, 0)
.ATTR(ratio, Int, 1)
.ATTR(fancy_upscaling, Bool, true)
.ATTR(try_recover_truncated, Bool, false)
.ATTR(acceptable_fraction, Float, 1.0)
.ATTR(dct_method, String, "")
.OP_END_FACTORY_REG(DecodeJpeg)

/**
*@brief Image warping using per-pixel flow vectors. \n

*@par Inputs:
*@li images: 4-D Tensor with shape `[batch, height, width, channels]`.
*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n

*@par Outputs:
*y: Returns 4-D with the same shape and dtype as `images`. \n
*/
REG_OP(DenseImageWarp)
.INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.OP_END_FACTORY_REG(DenseImageWarp)

/**
*@brief Computes the gradients of DenseImageWarp with respect to image and flow. \n

*@par Inputs:
*@li grad: gradients with respect to DenseImageWarp output.
*@li images: 4-D Tensor with shape `[batch, height, width, channels]`.
*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n

*@par Outputs:
*grad_image: Returns 4-D with the same shape and dtype as `images`.
*grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n
*/
REG_OP(DenseImageWarpGrad)
.INPUT(grad, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(grad_image, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(grad_flow, TensorType({DT_FLOAT, DT_FLOAT16}))
.OP_END_FACTORY_REG(DenseImageWarpGrad)
} // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_

+ 1
- 1
third_party/fwkacllib/inc/ops/internal_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/linalg_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 230
- 0
third_party/fwkacllib/inc/ops/list_ops.h View File

@@ -0,0 +1,230 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file list_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_

#include <algorithm>
#include "graph/operator_reg.h"
#include "graph/operator.h"

namespace ge {

/**
*@brief Creates and returns an empty tensor list. \n

*@par Inputs:
*@li element_shape: A shape compatible with that of elements in the list.
*@li max_num_elements: The maximum number of elements. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li handle: An empty tensor list . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow EmptyTensorList operator.
*/
REG_OP(EmptyTensorList)
.INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
.INPUT(max_num_elements, TensorType({DT_INT32}))
.OUTPUT(handle, TensorType({DT_VARIANT}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(EmptyTensorList)

/**
*@brief Returns a list which has the passed-in `Tensor` as last element
and the other elements of the given list in `input_handle`. \n

*@par Inputs:
*@li input_handle: The old list.
*@li tensor: The tensor to put on the list. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li output_handle:A list with the elements of old list followed by tensor. \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListPushBack operator.
*/
REG_OP(TensorListPushBack)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
.OUTPUT(output_handle, TensorType({DT_VARIANT}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListPushBack)

/**
*@brief The last element of the input list as well as a
list with all but that element. \n

*@par Inputs:
*@li input_handle: The input list.
*@li element_shape: A shape compatible with that of elements in the list. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li output_handle:A list with the elements of the old list followed by tensor.
*@li tensor:The withdrawn last element of the list. \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListPopBack operator.
*/
REG_OP(TensorListPopBack)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.INPUT(element_shape, TensorType({DT_INT32}))
.OUTPUT(output_handle, TensorType({DT_VARIANT}))
.OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListPopBack)

/**
*@brief The number of tensors in the input tensor list. \n

*@par Inputs:
*@li input_handle: The input list. \n

*@par Outputs:
*@li length:The number of tensors in the list. \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListLength operator.
*/
REG_OP(TensorListLength)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.OUTPUT(length, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(TensorListLength)

/**
*@brief The shape of elements in the input tensor list. \n

*@par Inputs:
*@li input_handle: The input list. \n

*@par Attributes:
*@li shape_type: The type of shape in the list. \n

*@par Outputs:
*@li element_shape:A shape compatible with that of elements in the list. \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListElementShape operator.
*/
REG_OP(TensorListElementShape)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.OUTPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
.ATTR(shape_type, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListElementShape)

/**
*@brief List of the given size with empty elements. \n

*@par Inputs:
*@li element_shape: A shape compatible with that of elements in the list.
*@li num_elements: The number of elements to reserve. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list.
*@li shape_type: The type of shape in the list. \n

*@par Outputs:
*@li handle: An output tensor list . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListReserve operator.
*/
REG_OP(TensorListReserve)
.INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
.INPUT(num_elements, TensorType({DT_INT32}))
.OUTPUT(handle, TensorType({DT_VARIANT}))
.ATTR(element_dtype, Type, DT_INT32)
.ATTR(shape_type, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListReserve)

/**
*@brief Get input tensor list elements of index position. \n

*@par Inputs:
*@li input_handle: The input list.
*@li index: A tensor of position.
*@li element_shape: A shape compatible with that of elements in the list. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li item: An output tensor value of index position . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListGetItem operator.
*/
REG_OP(TensorListGetItem)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.INPUT(index, TensorType({DT_INT32}))
.INPUT(element_shape, TensorType({DT_INT32}))
.OUTPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListGetItem)

/**
*@brief Sets the index-th position of the list to contain the given tensor. \n

*@par Inputs:
*@li input_handle: The input list.
*@li index: The position in the list to which the tensor will be assigned.
*@li item: The element to be assigned to that position. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li output_handle: An output tensor list . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListSetItem operator.
*/
REG_OP(TensorListSetItem)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.INPUT(index, TensorType({DT_INT32}))
.INPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
.OUTPUT(output_handle, TensorType({DT_VARIANT}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListSetItem)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_

+ 1
- 1
third_party/fwkacllib/inc/ops/logging_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/lookup_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 64
- 1
third_party/fwkacllib/inc/ops/math_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -365,6 +365,27 @@ REG_OP(GetNext)
.ATTR(channel_name, String, "") .ATTR(channel_name, String, "")
.OP_END_FACTORY_REG(GetNext) .OP_END_FACTORY_REG(GetNext)


/**
*@brief Get dynamic dims after GetNext. \n

*@par Inputs:
*input: A nested structure of Tensor objects, from GetNext's output. \n

*@par Attributes:
*@li shape_info: GE shape_info for each inputs, -1 means unknow dim.
*@li N: Inputs number. \n

*@par Outputs:
*dims: GE unknow dims, a vector of int64. \n
*/

REG_OP(GetDynamicDims)
.DYNAMIC_INPUT(input, TensorType({DT_INT32, DT_INT64}))
.OUTPUT(dims, TensorType({DT_INT32, DT_INT64}))
.REQUIRED_ATTR(shape_info, ListInt)
.REQUIRED_ATTR(N, Int)
.OP_END_FACTORY_REG(GetDynamicDims)

/** /**
*@brief End of sequence . \n *@brief End of sequence . \n


@@ -710,6 +731,9 @@ REG_OP(IFMR)


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with mindspore *Compatible with mindspore

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */


REG_OP(WtsARQ) REG_OP(WtsARQ)
@@ -741,6 +765,9 @@ REG_OP(WtsARQ)


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with mindspore *Compatible with mindspore

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */


REG_OP(ActsULQ) REG_OP(ActsULQ)
@@ -768,6 +795,9 @@ REG_OP(ActsULQ)


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with mindspore *Compatible with mindspore

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */


REG_OP(ActsULQInputGrad) REG_OP(ActsULQInputGrad)
@@ -790,6 +820,9 @@ REG_OP(ActsULQInputGrad)


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with mindspore *Compatible with mindspore

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */


REG_OP(ActULQClampMaxGrad) REG_OP(ActULQClampMaxGrad)
@@ -812,6 +845,9 @@ REG_OP(ActULQClampMaxGrad)


*@par Third-party framework compatibility *@par Third-party framework compatibility
*Compatible with mindspore *Compatible with mindspore

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */


REG_OP(ActULQClampMinGrad) REG_OP(ActULQClampMinGrad)
@@ -821,6 +857,33 @@ REG_OP(ActULQClampMinGrad)
.OUTPUT(clamp_min_grad, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(clamp_min_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(ActULQClampMinGrad) .OP_END_FACTORY_REG(ActULQClampMinGrad)


/**
* @brief Computes Lp norm.

* @par Inputs:
* @li x: An ND tensor of type float16, float32. \n
*
* @par Attributes:
* @li p: Int, "inf" or "-inf", default value is 2.
* @li axes: ListInt, {} means all axes will be computed.
* @li keepdim: Bool, default is false.
* @li epsilon: Float, default is 1e-12. \n

* @par Outputs:
* @li y: An ND tensor of type float16, float32. The shape of y is depending
* on axes and keepdim. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator LpNorm.
*/
REG_OP(LpNorm)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(p, Int, 2)
.ATTR(axes, ListInt, {})
.ATTR(keepdim, Bool, false)
.ATTR(epsilon, Float, 1e-12)
.OP_END_FACTORY_REG(LpNorm)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_

+ 50
- 7
third_party/fwkacllib/inc/ops/matrix_calculation_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2020 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -38,8 +38,8 @@ namespace ge {
* float32, int32. Has format [ND, NHWC] . \n * float32, int32. Has format [ND, NHWC] . \n


*@par Attributes: *@par Attributes:
*@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n


*@par Outputs: *@par Outputs:
*y: The result matrix Tensor. 2D. Must be one of the following types: float16, *y: The result matrix Tensor. 2D. Must be one of the following types: float16,
@@ -70,8 +70,8 @@ REG_OP(MatMul)
* float32, int32. Has format [ND, NHWC] . \n * float32, int32. Has format [ND, NHWC] . \n


*@par Attributes: *@par Attributes:
*@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n


*@par Outputs: *@par Outputs:
*y: The result matrix Tensor. 2D. Must be one of the following types: float16, *y: The result matrix Tensor. 2D. Must be one of the following types: float16,
@@ -156,8 +156,8 @@ REG_OP(GEMM)
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n


*@par Attributes: *@par Attributes:
*@li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
*@li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n
*@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
*@li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n


*@par Outputs: *@par Outputs:
*y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, *y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16,
@@ -175,6 +175,41 @@ REG_OP(BatchMatMul)
.ATTR(adj_x2, Bool, false) .ATTR(adj_x2, Bool, false)
.OP_END_FACTORY_REG(BatchMatMul) .OP_END_FACTORY_REG(BatchMatMul)



/**
* @brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n

* @par Inputs:
* Three inputs, including:
* @li x1: A matrix Tensor. Must be one of the following types: float16,
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ].
* @li x2: A matrix Tensor. Must be one of the following types: float16,
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n
* @li bias: A matrix Tensor. Must be one of the following types: float16,
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n

* @par Attributes:
* @li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
* @li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n

* @par Outputs:
* y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16,
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2" . \n

* @par Third-party framework compatibility
* Compatible with the TensorFlow operator BatchMatmul.
*/

REG_OP(BatchMatMulV2)
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.ATTR(adj_x1, Bool, false)
.ATTR(adj_x2, Bool, false)
.OP_END_FACTORY_REG(BatchMatMulV2)


/** /**
*@brief Computes half the L2 norm of a tensor without the sqrt . \n *@brief Computes half the L2 norm of a tensor without the sqrt . \n


@@ -979,6 +1014,14 @@ REG_OP(MatrixDiagV2)
.OUTPUT(output, TensorType::BasicType()) .OUTPUT(output, TensorType::BasicType())
.OP_END_FACTORY_REG(MatrixDiagV2) .OP_END_FACTORY_REG(MatrixDiagV2)


REG_OP(IndexAdd)
.INPUT(var, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
.INPUT(indices, TensorType({DT_INT32}))
.INPUT(updates, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
.OUTPUT(var_out, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
.ATTR(axis, Int, 0)
.OP_END_FACTORY_REG(IndexAdd)

} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_

+ 1
- 1
third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 344
- 72
third_party/fwkacllib/inc/ops/nn_calculation_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -365,6 +365,25 @@ REG_OP(BiasAddGrad)
* 4-D with shape [batch, out_height, out_width, out_channels] * 4-D with shape [batch, out_height, out_width, out_channels]
* or [batch, out_channels, out_height, out_width]. * or [batch, out_channels, out_height, out_width].
* Gradients with respect to the output of the convolution. * Gradients with respect to the output of the convolution.
*\n
*\n
* The following are the supported data types and data formats:
*@verbatim
| Tensor | out_bckprop | filter | y
------------|-------------|---------|--------
| Data Type | float16 | float16 | float16
| |-------------|---------|--------
| | float32 | float32 | float32
| |-------------|---------|--------
| | float64 | float64 | float64
------------|-------------|---------|--------
| Format | NCHW | NCHW | NCHW
| | NHWC | HWCN | NHWC
@endverbatim
* For float32 and float64 type, the actual calculation on the chip is based on
* float16.
*\n
*
*@par Attributes: *@par Attributes:
* Five attributes: * Five attributes:
* @li strides: A tuple/list of 4 integers. The stride of the sliding window * @li strides: A tuple/list of 4 integers. The stride of the sliding window
@@ -377,8 +396,52 @@ REG_OP(BiasAddGrad)
* channels. * channels.
* @li data_format: An optional string from: "NHWC", "NCHW". Defaults to * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to
* "NHWC". Specify the data format of the input and output data. * "NHWC". Specify the data format of the input and output data.
*\n
*\n
* The following value range restrictions must be met:
*@verbatim
| Name | Field | Scope
-------------------|----------|--------------
| input_size | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| Filter | H | [1, 255]
| | W | [1, 255]
-------------------|----------|--------------
| out_backprop | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| y(fmap) | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| Stride | H | [1, 63]
| | W | [1, 63]
-------------------|----------|--------------
| Padding | Top | [0, 255]
| | Bottom | [0, 255]
| | Left | [0, 255]
| | Right | [0, 255]
-------------------|----------|--------------
| Dilation | H | [1, 255]
| | W | [1, 255]

@endverbatim
* In Ascend910, fmap or out_backprop's H and W not support 1 when
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
*\n
*
*@par Outputs: *@par Outputs:
* y: A Tensor. Has the same type as filter,and has same format as input_size. * y: A Tensor. Has the same type as filter,and has same format as input_size.
*\n
* out_backprop_height = (fmap_height + pad_top + pad_bottom -
* (dilation_h * (filter_height - 1) + 1))
* / stride_h + 1
*\n
* out_backprop_width = (fmap_width + pad_left + pad_right -
* (dilation_w * (filter_width - 1) + 1))
* / stride_w + 1
*\n
*
*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with Tensorflow's conv2d_backprop_input * Compatible with Tensorflow's conv2d_backprop_input
*/ */
@@ -454,6 +517,21 @@ REG_OP(Conv2DBackpropInputD)
* @li bias: An optional tensor. Must have the same type as "y". * @li bias: An optional tensor. Must have the same type as "y".
* @li offset_w: An optional 1D tensor for quantized deconvolution. * @li offset_w: An optional 1D tensor for quantized deconvolution.
* Type is int8. Reserved.\n * Type is int8. Reserved.\n
*\n
*\n
* The following are the supported data types and data formats:
*@verbatim
| Tensor | x | filter | bias | y
------------|---------|---------|---------|--------
| Data Type | float16 | float16 | float16 | float16
| |---------|---------|---------|--------
| | int8 | int8 | int32 | int32
------------|---------|---------|---------|--------
| Format | NCHW | NCHW | ND | NCHW
@endverbatim
* For int8, a dequant or requant operator must be followed.
*\n
*
*@par Attributes: *@par Attributes:
* Six attributes: * Six attributes:
* @li strides: A tuple or list of 2 integers. The stride of the sliding window * @li strides: A tuple or list of 2 integers. The stride of the sliding window
@@ -468,8 +546,51 @@ REG_OP(Conv2DBackpropInputD)
Specify the data format of the input and output data. Specify the data format of the input and output data.
* @li offset_x: An optional integer for quantized deconvolution. * @li offset_x: An optional integer for quantized deconvolution.
* Defaults to "0". * Defaults to "0".
*\n
*\n
* The following value range restrictions must be met:
*@verbatim
| Name | Field | Scope
-------------------|----------|--------------
| x (out_backprop) | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| Filter | H | [1, 255]
| | W | [1, 255]
-------------------|----------|--------------
| y (fmap) | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| Stride | H | [1, 63]
| | W | [1, 63]
-------------------|----------|--------------
| Padding | Top | [0, 255]
| | Bottom | [0, 255]
| | Left | [0, 255]
| | Right | [0, 255]
-------------------|----------|--------------
| Dilation | H | [1, 255]
| | W | [1, 255]
-------------------|----------|--------------
| Offset_x | | [-128, 127]

@endverbatim
* In Ascend910, fmap or out_backprop's H and W not support 1 when
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
*\n
*
*@par Outputs: *@par Outputs:
* y: A Tensor. 4D tensor with shape [batch, channels, height, width]. * y: A Tensor. 4D tensor with shape [batch, channels, height, width].
*\n
* out_backprop_height = (fmap_height + pad_top + pad_bottom -
* (dilation_h * (filter_height - 1) + 1))
* / stride_h + 1
*\n
* out_backprop_width = (fmap_width + pad_left + pad_right -
* (dilation_w * (filter_width - 1) + 1))
* / stride_w + 1
*\n
*
* When type of x is float16, the type of y must be float16. * When type of x is float16, the type of y must be float16.
* When type of x is int8, the type of y must be int32. * When type of x is int8, the type of y must be int32.
*/ */
@@ -502,6 +623,25 @@ REG_OP(Deconvolution)
* [batch, out_height, out_width, out_channels] or [batch, out_channels, * [batch, out_height, out_width, out_channels] or [batch, out_channels,
* out_height, out_width]. Gradients with respect to the output of the * out_height, out_width]. Gradients with respect to the output of the
* convolution. * convolution.
*\n
*\n
* The following are the supported data types and data formats:
*@verbatim
| Tensor | x | out_backprop | y
------------|---------|--------------|---------
| Data Type | float16 | float16 | float16
| |---------|--------------|---------
| | float32 | float32 | float32
| |---------|--------------|---------
| | float64 | float64 | float64
|-----------|---------|--------------|---------
| Format | NCHW | NCHW | NCHW
| | NHWC | NHWC | HWCN
@endverbatim
* For float32 and float64 type of x and outbackprop, the actual calculation on the chip
* is based on float16.
*\n
*
*@par Attributes: *@par Attributes:
* Five attributes: * Five attributes:
* @li strides: A tuple/list of 4 integers. The stride of the sliding window * @li strides: A tuple/list of 4 integers. The stride of the sliding window
@@ -514,8 +654,52 @@ REG_OP(Deconvolution)
* channels. * channels.
* @li data_format: An optional string from: "NHWC", "NCHW". Defaults to * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to
* "NHWC". Specify the data format of the input and output data. * "NHWC". Specify the data format of the input and output data.
*\n
*\n
* The following value range restrictions must be met:
*@verbatim
| Name | Field | Scope
-------------------|----------|--------------
| x(fmap) | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| Filter Size | H | [1, 255]
| | W | [1, 255]
-------------------|----------|--------------
| out_backprop | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| y | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| Stride | H | [1, 63]
| | W | [1, 63]
-------------------|----------|--------------
| Padding | Top | [0, 255]
| | Bottom | [0, 255]
| | Left | [0, 255]
| | Right | [0, 255]
-------------------|----------|--------------
| Dilation | H | [1, 255]
| | W | [1, 255]

@endverbatim
* In Ascend910, out_backprop's H and W not support 1 when
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
*\n
*
*@par Outputs: *@par Outputs:
* y: A Tensor. Has the same type as x, has the same format as filter_size. * y: A Tensor. Has the same type as x, has the same format as filter_size.
*\n
* out_backprop_height = (in_height + pad_top + pad_bottom -
* (dilation_h * (filter_height - 1) + 1))
* / stride_h + 1
*\n
* out_backprop_width = (in_width + pad_left + pad_right -
* (dilation_w * (filter_width - 1) + 1))
* / stride_w + 1
*\n
*
*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with Tensorflow's conv2d_backprop_filter * Compatible with Tensorflow's conv2d_backprop_filter
*/ */
@@ -617,8 +801,7 @@ REG_OP(Conv2DBackpropFilterD)
* (top, bottom, left, right) side of the input. * (top, bottom, left, right) side of the input.
*@li dilations: Optional. A list of 4 integers. The dilation factor for each *@li dilations: Optional. A list of 4 integers. The dilation factor for each
* dimension of input. The dimension order is determined by the data format of * dimension of input. The dimension order is determined by the data format of
* "x". The N and C dimensions must be set to 1. The H and W dimensions must be
* set to 1 for int8 type. Defaults to [1, 1, 1, 1].
* "x". The N and C dimensions must be set to 1. Defaults to [1, 1, 1, 1].
*@li groups: Optional. An integer of type int32. The number of blocked *@li groups: Optional. An integer of type int32. The number of blocked
* connections from input channels to output channels. In_channels and * connections from input channels to output channels. In_channels and
* out_channels must both be divisible by "groups". Defaults to 1. * out_channels must both be divisible by "groups". Defaults to 1.
@@ -652,6 +835,8 @@ REG_OP(Conv2DBackpropFilterD)
| Offset_x | | [-128, 127] | Offset_x | | [-128, 127]


@endverbatim @endverbatim
* The W dimension of the input image supports cases exceeding 4096, but it may
* cause compilation errors.
*\n *\n
* *
*@par Outputs: *@par Outputs:
@@ -666,21 +851,6 @@ REG_OP(Conv2DBackpropFilterD)
* out_width = (in_width + pad_left + pad_right - * out_width = (in_width + pad_left + pad_right -
* (dilation_w * (filter_width - 1) + 1)) * (dilation_w * (filter_width - 1) + 1))
* / stride_w + 1 * / stride_w + 1
*
*@attention Constraints:
*@li The following restrictions on the output must be met:
*@verbatim
| Output | Restrictions
----------|--------------------------------
| H == 1 | H * W(input) == H * W(filter)
| W == 1 |
----------|--------------------------------
| H != 1 | W(input) == W(filter)
| W == 1 | Only for Ascend310 Hi3796V300CS
@endverbatim
* "H * W (input)" indicates the image size after padding and "H * W (filter)"
* indicates the filter size after dilation."W(input)" and W(filter) indicate
* the same rule on the W dimension.
*\n *\n
* *
*@par Quantization supported or not *@par Quantization supported or not
@@ -778,7 +948,7 @@ REG_OP(Conv2DCompress)
* With the format "HWCN" , the data is stored in the order of: [filter_height, * With the format "HWCN" , the data is stored in the order of: [filter_height,
* filter_width, in_channels / groups, out_channels]. * filter_width, in_channels / groups, out_channels].
*@li offsets: A 4D tensor of x-y coordinates offset and mask. With the format *@li offsets: A 4D tensor of x-y coordinates offset and mask. With the format
* "NHWC", the data is stored in the order of: [batch, in_height, in_width,
* "NHWC", the data is stored in the order of: [batch, out_height, out_width,
* deformable_groups * filter_height * filter_width * 3]. * deformable_groups * filter_height * filter_width * 3].
*@li bias: An optional 1D tensor of additive biases to the filter outputs. *@li bias: An optional 1D tensor of additive biases to the filter outputs.
* The data is stored in the order of: [out_channels]. * The data is stored in the order of: [out_channels].
@@ -822,25 +992,12 @@ REG_OP(Conv2DCompress)
*@verbatim *@verbatim
| Name | Field | Scope | Name | Field | Scope
--------------------|--------|---------------------------- --------------------|--------|----------------------------
| Input Image Size | H | [1, 100000]
| | W | [1, 4096]
| Input Image Size | H | [1, 100000 / filter_height]
| | W | [1, 4096 / filter_width]
--------------------|--------|---------------------------- --------------------|--------|----------------------------
| Filter Size | H | [1, 255]
| | W | [1, 255]
--------------------|--------|----------------------------
| Stride | H | [1, 63]
| Filter Size | H | [1, 63]
| | W | [1, 63] | | W | [1, 63]
--------------------|--------|----------------------------
| Padding | Top | [0, 255]
| | Bottom | [0, 255]
| | Left | [0, 255]
| | Right | [0, 255]
------------ -------|--------|----------------------------
| Dilation | H | [1, 255]
| | W | [1, 255]
@endverbatim @endverbatim
* "W(input)" indicate the image width after padding and W(filter) indicates the
* filter width after dilation.
*\n *\n
* *
*@par Outputs: *@par Outputs:
@@ -855,21 +1012,7 @@ REG_OP(Conv2DCompress)
* out_width = (in_width + pad_left + pad_right - * out_width = (in_width + pad_left + pad_right -
* (dilation_w * (filter_width - 1) + 1)) * (dilation_w * (filter_width - 1) + 1))
* / stride_w + 1 * / stride_w + 1
*
*@attention Constraints:
*@li The following restrictions on the output must be met:
*@verbatim
| Output | Restrictions
----------|--------------------------------
| H == 1 | H * W(input) == H * W(filter)
| W == 1 |
----------|--------------------------------
| H != 1 | W(input) == W(filter)
| W == 1 | Only for Ascend310 Hi3796V300CS
@endverbatim
* "H * W(input)" indicates the image size after padding and "H * W(filter)"
* indicates the filter size after dilation. "W(input)" and W(filter) indicate
* the same rule on the W dimension.
*\n
* *
*@par Quantization supported or not *@par Quantization supported or not
*@li No *@li No
@@ -920,8 +1063,8 @@ REG_OP(DeformableConv2D)
* @li data_format: An optional string from: "NDHWC", "NCDHW". * @li data_format: An optional string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data. * Defaults to "NDHWC". Specify the data format of the input and output data.
* @li dilations: A list of 5 integers. Specifies the dilation factor for each * @li dilations: A list of 5 integers. Specifies the dilation factor for each
* dimension of "x", now only support [1,1,1,1,1]
* The N and C dimensions must be 1. Has the same format as "x".
* dimension of "x".
* The N, C and D dimensions must be 1. Has the same format as "x".
* @li offset_x: An optional int. Input offset, used for quantized inference. * @li offset_x: An optional int. Input offset, used for quantized inference.
* Defaults to 0. Reserved . \n * Defaults to 0. Reserved . \n


@@ -967,8 +1110,8 @@ REG_OP(Conv3D)


*@par Required Attributes: *@par Required Attributes:
* @li strides: A list of 5 integers. Specifies the stride of the sliding window * @li strides: A list of 5 integers. Specifies the stride of the sliding window
* for each dimension of "x".
* The N and C dimensions must be 1. Has the same format as "x".
* for each dimension of "out_backprop".
* The N and C dimensions must be 1. Has the same format as "out_backprop".
* @li pads: A list of 6 integers. * @li pads: A list of 6 integers.
* Supports only padding along the D, H and W dimensions in sequence of head, * Supports only padding along the D, H and W dimensions in sequence of head,
* tail, top, bottom, left and right . \n * tail, top, bottom, left and right . \n
@@ -980,10 +1123,11 @@ REG_OP(Conv3D)
* @li data_format: An optional string from: "NDHWC", "NCDHW". * @li data_format: An optional string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data. * Defaults to "NDHWC". Specify the data format of the input and output data.
* @li dilations: A tuple/list of 5 integers, The dilation factor for each * @li dilations: A tuple/list of 5 integers, The dilation factor for each
* dimension of the input, now only support [1,1,1,1,1]
* dimension of the input.
* The N, C and D dimensions must be 1. Has the same format as "out_backprop".


*@par Outputs: *@par Outputs:
* y: A Tensor. Has the same type as filter,and has same format as input_size
* y: A Tensor. Has the same type as filter,and has same format as "input_size"


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with Tensorflow's conv3d_backprop_input * Compatible with Tensorflow's conv3d_backprop_input
@@ -1011,8 +1155,8 @@ REG_OP(Conv3DBackpropInput)


*@par Required Attributes: *@par Required Attributes:
* @li strides: A list of 5 integers. Specifies the stride of the sliding window * @li strides: A list of 5 integers. Specifies the stride of the sliding window
* for each dimension of "x".
* The N and C dimensions must be 1. Has the same format as "x".
* for each dimension of "out_backprop".
* The N and C dimensions must be 1. Has the same format as "out_backprop".
* @li pads: A list of 6 integers. Supports only padding along the D, H and W * @li pads: A list of 6 integers. Supports only padding along the D, H and W
* dimensions in sequence of head, tail, top, bottom, left and right. * dimensions in sequence of head, tail, top, bottom, left and right.
* @li input_size: A tuple/list of type int32, int64. An integer vector * @li input_size: A tuple/list of type int32, int64. An integer vector
@@ -1027,9 +1171,10 @@ REG_OP(Conv3DBackpropInput)
* @li data_format: An optional string from: "NDHWC", "NCDHW". * @li data_format: An optional string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data. * Defaults to "NDHWC". Specify the data format of the input and output data.
* @li dilations: A tuple/list of 5 integers, The dilation factor for each * @li dilations: A tuple/list of 5 integers, The dilation factor for each
* dimension of input, now only support [1,1,1,1,1]
* dimension of input.
* The N, C and D dimensions must be 1. Has the same format as "out_backprop".
*@par Outputs: *@par Outputs:
* y: A Tensor. Has the same type and data format as out_backprop.
* y: A Tensor. Has the same type and data format as "out_backprop".
*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with Tensorflow's conv3d_backprop_input * Compatible with Tensorflow's conv3d_backprop_input


@@ -1072,9 +1217,7 @@ REG_OP(Conv3DBackpropInputD)
* @li c_t: A optinal Tensor dtype of float16, float32. The cell state at time t . \n * @li c_t: A optinal Tensor dtype of float16, float32. The cell state at time t . \n


*@par Third-party framework compatibility: *@par Third-party framework compatibility:
* Compatible with the Pytorch operator adds.
*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
* Compatible with the Caffe operator LSTM.
*/ */
REG_OP(LSTM) REG_OP(LSTM)
.INPUT(x, TensorType({DT_FLOAT16})) .INPUT(x, TensorType({DT_FLOAT16}))
@@ -1121,14 +1264,15 @@ REG_OP(LSTM)
*@par Attributes: *@par Attributes:
* Three attributes: * Three attributes:
* @li dilations: A tuple/list of 5 integers, The dilation factor for each * @li dilations: A tuple/list of 5 integers, The dilation factor for each
* dimension of input, now only support [1,1,1,1,1].
* dimension of input.
* The N, C and D dimensions must be 1. Has the same format as "x".
* @li groups: Number of blocked connections from input channels to output * @li groups: Number of blocked connections from input channels to output
* channels. Reserved. * channels. Reserved.
* @li data_format: An optional string from: "NDHWC", "NCDHW". * @li data_format: An optional string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data. * Defaults to "NDHWC". Specify the data format of the input and output data.


*@par Outputs: *@par Outputs:
* y: A Tensor that has the same type as x
* y: A Tensor that has the same type as "x"
* and the format is NDHWC, NCDHW or DHWCN. * and the format is NDHWC, NCDHW or DHWCN.
*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with Tensorflow's conv3d_backprop_filter * Compatible with Tensorflow's conv3d_backprop_filter
@@ -1172,7 +1316,8 @@ REG_OP(Conv3DBackpropFilter)
*@par Attributes: *@par Attributes:
* Three attributes: * Three attributes:
* @li dilations: A tuple/list of 5 integers, The dilation factor for each * @li dilations: A tuple/list of 5 integers, The dilation factor for each
* dimension of input, now only support [1,1,1,1,1].
* dimension of input.
* The N, C and D dimensions must be 1. Has the same format as "x".
* @li groups: Number of blocked connections from input channels to output * @li groups: Number of blocked connections from input channels to output
* channels. Reserved. * channels. Reserved.
* @li data_format: An optional string from: "NDHWC", "NCDHW". * @li data_format: An optional string from: "NDHWC", "NCDHW".
@@ -1226,13 +1371,14 @@ REG_OP(Conv3DBackpropFilterD)
* @li groups: Number of blocked connections from input channels to output * @li groups: Number of blocked connections from input channels to output
* channels. Reserved. * channels. Reserved.
* @li dilations: A tuple/list of 5 integers, * @li dilations: A tuple/list of 5 integers,
* The dilation factor for each dimension of input, now only support [1,1,1,1,1]
* The dilation factor for each dimension of input.
* The N, C and D dimensions must be 1. Has the same format as "x".
* @li data_format: An optional string from: "NDHWC", "NCDHW". * @li data_format: An optional string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data. * Defaults to "NDHWC". Specify the data format of the input and output data.
* @li output_padding: The size will be added in the output shape. * @li output_padding: The size will be added in the output shape.
* @li offset_x: Input offset_x value. Reserved. * @li offset_x: Input offset_x value. Reserved.
*@par Outputs: *@par Outputs:
* y: A Tensor. Has the same type and format as x.
* y: A Tensor. Has the same type and format as "x".
*/ */
REG_OP(Conv3DTranspose) REG_OP(Conv3DTranspose)
.INPUT(input_size, TensorType({DT_INT32, DT_INT64})) .INPUT(input_size, TensorType({DT_INT32, DT_INT64}))
@@ -1273,7 +1419,8 @@ REG_OP(Conv3DTranspose)
*@par Attributes: *@par Attributes:
* Five attributes: * Five attributes:
* @li dilations: A tuple/list of 5 integers, The dilation factor for each * @li dilations: A tuple/list of 5 integers, The dilation factor for each
* dimension of input, now only support [1,1,1,1,1]
* dimension of input.
* The N, C and D dimensions must be 1. Has the same format as "x".
* @li groups: Number of blocked connections from input channels to output * @li groups: Number of blocked connections from input channels to output
* channels. Reserved. * channels. Reserved.
* @li data_format: An optional string from: "NDHWC", "NCDHW". * @li data_format: An optional string from: "NDHWC", "NCDHW".
@@ -1281,7 +1428,7 @@ REG_OP(Conv3DTranspose)
* @li output_padding: The size will be added in the output shape. * @li output_padding: The size will be added in the output shape.
* @li offset_x: Input offset_x value. Reserved. * @li offset_x: Input offset_x value. Reserved.
*@par Outputs: *@par Outputs:
* y: A Tensor. Has the same type and format as x.
* y: A Tensor. Has the same type and format as "x".
*@par Restrictions: *@par Restrictions:
* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead. * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead.
*/ */
@@ -1316,6 +1463,22 @@ REG_OP(Conv3DTransposeD)
* or [out_channels, in_channel, filter_height, filter_width]. * or [out_channels, in_channel, filter_height, filter_width].
* @li bias: An optional 1D tensor of type float16 or int32. Format is "ND". * @li bias: An optional 1D tensor of type float16 or int32. Format is "ND".
* @li offset_w: An optional 1D tensor for quantized inference. Reserved. * @li offset_w: An optional 1D tensor for quantized inference. Reserved.
*\n
*\n
* The following are the supported data types and data formats:
*@verbatim
| Tensor | x | filter | bias | y
------------|---------|---------|---------|--------
| Data Type | float16 | float16 | float16 | float16
| |---------|---------|---------|--------
| | int8 | int8 | int32 | int32
------------|---------|---------|---------|--------
| Format | NCHW | NCHW | ND | NCHW
| | NHWC | HWCN | | NHWC
@endverbatim
* For int8, a dequant or requant operator must be followed.
*\n
*
*@par Required Attributes: *@par Required Attributes:
* @li strides: A required tuple/list of 4 integers. The stride of the sliding * @li strides: A required tuple/list of 4 integers. The stride of the sliding
* window for H/W dimension. The index of H/W is same as data_format. * window for H/W dimension. The index of H/W is same as data_format.
@@ -1334,9 +1497,55 @@ REG_OP(Conv3DTransposeD)
* to [0, 0, 0, 0]. * to [0, 0, 0, 0].
* @li offset_x: An optional int. Input offset, used for quantized inference. * @li offset_x: An optional int. Input offset, used for quantized inference.
* Defaults to "0". * Defaults to "0".
*\n
*\n
* The following value range restrictions must be met:
*@verbatim
| Name | Field | Scope
-------------------|----------|--------------
| input_size | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| x (out_backprop) | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| filter | H | [1, 255]
| | W | [1, 255]
-------------------|----------|--------------
| y (fmap) | H | [1, 4096]
| | W | [1, 4096]
-------------------|----------|--------------
| Stride | H | [1, 63]
| | W | [1, 63]
-------------------|----------|--------------
| Padding | Top | [0, 255]
| | Bottom | [0, 255]
| | Left | [0, 255]
| | Right | [0, 255]
-------------------|----------|--------------
| Dilation | H | [1, 255]
| | W | [1, 255]
-------------------|----------|--------------
| Offset_x | | [-128, 127]

@endverbatim
* In Ascend910, fmap or out_backprop's H and W not support 1 when
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
*\n
*
*@par Outputs: *@par Outputs:
* y: A Tensor. A Tensor of type float16 or int32, and has same format as * y: A Tensor. A Tensor of type float16 or int32, and has same format as
* input_size. * input_size.
*\n
* out_backprop_height = (fmap_height + pad_top + pad_bottom -
* (dilation_h * (filter_height - 1) + 1))
* / stride_h + 1
*\n
* out_backprop_width = (fmap_width + pad_left + pad_right -
* (dilation_w * (filter_width - 1) + 1))
* / stride_w + 1
*\n
*
*/ */
REG_OP(Conv2DTranspose) REG_OP(Conv2DTranspose)
.INPUT(input_size, TensorType({DT_INT32, DT_INT64})) .INPUT(input_size, TensorType({DT_INT32, DT_INT64}))
@@ -1405,13 +1614,13 @@ REG_OP(Conv2DTransposeD)
/** /**
*@brief Computes the deformed convolution output with the expected input *@brief Computes the deformed convolution output with the expected input
*@par Inputs: *@par Inputs:
* Four inputs:
* Two inputs:
* @li x: A Tensor of type float16,float32 * @li x: A Tensor of type float16,float32
* @li offsets: A Tensor of type float16,float32.Deformation offset parameter. * @li offsets: A Tensor of type float16,float32.Deformation offset parameter.
*@par Required Attributes: *@par Required Attributes:
* @li strides: A tuple/list of 4 integers.The stride of the sliding window for * @li strides: A tuple/list of 4 integers.The stride of the sliding window for
* height and width for H/W dimension. * height and width for H/W dimension.
* @li pads: A tuple/list of 4 integers.Padding added to each dimension
* @li pads: A tuple/list of 4 integers.Padding added to H/W dimension
* of the input. * of the input.
* @li ksize: A tuple/list of 2 integers.kernel size. * @li ksize: A tuple/list of 2 integers.kernel size.
*@par Attributes: *@par Attributes:
@@ -1420,6 +1629,7 @@ REG_OP(Conv2DTransposeD)
* of input. Defaults to [1, 1, 1, 1] * of input. Defaults to [1, 1, 1, 1]
* @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x.
* @li deformable_groups: Specify the c-axis grouping number of input x. * @li deformable_groups: Specify the c-axis grouping number of input x.
* @li modulated: Specify version of DeformableConv2D, true means v2, false means v1
*@par Outputs: *@par Outputs:
* y: A Tensor. A Tensor of type float16, float32. * y: A Tensor. A Tensor of type float16, float32.
*/ */
@@ -1433,7 +1643,69 @@ REG_OP(DeformableOffsets)
.ATTR(dilations, ListInt, {1, 1, 1, 1}) .ATTR(dilations, ListInt, {1, 1, 1, 1})
.ATTR(data_format, String, "NCHW") .ATTR(data_format, String, "NCHW")
.ATTR(deformable_groups, Int, 1) .ATTR(deformable_groups, Int, 1)
.ATTR(modulated, Bool, true)
.OP_END_FACTORY_REG(DeformableOffsets) .OP_END_FACTORY_REG(DeformableOffsets)


/**
*@brief Computes the gradients of DeformableOffsets with respect to input and offsets
*@par Inputs:
* Three inputs:
* @li grad: A Tensor of type float16,float32. gradients with respect to DeformableOffsets output
* @li x: A Tensor of type float16,float32.
* @li offsets: A Tensor of type float16,float32.Deformation offset parameter.
*@par Required Attributes:
* @li strides: A tuple/list of 4 integers.The stride of the sliding window for
* height and width for H/W dimension.
* @li pads: A tuple/list of 4 integers.Padding added to H/W dimension
* of the input.
* @li ksize: A tuple/list of 2 integers.kernel size.
*@par Attributes:
* Three attributes:
* @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension
* of input. Defaults to [1, 1, 1, 1]
* @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x.
* @li deformable_groups: Specify the c-axis grouping number of input x.
* @li modulated: Specify version of DeformableConv2D, true means v2, false means v1.
*@par Outputs:
* grad_x: A Tensor of type float16, float32. Gradients with respect to input_x
* grad_offsets: A Tensor of type float16, float32. Gradients with respect to input_offsets
*/
REG_OP(DeformableOffsetsGrad)
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(offsets, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(grad_x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(grad_offsets, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
.REQUIRED_ATTR(ksize, ListInt)
.ATTR(dilations, ListInt, {1, 1, 1, 1})
.ATTR(data_format, String, "NCHW")
.ATTR(deformable_groups, Int, 1)
.ATTR(modulated, Bool, true)
.OP_END_FACTORY_REG(DeformableOffsetsGrad)

/**
*@brief Computes the deformed dilation output with the expected input
*@par Inputs:
* One inputs:
* @li x: A Tensor of type int8, float16, float32
*@par Required Attributes:
* @li dilations: A tuple/list of integers.
*@par Attributes:
* Two attributes:
* @li padding_value: default value filling in blank
* @li pads: A tuple/list of integers.
*@par Outputs:
* y: A Tensor. A Tensor of type int8, float16, float32.
*/
REG_OP(Dilation)
.INPUT(x, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(dilations, ListInt)
.ATTR(pads, ListInt, {})
.ATTR(padding_value, Float, 0.0)
.OP_END_FACTORY_REG(Dilation)

} // namespace ge } // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_

+ 111
- 2
third_party/fwkacllib/inc/ops/nn_detect_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -1383,6 +1383,7 @@ REG_OP(DecodeWheelsTarget)


*@attention Constraints: *@attention Constraints:
* Only computation of float16 data is supported. * Only computation of float16 data is supported.
* Note: when the class num per image * max_size_per_class is too big, will compile fail with ERROR-insufficient memory
*/ */
REG_OP(BatchMultiClassNonMaxSuppression) REG_OP(BatchMultiClassNonMaxSuppression)
.INPUT(boxes, TensorType({DT_FLOAT16})) .INPUT(boxes, TensorType({DT_FLOAT16}))
@@ -1485,7 +1486,10 @@ REG_OP(DecodeBboxV2)
* *
*@par Outputs: *@par Outputs:
* @li y1: A Tensor. Must have the same type as x. * @li y1: A Tensor. Must have the same type as x.
* @li y2: A Tensor. Indices of y1 in x.Dtype must be int32.
* @li y2: A Tensor. Indices of y1 in x. Dtype must be int32.
*
*@attention Constraints:
* The upper limit of data on the direction axis is 7040.
*/ */
REG_OP(Sort) REG_OP(Sort)
.INPUT(x, TensorType({ DT_FLOAT16 })) .INPUT(x, TensorType({ DT_FLOAT16 }))
@@ -1495,6 +1499,111 @@ REG_OP(Sort)
.ATTR(descending, Bool, false) .ATTR(descending, Bool, false)
.OP_END_FACTORY_REG(Sort) .OP_END_FACTORY_REG(Sort)


REG_OP(PtIou)
.INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(mode, String, "iou")
.OP_END_FACTORY_REG(PtIou)

/**
*@brief Greedily selects a subset of bounding boxes in descending order of
score . \n

*@par Inputs:
*Input boxes and scores must be float16 type. Inputs include:
*@li boxes: A input tensor with shape [num_batches,spatial_dimension,4].
The single box data format is indicated by center_point_box.
*@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension]
*@li max_output_size: A scalar integer tensor representing the maximum number
of boxes to be selected by non max suppression.
*@li iou_threshold: A 0-D float tensor representing the threshold for deciding
whether boxes overlap too much with respect to IOU.
*@li score_threshold: A 0-D float tensor representing the threshold for
deciding when to remove boxes based on score . \n

*@par Attributes:
*center_point_box:Integer indicate the format of the box data.
The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2]
where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair
of box corners and the coordinates can be provided as normalized
(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
1 - the box data is supplied as [x_center, y_center, width, height].
Mostly used for Pytorch models. \n

*@par Outputs:
*@li selected_indices: A 2-D integer tensor of shape [M] representing the
selected indices from the boxes tensor, where M <= max_output_size. \n

*@attention Constraints:
*Input boxes and scores must be float16 type . \n

*@par Third-party framework compatibility
*Compatible with onnx NonMaxSuppression operator.
*/

REG_OP(NonMaxSuppressionV6)
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32}))
.OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT}))
.OUTPUT(selected_indices, TensorType({DT_INT32}))
.ATTR(center_point_box, Int, 0)
.ATTR(max_boxes_size, Int, 0)
.OP_END_FACTORY_REG(NonMaxSuppressionV6)

/**
*@brief Greedily selects a subset of bounding boxes in descending order of
score . \n

*@par Inputs:
*Input boxes and scores must be float16 type. Inputs include:
*@li boxes: A input tensor with shape [num_batches,spatial_dimension,4].
The single box data format is indicated by center_point_box.
*@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension]
*@li max_output_size: A scalar integer tensor representing the maximum number
of boxes to be selected by non max suppression.
*@li iou_threshold: A 0-D float tensor representing the threshold for deciding
whether boxes overlap too much with respect to IOU.
*@li score_threshold: A 0-D float tensor representing the threshold for
deciding when to remove boxes based on score . \n
*@li index_id: A input tensor with shape [num_batches,num_classes,spatial_dimension,3]
the last dim representing (batch_id,class_id,index_id) . \n

*@par Attributes:
*center_point_box:Integer indicate the format of the box data.
The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2]
where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair
of box corners and the coordinates can be provided as normalized
(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
1 - the box data is supplied as [x_center, y_center, width, height].
Mostly used for Pytorch models. \n

*@par Outputs:
*@li selected_indices: A 2-D integer tensor of shape [M] representing the
selected indices from the boxes tensor, where M <= max_output_size. \n

*@attention Constraints:
*Input boxes and scores must be float16 type . \n

*@par Third-party framework compatibility
*Compatible with onnx NonMaxSuppression operator.
*/


REG_OP(NonMaxSuppressionV7)
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32}))
.OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(index_id, TensorType({DT_FLOAT16}))
.OUTPUT(selected_indices, TensorType({DT_INT32}))
.ATTR(center_point_box, Int, 0)
.ATTR(max_boxes_size, Int, 0)
.OP_END_FACTORY_REG(NonMaxSuppressionV7)

} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_

+ 261
- 6
third_party/fwkacllib/inc/ops/nn_norm_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -160,20 +160,20 @@ REG_OP(SigmoidCrossEntropyWithLogits)
.OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogits) .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogits)


/** /**
*@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n
*@brief Computes the sigmoid cross entropy loss of "predict" and "target".


*@par Inputs: *@par Inputs:
* four inputs, including: * four inputs, including:
*@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value. *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n
*@li weight: An multi-dimensional Tensor, specifying the weight value. \n
*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value.
*@li weight: An multi-dimensional Tensor, specifying the weight value.
*@li pos_weight: An multi-dimensional Tensor, specifying the pos weight value. \n *@li pos_weight: An multi-dimensional Tensor, specifying the pos weight value. \n


*@par Attributes: *@par Attributes:
*reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean" . \n
*reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean". \n


*@par Outputs: *@par Outputs:
*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n
*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict". \n


*@par Third-party framework compatibility *@par Third-party framework compatibility
* Compatible with PyTorch operator BCEWithLogitsLoss. * Compatible with PyTorch operator BCEWithLogitsLoss.
@@ -978,6 +978,261 @@ REG_OP(InHost)
.OUTPUT(variance_sqrt, TensorType({DT_FLOAT})) .OUTPUT(variance_sqrt, TensorType({DT_FLOAT}))
.ATTR(epsilon, Float, 0.00001) .ATTR(epsilon, Float, 0.00001)
.OP_END_FACTORY_REG(InHost) .OP_END_FACTORY_REG(InHost)

/**
* @brief perform instance normalization to x. \n

* @par Inputs:
* Three inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32, format is NC1HWC0.
* @li gamma: A Tensor. Must be one of the following types: float16, float32, format is ND.
* @li beta: A Tensor. Must be one of the following types: float16, float32, format is ND.

* @par Attributes:
* @li data_format: An attribute of type String \n
* @li epsilon: An attribute of type Float, . \n

* @par Outputs:
* @li y: A Tensor. Has the same type as "x", format is NC1HWC0. \n
* @li mean: A Tensor. Has the same type as "x", format is NC1HWC0 and the shape is [N, C1, 1, 1, C0]. \n
* @li variance: A Tensor. Has the same type as "x", format is NC1HWC0 and the shape is [N, C1, 1, 1, C0]. \n

* @par Third-party framework compatibility
* Can be used by onnx InstanceNormalization
*/
REG_OP(InstanceNorm)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(gamma, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(beta, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(variance, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(data_format, String)
.REQUIRED_ATTR(epsilon, Float)
.OP_END_FACTORY_REG(InstanceNorm)

REG_OP(KlDivLossGrad)
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(reduction, String, "mean")
.ATTR(log_target, Bool, false)
.OP_END_FACTORY_REG(KlDivLossGrad)

/**
* @brief Computes l1_loss_grad or l1_loss_backward. \n

* @par Inputs:
* Three inputs, including:
* @li grads: A Tensor. Must be one of the following types: float16, float32.
* Required.
* @li predict: A Tensor. Has the same type as "grads". Required.
* @li label: A Tensor. Has the same type as "grads". Required. \n

* @par Attributes:
* @li reduction: An optional attribute of type String. Defaults to "mean". \n

* @par Outputs:
* @li y: A Tensor. Has the same type as "x". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator L1LossGrad.
*/
REG_OP(L1LossGrad)
.INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(reduction, String, "mean")
.OP_END_FACTORY_REG(L1LossGrad)

/**
* @brief Computes loss of lp, p=1,2,3....

* @par Inputs:
* @li predict: An ND tensor of type float16, float32.
* @li label: An ND tensor of type float16, float32. \n

* @par Attributes:
* @li p: A required int attribute that decides which loss to compute, now the p only can be 1 to compute l1_loss.
* @li reduction: An optional string.Defaults to "mean". \n

* @par Outputs:
* @li y: An ND tensor tensor with the same shape and type as "predict". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator LpLoss.
*/
REG_OP(LpLoss)
.INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(p, Int)
.ATTR(reduction, String, "mean")
.OP_END_FACTORY_REG(LpLoss)

/**
* @brief Computes gradients of mse loss.

* @par Inputs:
* @li predict: An ND tensor of type float16, float32.
* @li label: An ND tensor of type float16, float32.
* @li dout: An ND tensor of type float16, float32. \n

* @par Attributes:
* @li reduction: An optional string.Defaults to "mean". \n

* @par Outputs:
* @li y: An ND tensor tensor with the same shape and type as "predict". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator MseLossGrad.
*/
REG_OP(MseLossGrad)
.INPUT(predict, TensorType({DT_FLOAT32, DT_FLOAT16}))
.INPUT(label, TensorType({DT_FLOAT32, DT_FLOAT16}))
.INPUT(dout, TensorType({DT_FLOAT32, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT32, DT_FLOAT16}))
.ATTR(reduction, String, "mean")
.OP_END_FACTORY_REG(MseLossGrad)

/**
* @brief Computes mse loss.
* @par Inputs:
* two inputs, including:
* @li predict: An ND Tensor of dtype float16 or float32.
* @li label: An ND Tensor of dtype float16 or float32.\n
*
* @par Attributes:
* @li reduction:An optional str from sum, none, mean, Defaults to "mean".\n
*
* @par Outputs:
* @li y: when reduction=sum/mean, y is scale. when reduction=none, y has
* same type and shape as "predict".\n
*/
REG_OP(MseLoss)
.INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(reduction, String, "mean")
.OP_END_FACTORY_REG(MseLoss)

/**
* @brief Calculates the reversed outputs of the function "smooth_l1_loss_v2". \n

* @par Inputs:
* Three Inputs, including:
* @li predict: A Tensor. Must be one of the following types:
* float16, float32.
* @li label: A Tensor. Has the same type as "predict".
* @li dout: A Tensor. Has the same type as "predict". \n

* @par Attributes:
* Two Attributes, including:
* @li sigma: An optional float. Defaults to 1.0. \n

* @li reduction: An optional string. Defaults to "mean",
* Must be one of the following: "none", "mean", "sum". \n

* @par Outputs:
* @li gradient: A Tensor. Has the same type as "predict". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator SmoothL1LossBackward.
*/
REG_OP(SmoothL1LossGradV2)
.INPUT(predict, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(label, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(dout, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(gradient, TensorType({DT_FLOAT, DT_FLOAT16}))
.ATTR(sigma, Float, 1.0)
.ATTR(reduction, String, "mean")
.OP_END_FACTORY_REG(SmoothL1LossGradV2)

/**
* @brief Creates a criterion that uses a squared term if the absolute
* element-wise error falls below beta and an L1 term otherwise. It is
* less sensitive to outliers than the MSELoss and in some cases prevents
* exploding gradients.

* @par Inputs:
* @li predict: A multi-dimensional Tensor of type float16 or float32,
* specifying the predictive value. \n
* @li label: A multi-dimensional Tensor of type float16 or float32,
* specifying the target value. \n

* @par Attributes:
* @li sigma: An optional int. Specifies the threshold of loss. Defaults
* to "1.0". \n
* @li reduction: An optional str. Specifies the reduction to apply to
* the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied,
* 'mean': the sum of the output will be divided by the number of elements in
* the output,'sum': the output will be summed. Default: 'mean'. \n

* @par Outputs:
* @li loss: Indicates the loss between the predictive value and target value.
* Has the same dimensions as "predict". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator smooth_l1_loss. \n
*/
REG_OP(SmoothL1LossV2)
.INPUT(predict, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.INPUT(label, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.OUTPUT(loss, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.ATTR(sigma, Float, 1.0)
.ATTR(reduction, String, "mean")
.OP_END_FACTORY_REG(SmoothL1LossV2)

/**
* @brief Computes Centralization. result = x - mean(x, axes)

* @par Inputs:
* @li x: An ND tensor of type float16, float32.
* @par Attributes:
* @li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType.
* Must be in the range [-rank(x), rank(x)).
* @par Outputs:
* @li y: A Tensor. Has the same type as "x". \n

* @par Third-party framework compatibility
* custom operator \n
*/
REG_OP(Centralization)
.INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.ATTR(axes, ListInt, {-1})
.OP_END_FACTORY_REG(Centralization)

/**
* @brief Computes gradients of sigmoid_cross_entropy_with_logits_v2.

* @par Inputs:
* @li predict: An ND tensor of type float16, float32.
* @li target: An ND tensor of type float16, float32.
* @li dout: An ND tensor of type float16, float32.
* @li weight: An optional ND tensor of type float16, float32.
* @li pos_weight: An optional ND tensor of type float16, float32. \n

* @par Attributes:
* @li reduction: An optional string.Defaults to "mean". \n

* @par Outputs:
* @li gradient: An ND tensor tensor with the same shape and type as "predict". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator SigmoidCrossEntropyWithLogitsGrad.
*/
REG_OP(SigmoidCrossEntropyWithLogitsGradV2)
.INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(dout, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(pos_weight, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(reduction, String, "mean")
.OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_

+ 1
- 1
third_party/fwkacllib/inc/ops/nn_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 322
- 9
third_party/fwkacllib/inc/ops/nn_pooling_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -182,6 +182,125 @@ REG_OP(AvgPool3D)
.ATTR(data_format, String, "NDHWC") .ATTR(data_format, String, "NDHWC")
.OP_END_FACTORY_REG(AvgPool3D) .OP_END_FACTORY_REG(AvgPool3D)



/**
*@brief Performs average pooling on the input.

*@par Inputs:
*@li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double.
*@li filter: An optional tensor of type float16, float32, double, fractal_z_3d layout.
*@li multiplier: An optional tensor of float16, float32, double.

*@par Attributes:
*@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor.
*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor.
*@li pads: List of ints, implicit zero paddings on both sides of the input.
*@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
*@li count_include_pad: When true, will include the zero-padding in the averaging calculation.
*@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
*@li data_format: A string, format of input data . \n

*@par Outputs:
*y: The average pooled output tensor . \n

*@attention Constraints:
*@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator AvgPool3D.
*/
REG_OP(AvgPool3DD)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.REQUIRED_ATTR(ksize, ListInt)
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(ceil_mode, Bool, false)
.ATTR(count_include_pad, Bool, true)
.ATTR(divisor_override, Int, 0)
.ATTR(data_format, String, "NDHWC")
.OP_END_FACTORY_REG(AvgPool3DD)

/**
* @brief Computes AvgPool3DGrad function.

* @par Inputs:
* @li orig_input_shape: An NDHWC tensor of type float16, float32, or double.
* @li grads: An NDHWC tensor of type int32.

* @par Attributes:
* @li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor.
* @li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor.
* @li pads: List of ints, implicit zero paddings on both sides of the input.
* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
* @li count_include_pad: When true, will include the zero-padding in the averaging calculation.
* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
* @li data_format: A string, format of input data .

* @par Outputs:
* @output: A mutable tensor with the same shape and type as "orig_input".

* @par Third-party framework compatibility
* @li Compatible with the TensorFlow operator AvgPoolGrad.
*/

REG_OP(AvgPool3DGrad)
.INPUT(orig_input_shape, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.INPUT(grads, TensorType({DT_INT32}))
.OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.REQUIRED_ATTR(ksize, ListInt)
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(ceil_mode, Bool, false)
.ATTR(count_include_pad, Bool, true)
.ATTR(divisor_override, Int, 0)
.ATTR(data_format, String, "NDHWC")
.OP_END_FACTORY_REG(AvgPool3DGrad)

/**
* @brief Performs average pooling on the input.

* @par Inputs:
* @li grads: An NDHWC tensor of type float16.
* @li filter: An optional tensor of type float16, fractal_z_3d layout.
* @li multiplier: An optional tensor of float16.

* @par Attributes:
* @li orig_input_shape: List of ints that has length 5. The size of the window for each dimension of the input tensor.
* @li ksize: List of ints that has length 3. The size of the window for each dimension of the input tensor.
* @li strides:List of ints that has length 3. The stride of the sliding window for each dimension of the input tensor.
* @li pads: List of ints, implicit zero paddings on both sides of the input.
* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
* @li count_include_pad: When true, will include the zero-padding in the averaging calculation.
* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
* @li data_format: A string, format of input data . \n

* @par Outputs:
* @output: The average pooled output tensor . \n

* @attention Constraints:
* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]

* @par Third-party framework compatibility
* Compatible with the TensorFlow operator AvgPool3DGradD.
*/
REG_OP(AvgPool3DGradD)
.INPUT(grads, TensorType({DT_FLOAT16}))
.OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16}))
.OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16}))
.OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.REQUIRED_ATTR(orig_input_shape, ListInt)
.REQUIRED_ATTR(ksize, ListInt)
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(ceil_mode, Bool, false)
.ATTR(count_include_pad, Bool, true)
.ATTR(divisor_override, Int, 0)
.ATTR(data_format, String, "NDHWC")
.OP_END_FACTORY_REG(AvgPool3DGradD)

/** /**
*@brief Performs max_pool_ext2 on the input . \n *@brief Performs max_pool_ext2 on the input . \n


@@ -308,6 +427,31 @@ REG_OP(MaxPool3D)
.ATTR(data_format, String, "NDHWC") .ATTR(data_format, String, "NDHWC")
.OP_END_FACTORY_REG(MaxPool3D) .OP_END_FACTORY_REG(MaxPool3D)


/**
*@brief Applies a 2D adaptive max pooling over an input signal conposed of several input planes. \n
* The output is of size H x W, for any input size.

* @par Inputs:
* One input, including:
* @li x: A Tensor. Must be one of the following data types:
* float16, float32, float64. \n

* @par Attributes:
* @li output_size: A required list of 2 ints
* specifying the size (H,W) of the output tensor. \n

* @par Outputs:
* @li y: A Tensor. Has the same data type as "x" \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator AdaptiveMaxPool2d.
*/
REG_OP(AdaptiveMaxPool2d)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.OUTPUT(argmax, TensorType::IndexNumberType())
.REQUIRED_ATTR(output_size, ListInt)
.OP_END_FACTORY_REG(AdaptiveMaxPool2d)


/** /**
* @brief Computes second-order gradients of the maxpooling3d function . \n * @brief Computes second-order gradients of the maxpooling3d function . \n
@@ -477,8 +621,9 @@ REG_OP(MaxPoolV2)


*@par Inputs: *@par Inputs:
* One input: * One input:
*x: An NC1HWC0 Tensor. Supported type: float, double, int32,
* uint8, int16, int8, int64, uint16, half, uint32, uint64 . \n
*x: An 4D Tensor. Supported type: float, double, int32,
* uint8, int16, int8, int64, uint16, half, uint32, uint64.
* Must set the format, supported format list ["NCHW, NHWC"]. \n


*@par Attributes: *@par Attributes:
*@li ksize: A required list of int8, int16, int32, or int64 values, *@li ksize: A required list of int8, int16, int32, or int64 values,
@@ -517,10 +662,12 @@ REG_OP(MaxPoolWithArgmax)


*@par Inputs: *@par Inputs:
* Three inputs, including: * Three inputs, including:
*@li x: An NC1HWC0 tensor. Supported type: float, double, int32,
*@li x: An 4d tensor. Supported type: float, double, int32,
* uint8, int16, int8, int64, uint16, half, uint32, uint64. * uint8, int16, int8, int64, uint16, half, uint32, uint64.
*@li grad: An NC1HWC0 tensor. Supported type: float, double, int32,
* Must set the format, supported format list ["NCHW, NHWC"]
*@li grad: An 4d tensor. Supported type: float, double, int32,
* uint8, int16, int8, int64, uint16, half, uint32, uint64. * uint8, int16, int8, int64, uint16, half, uint32, uint64.
* Must set the format, supported format list ["NCHW, NHWC"]
*@li argmx: An NC1HWC0 tensor of type int32 or int64 . \n *@li argmx: An NC1HWC0 tensor of type int32 or int64 . \n


*@par Attributes: *@par Attributes:
@@ -1107,7 +1254,7 @@ REG_OP(AvgPool1DD)


*@par Inputs: *@par Inputs:
* One input: * One input:
*x: An NC1HWC0 Tensor of type float16.
*x: An 4d Tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"].
*@par Attributes: *@par Attributes:
*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
* each dimension of the input tensor. No default value. * each dimension of the input tensor. No default value.
@@ -1148,9 +1295,9 @@ REG_OP(MaxPoolWithArgmaxV2)


*@par Inputs: *@par Inputs:
* Three inputs, including: * Three inputs, including:
*@li x: An NC1HWC0 tensor of type float16.
*@li grad: An NC1HWC0 tensor of type float16.
*@li argmx: An NC1HWC0 tensor of type uint16 or int64 . \n
*@li x: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"]
*@li grad: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"]
*@li argmx: An 4d tensor of type uint16 or int64. Must set the format, supported format list ["NCHW, NHWC"] \n


*@par Attributes: *@par Attributes:
*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
@@ -1291,5 +1438,171 @@ REG_OP(MaxPoolV3Grad)
.ATTR(global_pooling, Bool, false) .ATTR(global_pooling, Bool, false)
.ATTR(ceil_mode, Bool, false) .ATTR(ceil_mode, Bool, false)
.OP_END_FACTORY_REG(MaxPoolV3Grad) .OP_END_FACTORY_REG(MaxPoolV3Grad)

/**
*@brief Performs dilation2d on the input . \n

*@par Inputs:
*x: A tensor of shape is 4d, format is support NHWC.
*filter: A tensor of shape is 3d, the type is same with x,
and the c dimension is same with x. \n

*@par Attributes:
*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1.
*@li rates: A required list of 4 ints. The rates of the N and C dimensions are 1.
*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID.
*@li pads: An optional list of 4 ints.
*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED".
*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n

*@par Outputs:
*y: The output tensor. Has the same type and format as input "x" . \n

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator Dilation2D.
*/
REG_OP(Dilation2D)
.INPUT(x,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
.INPUT(filter,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
.OUTPUT(y,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(rates, ListInt)
.ATTR(padding_mode, String, "SAME")
.ATTR(pads, ListInt, {0,0,0,0})
.ATTR(ceil_mode, Bool, false)
.ATTR(data_format, String, "NHWC")
.OP_END_FACTORY_REG(Dilation2D)

/**
* @brief Applies a 2D adaptive average pooling over
* an input signal composed of several input planes. \n

* @par Inputs:
* One input, including:
* @li x: A Tensor. Must be one of the following data types:
* float16, float32. \n

* @par Attributes:
* @li output_size: A required list of 2 ints
* specifying the size (H,W) of the output tensor. \n

* @par Outputs:
* @li y: A Tensor. Has the same data type as "x" \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator AdaptiveAvgPool2d.
*/
REG_OP(AdaptiveAvgPool2d)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.REQUIRED_ATTR(output_size, ListInt)
.OP_END_FACTORY_REG(AdaptiveAvgPool2d)

/**
* @brief Compute gradients of adaptive averagev2 pooling function.

* @par Inputs:
* @li input_grad: A NCHW Tensor. Must be one of the following data types:
* float16, float32.

* @par Attributes:
* @li orig_input_shape: A required tuple or list of type int32.

* @par Outputs:
* @li output_grad: A tensor with the same shape and type as "orig_input_shape".

* @par Third-party framework compatibility
* Compatible with the Pytorch operator AdaptiveAvgPool2dGrad.
*/
REG_OP(AdaptiveAvgPool2dGrad)
.INPUT(input_grad, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(output_grad, TensorType({DT_FLOAT, DT_FLOAT16}))
.REQUIRED_ATTR(orig_input_shape, ListInt)
.OP_END_FACTORY_REG(AdaptiveAvgPool2dGrad)

/**
* @brief Performs the backpropagation of MaxPoolWithGradArgmaxV1.

* @par Inputs:
* Three inputs, including:
* @li x: An NC1HWC0 tensor of type float16.
* @li grad: An NC1HWC0 tensor of type float16.
* @li argmax: An NC1HWC0 tensor of type uint16 or int64. \n

* @par Attributes:
* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
* each dimension of the input tensor. No default value.
* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
* each dimension of the input tensor. No default value.
* @li pads: A required listint. \n

* @par Outputs:
* y: A Tensor. Has the same type and format as input "x". \n

* @attention Constraints:
* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
* @li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1
* @li "pads" is listint.
* @li "ceil_mode" defaults to False.
* @li "data_format" defaults to "NC1HWC0". \n

* @par Third-party framework compatibility
* Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV1.
*/

REG_OP(MaxPoolGradWithArgmaxV1)
.INPUT(x, TensorType({DT_FLOAT16}))
.INPUT(grad, TensorType({DT_FLOAT16}))
.INPUT(argmax, TensorType({DT_UINT16}))
.OUTPUT(y, TensorType({DT_FLOAT16}))
.REQUIRED_ATTR(ksize, ListInt)
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(dtype, Int, 3)
.ATTR(dilation, ListInt, {1, 1, 1, 1})
.ATTR(ceil_mode, Bool, false)
.OP_END_FACTORY_REG(MaxPoolGradWithArgmaxV1)

/**
* @brief Performs max pooling on the input and outputs both max values and indices.

* @par Inputs:
* One input:
* x: An NC1HWC0 Tensor of type float16. \n

* @par Attributes:
* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
* each dimension of the input tensor. No default value.
* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
* each dimension of the input tensor. No default value.
* @li pads: A required string. No default value. \n

* @par Outputs:
* y: A Tensor. Has the same type and format as input "x".
* argmax: A Tensor. type:uint16, format:NC1HWC0. \n

* @attention Constraints:
* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
* @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1,
* strides[2] <= 63, strides[2] >= 1.
* @li "pads" is listint.
* @li "ceil_mode" defaults to False.
* @li "data_format" defaults to "NC1HWC0". \n

* @par Third-party framework compatibility
* Compatible with the TensorFlow operator MaxPoolWithArgmaxV1.
*/
REG_OP(MaxPoolWithArgmaxV1)
.INPUT(x, TensorType({DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT16}))
.OUTPUT(argmax, TensorType({DT_UINT16}))
.REQUIRED_ATTR(ksize, ListInt)
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(dtype, Int, 3)
.ATTR(dilation, ListInt, {1, 1, 1, 1})
.ATTR(ceil_mode, Bool, false)
.OP_END_FACTORY_REG(MaxPoolWithArgmaxV1)

} // namespace ge } // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H

+ 1
- 1
third_party/fwkacllib/inc/ops/nn_training_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2020 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/no_op.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 203
- 1
third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2020 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -640,6 +640,208 @@ REG_OP(Mish)
.OUTPUT(y, TensorType({ DT_FLOAT,DT_FLOAT16 })) .OUTPUT(y, TensorType({ DT_FLOAT,DT_FLOAT16 }))
.OP_END_FACTORY_REG(Mish) .OP_END_FACTORY_REG(Mish)


/**
* @brief pytorch hardtanh_backward operator.
*
* @par Inputs:
* 2 inputs, including:
* @li result, minimum tensor of the linear region range,
* datatype: float16/float32, format:ND/5HD.
* @li grad, maximum tensor of the linear region range,
* datatype:float16/float32, format:ND/5HD. \n

* @par Attributes:
* 2 attributes, including:
* @li min_val, minimum value of the linear region range, datatype:float.
* @li max_val, maximum value of the linear region range, datatype:float. \n

* @par Outputs:
* 1 output, including:
* @li y, hardtanh_backward output tensor, datatype and format is same as
* input result. \n

* @attention Constraints:
* This operator only supports dataType: float16/float32, format: ND/5HD. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator HardtanhGrad.
*/
REG_OP(HardtanhGrad)
.INPUT(result, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "First operand." */
.INPUT(grad, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "Second operand." */
.OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "Result, has same element type as two inputs" */
.ATTR(min_val, Float, -1.0)
.ATTR(max_val, Float, 1.0)
.OP_END_FACTORY_REG(HardtanhGrad)

/**
* @brief Calculates the softplus loss function with attributes of beta and threshold. \n

* @par Inputs:
* One inputs, including:
* @li x: A mutable Tensor. Must be one of the following types:
* float16, float32. \n

* @par Attributes:
* @li beta: An optional float. Defaults to "1.0" \n

* @li threshold: An optional float. Defaults to "20.0" \n

* @par Outputs:
* @li y: A mutable Tensor. Has the same type as "x" \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Softplus.
*/
REG_OP(SoftplusV2)
.INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.ATTR(beta, Float, 1.0)
.ATTR(threshold, Float, 20.0)
.OP_END_FACTORY_REG(SoftplusV2)

/**
* @brief Calculates the reversed outputs of the function "softplus_v2". \n

* @par Inputs:
* Two inputs, including:
* @li input_gradients: A mutable Tensor. Must be one of the following types:
* float16, float32.
* @li input_features: A mutable Tensor of the same type as "input_gradients" \n

* @par Attributes:
* @li beta: An optional float. Defaults to "1.0" \n

* @li threshold: An optional float. Defaults to "20.0" \n

* @par Outputs:
* @li output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator SoftplusGrad.
*/
REG_OP(SoftplusV2Grad)
.INPUT(input_gradients, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.INPUT(input_features, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.OUTPUT(output_backprops, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.ATTR(beta, Float, 1.0)
.ATTR(threshold, Float, 20.0)
.OP_END_FACTORY_REG(SoftplusV2Grad)

/**
* @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor)
* where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise.
*
* @par inputs
* one input including:
* @li x: input A Tensor. Must be one of the following types: float32, float16
*
* @par output
* one output including:
* @li y:A Tensor of the same type as x
*
*/
REG_OP(ThresholdedRelu)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(alpha, Float, 1.0)
.OP_END_FACTORY_REG(ThresholdedRelu)

/**
* @brief Calculate the hard shrinkage function. \n

* @par Inputs:
* One inputs, including:
* @li input_x: A tensor. Must be one of the following types:
* float16, float32. \n

* @par Attributes:
* @li lambd: An optional float. Defaults to 0.5. \n

* @par Outputs:
* y: A Tensor with the same dtype and shape of input_x's. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Hardshrink. \n
*/
REG_OP(HardShrink)
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(lambd, Float, 0.5)
.OP_END_FACTORY_REG(HardShrink)

/**
* @brief Calculate the hard sigmoid function. \n

* @par Inputs:
* One inputs, including:
* @li input_x: A tensor. Must be one of the following types:
* float16, float32, int32. \n

* @par Attributes:
* @li alpha: An optional float. Defaults to 0.16666666. \n
* @li beta: An optional float. Defaults to 0.5. \n

* @par Outputs:
* y: A Tensor with the same dtype and shape of input_x's. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Hardsigmoid. \n
*/
REG_OP(HardSigmoid)
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16}))
.ATTR(alpha, Float, 0.16666666)
.ATTR(beta, Float, 0.5)
.OP_END_FACTORY_REG(HardSigmoid)

/**
* @brief Calculate the soft shrinkage function. \n

* @par Inputs:
* One inputs, including:
* @li input_x: A tensor. Must be one of the following types:
* float16, float32. \n

* @par Attributes:
* @li lambd: An optional float. Defaults to 0.5. \n

* @par Outputs:
* y: A Tensor with the same dtype and shape of input_x's. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Softshrink. \n
*/
REG_OP(SoftShrink)
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(lambd, Float, 0.5)
.OP_END_FACTORY_REG(SoftShrink)

/**
* @brief Calculate the reversed outputs of the function "soft_shrink". \n

* @par Inputs:
* Two inputs, including:
* @li input_grad: A tensor. Must be one of the following types:
* float16, float32. \n
* @li input_x: A tensor of the same dtype as "input_grad". \n

* @par Attributes:
* @li lambd: An optional float. Defaults to 0.5. \n

* @par Outputs:
* y: A Tensor of the same dtype and shape as "input_graxd". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator SoftShrinkGrad. \n
*/
REG_OP(SoftShrinkGrad)
.INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(lambd, Float, 0.5)
.OP_END_FACTORY_REG(SoftShrinkGrad)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_

+ 1
- 1
third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/outfeed_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 47
- 9
third_party/fwkacllib/inc/ops/pad_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2020 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -161,7 +161,7 @@ REG_OP(Pad)
*@brief Pads a tensor . \n *@brief Pads a tensor . \n


*@par Inputs: *@par Inputs:
*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n
*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n


*@par Attributes: *@par Attributes:
*paddings: An optional "vector<vector<int>>". Defaults to "{}". *paddings: An optional "vector<vector<int>>". Defaults to "{}".
@@ -180,8 +180,8 @@ REG_OP(Pad)
* Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead. * Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead.
*/ */
REG_OP(PadD) REG_OP(PadD)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.REQUIRED_ATTR(paddings, ListListInt) .REQUIRED_ATTR(paddings, ListListInt)
.OP_END_FACTORY_REG(PadD) .OP_END_FACTORY_REG(PadD)


@@ -213,7 +213,7 @@ REG_OP(PadV2)
*@brief Pads a tensor . \n *@brief Pads a tensor . \n


*@par Inputs: *@par Inputs:
*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n
*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
*constant_values: A Tensor. Must have the same type as input. *constant_values: A Tensor. Must have the same type as input.


*@par Attributes: *@par Attributes:
@@ -227,10 +227,7 @@ REG_OP(PadV2)
*y: A Tensor of the same type as "x" . \n *y: A Tensor of the same type as "x" . \n


*@par Third-party framework compatibility: *@par Third-party framework compatibility:
* Compatible with TensorFlow operator Pad.
*
* @par Restrictions:
* Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead.
* Compatible with TensorFlow operator PadV2.
*/ */
REG_OP(PadV2D) REG_OP(PadV2D)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
@@ -403,5 +400,46 @@ REG_OP(EmbeddingRankId)
.ATTR(mode, String, "mod") .ATTR(mode, String, "mod")
.OP_END_FACTORY_REG(EmbeddingRankId) .OP_END_FACTORY_REG(EmbeddingRankId)


/**
* @brief Fill the value to a tensor has the specified shape.

* @par Inputs:
* One inputs, including:
* @li dims: An Tensor, specify the shape that the value to fill.

* @par Attributes:
* @li value: An optional float value. Defaults to 0.0.

* @par Outputs:
* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.

* @par Third-party framework compatibility
* Compatible with the ONNX operator ConstantOfShape.
*/
REG_OP(FillV2)
.INPUT(dims, TensorType({DT_INT16, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
.ATTR(value, Float, 0)
.OP_END_FACTORY_REG(FillV2)

/**
* @brief Fill the value to a tensor has the specified shape.

* @par Attributes:
* @li value: An optional float value. Defaults to 0.0.

* @li dims: An required listInt to specify the shape that the value to fill.

* @par Outputs:
* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.

* @par Third-party framework compatibility
* Compatible with the ONNX operator ConstantOfShape.
*/
REG_OP(FillV2D)
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64}))
.ATTR(value, Float, 0)
.REQUIRED_ATTR(dims, ListInt)
.OP_END_FACTORY_REG(FillV2D)
} // namespace ge } // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_

+ 1
- 1
third_party/fwkacllib/inc/ops/parsing_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/quantize_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/ragged_array_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/ragged_conversion_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/ragged_math_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 55
- 1
third_party/fwkacllib/inc/ops/random_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -495,6 +495,60 @@ REG_OP(ShuffleChannel)
DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64})) DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64}))
.ATTR(group, Int, 1) .ATTR(group, Int, 1)
.OP_END_FACTORY_REG(ShuffleChannel) .OP_END_FACTORY_REG(ShuffleChannel)

/**
* @briefGenerate a tensor of samples from a multinomial
* distribution according to the probabilities of each of
* the possible outcomes.
*
* @par inputs
* one input including:
* @li x:Input tensor with shape [batch_size, class_size],
* where class_size is the number of all possible outcomes.
* Each value along the axis zero represents the unnormalized
* log-probability of each corresponding outcome in a batch.
*
* @par output
* one output including:
* @li y:Output tensor with shape [batch_size, sample_size],
* where sample_size is the number of times to sample.
* Each value along the axis zero represents the outcome of
* the corresponding sample in a batch.
*
*/
REG_OP(MultinomialFuss)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64}))
.OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
.ATTR(dtype, Int, 6)
.ATTR(sample_size, Int, 1)
.ATTR(seed, Float, 0)
.OP_END_FACTORY_REG(MultinomialFuss)

/**
* @brief During training, randomly zeroes some of the elements of the input tensor
* with probability
*
* @par Inputs:
* @li x: A ND Tensor. Must be one of the following data types: Float, Float16
* @li seed: A ND Tensor. Must be one of the following data types: Float
*
* @par Attributes:
* @li p: probability of an element to be zeroed
*
* @par Outputs:
* @li y: A tensor with the same shape and type as "x".
* @li mask: A tensor with the same shape and type as "x".
* @li new_seed: A tensor with the same shape and type as "seed".
*/

REG_OP(DropoutV2)
.INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT }))
.INPUT(seed, TensorType({ DT_FLOAT }))
.OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT }))
.OUTPUT(mask, TensorType({ DT_FLOAT }))
.OUTPUT(seed, TensorType({ DT_FLOAT }))
.REQUIRED_ATTR(p, Float)
.OP_END_FACTORY_REG(DropoutV2)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_

+ 41
- 6
third_party/fwkacllib/inc/ops/reduce_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2020 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -635,8 +635,8 @@ REG_OP(ReduceMin)
* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead. * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead.
*/ */
REG_OP(ReduceMinD) REG_OP(ReduceMinD)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32}))
.REQUIRED_ATTR(axes, ListInt) .REQUIRED_ATTR(axes, ListInt)
.ATTR(keep_dims, Bool, false) .ATTR(keep_dims, Bool, false)
.OP_END_FACTORY_REG(ReduceMinD) .OP_END_FACTORY_REG(ReduceMinD)
@@ -821,7 +821,7 @@ Defaults to "0.00001" . \n
*batch_ variance: A Tensor of type float32 for the result variance . \n *batch_ variance: A Tensor of type float32 for the result variance . \n


*@attention Constraints: *@attention Constraints:
*For Ascend 310, the result accuracy fails to reach 1 due to the square root instruction.
*For Ascend 310, the result accuracy fails to reach 0.001 due to the square root instruction.
*/ */
REG_OP(INInferV2) REG_OP(INInferV2)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -882,7 +882,7 @@ REG_OP(INTrainingReduceV2)
*@attention Constraints: *@attention Constraints:
*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training.
* This operator is used in conjunction with INTrainingReduceV2. * This operator is used in conjunction with INTrainingReduceV2.
*@li For Ascend 310, the result accuracy fails to reach 1 due to the square root instruction.
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
*/ */
REG_OP(INTrainingUpdateV2) REG_OP(INTrainingUpdateV2)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -965,7 +965,7 @@ for the updated variance.
*@attention Constraints: *@attention Constraints:
*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training.
* This operator is used in conjunction with GNTrainingUpdate. * This operator is used in conjunction with GNTrainingUpdate.
*@li For Ascend 310, the result accuracy fails to reach 1 due to the square root instruction.
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
*/ */
REG_OP(GNTrainingUpdate) REG_OP(GNTrainingUpdate)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -982,6 +982,41 @@ REG_OP(GNTrainingUpdate)
.OUTPUT(batch_variance, TensorType({DT_FLOAT})) .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(GNTrainingUpdate) .OP_END_FACTORY_REG(GNTrainingUpdate)


/**
* @brief Calculates the standard deviation and average value of Tensors.

* @par Inputs:
* @li x: A Tensor. Must be one of the following types:
* float16, float32. \n

* @par Attributes:
* Three Attributes, including:
* @li dim: An optional listint, Defaults to "None". \n

* @li unbiased: An optional bool. Defaults to "True".
* If "True", Use Bessel Correction.
* If "False", Do not use Bessel Correction. \n

* @li keepdim: An optional bool. Defaults to "False".
* If "True", Keep the original tensor dimension.
* If "False", Do not keep the original tensor dimension. \n

* @par Outputs:
* Two Outputs, including:
* @li y1: A Tensor. Has the same type as "x".
* @li y2: A Tensor. Has the same type as "x". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator ReduceStd.
*/
REG_OP(ReduceStd)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(y1, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(y2, TensorType({DT_FLOAT, DT_FLOAT16}))
.ATTR(dim, ListInt, {})
.ATTR(unbiased, Bool, true)
.ATTR(keepdim, Bool, false)
.OP_END_FACTORY_REG(ReduceStd)
} //namespace ge } //namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_

+ 1
- 1
third_party/fwkacllib/inc/ops/resource_variable_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 283
- 24
third_party/fwkacllib/inc/ops/rnn.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -187,16 +187,16 @@ REG_OP(DynamicRNNGrad)
*@brief: DynamicRNN calculation. *@brief: DynamicRNN calculation.
*@par Inputs: *@par Inputs:
*ten inputs: *ten inputs:
*@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
*@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND.
*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND.
*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n


*@par Attributes: *@par Attributes:
*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. *@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
@@ -221,6 +221,8 @@ REG_OP(DynamicRNNGrad)
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@par Third-party framework compatibility:
* Compatible with the TF operator LSTM.
*/ */
REG_OP(DynamicRNN) REG_OP(DynamicRNN)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -254,6 +256,63 @@ REG_OP(DynamicRNN)
.ATTR(is_training, Bool, true) .ATTR(is_training, Bool, true)
.OP_END_FACTORY_REG(DynamicRNN) .OP_END_FACTORY_REG(DynamicRNN)


/**
*@brief: DynamicLSTMV2 calculation.
*@par Inputs:
*ten inputs:
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
*@li cont:A required 2D Tensor. Must be one of the following types: float16, float32. The format must be ND.
*@li w_xc_x_static:A optional 2D Tensor. Must be one of the following types: float16, float32. The format must be ND.
*@li h0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li c0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li wci:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li wcf:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li wco:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li mask:A optional 1D Tensor. Must be one of the following types: uint8. The format must be ND .

*@par Attributes:
*@li num_output:An integer identifying the num projection in the op. Default to 0.
*@li expose_hidden:An bool identifying the expose_hidden in the op. Default to flase.
*@li need_output_last:An bool identifying the time major in the op. Default to true.
*@li forget_bias:An float identifying the forget bias in the op. Default to 0.

*@par Outputs:
*eight outputs:
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li last_output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li last_output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@par Third-party framework compatibility:
* Compatible with the Caffe operator LSTM.
*@par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(DynamicLSTMV2)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(cont, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(w_xc_x_static, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(h0, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(c0, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(last_output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(last_output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(num_output, Int, 0)
.ATTR(expose_hidden, Bool, false)
.ATTR(need_output_last, Bool, false)
.ATTR(forget_bias, Float, 0.0)
.OP_END_FACTORY_REG(DynamicLSTMV2)

/** /**
*@brief: LSTMInputGrad calculation. *@brief: LSTMInputGrad calculation.
*@par Inputs: *@par Inputs:
@@ -475,9 +534,9 @@ REG_OP(BasicRNNCell)
.OP_END_FACTORY_REG(BasicRNNCell) .OP_END_FACTORY_REG(BasicRNNCell)


/** /**
*@brief: DynamicGRU calculation.
*@brief DynamicGRU calculation.
*@par Inputs: *@par Inputs:
*seven inputs: \n
*seven inputs:
*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ.
*@li w:Must be one of the following types: float16. The format must be FRACTAL_Z. *@li w:Must be one of the following types: float16. The format must be FRACTAL_Z.
*@li b:Must be one of the following types: float16, float32. The format must be ND. *@li b:Must be one of the following types: float16, float32. The format must be ND.
@@ -497,7 +556,7 @@ REG_OP(BasicRNNCell)
*@li is_training:An bool identifying is training in the op. Default to true. *@li is_training:An bool identifying is training in the op. Default to true.


*@par Outputs: *@par Outputs:
*five outputs: \n
*five outputs:
*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
@@ -531,9 +590,9 @@ REG_OP(DynamicGRU)
.OP_END_FACTORY_REG(DynamicGRU) .OP_END_FACTORY_REG(DynamicGRU)


/** /**
*@brief: DynamicGRUV2 calculation.
*@brief DynamicGRUV2 calculation.
*@par Inputs: *@par Inputs:
*seven inputs: \n
*seven inputs:
*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ.
*@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z. *@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z.
*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z.
@@ -555,7 +614,7 @@ REG_OP(DynamicGRU)
*@li is_training:An bool identifying is training in the op. Default to true. *@li is_training:An bool identifying is training in the op. Default to true.


*@par Outputs: *@par Outputs:
*six outputs: \n
*six outputs:
*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
@@ -592,6 +651,68 @@ REG_OP(DynamicGRUV2)
.ATTR(is_training, Bool, true) .ATTR(is_training, Bool, true)
.OP_END_FACTORY_REG(DynamicGRUV2) .OP_END_FACTORY_REG(DynamicGRUV2)



/**
*@brief DynamicGRUV2Hidden calculation.
*@par Inputs:
*five inputs:
*@li x_weight_input:Must be one of the following types: float32. The format must be FRACTAL_NZ.
*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z.
*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND.
*@li seq_length:Must be one of the following types: int32. The format must be ND.
*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.

*@par Attributes:
*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL".
Only UNIDIRECTIONAL is currently supported.
*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
*@li num_proj:An integer identifying the num projection in the op. Default to 0.
*@li time_major:An bool identifying the time major in the op. Default to true.
*@li activation:An string identifying the type of activation function in the op. Default to "tanh".
Only tanh is currently supported.
*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
*@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true.
*@li is_training:An bool identifying is training in the op. Default to true.

*@par Outputs:
*six outputs:
*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(DynamicGRUV2Hidden)
.INPUT(x_weight_input, TensorType({DT_FLOAT32}))
.INPUT(weight_hidden, TensorType({DT_FLOAT16}))
.OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(update, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(new, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(direction, String, "UNIDIRECTIONAL")
.ATTR(cell_depth, Int, 1)
.ATTR(keep_prob, Float, 1.0)
.ATTR(cell_clip, Float, -1.0)
.ATTR(num_proj, Int, 0)
.ATTR(time_major, Bool, true)
.ATTR(activation, String, "tanh")
.ATTR(gate_order, String, "zrh")
.ATTR(reset_after, Bool, true)
.ATTR(is_training, Bool, true)
.OP_END_FACTORY_REG(DynamicGRUV2Hidden)


/** /**
*@brief: DynamicGRUV2Grad calculation. *@brief: DynamicGRUV2Grad calculation.
*@par Inputs: *@par Inputs:
@@ -618,7 +739,6 @@ REG_OP(DynamicGRUV2)
*@li cell_clip:An float identifying the cell clip in the op. Default to -1. *@li cell_clip:An float identifying the cell clip in the op. Default to -1.
*@li num_proj:An integer identifying the num projection in the op. Default to 0. *@li num_proj:An integer identifying the num projection in the op. Default to 0.
*@li time_major:An bool identifying the time major in the op. Default to true. *@li time_major:An bool identifying the time major in the op. Default to true.
*@li bias_type:An string identifying the type of bias_type function in the op. Default to "double_bias".
*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
*@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true. *@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true.


@@ -630,6 +750,9 @@ REG_OP(DynamicGRUV2)
*@li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(DynamicGRUV2Grad) REG_OP(DynamicGRUV2Grad)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -658,7 +781,6 @@ REG_OP(DynamicGRUV2Grad)
.ATTR(cell_clip, Float, -1.0) .ATTR(cell_clip, Float, -1.0)
.ATTR(num_proj, Int, 0) .ATTR(num_proj, Int, 0)
.ATTR(time_major, Bool, true) .ATTR(time_major, Bool, true)
.ATTR(bias_type, String, "double_bias")
.ATTR(gate_order, String, "zrh") .ATTR(gate_order, String, "zrh")
.ATTR(reset_after, Bool, true) .ATTR(reset_after, Bool, true)
.OP_END_FACTORY_REG(DynamicGRUV2Grad) .OP_END_FACTORY_REG(DynamicGRUV2Grad)
@@ -667,7 +789,7 @@ REG_OP(DynamicGRUV2Grad)
*@brief: GRUV2HiddenGrad calculation. *@brief: GRUV2HiddenGrad calculation.
*@par Inputs: *@par Inputs:
*nine inputs: \n *nine inputs: \n
*@li weight_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
@@ -678,6 +800,7 @@ REG_OP(DynamicGRUV2Grad)
*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.


*@par Attributes: *@par Attributes:
*@li t_state:An Int identifying the current t state. Default to [0, 4].
*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.


*@par Outputs: *@par Outputs:
@@ -685,10 +808,12 @@ REG_OP(DynamicGRUV2Grad)
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/ */
REG_OP(GRUV2HiddenGrad)
.INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
REG_OP(GRUV2HiddenGradCell)
.INPUT(dh_pre_t, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -699,8 +824,142 @@ REG_OP(GRUV2HiddenGrad)
.OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(t_state, Int, 0)
.ATTR(gate_order, String, "zrh") .ATTR(gate_order, String, "zrh")
.OP_END_FACTORY_REG(GRUV2HiddenGrad)
.OP_END_FACTORY_REG(GRUV2HiddenGradCell)

/**
* @brief Calculates the reversed outputs of the function "embedding". \n

* @par Inputs:
* Two inputs, including:
* @li grad: A mutable Tensor of word grad. Must be one of the following types:
* float32.
* @li indices: A mutable word index Tensor of the int32 type.\n

* @par Attributes:
* @li num_weights: An int attr which use to judge how many words in dict. \n

* @li padding_idx: An int attr judge which word to fill zeros. Defaults to "-1". \n

* @li scale_grad_by_freq: An optional bool. Defaults to "False".
* If "True", "grad_weight" will be scale by word_frequency.
* If "False", "grad_weight" will not be scale by word_frequency. \n

* @par Outputs:
* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator EmbeddingDenseGrad.
*/
REG_OP(EmbeddingDenseGrad)
.INPUT(grad, TensorType({ DT_FLOAT32 })) /* "First operand." */
.INPUT(indices, TensorType({ DT_INT32 })) /* "Second operand." */
.OUTPUT(y, TensorType({ DT_FLOAT32 })) /* "Result, has same element type as two inputs" */
.REQUIRED_ATTR(num_weights, Int)
.ATTR(padding_idx, Int, -1)
.ATTR(scale_grad_by_freq, Bool, false)
.OP_END_FACTORY_REG(EmbeddingDenseGrad)

/**
*@brief CommonLSTM calculation.
*@par Inputs:
*eight inputs: \n
*@li x:Each time step is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li w:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li r:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
*@li b:An optional input. Each direction is a 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
*@li sequence_lens:An optional input. A 1D Tensor.Must be one of the following types: int32. The format must be ND.
*@li initial_h:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li initial_c:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li p:An optional input. Each direction is a 1D Tensor.Must be one of the following types: float16, float32. The format must be ND.

*@par Attributes:
*@li activation_alpha:Optional scaling values used by some activation functions. Empty is currently supported.
*@li activation_beta:Optional scaling values used by some activation functions. Empty is currently supported.
*@li activations:The list of activation functions. Empty is currently supported.
*@li clip:An float identifying the cell clip in the op. Default to -1.
*@li direction:Specify if the RNN is forward, reverse, or bidirectional. Must be one of forward(default), reverse, or bidirectional.
*@li hidden_size:Number of neurons in the hidden layer. Reserved.
*@li input_forget:Couple the input and forget gates if 1. Reserved.

*@par Outputs:
*three outputs: \n
*@li y:First dimension is time step, second dimension is direction, others is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li y_h:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li y_c:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*/

REG_OP(CommonLSTM)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32}))
.OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(initial_c, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(p, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y_c, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(activation_alpha, ListFloat, {})
.ATTR(activation_beta, ListFloat, {})
.ATTR(activations, ListString, {})
.ATTR(clip, Float, -1.0)
.ATTR(direction, String, "forward")
.REQUIRED_ATTR(hidden_size, Int)
.ATTR(input_forget, Int, 0)
.OP_END_FACTORY_REG(CommonLSTM)

/**
* @brief Common GRU calculation.

* @par Inputs:
* Eight inputs, including:
* @li x: The input sequences packed (and pontentially padded) into on 3D Tesnor(float16). The format must be FRACTAL_NZ
* @li w: The weight tensor for the gates is 3D Tensor(float16). The format must be FRACTAL_Z
* @li r: The recurrence weight tesnor is 3D Tensor(float16). The format must be FRACTAL_Z
* @li b: The bias tensor for the gates. The format must be ND
* @li sequence_lens: Optional tensor specifying lengths of sequences(int32). The format must be ND
* @li init_h: Optional initial value of the hidden(float16,float32). The format must be FRACTAL_NZ

* @par Attributes:
* @li activation_alpha: Optional scaling values used by some activation functions. \n

* @li activation_beta: Optional scaling values used by some activation functions. \n

* @li activations: A list of 2 (or 4 if bidirectional) activation functions for update, reset, and hidden gates. \n

* @li clip: Cell clip threshold. \n

* @li direction: Specify if the RNN is forward, reverse, or bidirectional. \n

* @li hidden_size: Number of neurons in the hidden layer. \n

* @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n

* @par Outputs:
* @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). The format must be FRACTAL_NZ

* @li y_h: The last output value of the hidden(float16,float32). The format must be FRACTAL_NZ
*/
REG_OP(CommonGRU)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32}))
.OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(activation_alpha, ListFloat, {})
.ATTR(activation_beta , ListFloat, {})
.ATTR(activations , ListString, {})
.ATTR(clip, Float, -1.0)
.ATTR(direction, String, "forward")
.REQUIRED_ATTR(hidden_size, Int)
.ATTR(linear_before_reset , Int, 0)
.OP_END_FACTORY_REG(CommonGRU)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_

+ 1
- 1
third_party/fwkacllib/inc/ops/rpn_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/save_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/sdca_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 183
- 1
third_party/fwkacllib/inc/ops/selection_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -796,6 +796,34 @@ REG_OP(SliceD)
.REQUIRED_ATTR(size, ListInt) .REQUIRED_ATTR(size, ListInt)
.OP_END_FACTORY_REG(SliceD) .OP_END_FACTORY_REG(SliceD)


/**
*@brief Extracts a slice from a tensor.
* This operation extracts a slice of size "size" from a tensor "x"
* starting at the location specified by "begin" . \n

*@par Inputs:
*@li x: A Tensor. Must be one of the following types:
* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8,
* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32 . \n

*@par Inputs:
*@li offsets: The starting location for the slice.

*@par Attributes:
*@li size: The tensor shape . \n

*@par Outputs:
*y: A Tensor. Has the same type as "x". The slice extracted from the tensor.
*@par Restrictions:
*Warning: THIS FUNCTION IS DEPRECATED. Please use Slice instead.
*/
REG_OP(SliceDV2)
.INPUT(x, TensorType::BasicType())
.INPUT(offsets, TensorType::IndexNumberType())
.OUTPUT(y, TensorType::BasicType())
.REQUIRED_ATTR(size, ListInt)
.OP_END_FACTORY_REG(SliceDV2)
/** /**
* @brief Finds values and indices of the "k" largest elements for the last * @brief Finds values and indices of the "k" largest elements for the last
* dimension . \n * dimension . \n
@@ -1921,6 +1949,160 @@ REG_OP(CumulativeLogsumexpD)
.ATTR(exclusive, Bool, false) .ATTR(exclusive, Bool, false)
.ATTR(reverse, Bool, false) .ATTR(reverse, Bool, false)
.OP_END_FACTORY_REG(CumulativeLogsumexpD) .OP_END_FACTORY_REG(CumulativeLogsumexpD)

/**
* @brief Add updates to var according to axis and indices.

* @par Inputs:
* Three inputs, including:
* @li var: A Tensor. Must be one of the following types:
* float16, float32, int16, int32, int8, uint8.
* @li indices: A Tensor of the indices, type should be int32.
* @li updates: A Tensor of the same type as "var". \n

* @par Attributes:
* @li axis: An required int to specify the axis to perform indices add. \n

* @par Outputs:
* @li var: A Tensor. Same as input "var".

* @par Third-party framework compatibility
* Compatible with the Pytorch operator index_add_.
*/
REG_OP(InplaceIndexAdd)
.INPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8,
DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
.INPUT(indices, TensorType({DT_INT32}))
.INPUT(updates, TensorType({DT_INT16, DT_INT32, DT_INT8,
DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
.OUTPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8,
DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
.REQUIRED_ATTR(axis, Int)
.OP_END_FACTORY_REG(InplaceIndexAdd)

/**
* @brief Replace the value of X with value according to mask.
* @par Inputs:
* three inputs, including:
* @li x: A Tensor of dtype is float16 or float32 or int32 or int8.
* @li mask: A Tensor of dtype float16 or float32 or int32 or int8.
* @li value: A Tensor or scalar of dtype float16 or float32 or int32 or int8. \n

* @par Outputs:
* @li y: A tensor. Must be one of the following dtypes:
* float16, float32, int32, int8.
*/
REG_OP(MaskedFill)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32}))
.INPUT(mask, TensorType({DT_BOOL}))
.INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32}))
.OP_END_FACTORY_REG(MaskedFill)

/**
* @brief Choose the value of X with value according to mask.

* @par Inputs:
* two inputs, including:
* @li x: A Tensor of dtype is float16 or float32.
* @li mask: A Tensor of dtype is bool. \n

* @par Outputs:
* @li y: A tensor with the same type as x. \n

* @par Third-party framework compatibility
* Compatible with the Numpy operator select.
* Replaces the pytorch operator masked_select in some scenarios.\n
*/
REG_OP(MaskedSelectV2)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(mask, TensorType({DT_BOOL}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(MaskedSelectV2)

/**
* @brief Slice a tensor at its last dim, e.x. a[..., begin:end:stride]. \n

* @par Inputs:
* One inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32, int16, int32.

* @par Attributes:
* @li start: An attribute of type Int, start index of last dim. \n
* @li end: An attribute of type Int, end index of last dim. \n
* @li stride: An attribute of type Int, stride of slice. \n

* @par Outputs:
* @li y: A Tensor. Has the same type as "x". \n

* @par Third-party framework compatibility
* No compatibility
*/
REG_OP(SliceLastDim)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
.REQUIRED_ATTR(start, Int)
.REQUIRED_ATTR(end, Int)
.ATTR(stride, Int, 1)
.OP_END_FACTORY_REG(SliceLastDim)

/**
* @brief Extracts a strided slice of a tensor. Roughly speaking, this op \n
* extracts a slice of size (end-begin)/stride from the given input tensor. \n
* Starting at the location specified by begin the slice continues by \n
* adding stride to the index until all dimensions are not less than end. \n
*
* @par Inputs:
* Four inputs, including:
* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n
* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n
* complex128, float16, uint32, uint64, complex64, complex128. \n
* @li begin: A Tensor of type int32 or int64, for the index of the first value to select.
*
* @li end: A Tensor of type int32 or int64, for the index of the last value to select.
*
* @li axes: A Tensor of type int32 or int64, indicate axis to be select.
*
* @li strides: A Tensor of type int32 or int64, for the increment.
*
* @par Attributes:
* @li begin_mask: A Tensor of type int32. \n
* A bitmask where a bit "i" being "1" means to ignore the begin \n
* value and instead use the largest interval possible.
* @li end_mask: A Tensor of type int32. \n
* Analogous to "begin_mask".
* @li ellipsis_mask: A Tensor of type int32. \n
* A bitmask where bit "i" being "1" means the "i"th position \n
* is actually an ellipsis.
* @li new_axis_mask: A Tensor of type int32. \n
* A bitmask where bit "i" being "1" means the "i"th \n
* specification creates a new shape 1 dimension.
* @li shrink_axis_mask: A Tensor of type int32. \n
* A bitmask where bit "i" implies that the "i"th \n
* specification should shrink the dimensionality.
*
* @par Outputs:
* y: A Tensor. Has the same type as "x".
*
* @attention Constraints:
*
* @par Third-party framework compatibility
* Compatible with the TensorFlow operator StridedSliceV2.
*/
REG_OP(StridedSliceV2)
.INPUT(x, TensorType::BasicType())
.INPUT(begin, TensorType::IndexNumberType())
.INPUT(end, TensorType::IndexNumberType())
.OPTIONAL_INPUT(axes, TensorType::IndexNumberType())
.OPTIONAL_INPUT(strides, TensorType::IndexNumberType())
.ATTR(begin_mask, Int, 0)
.ATTR(end_mask, Int, 0)
.ATTR(ellipsis_mask, Int, 0)
.ATTR(new_axis_mask, Int, 0)
.ATTR(shrink_axis_mask, Int, 0)
.OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(StridedSliceV2)

} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_

+ 1
- 1
third_party/fwkacllib/inc/ops/set_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/sparse_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/spectral_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/split_combination_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/state_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/stateful_random_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/stateless_random_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/string_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/swap_co_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 1
- 1
third_party/fwkacllib/inc/ops/target_crop_and_resize.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 9
- 10
third_party/fwkacllib/inc/ops/transformation_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -141,7 +141,7 @@ support "NHWC/NCHW" to "NC1HWC0" and "NC1HWC0" to "NHWC/NCHW"
*@par Attributes: *@par Attributes:
*@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Zn" etc. *@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Zn" etc.
*@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Zn" etc. *@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Zn" etc.
*@li group: A required int32, default value is 1. \n
*@li group: A optional int32, default value is 1. \n


*@par Outputs: *@par Outputs:
*dst: A Tensor dtype of all types. *dst: A Tensor dtype of all types.
@@ -151,7 +151,7 @@ REG_OP(TransData)
.OUTPUT(dst, TensorType::BasicType()) .OUTPUT(dst, TensorType::BasicType())
.REQUIRED_ATTR(src_format, String) .REQUIRED_ATTR(src_format, String)
.REQUIRED_ATTR(dst_format, String) .REQUIRED_ATTR(dst_format, String)
.ATTR(group, Int, 1)
.ATTR(groups, Int, 1)
.OP_END_FACTORY_REG(TransData) .OP_END_FACTORY_REG(TransData)


/** /**
@@ -357,7 +357,7 @@ REG_OP(DepthToSpace)
*@brief Permutes data into spatial data blocks and then prunes them . \n *@brief Permutes data into spatial data blocks and then prunes them . \n


*@par Inputs: *@par Inputs:
*@li x: A 4D Tensor with format NHWC.
*@li x: A 4D Tensor with format. Must set the format, supported format list ["NCHW, NHWC"]
*@li crops: A 1D list or tuple of int32 or int64 . \n *@li crops: A 1D list or tuple of int32 or int64 . \n


*Must be one of the following types: float16, float32 *Must be one of the following types: float16, float32
@@ -434,9 +434,10 @@ REG_OP(BatchToSpaceD)


*@par Inputs: *@par Inputs:
* Two inputs, including: * Two inputs, including:
*@li x: An NHWC Tensor. Must be one of the following types:
*@li x: An 4D Tensor. Must be one of the following types:
* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, * float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8,
* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. * int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
* Must set the format, supported format list ["NCHW, NHWC"]
*@li paddings: A 2D tensor of type int, specifying the input . \n *@li paddings: A 2D tensor of type int, specifying the input . \n


*@par Attributes: *@par Attributes:
@@ -518,7 +519,8 @@ REG_OP(Unpack)
* @par Inputs: * @par Inputs:
* x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the * x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the
* following types:float32, double, int32, uint8, int16, int8, int64, uint16, * following types:float32, double, int32, uint8, int16, int8, int64, uint16,
* float16, uint32, uint64
* float16, uint32, uint64. The inputs must have data_format with one of follows:
* NHWC, NCHW.


* @par Attributes: * @par Attributes:
* @li ksizes: A required list or tuple. The size of the sliding window for each * @li ksizes: A required list or tuple. The size of the sliding window for each
@@ -533,7 +535,6 @@ REG_OP(Unpack)
* This is equivalent to rate in dilated (a.k.a. Atrous) convolutions. * This is equivalent to rate in dilated (a.k.a. Atrous) convolutions.
* @li padding: A required string. The type of padding algorithm to use, * @li padding: A required string. The type of padding algorithm to use,
support "SAME" or "VALID". \n support "SAME" or "VALID". \n
* @li data_format: A required string. The format of input, only supported NHWC. \n


* @par Outputs: * @par Outputs:
* y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows * * y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows *
@@ -554,7 +555,6 @@ REG_OP(ExtractImagePatches)
.REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(rates, ListInt) .REQUIRED_ATTR(rates, ListInt)
.REQUIRED_ATTR(padding, String) .REQUIRED_ATTR(padding, String)
.ATTR(data_format, String, "NHWC")
.OP_END_FACTORY_REG(ExtractImagePatches) .OP_END_FACTORY_REG(ExtractImagePatches)


/** /**
@@ -563,6 +563,7 @@ REG_OP(ExtractImagePatches)


* @par Inputs: * @par Inputs:
* x: A 5D Tensor with shape [batch, in_planes, in_rows, in_cols, depth] . \n * x: A 5D Tensor with shape [batch, in_planes, in_rows, in_cols, depth] . \n
* The inputs must have data_format with one of follows: NDHWC, NCDHW. \n


* @par Attributes: * @par Attributes:
* @li ksizes: A required list or tuple. The size of the sliding window for each * @li ksizes: A required list or tuple. The size of the sliding window for each
@@ -571,7 +572,6 @@ REG_OP(ExtractImagePatches)
* patches are in "x". Must be: [1, stride_planes, stride_rows, stride_cols, 1]. * patches are in "x". Must be: [1, stride_planes, stride_rows, stride_cols, 1].
* @li padding: A required string. The type of padding algorithm to use , * @li padding: A required string. The type of padding algorithm to use ,
* support "SAME" or "VALID" . \n * support "SAME" or "VALID" . \n
* @li data_format: An optional string. The format of input, only supported NDHWC. \n


* @par Outputs: * @par Outputs:
* Output: A 5D Tensor with shape [batch, out_planes, out_rows, out_cols, ksize_planes * * Output: A 5D Tensor with shape [batch, out_planes, out_rows, out_cols, ksize_planes *
@@ -590,7 +590,6 @@ REG_OP(ExtractVolumePatches)
.REQUIRED_ATTR(ksizes, ListInt) .REQUIRED_ATTR(ksizes, ListInt)
.REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(padding, String) .REQUIRED_ATTR(padding, String)
.ATTR(data_format, String, "NDHWC")
.OP_END_FACTORY_REG(ExtractVolumePatches) .OP_END_FACTORY_REG(ExtractVolumePatches)


/** /**


+ 1
- 1
third_party/fwkacllib/inc/ops/warp_perspective_ops.h View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2019 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.


+ 27
- 27
third_party/fwkacllib/inc/runtime/base.h View File

@@ -1,18 +1,18 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/
*/


#ifndef __CCE_RUNTIME_BASE_H__ #ifndef __CCE_RUNTIME_BASE_H__
#define __CCE_RUNTIME_BASE_H__ #define __CCE_RUNTIME_BASE_H__
@@ -41,12 +41,12 @@ static const int32_t RT_ERROR_NONE = 0; // success
* @brief runtime exception numbers. * @brief runtime exception numbers.
*/ */
typedef enum tagRtExceptionType { typedef enum tagRtExceptionType {
RT_EXCEPTION_NONE = 0,
RT_EXCEPTION_TS_DOWN = 1,
RT_EXCEPTION_TASK_TIMEOUT = 2,
RT_EXCEPTION_TASK_FAILURE = 3,
RT_EXCEPTION_DEV_RUNNING_DOWN = 4,
RT_EXCEPTION_STREAM_ID_FREE_FAILED = 5
RT_EXCEPTION_NONE = 0,
RT_EXCEPTION_TS_DOWN = 1,
RT_EXCEPTION_TASK_TIMEOUT = 2,
RT_EXCEPTION_TASK_FAILURE = 3,
RT_EXCEPTION_DEV_RUNNING_DOWN = 4,
RT_EXCEPTION_STREAM_ID_FREE_FAILED = 5
} rtExceptionType; } rtExceptionType;


/** /**
@@ -54,12 +54,12 @@ typedef enum tagRtExceptionType {
* @brief Switch type. * @brief Switch type.
*/ */
typedef enum tagRtCondition { typedef enum tagRtCondition {
RT_EQUAL = 0,
RT_NOT_EQUAL,
RT_GREATER,
RT_GREATER_OR_EQUAL,
RT_LESS,
RT_LESS_OR_EQUAL
RT_EQUAL = 0,
RT_NOT_EQUAL,
RT_GREATER,
RT_GREATER_OR_EQUAL,
RT_LESS,
RT_LESS_OR_EQUAL
} rtCondition_t; } rtCondition_t;


/** /**
@@ -67,25 +67,25 @@ typedef enum tagRtCondition {
* @brief Data Type of Extensible Switch Task. * @brief Data Type of Extensible Switch Task.
*/ */
typedef enum tagRtSwitchDataType { typedef enum tagRtSwitchDataType {
RT_SWITCH_INT32 = 0,
RT_SWITCH_INT64 = 1,
RT_SWITCH_INT32 = 0,
RT_SWITCH_INT64 = 1,
} rtSwitchDataType_t; } rtSwitchDataType_t;


typedef enum tagRtStreamFlagType { typedef enum tagRtStreamFlagType {
RT_HEAD_STREAM = 0, // first stream
RT_INVALID_FLAG = 0xFFFFFFFF,
RT_HEAD_STREAM = 0, // first stream
RT_INVALID_FLAG = 0xFFFFFFFF,
} rtStreamFlagType_t; } rtStreamFlagType_t;


typedef enum tagRtLimitType { typedef enum tagRtLimitType {
RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0, // timeout for power down , ms
RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0, // timeout for power down , ms
} rtLimitType_t; } rtLimitType_t;


typedef struct rtExceptionInfo { typedef struct rtExceptionInfo {
uint32_t taskid;
uint32_t streamid;
uint32_t tid;
uint32_t deviceid;
uint32_t retcode;
uint32_t taskid;
uint32_t streamid;
uint32_t tid;
uint32_t deviceid;
uint32_t retcode;
} rtExceptionInfo; } rtExceptionInfo;


typedef void (*rtErrorCallback)(rtExceptionType); typedef void (*rtErrorCallback)(rtExceptionType);


+ 73
- 71
third_party/fwkacllib/inc/runtime/config.h View File

@@ -1,18 +1,18 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/
*/


#ifndef __CCE_RUNTIME_CONFIG_H__ #ifndef __CCE_RUNTIME_CONFIG_H__
#define __CCE_RUNTIME_CONFIG_H__ #define __CCE_RUNTIME_CONFIG_H__
@@ -24,105 +24,106 @@ extern "C" {
#endif #endif


#define PLAT_COMBINE(arch, chip, ver) ((arch << 16) | (chip << 8) | (ver)) #define PLAT_COMBINE(arch, chip, ver) ((arch << 16) | (chip << 8) | (ver))
#define PLAT_GET_ARCH(type) ((type >> 16) & 0xffff)
#define PLAT_GET_CHIP(type) ((type >> 8) & 0xff)
#define PLAT_GET_VER(type) (type & 0xff)
#define PLAT_GET_ARCH(type) ((type >> 16) & 0xffff)
#define PLAT_GET_CHIP(type) ((type >> 8) & 0xff)
#define PLAT_GET_VER(type) (type & 0xff)


typedef enum tagRtArchType { typedef enum tagRtArchType {
ARCH_BEGIN = 0,
ARCH_V100 = ARCH_BEGIN,
ARCH_V200,
ARCH_END,
ARCH_BEGIN = 0,
ARCH_V100 = ARCH_BEGIN,
ARCH_V200,
ARCH_END,
} rtArchType_t; } rtArchType_t;


typedef enum tagRtChipType { typedef enum tagRtChipType {
CHIP_BEGIN = 0,
CHIP_MINI = CHIP_BEGIN,
CHIP_CLOUD,
CHIP_MDC,
CHIP_LHISI,
CHIP_DC,
CHIP_CLOUD_V2,
CHIP_END,
CHIP_BEGIN = 0,
CHIP_MINI = CHIP_BEGIN,
CHIP_CLOUD,
CHIP_MDC,
CHIP_LHISI,
CHIP_DC,
CHIP_CLOUD_V2,
CHIP_END,
} rtChipType_t; } rtChipType_t;


typedef enum tagRtVersion { typedef enum tagRtVersion {
VER_BEGIN = 0,
VER_NA = VER_BEGIN,
VER_ES,
VER_CS,
VER_END,
VER_BEGIN = 0,
VER_NA = VER_BEGIN,
VER_ES,
VER_CS,
VER_SD3403,
VER_END,
} rtVersion_t; } rtVersion_t;


/* match rtChipType_t */ /* match rtChipType_t */
typedef enum tagRtPlatformType { typedef enum tagRtPlatformType {
PLATFORM_BEGIN = 0,
PLATFORM_MINI_V1 = PLATFORM_BEGIN,
PLATFORM_CLOUD_V1,
PLATFORM_MINI_V2,
PLATFORM_LHISI_ES,
PLATFORM_LHISI_CS,
PLATFORM_DC,
PLATFORM_CLOUD_V2,
PLATFORM_END,
PLATFORM_BEGIN = 0,
PLATFORM_MINI_V1 = PLATFORM_BEGIN,
PLATFORM_CLOUD_V1,
PLATFORM_MINI_V2,
PLATFORM_LHISI_ES,
PLATFORM_LHISI_CS,
PLATFORM_DC,
PLATFORM_CLOUD_V2,
PLATFORM_END,
} rtPlatformType_t; } rtPlatformType_t;


typedef enum tagRtCubeFracMKNFp16 { typedef enum tagRtCubeFracMKNFp16 {
RT_CUBE_MKN_FP16_2_16_16 = 0,
RT_CUBE_MKN_FP16_4_16_16,
RT_CUBE_MKN_FP16_16_16_16,
RT_CUBE_MKN_FP16_Default,
RT_CUBE_MKN_FP16_2_16_16 = 0,
RT_CUBE_MKN_FP16_4_16_16,
RT_CUBE_MKN_FP16_16_16_16,
RT_CUBE_MKN_FP16_Default,
} rtCubeFracMKNFp16_t; } rtCubeFracMKNFp16_t;


typedef enum tagRtCubeFracMKNInt8 { typedef enum tagRtCubeFracMKNInt8 {
RT_CUBE_MKN_INT8_2_32_16 = 0,
RT_CUBE_MKN_INT8_4_32_4,
RT_CUBE_MKN_INT8_4_32_16,
RT_CUBE_MKN_INT8_16_32_16,
RT_CUBE_MKN_INT8_Default,
RT_CUBE_MKN_INT8_2_32_16 = 0,
RT_CUBE_MKN_INT8_4_32_4,
RT_CUBE_MKN_INT8_4_32_16,
RT_CUBE_MKN_INT8_16_32_16,
RT_CUBE_MKN_INT8_Default,
} rtCubeFracMKNInt8_t; } rtCubeFracMKNInt8_t;


typedef enum tagRtVecFracVmulMKNFp16 { typedef enum tagRtVecFracVmulMKNFp16 {
RT_VEC_VMUL_MKN_FP16_1_16_16 = 0,
RT_VEC_VMUL_MKN_FP16_Default,
RT_VEC_VMUL_MKN_FP16_1_16_16 = 0,
RT_VEC_VMUL_MKN_FP16_Default,
} rtVecFracVmulMKNFp16_t; } rtVecFracVmulMKNFp16_t;


typedef enum tagRtVecFracVmulMKNInt8 { typedef enum tagRtVecFracVmulMKNInt8 {
RT_VEC_VMUL_MKN_INT8_1_32_16 = 0,
RT_VEC_VMUL_MKN_INT8_Default,
RT_VEC_VMUL_MKN_INT8_1_32_16 = 0,
RT_VEC_VMUL_MKN_INT8_Default,
} rtVecFracVmulMKNInt8_t; } rtVecFracVmulMKNInt8_t;


typedef struct tagRtAiCoreSpec { typedef struct tagRtAiCoreSpec {
uint32_t cubeFreq;
uint32_t cubeMSize;
uint32_t cubeKSize;
uint32_t cubeNSize;
rtCubeFracMKNFp16_t cubeFracMKNFp16;
rtCubeFracMKNInt8_t cubeFracMKNInt8;
rtVecFracVmulMKNFp16_t vecFracVmulMKNFp16;
rtVecFracVmulMKNInt8_t vecFracVmulMKNInt8;
uint32_t cubeFreq;
uint32_t cubeMSize;
uint32_t cubeKSize;
uint32_t cubeNSize;
rtCubeFracMKNFp16_t cubeFracMKNFp16;
rtCubeFracMKNInt8_t cubeFracMKNInt8;
rtVecFracVmulMKNFp16_t vecFracVmulMKNFp16;
rtVecFracVmulMKNInt8_t vecFracVmulMKNInt8;
} rtAiCoreSpec_t; } rtAiCoreSpec_t;


typedef struct tagRtAiCoreRatesPara { typedef struct tagRtAiCoreRatesPara {
uint32_t ddrRate;
uint32_t l2Rate;
uint32_t l2ReadRate;
uint32_t l2WriteRate;
uint32_t l1ToL0ARate;
uint32_t l1ToL0BRate;
uint32_t l0CToUBRate;
uint32_t ubToL2;
uint32_t ubToDDR;
uint32_t ubToL1;
uint32_t ddrRate;
uint32_t l2Rate;
uint32_t l2ReadRate;
uint32_t l2WriteRate;
uint32_t l1ToL0ARate;
uint32_t l1ToL0BRate;
uint32_t l0CToUBRate;
uint32_t ubToL2;
uint32_t ubToDDR;
uint32_t ubToL1;
} rtAiCoreMemoryRates_t; } rtAiCoreMemoryRates_t;


typedef struct tagRtMemoryConfig { typedef struct tagRtMemoryConfig {
uint32_t flowtableSize;
uint32_t compilerSize;
uint32_t flowtableSize;
uint32_t compilerSize;
} rtMemoryConfig_t; } rtMemoryConfig_t;


typedef struct tagRtPlatformConfig { typedef struct tagRtPlatformConfig {
uint32_t platformConfig;
uint32_t platformConfig;
} rtPlatformConfig_t; } rtPlatformConfig_t;


/** /**
@@ -165,7 +166,6 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate
*/ */
RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig); RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig);



/** /**
* @ingroup * @ingroup
* @brief get l2 buffer Info,virtual baseaddr,Size * @brief get l2 buffer Info,virtual baseaddr,Size
@@ -176,14 +176,16 @@ RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size);


/** /**
* @ingroup * @ingroup
* @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be represented by 9020.
* @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be
* represented by 9020.
* @param [out] runtimeVersion * @param [out] runtimeVersion
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_INVALID_VALUE for error input
*/ */
RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion); RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion);

#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
} }
#endif #endif


#endif // __CCE_RUNTIME_STREAM_H__
#endif // __CCE_RUNTIME_STREAM_H__

+ 18
- 17
third_party/fwkacllib/inc/runtime/context.h View File

@@ -1,18 +1,18 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/
*/


#ifndef __CCE_RUNTIME_CONTEXT_H__ #ifndef __CCE_RUNTIME_CONTEXT_H__
#define __CCE_RUNTIME_CONTEXT_H__ #define __CCE_RUNTIME_CONTEXT_H__
@@ -30,24 +30,24 @@ extern "C" {
typedef void *rtContext_t; typedef void *rtContext_t;


typedef enum tagDryRunFlag { typedef enum tagDryRunFlag {
RT_DRYRUN_FLAG_FALSE = 0,
RT_DRYRUN_FLAG_TRUE = 1,
RT_DRYRUN_FLAG_FALSE = 0,
RT_DRYRUN_FLAG_TRUE = 1,
} rtDryRunFlag_t; } rtDryRunFlag_t;


typedef enum tagCtxMode { typedef enum tagCtxMode {
RT_CTX_NORMAL_MODE = 0,
RT_CTX_GEN_MODE = 1,
RT_CTX_NORMAL_MODE = 0,
RT_CTX_GEN_MODE = 1,
} rtCtxMode_t; } rtCtxMode_t;


typedef struct tagRtGroupInfo { typedef struct tagRtGroupInfo {
int32_t groupId;
uint32_t flag;
uint32_t aicoreNum;
uint32_t aicpuNum;
uint32_t aivectorNum;
uint32_t sdmaNum;
uint32_t activeStreamNum;
void *extrPtr;
int32_t groupId;
uint32_t flag;
uint32_t aicoreNum;
uint32_t aicpuNum;
uint32_t aivectorNum;
uint32_t sdmaNum;
uint32_t activeStreamNum;
void *extrPtr;
} rtGroupInfo_t; } rtGroupInfo_t;


/** /**
@@ -156,6 +156,7 @@ RTS_API rtError_t rtGetGroupCount(uint32_t *count);
* @return RT_ERROR_NONE for ok * @return RT_ERROR_NONE for ok
*/ */
RTS_API rtError_t rtSetCtxINFMode(bool mode); RTS_API rtError_t rtSetCtxINFMode(bool mode);

#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
} }
#endif #endif


+ 32
- 31
third_party/fwkacllib/inc/runtime/dev.h View File

@@ -1,18 +1,18 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/
*/


#ifndef __CCE_RUNTIME_DEVICE_H__ #ifndef __CCE_RUNTIME_DEVICE_H__
#define __CCE_RUNTIME_DEVICE_H__ #define __CCE_RUNTIME_DEVICE_H__
@@ -27,44 +27,44 @@ extern "C" {
#define RT_CAPABILITY_NOT_SUPPORT (0x0) #define RT_CAPABILITY_NOT_SUPPORT (0x0)


typedef struct tagRTDeviceInfo { typedef struct tagRTDeviceInfo {
uint8_t env_type; // 0: FPGA 1: EMU 2: ESL
uint32_t ctrl_cpu_ip;
uint32_t ctrl_cpu_id;
uint32_t ctrl_cpu_core_num;
uint32_t ctrl_cpu_endian_little;
uint32_t ts_cpu_core_num;
uint32_t ai_cpu_core_num;
uint32_t ai_core_num;
uint32_t ai_core_freq;
uint32_t ai_cpu_core_id;
uint32_t ai_core_id;
uint32_t aicpu_occupy_bitmap;
uint32_t hardware_version;
uint32_t ts_num;
uint8_t env_type; // 0: FPGA 1: EMU 2: ESL
uint32_t ctrl_cpu_ip;
uint32_t ctrl_cpu_id;
uint32_t ctrl_cpu_core_num;
uint32_t ctrl_cpu_endian_little;
uint32_t ts_cpu_core_num;
uint32_t ai_cpu_core_num;
uint32_t ai_core_num;
uint32_t ai_core_freq;
uint32_t ai_cpu_core_id;
uint32_t ai_core_id;
uint32_t aicpu_occupy_bitmap;
uint32_t hardware_version;
uint32_t ts_num;
} rtDeviceInfo_t; } rtDeviceInfo_t;


typedef enum tagRtRunMode { typedef enum tagRtRunMode {
RT_RUN_MODE_OFFLINE = 0,
RT_RUN_MODE_ONLINE = 1,
RT_RUN_MODE_AICPU_SCHED = 2,
RT_RUN_MODE_RESERVED
RT_RUN_MODE_OFFLINE = 0,
RT_RUN_MODE_ONLINE = 1,
RT_RUN_MODE_AICPU_SCHED = 2,
RT_RUN_MODE_RESERVED
} rtRunMode; } rtRunMode;


typedef enum tagRtAicpuDeployType { typedef enum tagRtAicpuDeployType {
AICPU_DEPLOY_CROSS_OS = 0x0,
AICPU_DEPLOY_CROSS_PROCESS = 0x1,
AICPU_DEPLOY_CROSS_THREAD = 0x2,
AICPU_DEPLOY_RESERVED
AICPU_DEPLOY_CROSS_OS = 0x0,
AICPU_DEPLOY_CROSS_PROCESS = 0x1,
AICPU_DEPLOY_CROSS_THREAD = 0x2,
AICPU_DEPLOY_RESERVED
} rtAicpuDeployType_t; } rtAicpuDeployType_t;


typedef enum tagRtFeatureType { typedef enum tagRtFeatureType {
FEATURE_TYPE_MEMCPY = 0,
FEATURE_TYPE_RSV
FEATURE_TYPE_MEMCPY = 0,
FEATURE_TYPE_RSV
} rtFeatureType_t; } rtFeatureType_t;


typedef enum tagMemcpyInfo { typedef enum tagMemcpyInfo {
MEMCPY_INFO_SUPPORT_ZEROCOPY = 0,
MEMCPY_INFO_RSV
MEMCPY_INFO_SUPPORT_ZEROCOPY = 0,
MEMCPY_INFO_RSV
} rtMemcpyInfo_t; } rtMemcpyInfo_t;


/** /**
@@ -356,6 +356,7 @@ RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device);
* @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_INVALID_VALUE for error input
*/ */
RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device); RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device);

#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
} }
#endif #endif


+ 5
- 5
third_party/fwkacllib/inc/runtime/dvfsprofile.h View File

@@ -1,18 +1,18 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/
*/


#ifndef __CCE_RUNTIME_DVFSPROFILE_H__ #ifndef __CCE_RUNTIME_DVFSPROFILE_H__
#define __CCE_RUNTIME_DVFSPROFILE_H__ #define __CCE_RUNTIME_DVFSPROFILE_H__


+ 5
- 5
third_party/fwkacllib/inc/runtime/event.h View File

@@ -1,18 +1,18 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/
*/


#ifndef __CCE_RUNTIME_EVENT_H__ #ifndef __CCE_RUNTIME_EVENT_H__
#define __CCE_RUNTIME_EVENT_H__ #define __CCE_RUNTIME_EVENT_H__


+ 50
- 49
third_party/fwkacllib/inc/runtime/kernel.h View File

@@ -1,18 +1,18 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/
*/


#ifndef __CCE_RUNTIME_KERNEL_H__ #ifndef __CCE_RUNTIME_KERNEL_H__
#define __CCE_RUNTIME_KERNEL_H__ #define __CCE_RUNTIME_KERNEL_H__
@@ -29,15 +29,15 @@ extern "C" {
* @brief shared memory data control * @brief shared memory data control
*/ */
typedef struct tagRtSmData { typedef struct tagRtSmData {
uint64_t L2_mirror_addr; // preload or swap source address
uint32_t L2_data_section_size; // every data size
uint8_t L2_preload; // 1 - preload from mirrorAddr, 0 - no preload
uint8_t modified; // 1 - data will be modified by kernel, 0 - no modified
uint8_t priority; // data priority
int8_t prev_L2_page_offset_base; // remap source section offset
uint8_t L2_page_offset_base; // remap destination section offset
uint8_t L2_load_to_ddr; // 1 - need load out, 0 - no need
uint8_t reserved[2]; // reserved
uint64_t L2_mirror_addr; // preload or swap source address
uint32_t L2_data_section_size; // every data size
uint8_t L2_preload; // 1 - preload from mirrorAddr, 0 - no preload
uint8_t modified; // 1 - data will be modified by kernel, 0 - no modified
uint8_t priority; // data priority
int8_t prev_L2_page_offset_base; // remap source section offset
uint8_t L2_page_offset_base; // remap destination section offset
uint8_t L2_load_to_ddr; // 1 - need load out, 0 - no need
uint8_t reserved[2]; // reserved
} rtSmData_t; } rtSmData_t;


/** /**
@@ -45,12 +45,12 @@ typedef struct tagRtSmData {
* @brief shared memory description * @brief shared memory description
*/ */
typedef struct tagRtSmCtrl { typedef struct tagRtSmCtrl {
rtSmData_t data[8]; // data description
uint64_t size; // max page Num
uint8_t remap[64]; /* just using for static remap mode, default:0xFF
rtSmData_t data[8]; // data description
uint64_t size; // max page Num
uint8_t remap[64]; /* just using for static remap mode, default:0xFF
array index: virtual l2 page id, array value: physic l2 page id */ array index: virtual l2 page id, array value: physic l2 page id */
uint8_t l2_in_main; // 0-DDR, 1-L2, default:0xFF
uint8_t reserved[3];
uint8_t l2_in_main; // 0-DDR, 1-L2, default:0xFF
uint8_t reserved[3];
} rtSmDesc_t; } rtSmDesc_t;


typedef rtSmDesc_t rtL2Ctrl_t; typedef rtSmDesc_t rtL2Ctrl_t;
@@ -60,10 +60,10 @@ typedef rtSmDesc_t rtL2Ctrl_t;
* @brief device binary type * @brief device binary type
*/ */
typedef struct tagRtDevBinary { typedef struct tagRtDevBinary {
uint32_t magic; // magic number
uint32_t version; // version of binary
const void *data; // binary data
uint64_t length; // binary length
uint32_t magic; // magic number
uint32_t version; // version of binary
const void *data; // binary data
uint64_t length; // binary length
} rtDevBinary_t; } rtDevBinary_t;


/** /**
@@ -73,15 +73,15 @@ typedef struct tagRtDevBinary {
#define ONLINE_PROF_MAX_PMU_NUM (8) #define ONLINE_PROF_MAX_PMU_NUM (8)


typedef struct ProfilefDataInfo { typedef struct ProfilefDataInfo {
const void *stubFunc;
uint32_t blockDim;
const void *args;
uint32_t argsSize;
rtSmDesc_t *smDesc;
rtStream_t stream;
uint64_t totalcycle;
uint64_t ovcycle;
uint64_t pmu_cnt[ONLINE_PROF_MAX_PMU_NUM];
const void *stubFunc;
uint32_t blockDim;
const void *args;
uint32_t argsSize;
rtSmDesc_t *smDesc;
rtStream_t stream;
uint64_t totalcycle;
uint64_t ovcycle;
uint64_t pmu_cnt[ONLINE_PROF_MAX_PMU_NUM];
} rtProfDataInfo_t; } rtProfDataInfo_t;


/** /**
@@ -89,12 +89,12 @@ typedef struct ProfilefDataInfo {
* @brief function mode type * @brief function mode type
*/ */
typedef enum { typedef enum {
FUNC_MODE_NORMAL = 0,
FUNC_MODE_PCTRACE_USERPROFILE_RECORDLOOP,
FUNC_MODE_PCTRACE_USERPROFILE_SKIPLOOP,
FUNC_MODE_PCTRACE_CYCLECNT_RECORDLOOP,
FUNC_MODE_PCTRACE_CYCLECNT_SKIPLOOP,
FUNC_MODE_BUTT
FUNC_MODE_NORMAL = 0,
FUNC_MODE_PCTRACE_USERPROFILE_RECORDLOOP,
FUNC_MODE_PCTRACE_USERPROFILE_SKIPLOOP,
FUNC_MODE_PCTRACE_CYCLECNT_RECORDLOOP,
FUNC_MODE_PCTRACE_CYCLECNT_SKIPLOOP,
FUNC_MODE_BUTT
} rtFuncModeType_t; } rtFuncModeType_t;


/** /**
@@ -102,23 +102,23 @@ typedef enum {
* @brief kernel info * @brief kernel info
*/ */
typedef struct rtKernelInfo { typedef struct rtKernelInfo {
uint64_t task_offset; // kernel offset in module
/* flowtable */
void *arg; // launch kernel arg
uint32_t arg_size;
/* module */
void *module_addr; // module::baseaddr_
uint32_t module_size;
} * rtKernelInfo_t;
uint64_t task_offset; // kernel offset in module
/* flowtable */
void *arg; // launch kernel arg
uint32_t arg_size;
/* module */
void *module_addr; // module::baseaddr_
uint32_t module_size;
} *rtKernelInfo_t;


/** /**
* @ingroup rt_KernelConfigDump * @ingroup rt_KernelConfigDump
* @brief device dump type * @brief device dump type
*/ */
typedef enum tagRtDumpKind { typedef enum tagRtDumpKind {
RT_DATA_DUMP_KIND_INVALID = -1,
RT_DATA_DUMP_KIND_DUMP = 0,
RT_DATA_DUMP_KIND_RESERVED
RT_DATA_DUMP_KIND_INVALID = -1,
RT_DATA_DUMP_KIND_DUMP = 0,
RT_DATA_DUMP_KIND_RESERVED
} rtDumpKind_t; } rtDumpKind_t;


/** /**
@@ -414,6 +414,7 @@ RTS_API rtError_t rtDatadumpInfoLoad(const void *dumpInfo, uint32_t length);
RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc = nullptr, rtStream_t stream = nullptr); RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc = nullptr, rtStream_t stream = nullptr);
#else #else
RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc, rtStream_t stream); RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc, rtStream_t stream);

#endif #endif
#endif // __CLANG_CCE_RUNTIME_H__ #endif // __CLANG_CCE_RUNTIME_H__




+ 66
- 65
third_party/fwkacllib/inc/runtime/mem.h View File

@@ -1,18 +1,18 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/
*/


#ifndef __CCE_RUNTIME_MEM_H__ #ifndef __CCE_RUNTIME_MEM_H__
#define __CCE_RUNTIME_MEM_H__ #define __CCE_RUNTIME_MEM_H__
@@ -34,6 +34,7 @@ extern "C" {
*/ */
#define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device #define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device
#define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device #define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device
#define RT_MEMORY_RDMA_HBM ((uint32_t)0x3) // RDMA-HBM memory on device
#define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device #define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device
#define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device #define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device
#define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device #define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device
@@ -89,40 +90,40 @@ typedef uint32_t rtMemType_t;
* @brief memory copy type * @brief memory copy type
*/ */
typedef enum tagRtMemcpyKind { typedef enum tagRtMemcpyKind {
RT_MEMCPY_HOST_TO_HOST = 0, // host to host
RT_MEMCPY_HOST_TO_DEVICE, // host to device
RT_MEMCPY_DEVICE_TO_HOST, // device to host
RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P
RT_MEMCPY_MANAGED, // managed memory
RT_MEMCPY_ADDR_DEVICE_TO_DEVICE,
RT_MEMCPY_HOST_TO_DEVICE_EX, // host to device ex (only used for 8 bytes)
RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex
RT_MEMCPY_RESERVED,
RT_MEMCPY_HOST_TO_HOST = 0, // host to host
RT_MEMCPY_HOST_TO_DEVICE, // host to device
RT_MEMCPY_DEVICE_TO_HOST, // device to host
RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P
RT_MEMCPY_MANAGED, // managed memory
RT_MEMCPY_ADDR_DEVICE_TO_DEVICE,
RT_MEMCPY_HOST_TO_DEVICE_EX, // host to device ex (only used for 8 bytes)
RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex
RT_MEMCPY_RESERVED,
} rtMemcpyKind_t; } rtMemcpyKind_t;


typedef enum tagRtMemInfoType { typedef enum tagRtMemInfoType {
RT_MEMORYINFO_DDR,
RT_MEMORYINFO_HBM,
RT_MEMORYINFO_DDR_HUGE, // Hugepage memory of DDR
RT_MEMORYINFO_DDR_NORMAL, // Normal memory of DDR
RT_MEMORYINFO_HBM_HUGE, // Hugepage memory of HBM
RT_MEMORYINFO_HBM_NORMAL, // Normal memory of HBM
RT_MEMORYINFO_DDR_P2P_HUGE, // Hugepage memory of DDR
RT_MEMORYINFO_DDR_P2P_NORMAL, // Normal memory of DDR
RT_MEMORYINFO_HBM_P2P_HUGE, // Hugepage memory of HBM
RT_MEMORYINFO_HBM_P2P_NORMAL, // Normal memory of HBM
RT_MEMORYINFO_DDR,
RT_MEMORYINFO_HBM,
RT_MEMORYINFO_DDR_HUGE, // Hugepage memory of DDR
RT_MEMORYINFO_DDR_NORMAL, // Normal memory of DDR
RT_MEMORYINFO_HBM_HUGE, // Hugepage memory of HBM
RT_MEMORYINFO_HBM_NORMAL, // Normal memory of HBM
RT_MEMORYINFO_DDR_P2P_HUGE, // Hugepage memory of DDR
RT_MEMORYINFO_DDR_P2P_NORMAL, // Normal memory of DDR
RT_MEMORYINFO_HBM_P2P_HUGE, // Hugepage memory of HBM
RT_MEMORYINFO_HBM_P2P_NORMAL, // Normal memory of HBM
} rtMemInfoType_t; } rtMemInfoType_t;


typedef enum tagRtRecudeKind { typedef enum tagRtRecudeKind {
RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P
RT_RECUDE_KIND_END
RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P
RT_RECUDE_KIND_END
} rtRecudeKind_t; } rtRecudeKind_t;


typedef enum tagRtDataType { typedef enum tagRtDataType {
RT_DATA_TYPE_FP32 = 0, // fp32
RT_DATA_TYPE_FP16 = 1, // fp16
RT_DATA_TYPE_INT16 = 2, // int16
RT_DATA_TYPE_END
RT_DATA_TYPE_FP32 = 0, // fp32
RT_DATA_TYPE_FP16 = 1, // fp16
RT_DATA_TYPE_INT16 = 2, // int16
RT_DATA_TYPE_END
} rtDataType_t; } rtDataType_t;


/** /**
@@ -130,10 +131,10 @@ typedef enum tagRtDataType {
* @brief memory copy channel type * @brief memory copy channel type
*/ */
typedef enum tagRtMemcpyChannelType { typedef enum tagRtMemcpyChannelType {
RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P
RT_MEMCPY_CHANNEL_TYPE_PCIe,
RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now
RT_MEMCPY_CHANNEL_TYPE_RESERVED,
RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P
RT_MEMCPY_CHANNEL_TYPE_PCIe,
RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now
RT_MEMCPY_CHANNEL_TYPE_RESERVED,
} rtMemcpyChannelType_t; } rtMemcpyChannelType_t;


/** /**
@@ -141,18 +142,18 @@ typedef enum tagRtMemcpyChannelType {
* @brief ai core memory size * @brief ai core memory size
*/ */
typedef struct rtAiCoreMemorySize { typedef struct rtAiCoreMemorySize {
uint32_t l0ASize;
uint32_t l0BSize;
uint32_t l0CSize;
uint32_t l1Size;
uint32_t ubSize;
uint32_t l2Size;
uint32_t l2PageNum;
uint32_t blockSize;
uint64_t bankSize;
uint64_t bankNum;
uint64_t burstInOneBlock;
uint64_t bankGroupNum;
uint32_t l0ASize;
uint32_t l0BSize;
uint32_t l0CSize;
uint32_t l1Size;
uint32_t ubSize;
uint32_t l2Size;
uint32_t l2PageNum;
uint32_t blockSize;
uint64_t bankSize;
uint64_t bankNum;
uint64_t burstInOneBlock;
uint64_t bankGroupNum;
} rtAiCoreMemorySize_t; } rtAiCoreMemorySize_t;


/** /**
@@ -160,10 +161,10 @@ typedef struct rtAiCoreMemorySize {
* @brief memory type * @brief memory type
*/ */
typedef enum tagRtMemoryType { typedef enum tagRtMemoryType {
RT_MEMORY_TYPE_HOST = 1,
RT_MEMORY_TYPE_DEVICE = 2,
RT_MEMORY_TYPE_SVM = 3,
RT_MEMORY_TYPE_DVPP = 4
RT_MEMORY_TYPE_HOST = 1,
RT_MEMORY_TYPE_DEVICE = 2,
RT_MEMORY_TYPE_SVM = 3,
RT_MEMORY_TYPE_DVPP = 4
} rtMemoryType_t; } rtMemoryType_t;


/** /**
@@ -171,31 +172,31 @@ typedef enum tagRtMemoryType {
* @brief memory attribute * @brief memory attribute
*/ */
typedef struct tagRtPointerAttributes { typedef struct tagRtPointerAttributes {
rtMemoryType_t memoryType; // host memory or device memory
rtMemoryType_t locationType;
uint32_t deviceID; // device ID
uint32_t pageSize;
rtMemoryType_t memoryType; // host memory or device memory
rtMemoryType_t locationType;
uint32_t deviceID; // device ID
uint32_t pageSize;
} rtPointerAttributes_t; } rtPointerAttributes_t;




typedef struct rtMallocHostSharedMemoryIn { typedef struct rtMallocHostSharedMemoryIn {
const char *name;
const uint64_t size;
uint32_t flag;
const char *name;
const uint64_t size;
uint32_t flag;
} rtMallocHostSharedMemoryIn; } rtMallocHostSharedMemoryIn;


typedef struct rtMallocHostSharedMemoryOut { typedef struct rtMallocHostSharedMemoryOut {
int fd;
void *ptr;
void *devPtr;
int fd;
void *ptr;
void *devPtr;
} rtMallocHostSharedMemoryOut; } rtMallocHostSharedMemoryOut;


typedef struct rtFreeHostSharedMemoryIn { typedef struct rtFreeHostSharedMemoryIn {
const char *name;
const uint64_t size;
int fd;
void *ptr;
void *devPtr;
const char *name;
const uint64_t size;
int fd;
void *ptr;
void *devPtr;
} rtFreeHostSharedMemoryIn; } rtFreeHostSharedMemoryIn;






+ 5
- 5
third_party/fwkacllib/inc/runtime/rt.h View File

@@ -1,18 +1,18 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/
*/


#ifndef __CCE_RUNTIME_RT_H__ #ifndef __CCE_RUNTIME_RT_H__
#define __CCE_RUNTIME_RT_H__ #define __CCE_RUNTIME_RT_H__


+ 5
- 5
third_party/fwkacllib/inc/runtime/rt_model.h View File

@@ -1,18 +1,18 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/
*/


#ifndef __CCE_RUNTIME_MODEL_H__ #ifndef __CCE_RUNTIME_MODEL_H__
#define __CCE_RUNTIME_MODEL_H__ #define __CCE_RUNTIME_MODEL_H__


+ 5
- 5
third_party/fwkacllib/inc/runtime/stream.h View File

@@ -1,18 +1,18 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/
*/


#ifndef __CCE_RUNTIME_STREAM_H__ #ifndef __CCE_RUNTIME_STREAM_H__
#define __CCE_RUNTIME_STREAM_H__ #define __CCE_RUNTIME_STREAM_H__


+ 6
- 14
third_party/fwkacllib/inc/tdt/index_transform.h View File

@@ -1,18 +1,10 @@
/** /**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* @file index_transform.h
*
* Copyright (C) Huawei Technologies Co., Ltd. 2018-2019. All Rights Reserved.
*
* This program is used to get logical device id by phy device id.
*/


#ifndef INC_TDT_INDEX_TRANSFORM_H #ifndef INC_TDT_INDEX_TRANSFORM_H
#define INC_TDT_INDEX_TRANSFORM_H #define INC_TDT_INDEX_TRANSFORM_H


+ 1
- 1
third_party/fwkacllib/inc/tdt/status.h View File

@@ -1,4 +1,4 @@
/**
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd * Copyright 2019-2020 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");


+ 1
- 20
third_party/fwkacllib/inc/tdt/tdt_host_interface.h View File

@@ -61,7 +61,7 @@ int32_t TdtHostInit(uint32_t deviceId);
* @li tdt_host_interface.h: Header file where the interface declaration is located. * @li tdt_host_interface.h: Header file where the interface declaration is located.
* @li data_common.h: Header file where 'DataItem' defined * @li data_common.h: Header file where 'DataItem' defined
*/ */
int32_t TdtHostPushData(const std::string &channelName, const std::vector<DataItem> &item);
int32_t TdtHostPushData(const std::string &channelName, const std::vector<DataItem> &item, uint32_t deviceId = 0);


/** /**
* @ingroup TdtHostDestroy * @ingroup TdtHostDestroy
@@ -203,25 +203,6 @@ int32_t TdtInFeedDestroy(uint32_t deviceId);
* @li tdt_host_interface.h: Header file where the interface declaration is located. * @li tdt_host_interface.h: Header file where the interface declaration is located.
*/ */
int32_t TdtOutFeedDestroy(); int32_t TdtOutFeedDestroy();

/**
* @ingroup TdtInFeedData
* @brief Blocking queue. When the queue is full, the Push interface will block.
*
* @par Function
* Blocking queue. When the queue is full, the Push interface will block.
*
* @param channelName [IN] type #String. queue channel name
* @param items [IN] type #vector<DataItem> DataItem is defined in data_common.h. input data
* @retval 0 Success
* @retval OtherValues 0 Fail
*
* @par Dependency
* @li libtsdclient.so: Library to which the interface belongs.
* @li tdt_host_interface.h: Header file where the interface declaration is located.
* @li data_common.h: Header file where 'DataItem' defined
*/
int32_t TdtInFeedData(const std::string &channelName, const std::vector<DataItem> &item, uint32_t deviceId);
} // namespace tdt } // namespace tdt
#ifdef __cplusplus #ifdef __cplusplus
} }


+ 67
- 351
third_party/fwkacllib/inc/toolchain/prof_acl_api.h View File

@@ -17,380 +17,96 @@
#ifndef MSPROFILER_API_PROF_ACL_API_H_ #ifndef MSPROFILER_API_PROF_ACL_API_H_
#define MSPROFILER_API_PROF_ACL_API_H_ #define MSPROFILER_API_PROF_ACL_API_H_


#define MSVP_MAX_DEV_NUM 64
// DataTypeConfig
#define PROF_ACL_API 0x00000001
#define PROF_TASK_TIME 0x00000002
#define PROF_AICORE_METRICS 0x00000004
#define PROF_AICPU_TRACE 0x00000008
#define PROF_MODEL_EXECUTE 0x00000010
#define PROF_RUNTIME_API 0x00000020
#define PROF_RUNTIME_TRACE 0x00000040
#define PROF_SCHEDULE_TIMELINE 0x00000080
#define PROF_SCHEDULE_TRACE 0x00000100
#define PROF_AIVECTORCORE_METRICS 0x00000200
#define PROF_SUBTASK_TIME 0x00000400

#define PROF_TRAINING_TRACE 0x00000800
#define PROF_HCCL_TRACE 0x00001000

#define PROF_TASK_TRACE 0x00001852

// system profilinig switch
#define PROF_CPU 0x00010000
#define PROF_HARDWARE_MEMORY 0x00020000
#define PROF_IO 0x00040000
#define PROF_INTER_CONNECTION 0x00080000
#define PROF_DVPP 0x00100000
#define PROF_SYS_AICORE_SAMPLE 0x00200000
#define PROF_AIVECTORCORE_SAMPLE 0x00400000

#define PROF_MODEL_LOAD 0x8000000000000000

// DataTypeConfig MASK
#define PROF_ACL_API_MASK 0x00000001
#define PROF_TASK_TIME_MASK 0x00000002
#define PROF_AICORE_METRICS_MASK 0x00000004
#define PROF_AICPU_TRACE_MASK 0x00000008
#define PROF_MODEL_EXECUTE_MASK 0x00000010
#define PROF_RUNTIME_API_MASK 0x00000020
#define PROF_RUNTIME_TRACE_MASK 0x00000040
#define PROF_SCHEDULE_TIMELINE_MASK 0x00000080
#define PROF_SCHEDULE_TRACE_MASK 0x00000100
#define PROF_AIVECTORCORE_METRICS_MASK 0x00000200
#define PROF_SUBTASK_TIME_MASK 0x00000400

#define PROF_TRAINING_TRACE_MASK 0x00000800
#define PROF_HCCL_TRACE_MASK 0x00001000

// system profilinig mask
#define PROF_CPU_MASK 0x00010000
#define PROF_HARDWARE_MEMORY_MASK 0x00020000
#define PROF_IO_MASK 0x00040000
#define PROF_INTER_CONNECTION_MASK 0x00080000
#define PROF_DVPP_MASK 0x00100000
#define PROF_SYS_AICORE_SAMPLE_MASK 0x00200000
#define PROF_AIVECTORCORE_SAMPLE_MASK 0x00400000

#define PROF_MODEL_LOAD_MASK 0x8000000000000000

#ifndef OS_TYPE #ifndef OS_TYPE
#define OS_TYPE 0 #define OS_TYPE 0
#endif // OS_TYPE #endif // OS_TYPE



#if (OS_TYPE != LINUX) #if (OS_TYPE != LINUX)
#define MSVP_PROF_API __declspec(dllexport) #define MSVP_PROF_API __declspec(dllexport)
#else #else
#define MSVP_PROF_API __attribute__((visibility("default"))) #define MSVP_PROF_API __attribute__((visibility("default")))
#endif #endif


// DataTypeConfig
#define PROF_ACL_API 0x0001
#define PROF_TASK_TIME 0x0002
#define PROF_AICORE_METRICS 0x0004
#define PROF_AICPU_TRACE 0x0008
#define PROF_MODEL_EXECUTE 0x0010
#define PROF_RUNTIME_API 0x0020
#define PROF_RUNTIME_TRACE 0x0040
#define PROF_SCHEDULE_TIMELINE 0x0080
#define PROF_SCHEDULE_TRACE 0x0100
#define PROF_AIVECTORCORE_METRICS 0x0200
#define PROF_SUBTASK_TIME 0x0400

#define PROF_TRAINING_TRACE 0x0800
#define PROF_HCCL_TRACE 0x1000
#define PROF_DATA_PROCESS 0x2000
#define PROF_TASK_TRACE 0x3842

#define PROF_MODEL_LOAD 0x8000000000000000

// DataTypeConfig MASK
#define PROF_ACL_API_MASK 0x0001
#define PROF_TASK_TIME_MASK 0x0002
#define PROF_AICORE_METRICS_MASK 0x0004
#define PROF_AICPU_TRACE_MASK 0x0008
#define PROF_MODEL_EXECUTE_MASK 0x0010
#define PROF_RUNTIME_API_MASK 0x0020
#define PROF_RUNTIME_TRACE_MASK 0x0040
#define PROF_SCHEDULE_TIMELINE_MASK 0x0080
#define PROF_SCHEDULE_TRACE_MASK 0x0100
#define PROF_AIVECTORCORE_METRICS_MASK 0x0200
#define PROF_SUBTASK_TIME_MASK 0x0400

#define PROF_TRAINING_TRACE_MASK 0x0800
#define PROF_HCCL_TRACE_MASK 0x1000
#define PROF_DATA_PROCESS_MASK 0x2000

#define PROF_MODEL_LOAD_MASK 0x8000000000000000

#include <cstdint> #include <cstdint>
#include <string>

/**
* @name ProrErrorCode
* @brief error code enum of prof_acl_apis
*/
enum ProfErrorCode {
PROF_ERROR_NONE = 0, // ok
PROF_ERROR_PARAM_INVALID, // param invalid, for example nullptr
PROF_ERROR_REPEAT_INIT, // profiling has already been inited
PROF_ERROR_CONFIG_INVALID, // config invalid, for example invalid json string
PROF_ERROR_DIR_NO_ACCESS, // dir is not accessable
PROF_ERROR_FAILURE, // failed to init or start profiling
PROF_ERROR_NOT_INITED, // profiling has not been inited
PROF_ERROR_DEVICE_INVALID, // device id invalid
PROF_ERROR_UNSUPPORTED, // unsupported data type or ai core metrics
PROF_ERROR_REPEAT_START, // profiilng has already been started
PROF_ERROR_NOT_STARTED, // profiling has not been started
PROF_ERROR_REPEAT_SUBSCRIBE, // same model id has already been subscribed
PROF_ERROR_MODEL_ID_INVALID, // model id does not exist or has not been subscribed
PROF_ERROR_API_CONFLICT, // prof ctrl api mode conflicts with subscribe mode
};

/**
* @brief transfer profiling config in acl.json to sample config
* @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...}
* @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]}
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg);

/**
* @name ProfInit
* @brief init profiling
* @param profInitCfg [IN] config of init profiling of json format
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg);

/**
* @name ProfAicoreMetrics
* @brief aicore metrics enum
*/
enum ProfAicoreMetrics {
PROF_AICORE_ARITHMATIC_THROUGHPUT = 0,
PROF_AICORE_PIPELINE = 1,
PROF_AICORE_SYNCHRONIZATION = 2,
PROF_AICORE_MEMORY = 3,
PROF_AICORE_INTERNAL_MEMORY = 4,
PROF_AICORE_STALL = 5,
PROF_AICORE_METRICS_COUNT,
PROF_AICORE_NONE = 0xff,
};

/**
* @name ProfConfig
* @brief struct of ProfStart
*/
struct ProfConfig {
uint32_t devNums; // length of device id list
uint32_t devIdList[MSVP_MAX_DEV_NUM]; // physical device id list
ProfAicoreMetrics aicoreMetrics; // aicore metric
uint64_t dataTypeConfig; // data type to start profiling
};

/**
* @name ProfStartProfiling
* @brief start profiling
* @param profStartCfg [IN] config to start profiling
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg);

/**
* @name ProfStopProfiling
* @brief stop profiling
* @param profStopCfg [IN] config to stop profiling
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg);

/**
* @name ProfFinalize
* @brief finalize profiling task
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfFinalize();

/**
* @name ProfGetDataTypeConfig
* @brief get dataTypeConfig started with of one device
* @param deviceId [IN] deviceId to get dataTypeConfig
* @param dataTypeConfig [OUT] result get
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig);


namespace Msprofiler { namespace Msprofiler {
namespace Api { namespace Api {
/**
* @brief transfer profiling config in acl.json to sample config
* @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...}
* @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]}
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg);

/**
* @name ProfInit
* @brief init profiling
* @param profInitCfg [IN] config of init profiling of json format
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg);

/**
* @name ProfStartProfiling
* @brief start profiling
* @param profStartCfg [IN] config to start profiling
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg);

/**
* @name ProfStopProfiling
* @brief stop profiling
* @param profStopCfg [IN] config to stop profiling
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg);

/**
* @name ProfFinalize
* @brief finalize profiling task
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfFinalize();

/**
* @name ProfGetDataTypeConfig
* @brief get dataTypeConfig started with of one device
* @param deviceId [IN] deviceId to get dataTypeConfig
* @param dataTypeConfig [OUT] result get
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig);

/**
* @name WorkMode
* @brief profiling api work mode
*/
enum WorkMode {
WORK_MODE_OFF, // profiling not at work
WORK_MODE_API_CTRL, // profiling work on api ctrl mode, (ProfInit)
WORK_MODE_SUBSCRIBE, // profiling work on subscribe mode
};

/**
* @name ProfGetApiWorkMode
* @brief get profiling api work mode
* @return WorkMode
*/
MSVP_PROF_API WorkMode ProfGetApiWorkMode();

/**
* @name ProfSubscribeConfig
* @brief config of subscribe api
*/
struct ProfSubscribeConfig {
bool timeInfo; // subscribe op time
ProfAicoreMetrics aicoreMetrics; // subscribe ai core metrics
void* fd; // pipe fd
};

/**
* @name ProfGetDataTypeConfig
* @brief get DataTypeConfig of subscribe
* @param profSubscribeConfig [IN] config to subscribe data
* @return DataTypeConfig
*/
MSVP_PROF_API uint64_t ProfGetDataTypeConfig(const ProfSubscribeConfig *profSubscribeConfig);

/**
* @name ProfModelSubscribe
* @brief subscribe data of one model id
* @param modelId [IN] model id to subscribe data
* @param devId [IN] device id of model
* @param profSubscribeConfig [IN] config to subscribe data
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfModelSubscribe(uint32_t modelId, uint32_t devId,
const ProfSubscribeConfig *profSubscribeConfig);

/**
* @name ProfIsModelSubscribed
* @brief check if a model id is subscribed
* @param modeiId [IN] modei id to check
* @return true: subscribed, false: not
*/
MSVP_PROF_API bool ProfIsModelSubscribed(uint32_t modelId);

/**
* @name ProfModelUnSubscribe
* @brief unsubscribe a model id
* @param modeiId [IN] modei id to unsubscribe
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfModelUnSubscribe(uint32_t modelId);

/**
* @name ProfGetOpDescSize
* @brief get profiling data struct size
* @param opDescSize [OUT] bytes of profiling subscribe data struct
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfGetOpDescSize(uint32_t *opDescSize);

/**
* @name ProfGetOpNum
* @brief get how many op data there are in data
* @param data [IN] data read from pipe
* @param len [IN] data length
* @param opNum [OUT] number of op in data
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfGetOpNum(const void *data, uint32_t len, uint32_t *opNum);

/**
* @name ProfGetModelId
* @brief get model id of specific part of data
* @param data [IN] data read from pipe
* @param len [IN] data length
* @param index [IN] index of part(op)
* @return model id
*/
MSVP_PROF_API uint32_t ProfGetModelId(const void *data, uint32_t len, uint32_t index);

/**
* @name ProfGetOpType
* @brief get op type of specific part of data
* @param data [IN] data read from pipe
* @param len [IN] data length
* @param opType [OUT] op type buffer
* @param opTypeLen [IN] buffer size of param opType
* @param index [IN] index of part(op)
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfGetOpType(const void *data, uint32_t len, char *opType, uint32_t opTypeLen, uint32_t index);

/**
* @name ProfGetOpName
* @brief get op name of specific part of data
* @param data [IN] data read from pipe
* @param len [IN] data length
* @param opType [OUT] op name buffer
* @param opTypeLen [IN] buffer size of param opName
* @param index [IN] index of part(op)
* @return ProfErrorCode
*/
MSVP_PROF_API int32_t ProfGetOpName(const void *data, uint32_t len, char *opName, uint32_t opNameLen, uint32_t index);

/**
* @name ProfGetOpStart
* @brief get op start timestamp of specific part of data
* @param data [IN] data read from pipe
* @param len [IN] data length
* @param index [IN] index of part(op)
* @return op start timestamp (us)
*/
MSVP_PROF_API uint64_t ProfGetOpStart(const void *data, uint32_t len, uint32_t index);

/**
* @name ProfGetOpEnd
* @brief get op end timestamp of specific part of data
* @param data [IN] data read from pipe
* @param len [IN] data length
* @param index [IN] index of part(op)
* @return op end timestamp (us)
*/
MSVP_PROF_API uint64_t ProfGetOpEnd(const void *data, uint32_t len, uint32_t index);

/**
* @name ProfGetOpDuration
* @brief get op duration of specific part of data
* @param data [IN] data read from pipe
* @param len [IN] data length
* @param index [IN] index of part(op)
* @return op duration (us)
*/
MSVP_PROF_API uint64_t ProfGetOpDuration(const void *data, uint32_t len, uint32_t index);

/** /**
* @name ProfGetOpExecutionTime * @name ProfGetOpExecutionTime
* @brief get op execution time of specific part of data * @brief get op execution time of specific part of data
* @param data [IN] data read from pipe
* @param len [IN] data length
* @param data [IN] data read from pipe
* @param len [IN] data length
* @param index [IN] index of part(op) * @param index [IN] index of part(op)
* @return op execution time (us) * @return op execution time (us)
*/ */
MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index);
}
}


/**
* @name ProfGetOpCubeOps
* @brief get op cube fops of specific part of data
* @param data [IN] data read from pipe
* @param len [IN] data length
* @param index [IN] index of part(op)
* @return op cube fops
*/
MSVP_PROF_API uint64_t ProfGetOpCubeOps(const void *data, uint32_t len, uint32_t index);
#ifdef __cplusplus
extern "C" {
#endif


/**
* @name ProfGetOpVectorOps
* @brief get op vector fops of specific part of data
* @param data [IN] data read from pipe
* @param len [IN] data length
* @param index [IN] index of part(op)
* @return op vector fops
*/
MSVP_PROF_API uint64_t ProfGetOpVectorOps(const void *data, uint32_t len, uint32_t index);
MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index);


} // namespace Api
} // namespace Msprofiler
#ifdef __cplusplus
}
#endif


#endif // MSPROFILER_API_PROF_ACL_API_H_ #endif // MSPROFILER_API_PROF_ACL_API_H_

+ 2
- 0
third_party/fwkacllib/inc/toolchain/prof_reporter.h View File

@@ -26,6 +26,8 @@
#define MSVP_PROF_API __attribute__((visibility("default"))) #define MSVP_PROF_API __attribute__((visibility("default")))
#endif #endif


#include "prof_callback.h"

/** /**
* @file prof_reporter.h * @file prof_reporter.h
* @defgroup reporter the reporter group * @defgroup reporter the reporter group


+ 72
- 78
third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h View File

@@ -1,78 +1,72 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/** @defgroup mstune mstune调优接口 */
#ifndef TUNE_API_H
#define TUNE_API_H
#include <vector>
#include <map>
#include <string>
#include "graph/graph.h"
#include "ge/ge_api.h"

/**
* @ingroup mstune
*
* mstune status
*/
enum MsTuneStatus {
MSTUNE_SUCCESS, /** tune success */
MSTUNE_FAILED, /** tune failed */
};

// Option key: for train options sets
const std::string MSTUNE_SELF_KEY = "mstune";
const std::string MSTUNE_GEINIT_KEY = "initialize";
const std::string MSTUNE_GESESS_KEY = "session";

/**
* @ingroup mstune
* @par 描述: 命令行调优
*
* @attention 无
* @param option [IN] 调优参数
* @param msg [OUT] 调优异常下返回信息
* @retval #MSTUNE_SUCCESS 执行成功
* @retval #MSTUNE_FAILED 执行失败
* @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。
* @see 无
* @since
*/
MsTuneStatus MsTuning(const std::map<std::string, std::string> &option, std::string &msg);

/**
* @ingroup mstune
* @par 描述: 梯度调优
*
* @attention 无
* @param tuningGraph [IN] 调优图
* @param dependGraph [IN] 调优依赖图
* @param session [IN] ge连接会话
* @param option [IN] 参数集. 包含调优参数及ge参数
* @retval #MSTUNE_SUCCESS 执行成功
* @retval #MSTUNE_FAILED 执行失败
* @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。
* @see 无
* @since
*/
extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option);

#endif
/**
* @file tune_api.h
*
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.\n
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n
* 描述:mstune调优接口头文件
*/
/** @defgroup mstune mstune调优接口 */
#ifndef TUNE_API_H
#define TUNE_API_H
#include <vector>
#include <map>
#include <string>
#include "graph/graph.h"
#include "ge/ge_api.h"
/**
* @ingroup mstune
*
* mstune status
*/
enum MsTuneStatus {
MSTUNE_SUCCESS, /** tune success */
MSTUNE_FAILED, /** tune failed */
};
// Option key: for train options sets
const std::string MSTUNE_SELF_KEY = "mstune";
const std::string MSTUNE_GEINIT_KEY = "initialize";
const std::string MSTUNE_GESESS_KEY = "session";
/**
* @ingroup mstune
* @par 描述: 命令行调优
*
* @attention 无
* @param option [IN] 调优参数
* @param msg [OUT] 调优异常下返回信息
* @retval #MSTUNE_SUCCESS 执行成功
* @retval #MSTUNE_FAILED 执行失败
* @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。
* @see 无
* @since
*/
MsTuneStatus MsTuning(const std::map<std::string, std::string> &option, std::string &msg);
/**
* @ingroup mstune
* @par 描述: 梯度调优
*
* @attention 无
* @param tuningGraph [IN] 调优图
* @param dependGraph [IN] 调优依赖图
* @param session [IN] ge连接会话
* @param option [IN] 参数集. 包含调优参数及ge参数
* @retval #MSTUNE_SUCCESS 执行成功
* @retval #MSTUNE_FAILED 执行失败
* @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。
* @see 无
* @since
*/
extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option);
#endif

Loading…
Cancel
Save