diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h index 578f3bdd..28e500c4 100644 --- a/inc/external/acl/acl_rt.h +++ b/inc/external/acl/acl_rt.h @@ -161,6 +161,17 @@ ACL_FUNC_VISIBILITY uint32_t aclrtGetThreadIdFromExceptionInfo(const aclrtExcept */ ACL_FUNC_VISIBILITY uint32_t aclrtGetDeviceIdFromExceptionInfo(const aclrtExceptionInfo *info); +/** + * @ingroup AscendCL + * @brief Get error code from exception information + * + * @param info [IN] pointer of exception information + * + * @retval The error code from exception information + * @retval 0xFFFFFFFF if info is null + */ +ACL_FUNC_VISIBILITY uint32_t aclrtGetErrorCodeFromExceptionInfo(const aclrtExceptionInfo *info); + /** * @ingroup AscendCL * @brief The thread that handles the callback function on the Stream diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h index 05122efb..cc607b74 100644 --- a/inc/external/acl/error_codes/rt_error_codes.h +++ b/inc/external/acl/error_codes/rt_error_codes.h @@ -65,6 +65,7 @@ static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is ful static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow static const int32_t ACL_ERROR_RT_OVER_FLOW = 207017; // common over flow +static const int32_t ACL_ERROR_RT_DEVIDE_OOM = 207018; // device oom static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error diff --git a/inc/external/ge/ge_api.h b/inc/external/ge/ge_api.h index 73753b72..f000ac46 100644 --- a/inc/external/ge/ge_api.h +++ b/inc/external/ge/ge_api.h @@ -191,6 +191,8 @@ class GE_FUNC_VISIBILITY Session { bool IsGraphNeedRebuild(uint32_t graph_id); + uint64_t GetSessionId() const; + private: uint64_t sessionId_; }; diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index 208b7eab..a377c2f7 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -326,6 +326,10 @@ const char_t *const OP_COMPILER_CACHE_DIR = "ge.op_compiler_cache_dir"; // Its value should be "disable", "enable" or "force", default value is "disable" const char_t *const OP_COMPILER_CACHE_MODE = "ge.op_compiler_cache_mode"; +// Configure build model type. FE need this option to judge inner model or not +// Its value should be "true" or "false" +const char_t *const BUILD_INNER_MODEL = "ge.build_inner_model"; + // Configure whether to use single stream. // Its value should be "true" or "false", default value is "false" const char_t *const ENABLE_SINGLE_STREAM = "ge.enableSingleStream"; @@ -375,6 +379,17 @@ const std::string OP_EXECUTE_TIMEOUT = "ge.exec.opExecuteTimeout"; const char_t *const FILE_CONSTANT_PATH = "ge.exec.value_bins"; +const char_t *const ENABLE_GRAPH_PARALLEL = "ge.enableGraphParallel"; + +const char_t *const RESOURCE_CONFIG_PATH = "ge.resourceConfigPath"; + +const std::string RECOMPUTE = "ge.recompute"; + +const char_t *const GRAPH_PARALLEL_OPTION_PATH = "ge.graphParallelOptionPath"; + +// 1: Complete graph resource evaluation(Inclusion graph fusion optimization) +// 2: Simplified graph resource evaluation(No graph fusion optimization) +const char_t *const EVALUATE_GRAPH_RESOURCE_MODE = "ge.evaluateGraphResourceMode"; // Graph run mode enum GraphRunMode { PREDICTION = 0, TRAIN }; @@ -450,6 +465,7 @@ static const char_t *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE static const char_t *const DEBUG_DIR = ge::DEBUG_DIR; static const char_t *const OP_COMPILER_CACHE_DIR = ge::OP_COMPILER_CACHE_DIR; static const char_t *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE; +static const char_t *const BUILD_INNER_MODEL = ge::BUILD_INNER_MODEL; static const char_t *const MDL_BANK_PATH = ge::MDL_BANK_PATH_FLAG.c_str(); static const char_t *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str(); static const char_t *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str(); @@ -490,7 +506,9 @@ const std::set ir_builder_suppported_options = {INPUT_FORMAT, PERFORMANCE_MODE, SHAPE_GENERALIZED_BUILD_MODE, MODIFY_MIXLIST, - CUSTOMIZE_DTYPES}; + CUSTOMIZE_DTYPES, + BUILD_INNER_MODEL, + EVALUATE_GRAPH_RESOURCE_MODE}; // for interface: aclgrphParse const std::set ir_parser_suppported_options = { diff --git a/inc/external/hccl/hccl.h b/inc/external/hccl/hccl.h index 170c7862..8c8650b9 100644 --- a/inc/external/hccl/hccl.h +++ b/inc/external/hccl/hccl.h @@ -81,7 +81,8 @@ extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, Hc * * @param buf A pointer identifying the data address of the operator. * @param count An integer(u64) identifying the number of the data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, int64, +uint8, uint16, uint32, uint64, float16, float32, float64. * @param root An integer(u32) identifying the the root rank in the operator. * @param comm A pointer identifying the communication resource based on * @param stream A pointer identifying the stream information. @@ -111,7 +112,8 @@ extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvC * @param sendBuf A pointer identifying the input data address of the operator. * @param recvBuf A pointer identifying the output data address of the operator. * @param sendCount An integer(u64) identifying the number of the input data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, int64, +uint8, uint16, uint32, uint64, float16, float32, float64. * @param comm A pointer identifying the communication resource based on. * @param stream A pointer identifying the stream information. * @return HcclResult @@ -149,7 +151,8 @@ extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream); * * @param sendBuff A pointer identifying the input data address of the operator. * @param count An integer(u64) identifying the number of the send data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, int64, +uint8, uint16, uint32, uint64, float16, float32, float64. * @param destRank An integer identifying the destination rank. * @param comm A pointer identifying the communication resource based on. * @param stream A pointer identifying the stream information. @@ -162,7 +165,8 @@ extern HcclResult HcclSend(void *sendBuf, uint64_t count, HcclDataType dataType, * * @param recvBuff A pointer identifying the output data address of the operator. * @param count An integer(u64) identifying the number of the receive data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, int64, +uint8, uint16, uint32, uint64, float16, float32, float64. * @param srcRank An integer identifying the source rank. * @param comm A pointer identifying the communication resource based on. * @param stream A pointer identifying the stream information. @@ -177,15 +181,15 @@ extern HcclResult HcclRecv(void *recvBuf, uint64_t count, HcclDataType dataType, * @param sendBuff A pointer identifying the input data address of the operator. * @param sendCounts Integer array, where entry i specifies the number of elements to send to rank i. * @param sdispls Integer array, where entry i specifies the displacement (offset from sendbuf, in units of sendtype) - * from which to send data to rank i. - * @param sendType Datatype of send buffer elements, must be one of the following types: int8, int32, int64, uint64, - * float16, float32. +from which to send data to rank i. + * @param sendType Datatype of send buffer elements, must be one of the following types: int8, int16, int32, int64, +uint8, uint16, uint32, uint64, float16, float32, float64. * @param recvBuf A pointer identifying the output data address of the operator. * @param recvCounts Integer array, where entry j specifies the number of elements to receive from rank j. * @param rdispls Integer array, where entry j specifies the displacement (offset from recvbuf, in units of recvtype) to - * which data from rank j should be written. - * @param recvType Datatype of receive buffer elements, must be one of the following types: int8, int32, int64, uint64, - * float16, float32. +which data from rank j should be written. + * @param recvType Datatype of receive buffer elements, must be one of the following types: int8, int16, int32, int64, +uint8, uint16, uint32, uint64, float16, float32, float64. * @param comm A pointer identifying the communication resource based on. * @param stream A pointer identifying the stream information. * @return HcclResult @@ -195,6 +199,23 @@ extern HcclResult HcclAlltoAllV(const void *sendBuf, const void *sendCounts, con const void *recvBuf, const void *recvCounts, const void *rdispls, HcclDataType recvType, HcclComm comm, aclrtStream stream); +/** + * @brief Reduce operator. + * + * @param sendBuf A pointer identifying the input data address of the operator. + * @param recvBuf A pointer identifying the output data address of the operator. + * @param count An integer(u64) identifying the number of the output data. + * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, + * float32. + * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. + * @param root An integer(u32) identifying the the root rank in the operator. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, + uint32_t root, HcclComm comm, aclrtStream stream); + /** * @brief Destroy HCCL comm * diff --git a/inc/external/hccl/hccl_types.h b/inc/external/hccl/hccl_types.h index 2fe98fde..4f6028c4 100644 --- a/inc/external/hccl/hccl_types.h +++ b/inc/external/hccl/hccl_types.h @@ -84,6 +84,10 @@ typedef enum { HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ + HCCL_DATA_TYPE_UINT8 = 7, /**< uint8 */ + HCCL_DATA_TYPE_UINT16 = 8, /**< uint16 */ + HCCL_DATA_TYPE_UINT32 = 9, /**< uint32 */ + HCCL_DATA_TYPE_FP64 = 10, /**< fp64 */ HCCL_DATA_TYPE_RESERVED /**< reserved */ } HcclDataType; diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h index 05122efb..cc607b74 100644 --- a/inc/external/runtime/rt_error_codes.h +++ b/inc/external/runtime/rt_error_codes.h @@ -65,6 +65,7 @@ static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is ful static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow static const int32_t ACL_ERROR_RT_OVER_FLOW = 207017; // common over flow +static const int32_t ACL_ERROR_RT_DEVIDE_OOM = 207018; // device oom static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error diff --git a/inc/framework/common/debug/ge_log.h b/inc/framework/common/debug/ge_log.h index aff56d4f..37afd233 100644 --- a/inc/framework/common/debug/ge_log.h +++ b/inc/framework/common/debug/ge_log.h @@ -85,9 +85,10 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) { } \ } while (false) -#define GEEVENT(fmt, ...) \ - do { \ - dlog_event(GE_MODULE_NAME, "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ +#define GEEVENT(fmt, ...) \ + do { \ + dlog_event((RUN_LOG_MASK | GE_MODULE_NAME), "%" PRIu64 " %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], \ + ##__VA_ARGS__); \ } while (false) #define GELOGT(VALUE, fmt, ...) \ diff --git a/inc/framework/common/fmk_error_codes.h b/inc/framework/common/fmk_error_codes.h index 00610af5..dd1bd678 100644 --- a/inc/framework/common/fmk_error_codes.h +++ b/inc/framework/common/fmk_error_codes.h @@ -46,7 +46,7 @@ #define DECLARE_ERRORNO_OME(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OME, name, value) #define DECLARE_ERRORNO_CALIBRATION(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_CALIBRATION, name, value) -#define DEF_ERRORNO(name, desc) const ErrorNoRegisterar g_##name##_errorno((name), (desc)); +#define DEF_ERRORNO(name, desc) const bool g_##name##_errorno = StatusFactory::Instance()->RegisterErrorNo(name, desc) // Interface for Obtaining Error Code Description #define GET_ERRORNO_STR(value) domi::StatusFactory::Instance()->GetErrDesc(value) @@ -60,7 +60,7 @@ class GE_FUNC_VISIBILITY StatusFactory { public: static StatusFactory *Instance(); - void RegisterErrorNo(const uint32_t err, const std::string &desc); + bool RegisterErrorNo(const uint32_t err, const std::string &desc); std::string GetErrDesc(const uint32_t err); @@ -72,14 +72,6 @@ class GE_FUNC_VISIBILITY StatusFactory { std::map err_desc_; }; -class GE_FUNC_VISIBILITY ErrorNoRegisterar { - public: - ErrorNoRegisterar(const uint32_t err, const std::string &desc) { - StatusFactory::Instance()->RegisterErrorNo(err, desc); - } - ~ErrorNoRegisterar() {} -}; - // Common errocode DECLARE_ERRORNO_COMMON(MEMALLOC_FAILED, 0); // 50331648 DECLARE_ERRORNO_COMMON(CCE_FAILED, 2); // 50331650 diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index bbbbf4b2..e21995e3 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -83,6 +83,7 @@ const std::string kAtomicOpType = "DynamicAtomicAddrClean"; const std::string kShapeTypeStatic = "static"; const std::string kShapeTypeDynamic = "dynamic"; +const std::string kAtomicPrefix = "_atomic"; constexpr uint64_t kInferSessionId = 0U; constexpr uint64_t kReleaseFlag = 1U; @@ -369,7 +370,16 @@ struct DumpConfig { std::string dump_status; std::string dump_op_switch; std::string dump_debug; + std::string dump_step; std::vector dump_list; }; + +struct ModelQueueParam { + uint32_t group_total_count{1}; + uint32_t group_index{0U}; + uint32_t group_policy{0U}; + std::vector input_queues; + std::vector output_queues; +}; } // namespace ge #endif // INC_FRAMEWORK_COMMON_GE_TYPES_H_ diff --git a/inc/framework/common/ge_visibility.h b/inc/framework/common/ge_visibility.h new file mode 100644 index 00000000..1041a93e --- /dev/null +++ b/inc/framework/common/ge_visibility.h @@ -0,0 +1,28 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AIR_CXX_INC_FRAMEWORK_COMMON_GE_VISIBILITY_H_ +#define AIR_CXX_INC_FRAMEWORK_COMMON_GE_VISIBILITY_H_ + +#if defined(_MSC_VER) +#define VISIBILITY_EXPORT _declspec(dllexport) +#define VISIBILITY_HIDDEN _declspec(dllimport) +#else +#define VISIBILITY_EXPORT __attribute__((visibility("default"))) +#define VISIBILITY_HIDDEN __attribute__((visibility("hidden"))) +#endif + +#endif // AIR_CXX_INC_FRAMEWORK_COMMON_GE_VISIBILITY_H_ diff --git a/inc/framework/common/helper/model_helper.h b/inc/framework/common/helper/model_helper.h index fda86b19..e4141a4b 100644 --- a/inc/framework/common/helper/model_helper.h +++ b/inc/framework/common/helper/model_helper.h @@ -68,7 +68,7 @@ class GE_FUNC_VISIBILITY ModelHelper { bool IsPartitionedGraph(const GeModelPtr &cur_model) const; Status GenerateGeModel(const OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index, - const bool is_dyn_root); + const bool is_dyn_root) const; Status GenerateGeRootModel(const OmFileLoadHelper &om_load_helper); Status LoadModelData(const OmFileLoadHelper &om_load_helper, const GeModelPtr &cur_model, diff --git a/inc/framework/common/op/ge_op_utils.h b/inc/framework/common/op/ge_op_utils.h index 95f1dca1..5d2f8400 100644 --- a/inc/framework/common/op/ge_op_utils.h +++ b/inc/framework/common/op/ge_op_utils.h @@ -20,7 +20,7 @@ #include #include -#include "framework/common/op/attr_value_util.h" +#include "graph/debug/ge_attr_define.h" #include "framework/common/util.h" #include "graph/attr_value.h" #include "graph/ge_tensor.h" diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index ab5218c0..f8529fa2 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -87,6 +87,8 @@ REGISTER_OPTYPE_DECLARE(DROPOUTDOMASK, "DropOutDoMask"); REGISTER_OPTYPE_DECLARE(DROPOUTDOMASKV3, "DropOutDoMaskV3"); REGISTER_OPTYPE_DECLARE(DROPOUTDOMASKV3D, "DropOutDoMaskV3D"); REGISTER_OPTYPE_DECLARE(SOFTMAXV2WITHDROPOUTDOMASKV3D, "SoftmaxV2WithDropOutDoMaskV3D"); +REGISTER_OPTYPE_DECLARE(ATTENTIONSCORE, "AttentionScore"); +REGISTER_OPTYPE_DECLARE(ATTENTIONSCOREGRAD, "AttentionScoreGrad"); REGISTER_OPTYPE_DECLARE(DROPOUTGENMASK, "DropOutGenMask"); REGISTER_OPTYPE_DECLARE(AXPYWITHSOFTMAXANDDROPOUTDOMASK, "AxpyWithSoftmaxAndDropOutDoMask"); REGISTER_OPTYPE_DECLARE(CONCAT, "Concat"); @@ -587,9 +589,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_CON // dim default size value constexpr int32_t DIM_DEFAULT_SIZE = 4; -// dim extension default value -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t DIM_DEFAULT_VALUE; - // default NCHW index FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_N; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t NCHW_DIM_C; diff --git a/inc/framework/common/util.h b/inc/framework/common/util.h index 84912e64..c334d25c 100644 --- a/inc/framework/common/util.h +++ b/inc/framework/common/util.h @@ -21,13 +21,11 @@ #include #include -#include #include "external/graph/types.h" #include "external/register/register.h" #include "framework/common/debug/log.h" #include "framework/common/scope_guard.h" #include "framework/common/ge_inner_error_codes.h" -#include "graph/detail/attributes_holder.h" #define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ do { \ @@ -247,67 +245,6 @@ GE_FUNC_VISIBILITY std::string ToString(const std::vector &v) { return ss.str(); } -/// -/// @ingroup domi_common -/// @brief Converts RepeatedField to String. -/// @param [in] rpd_field RepeatedField -/// @return string -/// -template -GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedField &rpd_field) { - std::stringstream ss; - ss << "["; - for (const T x : rpd_field) { - ss << x; - ss << ", "; - } - // Delete the two extra characters at the end of the line. - std::string str = ss.str().substr(0U, ss.str().length() - 2U); - str += "]"; - return str; -} - -/// -/// @ingroup ge_ir_utils -/// @brief RepeatedPtrField->String -/// @param [in] const rpd_field RepeatedPtrField -/// @return String -/// -template -GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedPtrField &rpd_ptr_field) { - std::stringstream ss; - ss << "["; - for (const T &x : rpd_ptr_field) { - ss << x; - ss << ", "; - } - std::string str_ret = ss.str().substr(0U, ss.str().length() - 2U); - str_ret += "]"; - return str_ret; -} - -/// -/// @ingroup domi_common -/// @brief Reads the proto structure from an array. -/// @param [in] data proto data to be read -/// @param [in] size proto data size -/// @param [out] proto Memory for storing the proto file -/// @return true success -/// @return false fail -/// -GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *const data, const int32_t size, - google::protobuf::Message *const proto); - -/// -/// @ingroup domi_proto -/// @brief Reads the proto file in the text format. -/// @param [in] file path of proto file -/// @param [out] message Memory for storing the proto file -/// @return true success -/// @return false fail -/// -GE_FUNC_VISIBILITY bool ReadProtoFromText(const char_t *const file, google::protobuf::Message *const message); - /// /// @ingroup: domi_common /// @brief: get length of file @@ -359,15 +296,6 @@ GE_FUNC_VISIBILITY uint64_t GetCurrentTimestamp(); /// GE_FUNC_VISIBILITY uint32_t GetCurrentSecondTimestap(); -/// -/// @ingroup domi_common -/// @brief Check whether the product of two int64 numbers exceeds the int64 range. -/// @param [in] a -/// @param [in] b -/// @return false: true: The result is within the normal int64 range. -/// -GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(const int64_t a, const int64_t b); - /// /// @ingroup domi_common /// @brief Absolute path for obtaining files. diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 3c921345..1c2a52e6 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -229,15 +229,14 @@ class GE_FUNC_VISIBILITY GeExecutor { /// /// @ingroup ge - /// @brief Load task list from ModelData with queue. + /// @brief Load task list from GeRootModel with queue and param. /// @param [out] model_id: model id allocate from manager. /// @param [in] root_model: Instance of GeRootModel. - /// @param [in] input_queue_ids: input queue ids create from user. - /// @param [in] output_queue_ids: input queue ids create from user. + /// @param [in] model_queue_param: params and queue ids and create from user. /// @return: 0 for success / others for fail /// Status LoadModelWithQ(uint32_t &model_id, const std::shared_ptr &root_model, - const std::vector &input_queue_ids, const std::vector &output_queue_ids); + const ModelQueueParam &model_queue_param); /// /// @ingroup ge @@ -324,6 +323,8 @@ class GE_FUNC_VISIBILITY GeExecutor { static Status ReleaseSingleOpResource(void *const stream); + static Status ClearCustomAicpuSo(); + static Status GetDeviceIdByModelId(const uint32_t model_id, uint32_t &device_id); Status GetBatchInfoSize(const uint32_t model_id, size_t &shape_count); diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h index 8213c115..371caf24 100644 --- a/inc/framework/generator/ge_generator.h +++ b/inc/framework/generator/ge_generator.h @@ -27,8 +27,8 @@ #include "graph/ge_tensor.h" #include "graph/graph.h" #include "graph/op_desc.h" -#include "graph/detail/attributes_holder.h" #include "framework/omg/omg_inner_types.h" +#include "graph/detail/attributes_holder.h" namespace ge { const std::string kAttrSupportDynamicShape = "support_dynamicshape"; diff --git a/inc/framework/memory/memory_api.h b/inc/framework/memory/memory_api.h index 30ade3b7..d771bddc 100644 --- a/inc/framework/memory/memory_api.h +++ b/inc/framework/memory/memory_api.h @@ -64,5 +64,14 @@ GE_FUNC_VISIBILITY Status MallocSharedMemory(const TensorInfo &tensor_info, uint /// \param var_size [out] var_size memory_size of host variable. /// \return Status result of function GE_FUNC_VISIBILITY Status GetVarBaseAddrAndSize(const std::string &var_name, uint64_t &base_addr, uint64_t &var_size); + +/* + * @brief + * @param [in] session_id + * @param [out] var_size:session variables mem size + * @param [out] graphs_mem_info: graphs mem info, include key:graph_id; value: {feature_map_size, const_size} + */ +GE_FUNC_VISIBILITY Status GetSessionMemInfo(const uint64_t session_id, uint64_t &var_size, + std::map> &graphs_mem_info); } // namespace ge #endif // INC_FRAMEWORK_MEMORY_MEMORY_API_H_ diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h index 8c0130c8..c1d1717b 100644 --- a/inc/framework/omg/parser/model_parser.h +++ b/inc/framework/omg/parser/model_parser.h @@ -30,8 +30,6 @@ #include "graph/utils/op_desc_utils.h" #include "graph/utils/tensor_utils.h" -using Status = domi::Status; - namespace domi { using GetGraphCallback = std::function( const google::protobuf::Message *root_proto, const std::string &graph)>; diff --git a/inc/framework/omg/parser/op_parser.h b/inc/framework/omg/parser/op_parser.h index f17b6fee..dfed00f6 100644 --- a/inc/framework/omg/parser/op_parser.h +++ b/inc/framework/omg/parser/op_parser.h @@ -24,7 +24,6 @@ #include "graph/utils/op_desc_utils.h" using google::protobuf::Message; -using Status = domi::Status; namespace ge { /** diff --git a/inc/framework/omg/parser/parser_types.h b/inc/framework/omg/parser/parser_types.h index c1e94a4c..457e51a7 100644 --- a/inc/framework/omg/parser/parser_types.h +++ b/inc/framework/omg/parser/parser_types.h @@ -21,6 +21,7 @@ #include #include "register/register_types.h" +#include "graph/types.h" #if !defined(__ANDROID__) && !defined(ANDROID) #ifndef DOMI_DYNAMIC_CAST @@ -40,427 +41,428 @@ namespace ge { namespace parser { -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DATA; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *AIPPDATA; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONVOLUTION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CORRELATION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CORRELATIONV2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DECONVOLUTION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *POOLING; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ELTWISE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RELU; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RELU6; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SIGMOID; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ABSVAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TANH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PRELU; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BATCHNORM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FUSIONBATCHNORM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SCALE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FULL_CONNECTION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SOFTMAX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PLUS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ACTIVATION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FLATTEN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ADD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SUB; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MUL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MATMUL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RSQRT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BIASADD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RESHAPE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REFORMAT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DEPCONVOLUTION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DROPOUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DROPOUTGENMASK; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DROPOUTDOMASK; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONCAT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ROIPOOLING; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PROPOSAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FSRDETECTIONOUTPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DETECTIONPOSTPROCESS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LRN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TRANSDATA; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PERMUTE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDNORMALIZE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDPRIORBOX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *NETOUTPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDDETECTIONOUTPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REFINEDETDETECTIONOUTPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CHANNELAXPY; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PSROIPOOLING; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *POWER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *POW; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ROIALIGN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PYTHON; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FREESPACEEXTRACT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPATIALTF; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SHAPE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SHAPEN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ARGMAX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *GATHERND; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *GATHER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REALDIV; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PACK; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SLICE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SLICED; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FLOORDIV; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SQUEEZE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *UNSQUEEZE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STRIDEDSLICE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RANGE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RPNPROPOSALS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DECODEBBOX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PADV2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MIRRORPAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TILE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SIZE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CLIPBOXES; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FASTRCNNPREDICTIONS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPLIT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPLITV; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *EXPANDDIMS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *EMPTY; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MEAN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *GREATER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SWITCH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SWITCHN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MERGE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SYMBOLICGRADIENT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REMOTECALL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *_IF; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSIF; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *IF; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CASE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSCASE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *_WHILE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *WHILE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSWHILE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PARTITIONEDCALL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATEFULPARTITIONEDCALL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FAKEPARAM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TRANSPOSE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TRANSPOSED; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CAST; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REGION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *YOLO; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *YOLODETECTIONOUTPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FILL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REVERSE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *UNPACK; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *YOLO2REORG; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REDUCESUM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SUM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONSTANT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RESIZEBILINEAR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RESIZEBILINEARGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MAXIMUM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FRAMEWORKOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ARG; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FUSEDBATCHNORMGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LSTM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HIGHWAY; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RNN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ATTENTIONDECODER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LOGICAL_NOT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LOGICAL_AND; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LOGICAL_OR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *EQUAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *NOTEQUAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *INTERP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SHUFFLECHANNEL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *AIPP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MULTISHAPE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RECIPROCAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SELU; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ELU; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ACOSH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASINH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MINIMUM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CLIP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *L2NORMALIZE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CROPANDRESIZE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *UNUSEDCONST; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPARSETODENSE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *NONMAXSUPPRESSION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TOPKV2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *INVERTPERMUTATION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MULTINOMIAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REVERSESEQUENCE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REDUCEPROD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REDUCEMAX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REDUCEMIN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *EXTRACTIMAGEPATCHES; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SQRT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REDUCEALL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RESIZENEARESTNEIGHBOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPACETOBATCHND; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BATCHTOSPACEND; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASSERT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *GREATEREQUAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FLOOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RANDOMUNIFORM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BATCHMATMUL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPACETODEPTH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DEPTHTOSPACE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RINT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ATAN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ATAN2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ATANH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ACOS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASIN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *NEG; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LOG; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TAN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ROUND; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *UPSAMPLE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FLOORMOD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LESS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LESSEQUAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ONEHOT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REFSWITCH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REFMERGE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ENTER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REFENTER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LOOPCOND; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *NEXTITERATION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REFNEXTITERATION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *EXIT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REFEXIT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONTROLTRIGGER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ZEROSLIKE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *EXP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *WHERE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FAKEQUANTWITHMINMAXVARS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SOFTPLUS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SOFTSIGN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *COSH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SINH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SQUAREDDIFFERENCE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DATA; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *AIPPDATA; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CONVOLUTION; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CORRELATION; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CORRELATIONV2; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DECONVOLUTION; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *POOLING; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ELTWISE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RELU; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RELU6; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SIGMOID; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ABSVAL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *TANH; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *PRELU; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *BATCHNORM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FUSIONBATCHNORM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SCALE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FULL_CONNECTION; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SOFTMAX; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *PLUS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ACTIVATION; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FLATTEN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ADD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SUB; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *MUL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *MATMUL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RSQRT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *BIASADD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RESHAPE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REFORMAT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DEPCONVOLUTION; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DROPOUT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DROPOUTGENMASK; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DROPOUTDOMASK; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CONCAT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ROIPOOLING; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *PROPOSAL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FSRDETECTIONOUTPUT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DETECTIONPOSTPROCESS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LRN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *TRANSDATA; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *PERMUTE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SSDNORMALIZE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SSDPRIORBOX; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *NETOUTPUT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SSDDETECTIONOUTPUT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REFINEDETDETECTIONOUTPUT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CHANNELAXPY; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *PSROIPOOLING; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *POWER; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *POW; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ROIALIGN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *PYTHON; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FREESPACEEXTRACT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SPATIALTF; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SHAPE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SHAPEN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ARGMAX; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *GATHERND; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *GATHER; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REALDIV; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *PACK; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SLICE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SLICED; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FLOORDIV; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SQUEEZE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *UNSQUEEZE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *STRIDEDSLICE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RANGE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RPNPROPOSALS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DECODEBBOX; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *PAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *PADV2; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *MIRRORPAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *TILE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SIZE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CLIPBOXES; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FASTRCNNPREDICTIONS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SPLIT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SPLITV; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *EXPANDDIMS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *EMPTY; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *MEAN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *GREATER; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SWITCH; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SWITCHN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *MERGE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SYMBOLICGRADIENT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REMOTECALL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *_IF; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *STATELESSIF; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *IF; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CASE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *STATELESSCASE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *_WHILE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *WHILE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *STATELESSWHILE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FOR; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *PARTITIONEDCALL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *STATEFULPARTITIONEDCALL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FAKEPARAM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *TRANSPOSE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *TRANSPOSED; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CAST; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REGION; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *YOLO; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *YOLODETECTIONOUTPUT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FILL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REVERSE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *UNPACK; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *YOLO2REORG; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REDUCESUM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SUM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CONSTANT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FILECONSTANT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RESIZEBILINEAR; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RESIZEBILINEARGRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *MAXIMUM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FRAMEWORKOP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ARG; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FUSEDBATCHNORMGRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LSTM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HIGHWAY; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RNN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ATTENTIONDECODER; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LOGICAL_NOT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LOGICAL_AND; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LOGICAL_OR; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *EQUAL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *NOTEQUAL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *INTERP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SHUFFLECHANNEL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *AIPP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *MULTISHAPE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RECIPROCAL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SELU; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ELU; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ACOSH; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ASINH; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *MINIMUM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CLIP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *L2NORMALIZE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CROPANDRESIZE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *UNUSEDCONST; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SPARSETODENSE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *NONMAXSUPPRESSION; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *TOPKV2; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *INVERTPERMUTATION; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *MULTINOMIAL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REVERSESEQUENCE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REDUCEPROD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REDUCEMAX; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REDUCEMIN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *EXTRACTIMAGEPATCHES; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SQRT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REDUCEALL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RESIZENEARESTNEIGHBOR; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SPACETOBATCHND; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *BATCHTOSPACEND; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ASSERT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *GREATEREQUAL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FLOOR; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RANDOMUNIFORM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *BATCHMATMUL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SPACETODEPTH; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DEPTHTOSPACE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RINT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ATAN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ATAN2; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ATANH; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ACOS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ASIN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *NEG; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LOG; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *TAN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ROUND; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *UPSAMPLE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FLOORMOD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LESS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LESSEQUAL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ONEHOT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REFSWITCH; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REFMERGE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ENTER; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REFENTER; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LOOPCOND; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *NEXTITERATION; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REFNEXTITERATION; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *EXIT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REFEXIT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CONTROLTRIGGER; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ZEROSLIKE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *EXP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *WHERE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FAKEQUANTWITHMINMAXVARS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SOFTPLUS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SOFTSIGN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *COSH; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SINH; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SQUAREDDIFFERENCE; // for retinanet scope fusion -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REQUIREDSPACETOBATCHPADDINGS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDPOSTPROCESSOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINANETBOXES; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINAMULTIANCHORS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINANETCLIPPEDBOXES; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINANETFILTEREDDETECTIONS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINANETPOSTPROCESSOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINANETANCHORS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FASTERRCNNMAP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FASTERRCNNMAP1; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FASTERRCNNSECONDSTAGEPOSTPROCESSOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FASTERRCNNROIINTERPOOLING; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FASTERRCNNFIRSTSTAGEPOSTPROCESSOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FASTERRCNNGRIDANCHORGENERATOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ROIINTERPOOLING; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FASTERRCNNCLIPTOWINDOW; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *EMBEDLOOKUP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HASHLOOKUP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LSH_PROJ; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SVDF; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDANCHORGENERATOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *IDENTITY; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *IDENTITYN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PLACEHOLDERWITHDEFAULT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SELECT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *GETSPAN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STOPGRADIENT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PREVENTGRADIENT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *GUARANTEECONST; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BROADCASTGRADIENTARGS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BROADCASTARGS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONFUSIONMATRIX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RANK; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PLACEHOLDER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *END; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BASICLSTMCELL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *GETNEXT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *INITDATA; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REFIDENTITY; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BITCAST; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REQUIREDSPACETOBATCHPADDINGS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SSDPOSTPROCESSOR; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RETINANETBOXES; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RETINAMULTIANCHORS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RETINANETCLIPPEDBOXES; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RETINANETFILTEREDDETECTIONS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RETINANETPOSTPROCESSOR; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RETINANETANCHORS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FASTERRCNNMAP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FASTERRCNNMAP1; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FASTERRCNNSECONDSTAGEPOSTPROCESSOR; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FASTERRCNNROIINTERPOOLING; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FASTERRCNNFIRSTSTAGEPOSTPROCESSOR; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FASTERRCNNGRIDANCHORGENERATOR; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ROIINTERPOOLING; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FASTERRCNNCLIPTOWINDOW; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *EMBEDLOOKUP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HASHLOOKUP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LSH_PROJ; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SVDF; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SSDANCHORGENERATOR; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *IDENTITY; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *IDENTITYN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *PLACEHOLDERWITHDEFAULT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SELECT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *GETSPAN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *STOPGRADIENT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *PREVENTGRADIENT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *GUARANTEECONST; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *BROADCASTGRADIENTARGS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *BROADCASTARGS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CONFUSIONMATRIX; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RANK; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *PLACEHOLDER; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *END; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *BASICLSTMCELL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *GETNEXT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *INITDATA; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REFIDENTITY; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *BITCAST; /***************Ann special operator*************************/ -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_MEAN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_CONVOLUTION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_DEPCONVOLUTION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_FULLCONNECTION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_NETOUTPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_DATA; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_RESHAPE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_ADD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_MUL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_SUB; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_DIV; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_DEQUANTIZE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_QUANTIZE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_PAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_RESIZE_BILINEAR; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ANN_MEAN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ANN_CONVOLUTION; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ANN_DEPCONVOLUTION; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ANN_FULLCONNECTION; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ANN_NETOUTPUT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ANN_DATA; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ANN_RESHAPE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ANN_ADD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ANN_MUL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ANN_SUB; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ANN_DIV; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ANN_DEQUANTIZE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ANN_QUANTIZE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ANN_PAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ANN_RESIZE_BILINEAR; /***************************************************/ /******************Training operator*************************/ -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *GATHERV2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONVGRADFILTER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONV2D; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONV2DBACKPROPINPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FUSEDBATCHNORM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BIASADDGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ACTIVATIONGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MAXPOOLWITHARGMAX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MAXPOOLGRADWITHARGMAX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPARSESOFTMAXCROSSENTROPYWITHLOGITS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SNAPSHOT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VAR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MEANGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TRANSLATE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ADDN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *L2LOSS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MULTIPLY; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HUBERLOSSGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HUBERLOSS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *NEGATIVE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDCAST; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPARSESOFTMAXCROSSENTROPY; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPARSESOFTMAXCROSSENTROPYGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDSQUEEZEFUSION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONCATFOUR2FIVE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONCATFIVE2FOUR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDREALDIVTILEMUL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDSUMMULREALDIVMEAN; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARIABLEV2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARHANDLEOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TEMPORARYVARIABLE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DESTROYTEMPORARYVARIABLE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARIABLE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASSIGN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASSIGNVARIABLEOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASSIGNADD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASSIGNADDVARIABLEOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASSIGNSUB; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASSIGNSUBVARIABLEOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYMOMENTUM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RESOURCEAPPLYMOMENTUM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SGD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *NOOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *READVARIABLEOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PARALLELCONCATSTART; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONSTANTOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DEPTHWISECONV2DBACKPROPFILTER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DEPTHWISECONV2DBACKPORPINPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DEPTHWISECONV2DFORWARDNATIVE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DROPOUTGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYRMSPROPMIXEDPRECISION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYRMSPROP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RELU6GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *AVGPOOLGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONCATV2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONCATOFFSET; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LAYERNORMGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LAYERNORM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LARS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DYNAMICSTITCH; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *GATHERV2; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CONVGRADFILTER; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CONV2D; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CONV2DBACKPROPINPUT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FUSEDBATCHNORM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *BIASADDGRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ACTIVATIONGRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *MAXPOOLWITHARGMAX; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *MAXPOOLGRADWITHARGMAX; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SPARSESOFTMAXCROSSENTROPYWITHLOGITS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SNAPSHOT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *VAR; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *MEANGRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *TRANSLATE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ADDN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *L2LOSS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *MULTIPLY; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HUBERLOSSGRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HUBERLOSS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *NEGATIVE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SSDCAST; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SPARSESOFTMAXCROSSENTROPY; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SPARSESOFTMAXCROSSENTROPYGRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SSDSQUEEZEFUSION; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CONCATFOUR2FIVE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CONCATFIVE2FOUR; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SSDREALDIVTILEMUL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SSDSUMMULREALDIVMEAN; + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *VARIABLEV2; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *VARHANDLEOP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *TEMPORARYVARIABLE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DESTROYTEMPORARYVARIABLE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *VARIABLE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ASSIGN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ASSIGNVARIABLEOP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ASSIGNADD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ASSIGNADDVARIABLEOP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ASSIGNSUB; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ASSIGNSUBVARIABLEOP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *APPLYMOMENTUM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RESOURCEAPPLYMOMENTUM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SGD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *NOOP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *READVARIABLEOP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *PARALLELCONCATSTART; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CONSTANTOP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DEPTHWISECONV2DBACKPROPFILTER; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DEPTHWISECONV2DBACKPORPINPUT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DEPTHWISECONV2DFORWARDNATIVE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DROPOUTGRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *APPLYRMSPROPMIXEDPRECISION; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *APPLYRMSPROP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RELU6GRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *AVGPOOLGRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CONCATV2; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CONCATOFFSET; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LAYERNORMGRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LAYERNORM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LARS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DYNAMICSTITCH; /***************************************************/ -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SQUARE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMBROADCAST; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMALLGATHER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMALLREDUCE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREDUCESCATTER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMSEND; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMRECEIVE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEREAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEREFREAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEWRITE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTESCATTERWRITE; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARASSIGN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARISINITIALIZEDOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LogTimeStamp; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ISVARIABLEINITIALIZED; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STREAMSWITCH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STREAMSWITCHN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STREAMACTIVE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MEMCPYASYNC; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MEMCPYADDRASYNC; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STREAMMERGE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ENDGRAPH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SEND; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RECV; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ENDOFSEQUENCE; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LABELSET; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LABELGOTO; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LABELGOTOEX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LABELSWITCH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LABELSWITCHBYINDEX; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ATOMICADDRCLEAN; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ABS_GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ACCUMULATE_N_V2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ACOS_GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ACOSH_GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANY; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPROXIMATE_EQUAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASIN_GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASINH_GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ATAN_GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BROADCAST_TO; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ELU_GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ADD_V2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DATAFORMATDIMMAP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DATAFORMATVECPERMUTE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BESSELI0E; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BESSELI1E; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYADADELTA; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYADAGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYADAGRADDA; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYADAM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYADAMAX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYADDSIGN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYCENTEREDRMSPROP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYFTRL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYFTRLV2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYGRADIENTDESCENT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYPOWERSIGN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYPROXIMALADAGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYPROXIMALGRADIENTDESCENT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DEQUANTIZE; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FOCAL_LOSS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FOCAL_LOSS_GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SMOOTHL1_LOSS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SMOOTHL1_LOSS_grad; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REDUCEMEAN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONCAT_V2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ONEHOT_V2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SLICE_V2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TILE_V2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SUM_V2; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SQUARE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HCOMBROADCAST; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HCOMALLGATHER; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HCOMALLREDUCE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HCOMREDUCESCATTER; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HCOMSEND; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HCOMRECEIVE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HCOMREMOTEREAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HCOMREMOTEREFREAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HCOMREMOTEWRITE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HCOMREMOTESCATTERWRITE; + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *VARASSIGN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *VARISINITIALIZEDOP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LogTimeStamp; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ISVARIABLEINITIALIZED; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *STREAMSWITCH; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *STREAMSWITCHN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *STREAMACTIVE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *MEMCPYASYNC; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *MEMCPYADDRASYNC; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *STREAMMERGE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ENDGRAPH; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SEND; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *RECV; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ENDOFSEQUENCE; + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LABELSET; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LABELGOTO; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LABELGOTOEX; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LABELSWITCH; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *LABELSWITCHBYINDEX; + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ATOMICADDRCLEAN; + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ABS_GRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ACCUMULATE_N_V2; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ACOS_GRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ACOSH_GRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ANY; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *APPROXIMATE_EQUAL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ASIN_GRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ASINH_GRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ATAN_GRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *BROADCAST_TO; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ELU_GRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ADD_V2; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DATAFORMATDIMMAP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DATAFORMATVECPERMUTE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *BESSELI0E; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *BESSELI1E; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *APPLYADADELTA; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *APPLYADAGRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *APPLYADAGRADDA; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *APPLYADAM; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *APPLYADAMAX; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *APPLYADDSIGN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *APPLYCENTEREDRMSPROP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *APPLYFTRL; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *APPLYFTRLV2; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *APPLYGRADIENTDESCENT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *APPLYPOWERSIGN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *APPLYPROXIMALADAGRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *APPLYPROXIMALGRADIENTDESCENT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DEQUANTIZE; + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FOCAL_LOSS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *FOCAL_LOSS_GRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SMOOTHL1_LOSS; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SMOOTHL1_LOSS_grad; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *REDUCEMEAN; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CONCAT_V2; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *ONEHOT_V2; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SLICE_V2; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *TILE_V2; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SUM_V2; // Common type when the operator has the same name -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DETECTIONOUTPUT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DETECTIONOUTPUT; // Custom operator -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CUSTOMOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CUSTOMOP_NCHW; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CUSTOMOP_NHWC; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CUSTOMOP_NC1HWC0; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CUSTOMOP; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CUSTOMOP_NCHW; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CUSTOMOP_NHWC; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *CUSTOMOP_NC1HWC0; // Depthwise 4d_2_6d,6d_2_4d -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DEPTHWISEWEIGHT4D26D; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DEPTHWISEWEIGHT6D24D; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DEPTHWISEWEIGHT4D26D; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *DEPTHWISEWEIGHT6D24D; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SQRTGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SIGMOIDGRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SQRTGRAD; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *SIGMOIDGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TRANSSHAPE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *TRANSSHAPE; // Horovod operator -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HVDCALLBACKALLREDUCE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HVDCALLBACKALLGATHER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HVDCALLBACKBROADCAST; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HVDWAIT; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HVDCALLBACKALLREDUCE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HVDCALLBACKALLGATHER; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HVDCALLBACKBROADCAST; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const ge::char_t *HVDWAIT; /// /// @brief Magic number of model file diff --git a/inc/framework/pne/pne_model.h b/inc/framework/pne/pne_model.h index 1721d09c..39a40a89 100644 --- a/inc/framework/pne/pne_model.h +++ b/inc/framework/pne/pne_model.h @@ -31,8 +31,10 @@ namespace ge { const std::string PNE_ID_NPU = "NPU"; const std::string PNE_ID_CPU = "HOST_CPU"; +const std::string PNE_ID_UDF = "UDF"; struct ModelRelation; +struct ModelDeployResource; class PneModel { public: PneModel() = default; @@ -100,6 +102,14 @@ class PneModel { return model_relation_; } + inline void SetDeployResource(std::shared_ptr deploy_resource) { + deploy_resource_ = std::move(deploy_resource); + } + + inline const std::shared_ptr GetDeployResource() const { + return deploy_resource_; + } + public: virtual Status SerializeModel(ModelBufferData &model_buff) = 0; @@ -113,9 +123,14 @@ class PneModel { return model_id_; } + virtual std::string GetLogicDeviceId() const { + return ""; + } + private: std::map> submodels_; std::shared_ptr model_relation_; + std::shared_ptr deploy_resource_; ComputeGraphPtr root_graph_ = nullptr; std::string model_name_; std::string model_type_; diff --git a/inc/framework/pne/process_node_engine.h b/inc/framework/pne/process_node_engine.h index bb18b553..61d19a52 100644 --- a/inc/framework/pne/process_node_engine.h +++ b/inc/framework/pne/process_node_engine.h @@ -29,6 +29,8 @@ namespace ge { class ProcessNodeEngineImpl { public: + virtual ~ProcessNodeEngineImpl() = default; + virtual Status OptimizeGraph(const std::vector &inputs, ComputeGraphPtr &compute_graph) = 0; virtual Status BuildGraph(ComputeGraphPtr &compute_graph, PneModelPtr &model) = 0; @@ -56,6 +58,22 @@ class ProcessNodeEngine { virtual void SetImpl(ProcessNodeEngineImplPtr impl) = 0; + virtual Status AddGraph(const ComputeGraphPtr &compute_graph, const std::map &options) { + (void)compute_graph; + (void)options; + return SUCCESS; + } + + virtual Status RemoveGraph(const uint32_t graph_id) { + (void)graph_id; + return SUCCESS; + } + + virtual Status ParallelPartition(const ComputeGraphPtr &compute_graph) { + (void)compute_graph; + return NOT_CHANGED; + } + protected: std::string engine_id_; ProcessNodeEngineImplPtr impl_ = nullptr; diff --git a/inc/framework/runtime/exe_graph_executor.h b/inc/framework/runtime/exe_graph_executor.h new file mode 100644 index 00000000..a94972d3 --- /dev/null +++ b/inc/framework/runtime/exe_graph_executor.h @@ -0,0 +1,56 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AIR_CXX_INC_FRAMEWORK_RUNTIME_EXE_GRAPH_EXECUTOR_H_ +#define AIR_CXX_INC_FRAMEWORK_RUNTIME_EXE_GRAPH_EXECUTOR_H_ +#include "graph/ge_error_codes.h" + +#include "common/ge_visibility.h" +#include "exe_graph_resource_guard.h" +#include "subscriber/executor_subscriber_c.h" +namespace gert { +class VISIBILITY_EXPORT ExeGraphExecutor { + public: + ge::graphStatus Load() const { + return ge::GRAPH_SUCCESS; + } + ge::graphStatus UnLoad() const { + return ge::GRAPH_SUCCESS; + } + + /** + * 设置图执行的输入/输出,需要注意的是,使用者需要自己保证inputs/outputs刷新完全!!! + */ + ge::graphStatus SpecifyInputs(void **inputs, size_t start, size_t num); + ge::graphStatus SpecifyOutputs(void **outputs, size_t num); + ge::graphStatus Execute(); + ge::graphStatus Execute(ExecutorSubscriber *callback); + + const void *GetExecutionData() const { + return execution_data_; + } + + ResourceGuard &GetResourceGuard(); + void *SetExecutionData(std::unique_ptr execution_data); + + private: + friend class ModelV2ExecutorTestHelper; + + void *execution_data_{nullptr}; + ResourceGuard resource_guard_; +}; +} // namespace gert +#endif // AIR_CXX_INC_FRAMEWORK_RUNTIME_EXE_GRAPH_EXECUTOR_H_ diff --git a/inc/framework/runtime/exe_graph_resource_guard.h b/inc/framework/runtime/exe_graph_resource_guard.h new file mode 100644 index 00000000..9f1e29f0 --- /dev/null +++ b/inc/framework/runtime/exe_graph_resource_guard.h @@ -0,0 +1,69 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AIR_CXX_INC_FRAMEWORK_RUNTIME_EXE_GRAPH_RESOURCE_GUARD_H_ +#define AIR_CXX_INC_FRAMEWORK_RUNTIME_EXE_GRAPH_RESOURCE_GUARD_H_ +#include +#include +#include +#include "common/ge_visibility.h" + +namespace gert { +class VISIBILITY_EXPORT ResourceGuard { + public: + void *ResetExecutionData(std::unique_ptr execution_data); + void ResetAnyValue(std::unique_ptr any_values, size_t count); + void PushNode(void *node); + void PushWatcher(void *watcher); + void *ResetNodesArray(std::unique_ptr nodes_array); + void *ResetStartNodesArray(std::unique_ptr start_nodes_array); + void *ResetNodesIndgreeArray(std::unique_ptr nodes_indgree_array); + void *ResetNodesWaitIndgreeArray(std::unique_ptr nodes_indgree_array); + void *ResetInputsArray(std::unique_ptr inputs_array); + void *ResetOutputsArray(std::unique_ptr outputs_array); + void *ResetWatchersArray(std::unique_ptr watchers_array); + void *ResetReadyQueue(void *ready_queue); + void *ResetBuffer(std::unique_ptr buffer); + void *ResetComputeNodeInfo(std::unique_ptr compute_node_info); + void *ResetKernelExtendInfo(std::unique_ptr kernel_extend_info); + void *ResetModelDesc(std::unique_ptr model_desc); + + ~ResourceGuard(); + + private: + std::unique_ptr execution_data_holder_; + size_t any_values_num_; + std::unique_ptr any_values_guard_; + + std::vector> nodes_guarder_; + std::vector> watchers_guarder_; + std::unique_ptr continuous_buffer_guarder_; + std::unique_ptr buffer_guarder_; + std::unique_ptr compute_node_info_guarder_; + std::unique_ptr kernel_extend_info_guarder_; + std::unique_ptr model_desc_guarder_; + + std::unique_ptr nodes_array_guarder_; + std::unique_ptr start_nodes_array_guarder_; + std::unique_ptr nodes_indgree_array_guarder_; + std::unique_ptr nodes_wait_indgree_array_guarder_; + std::unique_ptr inputs_array_guarder_; + std::unique_ptr outputs_array_guarder_; + std::unique_ptr watchers_array_guarder_; + std::unique_ptr ready_queue_guarder_{nullptr, nullptr}; +}; +} // namespace gert +#endif // AIR_CXX_INC_FRAMEWORK_RUNTIME_EXE_GRAPH_RESOURCE_GUARD_H_ diff --git a/inc/framework/runtime/gert_api.h b/inc/framework/runtime/gert_api.h index 007993e8..6571a67e 100644 --- a/inc/framework/runtime/gert_api.h +++ b/inc/framework/runtime/gert_api.h @@ -18,9 +18,13 @@ #define AIR_CXX_INC_FRAMEWORK_RUNTIME_GERT_API_H_ #include "model_v2_executor.h" #include "common/ge_types.h" +#include "common/ge_visibility.h" namespace gert { -std::unique_ptr LoadExecutorFromFile(const char *file_path, ge::graphStatus &error_code); +VISIBILITY_EXPORT +std::unique_ptr LoadExecutorFromFile(const char *model_path, ge::graphStatus &error_code); + +VISIBILITY_EXPORT std::unique_ptr LoadExecutorFromModelData(const ge::ModelData &model_data, ge::graphStatus &error_code); } // namespace gert diff --git a/inc/framework/runtime/model_desc.h b/inc/framework/runtime/model_desc.h index 46c21636..2574e817 100644 --- a/inc/framework/runtime/model_desc.h +++ b/inc/framework/runtime/model_desc.h @@ -1,94 +1,96 @@ -/** - * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef AIR_CXX_INC_FRAMEWORK_RUNTIME_MODEL_DESC_H_ -#define AIR_CXX_INC_FRAMEWORK_RUNTIME_MODEL_DESC_H_ -#include "common/ge_types.h" -#include "exe_graph/runtime/shape.h" -#include "exe_graph/runtime/continuous_vector.h" -#include "exe_graph/runtime/storage_format.h" -#include "exe_graph/runtime/storage_shape.h" - -namespace gert { -class ShapeRange { - public: - const Shape &GetMin() const; - const Shape &GetMax() const; - Shape &MutableMin(); - Shape &MutableMax(); - - private: - Shape min_; - Shape max_; -}; - -class ModelIoDesc { - public: - const char *GetName() const; - int32_t GetDataType() const; - ge::Format GetStorageFormat() const; - ge::Format GetOriginFormat() const; - int64_t GetSize() const; - const Shape &GetStorageShape() const; - const Shape &GetOriginShape() const; - const ShapeRange &GetOriginShapeRange() const; - const ShapeRange &GetStorageShapeRange() const; - - void SetName(const char *name); - void SetDataType(int32_t data_type); - void SetStorageFormat(ge::Format format); - void SetOriginFormat(ge::Format format); - Shape &MutableStorageShape(); - Shape &MutableOriginShape(); - ShapeRange &MutableOriginShapeRange(); - ShapeRange &MutableStorageShapeRange(); - - private: - const char *name_; - int32_t data_type_; - StorageFormat format_; - StorageShape shape_; - ShapeRange storage_shape_range_; - ShapeRange origin_shape_range_; -}; - -class ModelDesc { - public: - static size_t CalcSize(size_t input_num, size_t output_num); - const ModelIoDesc *GetInputDesc(size_t index) const; - const ModelIoDesc *GetAllInputsDesc(size_t &input_num) const; - - const ModelIoDesc *GetOutputDesc(size_t index) const; - const ModelIoDesc *GetAllOutputsDesc(size_t &output_num) const; - - ModelIoDesc *MutableInputDesc(size_t index); - ModelIoDesc *MutableOutputDesc(size_t index); - ModelIoDesc *AllMutableIoDesc(size_t &input_num, size_t &output_num); - void SetInputNum(size_t input_num); - void SetOutputNum(size_t output_num); - - ge::graphStatus GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) const; - ge::graphStatus GetUserDesignateShapeOrder(std::vector &user_designate_shape_order) const; - ge::graphStatus GetModelAttrs(std::vector &attrs) const; - - private: - size_t input_num_; - size_t output_num_; - ContinuousVector model_io_descs_; -}; -} // namespace gert - +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AIR_CXX_INC_FRAMEWORK_RUNTIME_MODEL_DESC_H_ +#define AIR_CXX_INC_FRAMEWORK_RUNTIME_MODEL_DESC_H_ +#include "common/ge_types.h" +#include "common/ge_visibility.h" + +#include "exe_graph/runtime/shape.h" +#include "exe_graph/runtime/continuous_vector.h" +#include "exe_graph/runtime/storage_format.h" +#include "exe_graph/runtime/storage_shape.h" + +namespace gert { +class VISIBILITY_EXPORT ShapeRange { + public: + const Shape &GetMin() const; + const Shape &GetMax() const; + Shape &MutableMin(); + Shape &MutableMax(); + + private: + Shape min_; + Shape max_; +}; + +class VISIBILITY_EXPORT ModelIoDesc { + public: + const char *GetName() const; + int32_t GetDataType() const; + ge::Format GetStorageFormat() const; + ge::Format GetOriginFormat() const; + int64_t GetSize() const; + const Shape &GetStorageShape() const; + const Shape &GetOriginShape() const; + const ShapeRange &GetOriginShapeRange() const; + const ShapeRange &GetStorageShapeRange() const; + + void SetName(const char *name); + void SetDataType(int32_t data_type); + void SetStorageFormat(ge::Format format); + void SetOriginFormat(ge::Format format); + Shape &MutableStorageShape(); + Shape &MutableOriginShape(); + ShapeRange &MutableOriginShapeRange(); + ShapeRange &MutableStorageShapeRange(); + + private: + const char *name_; + int32_t data_type_; + StorageFormat format_; + StorageShape shape_; + ShapeRange storage_shape_range_; + ShapeRange origin_shape_range_; +}; + +class VISIBILITY_EXPORT ModelDesc { + public: + static size_t CalcSize(size_t input_num, size_t output_num); + const ModelIoDesc *GetInputDesc(size_t index) const; + const ModelIoDesc *GetAllInputsDesc(size_t &input_num) const; + + const ModelIoDesc *GetOutputDesc(size_t index) const; + const ModelIoDesc *GetAllOutputsDesc(size_t &output_num) const; + + ModelIoDesc *MutableInputDesc(size_t index); + ModelIoDesc *MutableOutputDesc(size_t index); + ModelIoDesc *AllMutableIoDesc(size_t &input_num, size_t &output_num); + void SetInputNum(size_t input_num); + void SetOutputNum(size_t output_num); + + ge::graphStatus GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) const; + ge::graphStatus GetUserDesignateShapeOrder(std::vector &user_designate_shape_order) const; + ge::graphStatus GetModelAttrs(std::vector &attrs) const; + + private: + size_t input_num_; + size_t output_num_; + ContinuousVector model_io_descs_; +}; +} // namespace gert + #endif // AIR_CXX_INC_FRAMEWORK_RUNTIME_MODEL_DESC_H_ \ No newline at end of file diff --git a/inc/framework/runtime/model_v2_executor.h b/inc/framework/runtime/model_v2_executor.h index 277a23d0..dad4f4e4 100644 --- a/inc/framework/runtime/model_v2_executor.h +++ b/inc/framework/runtime/model_v2_executor.h @@ -22,94 +22,25 @@ #include "model_desc.h" #include "runtime/stream.h" #include "exe_graph/runtime/tensor.h" +#include "common/ge_visibility.h" +#include "exe_graph_resource_guard.h" +#include "exe_graph_executor.h" +#include "subscriber/executor_subscribers_scheduler.h" namespace gert { enum SubExeGraphType { kInitExeGraph, kMainExeGraph, kDeInitExeGraph, kSubExeGraphTypeEnd }; -static constexpr char *kSubExeGraphTypeStrs[kSubExeGraphTypeEnd] = {(char *)"Init", (char *)"Main", (char *)"DeInit"}; +static constexpr char *kSubExeGraphTypeStrs[kSubExeGraphTypeEnd] = { + const_cast("Init"), const_cast("Main"), const_cast("DeInit")}; inline const char *GetSubExeGraphTypeStr(SubExeGraphType type) { return kSubExeGraphTypeStrs[type]; } -class ResourceGuard { - public: - void *ResetExecutionData(std::unique_ptr execution_data); - void ResetAnyValue(std::unique_ptr any_values, size_t count); - void PushNode(void *node); - void PushWatcher(void *watcher); - void *ResetNodesArray(std::unique_ptr nodes_array); - void *ResetStartNodesArray(std::unique_ptr start_nodes_array); - void *ResetNodesIndgreeArray(std::unique_ptr nodes_indgree_array); - void *ResetNodesWaitIndgreeArray(std::unique_ptr nodes_indgree_array); - void *ResetInputsArray(std::unique_ptr inputs_array); - void *ResetOutputsArray(std::unique_ptr outputs_array); - void *ResetWatchersArray(std::unique_ptr watchers_array); - void *ResetReadyQueue(void *ready_queue); - void *ResetBuffer(std::unique_ptr buffer); - void *ResetComputeNodeInfo(std::unique_ptr compute_node_info); - void *ResetKernelExtendInfo(std::unique_ptr kernel_extend_info); - void *ResetModelDesc(std::unique_ptr model_desc); - - ~ResourceGuard(); - - private: - std::unique_ptr execution_data_holder_; - size_t any_values_num_; - std::unique_ptr any_values_guard_; - - std::vector> nodes_guarder_; - std::vector> watchers_guarder_; - std::unique_ptr continuous_buffer_guarder_; - std::unique_ptr buffer_guarder_; - std::unique_ptr compute_node_info_guarder_; - std::unique_ptr kernel_extend_info_guarder_; - std::unique_ptr model_desc_guarder_; - - std::unique_ptr nodes_array_guarder_; - std::unique_ptr start_nodes_array_guarder_; - std::unique_ptr nodes_indgree_array_guarder_; - std::unique_ptr nodes_wait_indgree_array_guarder_; - std::unique_ptr inputs_array_guarder_; - std::unique_ptr outputs_array_guarder_; - std::unique_ptr watchers_array_guarder_; - std::unique_ptr ready_queue_guarder_{nullptr, nullptr}; -}; - struct ModelExecuteArg { rtStream_t stream; }; static_assert(std::is_standard_layout::value, "The class ModelExecuteArg must be a POD"); -class ExeGraphExecutor { - public: - // todo unload时释放anyvalue资源 - ge::graphStatus Load() { - return ge::GRAPH_SUCCESS; - } - ge::graphStatus UnLoad() { - return ge::GRAPH_SUCCESS; - } - - /** - * 设置图执行的输入/输出,需要注意的是,使用者需要自己保证inputs/outputs刷新完全!!! - */ - ge::graphStatus SpecifyInputs(void **inputs, size_t start, size_t num); - ge::graphStatus SpecifyOutputs(void **outputs, size_t num); - ge::graphStatus Execute(); - - const void *GetExecutionData() const { - return execution_data_; - } - - ResourceGuard &GetResourceGuard(); - void *SetExecutionData(std::unique_ptr execution_data); - - private: - friend class ModelV2ExecutorTestHelper; - - void *execution_data_; - ResourceGuard resource_guard_; -}; -class ModelV2Executor { +class VISIBILITY_EXPORT ModelV2Executor { public: static std::unique_ptr Create(const ge::ComputeGraphPtr &root_graph); @@ -121,6 +52,12 @@ class ModelV2Executor { const ModelDesc &GetModelDesc() const; void SetModelDesc(ModelDesc *model_desc); + ExeGraphExecutor &GetMainExeGraphExecutor() { + return graphs_[kMainExeGraph]; + } + ExecutorSubscribersScheduler &GetSubscribers(); + const ExecutorSubscribersScheduler &GetSubscribers() const; + ModelV2Executor(const ModelV2Executor &) = delete; ModelV2Executor(ModelV2Executor &&) = delete; ModelV2Executor &operator=(const ModelV2Executor &) = delete; @@ -129,13 +66,14 @@ class ModelV2Executor { private: friend class ModelV2ExecutorBuilder; friend class ModelV2ExecutorTestHelper; - ModelV2Executor() = default; + ModelV2Executor(); private: std::array graphs_; ResourceGuard resource_guard_; ModelDesc *model_desc_ = nullptr; rtStream_t default_stream_ = nullptr; + ExecutorSubscribersScheduler subscribers_; }; } // namespace gert diff --git a/inc/framework/runtime/subscriber/built_in_subscriber_definitions.h b/inc/framework/runtime/subscriber/built_in_subscriber_definitions.h new file mode 100644 index 00000000..4ee2aff2 --- /dev/null +++ b/inc/framework/runtime/subscriber/built_in_subscriber_definitions.h @@ -0,0 +1,71 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AIR_CXX_INC_FRAMEWORK_RUNTIME_SUBSCRIBER_BUILT_IN_SUBSCRIBER_DEFINITIONS_H_ +#define AIR_CXX_INC_FRAMEWORK_RUNTIME_SUBSCRIBER_BUILT_IN_SUBSCRIBER_DEFINITIONS_H_ +#include +#include +#include "framework/common/ge_visibility.h" +namespace gert { +constexpr size_t kProfilingDataCap = 10UL * 1024UL * 1024UL; +constexpr size_t kInitSize = 10UL * 1024UL; +constexpr size_t kModelStrIdx = 0UL; +constexpr size_t kExecuteStrIdx = 1UL; +constexpr size_t kRegStartIdx = 2UL; +constexpr size_t kDouble = 2UL; + +enum class BuiltInSubscriberType { kProfiling, kDumper, kNum }; + +enum class ProfilingType { + kHost, // 打开Host侧调度的profiling + kDevice, + kGeHost, // 打开GE Host侧调度的profiling + kSingleOpReg, // 单算子需要打开此开关开始register node name和kernel type + kNum, + kAll = kNum +}; +static_assert(static_cast(ProfilingType::kNum) < sizeof(uint64_t) * 8, + "The max num of profiling type must less than the width of uint64"); + +enum class DumpType { kDataDump, kExceptionDump, kExceptionSave, kNum, kAll = kNum }; +static_assert(static_cast(DumpType::kNum) < sizeof(uint64_t) * 8, + "The max num of dumper type must less than the width of uint64"); + +class VISIBILITY_EXPORT BuiltInSubscriberUtil { + public: + template ::value) || (std::is_same::value), + int>::type = 0> + constexpr static uint64_t EnableBit(T et) { + return 1UL << static_cast(et); + } + + template ::value) || (std::is_same::value), + int>::type = 0> + static uint64_t BuildEnableFlags(const std::vector &enable_types) { + uint64_t flag = 0UL; + for (auto et : enable_types) { + if (et == T::kAll) { + return EnableBit(T::kNum) - 1UL; + } + flag |= EnableBit(et); + } + return flag; + } +}; +} // namespace gert +#endif // AIR_CXX_INC_FRAMEWORK_RUNTIME_SUBSCRIBER_BUILT_IN_SUBSCRIBER_DEFINITIONS_H_ diff --git a/inc/framework/runtime/subscriber/executor_subscriber_c.h b/inc/framework/runtime/subscriber/executor_subscriber_c.h new file mode 100644 index 00000000..f55c4161 --- /dev/null +++ b/inc/framework/runtime/subscriber/executor_subscriber_c.h @@ -0,0 +1,33 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AIR_CXX_INC_FRAMEWORK_RUNTIME_EXECUTOR_SUBSCRIBER_C_H_ +#define AIR_CXX_INC_FRAMEWORK_RUNTIME_EXECUTOR_SUBSCRIBER_C_H_ +#include "exe_graph/runtime/base_type.h" +#ifdef __cplusplus +extern "C" { +#endif +typedef enum { kExecuteStart, kExecuteEnd, kModelStart, kModelEnd, kExecuteEventEnd } ExecutorEvent; + +typedef void (*SubscriberFunc)(void *arg, ExecutorEvent event, const void *node, KernelStatus result); +typedef struct { + SubscriberFunc callback; + void *arg; +} ExecutorSubscriber; +#ifdef __cplusplus +} +#endif +#endif // AIR_CXX_INC_FRAMEWORK_RUNTIME_EXECUTOR_SUBSCRIBER_C_H_ diff --git a/inc/framework/runtime/subscriber/executor_subscriber_guarder.h b/inc/framework/runtime/subscriber/executor_subscriber_guarder.h new file mode 100644 index 00000000..98f817c4 --- /dev/null +++ b/inc/framework/runtime/subscriber/executor_subscriber_guarder.h @@ -0,0 +1,76 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AIR_CXX_INC_FRAMEWORK_RUNTIME_EXECUTOR_SUBSCRIBER_GUARDER_H_ +#define AIR_CXX_INC_FRAMEWORK_RUNTIME_EXECUTOR_SUBSCRIBER_GUARDER_H_ +#include "framework/common/ge_visibility.h" +#include "common/checker.h" +#include "executor_subscriber_c.h" +namespace gert { +template +void ObjectDeleter(void *obj) { + delete static_cast(obj); +} + +class VISIBILITY_EXPORT ExecutorSubscriberGuarder { + public: + using ArgDeleter = void (*)(void *); + + ExecutorSubscriberGuarder(::SubscriberFunc func, void *arg, ArgDeleter deleter) + : subscriber_({func, arg}), arg_deleter_(deleter) {} + ExecutorSubscriberGuarder(ExecutorSubscriberGuarder &&other) noexcept { + MoveAssignment(other); + } + ExecutorSubscriberGuarder &operator=(ExecutorSubscriberGuarder &&other) noexcept { + DeleteArg(); + MoveAssignment(other); + return *this; + } + + ExecutorSubscriber &GetSubscriber() { + return subscriber_; + } + + const ExecutorSubscriber &GetSubscriber() const { + return subscriber_; + } + + ~ExecutorSubscriberGuarder() { + DeleteArg(); + } + + ExecutorSubscriberGuarder(const ExecutorSubscriberGuarder &) = delete; + ExecutorSubscriberGuarder &operator=(const ExecutorSubscriberGuarder &) = delete; + + private: + void DeleteArg() { + if (arg_deleter_ != nullptr) { + arg_deleter_(subscriber_.arg); + } + } + void MoveAssignment(ExecutorSubscriberGuarder &other) { + subscriber_ = other.subscriber_; + arg_deleter_ = other.arg_deleter_; + other.subscriber_ = {nullptr, nullptr}; + other.arg_deleter_ = nullptr; + } + + private: + ExecutorSubscriber subscriber_{nullptr, nullptr}; + ArgDeleter arg_deleter_{nullptr}; +}; +} // namespace gert +#endif // AIR_CXX_INC_FRAMEWORK_RUNTIME_EXECUTOR_SUBSCRIBER_GUARDER_H_ diff --git a/inc/framework/runtime/subscriber/executor_subscribers_scheduler.h b/inc/framework/runtime/subscriber/executor_subscribers_scheduler.h new file mode 100644 index 00000000..1a9936f8 --- /dev/null +++ b/inc/framework/runtime/subscriber/executor_subscribers_scheduler.h @@ -0,0 +1,149 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AIR_CXX_INC_FRAMEWORK_RUNTIME_EXECUTOR_SUBSCRIBERS_SCHEDULER_H_ +#define AIR_CXX_INC_FRAMEWORK_RUNTIME_EXECUTOR_SUBSCRIBERS_SCHEDULER_H_ +#include +#include +#include "executor_subscriber_guarder.h" +#include "built_in_subscriber_definitions.h" +#include "global_profiling.h" +#include "framework/common/ge_visibility.h" +namespace gert { +namespace { +constexpr size_t kInitSubscriberSize = 1UL; +} +class ModelV2Executor; +class VISIBILITY_EXPORT ExecutorSubscribersScheduler { + public: + static void OnExecuteEvent(ExecutorSubscribersScheduler *ins, ExecutorEvent event, const void *node, + KernelStatus result); + + ExecutorSubscribersScheduler() + : executor_(nullptr), + enabled_(false), + built_in_subscribers_ptr_(), + subscribers_(), + subscriber_wrapper_({reinterpret_cast<::SubscriberFunc>(ExecutorSubscribersScheduler::OnExecuteEvent), this}) {} +#ifdef ONLY_COMPILE_OPEN_SRC + ~ExecutorSubscribersScheduler(); +#endif + void Init(ModelV2Executor *executor); + ExecutorSubscribersScheduler(const ExecutorSubscribersScheduler &) = delete; + ExecutorSubscribersScheduler &operator=(const ExecutorSubscribersScheduler &) = delete; + ExecutorSubscriber &GetSubscriber() { + if (subscribers_.size() == 1UL) { + return subscribers_[0].GetSubscriber(); + } else { + return subscriber_wrapper_; + } + } + + ModelV2Executor *GetModelV2Executor() { + return executor_; + } + const ModelV2Executor *GetModelV2Executor() const { + return executor_; + } + + /** + * 设置订阅者,订阅者需要实现一个static方法,原型为: + * ```c++ + * static void OnExecuteEvent(T *void_arg, ExecutorEvent event, const void *node, KernelStatus result); + * ``` + * + * 默认情况下,subscribers处于disable状态,在添加首个subscriber时,自动将状态切换到enable状态。 + * + * @tparam T 订阅者类型 + * @tparam Args 订阅者初始化参数类型 + * @param args 订阅者初始化参数 + * @return 添加的subscriber指针,注意subscriber所有权归`ExecutorSubscribersScheduler`所有,外部使用者不可以释放此指针 + */ + template + T *AddSubscriber(Args... args) { + auto ins = new (std::nothrow) T(args...); + if (ins == nullptr) { + return nullptr; + } + + // profiler exists when ess init + if (subscribers_.size() == kInitSubscriberSize) { + enabled_ = true; + } + subscribers_.emplace_back(reinterpret_cast<::SubscriberFunc>(T::OnExecuteEvent), ins, ObjectDeleter); + return ins; + } + + /** + * 添加一个内置的subscriber + * 内置subscriber较少,当前没有使用注册机制,后续如果需要扩展,那么可以考虑通过注册机制自动注册。 + * 为了易用性,在本类提供了获取内置subscriber的指针的接口。而自注册的subscriber将丢失此能力。 + * @param subscriber_type + */ + void AddBuiltIn(BuiltInSubscriberType subscriber_type, uint64_t enable_flag); + void RemoveSubscriber(void *subscriber_ptr) { + for (auto iter = subscribers_.begin(); iter != subscribers_.end(); ++iter) { + if (iter->GetSubscriber().arg == subscriber_ptr) { + subscribers_.erase(iter); + break; + } + } + for (auto &built_in_subscriber : built_in_subscribers_ptr_) { + if (built_in_subscriber == subscriber_ptr) { + built_in_subscriber = nullptr; + } + } + if (subscribers_.empty()) { + enabled_ = false; + } + } + + template + inline T *MutableBuiltInSubscriber(const BuiltInSubscriberType type) { + return static_cast(built_in_subscribers_ptr_[static_cast(type)]); + } + + template + inline const T *GetBuiltInSubscriber(const BuiltInSubscriberType type) { + return static_cast(built_in_subscribers_ptr_[static_cast(type)]); + } + + bool IsEnable() const { + return enabled_ || GlobalProfilingWrapper::GetInstance()->GetEnableFlags(); + } + void SetEnable(bool enable_flag) { + enabled_ = enable_flag; + } + void Clear() { + subscribers_.clear(); + for (auto &built_in_subscriber : built_in_subscribers_ptr_) { + built_in_subscriber = nullptr; + } + enabled_ = false; + } + size_t GetSize() const { + return subscribers_.size(); + } + + private: + ModelV2Executor *executor_{nullptr}; + bool enabled_{false}; + std::array(BuiltInSubscriberType::kNum)> built_in_subscribers_ptr_; + std::vector subscribers_; + ExecutorSubscriber subscriber_wrapper_; +}; +} // namespace gert +#endif // AIR_CXX_INC_FRAMEWORK_RUNTIME_EXECUTOR_SUBSCRIBERS_SCHEDULER_H_ diff --git a/inc/framework/runtime/subscriber/global_profiling.h b/inc/framework/runtime/subscriber/global_profiling.h new file mode 100644 index 00000000..6b84214e --- /dev/null +++ b/inc/framework/runtime/subscriber/global_profiling.h @@ -0,0 +1,138 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef AIR_CXX_INC_FRAMEWORK_RUNTIME_SUBSCRIBER_GLOBAL_PROFILING_H_ +#define AIR_CXX_INC_FRAMEWORK_RUNTIME_SUBSCRIBER_GLOBAL_PROFILING_H_ + +#include +#include +#include +#include "built_in_subscriber_definitions.h" +#include "common/debug/ge_log.h" +#include "framework/common/ge_visibility.h" +#include "runtime/subscriber/executor_subscriber_c.h" + +namespace gert { +struct ProfilingData { + uint64_t name_idx; + uint64_t type_idx; + ExecutorEvent event; + std::chrono::time_point timestamp; +}; +class GlobalProfiler { + public: + GlobalProfiler() = default; + void Record(uint64_t name_idx, uint64_t type_idx, ExecutorEvent event, + std::chrono::time_point timestamp) { + auto index = count_++; + if (index >= kProfilingDataCap) { + return; + } + records_[index] = {name_idx, type_idx, event, timestamp}; + } + void Dump(std::ostream &out_stream, std::vector &idx_to_str) const; + size_t GetCount() const { + return count_; + } + + private: + std::atomic count_{0UL}; + ProfilingData records_[kProfilingDataCap]; +}; + +class VISIBILITY_EXPORT GlobalProfilingWrapper { + public: + static GlobalProfilingWrapper *GetInstance() { + static GlobalProfilingWrapper global_prof_wrapper; + return &global_prof_wrapper; + } + + static void OnGlobalProfilingSwitch(void *ins, uint64_t enable_flags); + + void Init(uint64_t enable_flags); + + void Free() { + global_profiler_.reset(nullptr); + SetEnableFlags(0UL); + } + + GlobalProfiler *GetGlobalProfiler() const { + return global_profiler_.get(); + } + + void SetEnableFlags(uint64_t enable_flags) { + enable_flags_ = enable_flags; + } + + uint64_t GetRecordCount() { + if (global_profiler_ == nullptr) { + return 0UL; + } + return global_profiler_->GetCount(); + } + + uint64_t GetEnableFlags() const { + return enable_flags_; + } + + bool IsEnable(ProfilingType profiling_type) const { + return enable_flags_ & BuiltInSubscriberUtil::EnableBit(profiling_type); + } + + void DumpAndFree(std::ostream &out_stream) { + Dump(out_stream); + Free(); + } + void Dump(std::ostream &out_stream) { + if (global_profiler_ != nullptr) { + global_profiler_->Dump(out_stream, idx_to_str_); + } + } + void Record(uint64_t name_idx, uint64_t type_idx, ExecutorEvent event, + std::chrono::time_point timestamp) { + if (global_profiler_ != nullptr) { + global_profiler_->Record(name_idx, type_idx, event, timestamp); + } + } + + uint64_t RegisterString(const char *name) { + const std::lock_guard lk(register_mutex_); + std::string str_name = name; + const auto iter = std::find(idx_to_str_.begin(), idx_to_str_.end(), str_name); + if (iter == idx_to_str_.end()) { + idx_to_str_[str_idx_] = str_name; + ++str_idx_; + if (str_idx_ >= idx_to_str_.size()) { + idx_to_str_.resize(idx_to_str_.size() * kDouble); + } + return str_idx_ - 1UL; + } else { + return iter - idx_to_str_.begin(); + } + } + + private: + GlobalProfilingWrapper(); + + private: + std::unique_ptr global_profiler_{nullptr}; + uint64_t enable_flags_{0UL}; + uint64_t str_idx_{0UL}; + std::vector idx_to_str_; + std::mutex register_mutex_; +}; +} // namespace gert + +#endif diff --git a/metadef b/metadef index e4d1efc4..17536092 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit e4d1efc47349f13af1bcdb53ba408118779fc27e +Subproject commit 17536092c004f6f9e08116939f4f49e1e11a99d3 diff --git a/third_party/fwkacllib/inc/common/type_def.h b/third_party/fwkacllib/inc/common/type_def.h index 1bbaf32d..520aeaf0 100644 --- a/third_party/fwkacllib/inc/common/type_def.h +++ b/third_party/fwkacllib/inc/common/type_def.h @@ -27,12 +27,12 @@ typedef double float64_t; inline uint64_t PtrToValue(const void *ptr) { - return static_cast(reinterpret_cast(ptr)); + return static_cast(reinterpret_cast(ptr)); } inline void *ValueToPtr(const uint64_t value) { - return reinterpret_cast(static_cast(value)); + return reinterpret_cast(static_cast(value)); } template diff --git a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h index 1c8f8e44..a5a2642c 100644 --- a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h +++ b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h @@ -59,6 +59,7 @@ static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016; // aivec over flow static const int32_t ACL_ERROR_RT_OVER_FLOW = 207017; // common over flow +static const int32_t ACL_ERROR_RT_DEVIDE_OOM = 207018; // device oom static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error diff --git a/third_party/fwkacllib/inc/ops/cluster.h b/third_party/fwkacllib/inc/ops/cluster.h index 6e41e569..d2ee7f09 100644 --- a/third_party/fwkacllib/inc/ops/cluster.h +++ b/third_party/fwkacllib/inc/ops/cluster.h @@ -30,18 +30,18 @@ namespace ge { * @par Inputs: * Three required inputs and one optional inputs, including: -* @li x: A 2D tensor of data type float32. -* @li y: A 2D tensor of data type float32. -* @li sum_square_x: An optional 2D tensor of data type float32. +* @li x: A 2D tensor of data type float32. +* @li y: A 2D tensor of data type float32. +* @li sum_square_x: An optional 2D tensor of data type float32. * @li sum_square_y: A 2D tensor of data type float32. \n * @par Attributes: * use_actual_distance: Indicates whether to calculate the complete distance. \n * @par Outputs: -* @li segment_sum: A tensor of data type float32. -* @li segment_count: A tensor of data type float32. -* @li k_mean_total_sum: A tensor of data type float32. +* @li segment_sum: A tensor of data type float32. +* @li segment_count: A tensor of data type float32. +* @li k_mean_total_sum: A tensor of data type float32. */ REG_OP(KMeansCentroids) .INPUT(x, TensorType({DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h index de351e43..d812f7a9 100644 --- a/third_party/fwkacllib/inc/ops/data_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h @@ -2549,5 +2549,32 @@ REG_OP(OptionalGetValue) .REQUIRED_ATTR(output_types, ListType) .REQUIRED_ATTR(output_shapes, ListListInt) .OP_END_FACTORY_REG(OptionalGetValue) + +/** +* @brief User define function process. \n + +* @par Inputs: +* @li x: A list of input tensor objects. It's a dynamic input. \n + +* @par Outputs: +* @li y: A list of output tensor objects. It's a dynamic output. \n + +* @par Attributes: +* @li bin_path: User's binary path. +* @li func_name: User defined function name. +* @li output_types: Types of outputs data. +* @li output_shapes: Shapes of outputs data. +* @li _flow_attr_process_node_engine_id: Default process node engine of FlowFunc. +*/ +REG_OP(FlowFunc) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, \ + DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE})) + .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, \ + DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE})) + .REQUIRED_ATTR(bin_path, String) + .REQUIRED_ATTR(func_name, String) + .ATTR(output_shapes, ListListInt, {}) + .REQUIRED_ATTR(output_types, ListType) + .OP_END_FACTORY_REG(FlowFunc) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index 29cfa4f5..4e8fb312 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -48,29 +48,29 @@ REG_OP(AddN) .OP_END_FACTORY_REG(AddN) /** -*@brief Calculates the reversed outputs of the function "maximum". +* @brief Calculates the reversed outputs of the function "maximum". -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li grads: A mutable Tensor. Must be one of the following types: +* @li grads: A mutable Tensor. Must be one of the following types: * float16, float32, int32. -*@li x1: A mutable Tensor of the same type as "grads". -*@li x2: A mutable Tensor of the same type as "grads". \n +* @li x1: A mutable Tensor of the same type as "grads". +* @li x2: A mutable Tensor of the same type as "grads". \n -*@par Attributes: -*@li grad_x: An optional bool. Defaults to "True". +* @par Attributes: +* @li grad_x: An optional bool. Defaults to "True". * If "True", "y1" will be output. * If "False", "y1" will not be output. \n -*@li grad_y: An optional bool. Defaults to "True". +* @li grad_y: An optional bool. Defaults to "True". * If "True", "y2" will be output. * If "False", "y2" will not be output. \n -*@par Outputs: -*@li y1: A mutable Tensor. Has the same type as "grads". -*@li y2: A mutable Tensor. Has the same type as "grads". \n +* @par Outputs: +* @li y1: A mutable Tensor. Has the same type as "grads". +* @li y2: A mutable Tensor. Has the same type as "grads". \n -*@par Third-party framework compatibility: +* @par Third-party framework compatibility: * Compatible with the TensorFlow operator MaximumGrad. */ REG_OP(MaximumGrad) @@ -84,29 +84,29 @@ REG_OP(MaximumGrad) .OP_END_FACTORY_REG(MaximumGrad) /** -*@brief Calculates the reversed outputs of the function "minimum". +* @brief Calculates the reversed outputs of the function "minimum". -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li grads: A mutable Tensor. Must be one of the following types: +* @li grads: A mutable Tensor. Must be one of the following types: * float16, float32, int32. -*@li x1: A mutable Tensor of the same type as "grads". -*@li x2: A mutable Tensor of the same type as "grads". \n +* @li x1: A mutable Tensor of the same type as "grads". +* @li x2: A mutable Tensor of the same type as "grads". \n -*@par Attributes: -*@li grad_x: An optional bool. Defaults to "True". +* @par Attributes: +* @li grad_x: An optional bool. Defaults to "True". * If "True", "y1" will be output. * If "False", "y1" will not be output. \n -*@li grad_y: An optional bool. Defaults to "True". +* @li grad_y: An optional bool. Defaults to "True". * If "True", "y2" will be output. * If "False", "y2" will not be output. \n -*@par Outputs: -*@li y1: A mutable Tensor. Has the same type as "grads". -*@li y2: A mutable Tensor. Has the same type as "grads". \n +* @par Outputs: +* @li y1: A mutable Tensor. Has the same type as "grads". +* @li y2: A mutable Tensor. Has the same type as "grads". \n -*@par Third-party framework compatibility: +* @par Third-party framework compatibility: * Compatible with the TensorFlow operator MinimumGrad. */ REG_OP(MinimumGrad) @@ -552,16 +552,16 @@ REG_OP(Expint) .OP_END_FACTORY_REG(Expint) /** -*@brief: Computes the reciprocal of "x". +* @brief: Computes the reciprocal of "x". -*@par Inputs: -*x: A Tensor. Must be one of the following types: float16, float32, +* @par Inputs: +* x: A Tensor. Must be one of the following types: float16, float32, * int32, int64, double, complex64, complex128. \n -*@par Outputs: -*y: A Tensor. Must be one of the following type: float16, float32, int32. \n +* @par Outputs: +* y: A Tensor. Must be one of the following type: float16, float32, int32. \n -*@par Third-party framework compatibility: +* @par Third-party framework compatibility: * Compatible with the TensorFlow operator Inv. */ REG_OP(Inv) @@ -570,19 +570,19 @@ REG_OP(Inv) .OP_END_FACTORY_REG(Inv) /** -*@brief: Computes "x" reciprocal grad, dx = -1*dy*y*y, where, "y = 1/x", +* @brief: Computes "x" reciprocal grad, dx = -1*dy*y*y, where, "y = 1/x", * and "dy" is the corresponding input gradient. -*@par Inputs: +* @par Inputs: * Two inputs, including: -*@li x: A Tensor. Must be one of the following types: float16, float32, +* @li x: A Tensor. Must be one of the following types: float16, float32, * int32, int8. -*@li grad: A Tensor. Has the same type as "x". \n +* @li grad: A Tensor. Has the same type as "x". \n -*@par Outputs: -*y: A Tensor, Has the same type as "x". \n +* @par Outputs: +* y: A Tensor, Has the same type as "x". \n -*@par Third-party framework compatibility: +* @par Third-party framework compatibility: * Compatible with the TensorFlow operator InvGrad. */ REG_OP(InvGrad) @@ -634,27 +634,27 @@ REG_OP(Log1p) .OP_END_FACTORY_REG(Log1p) /** -*@brief Returns element-wise remainder of division. +* @brief Returns element-wise remainder of division. -*@par Inputs: +* @par Inputs: * Two inputs, including: -*@li x1: A Tensor. Must be one of the following types: float16, float32, +* @li x1: A Tensor. Must be one of the following types: float16, float32, * int32, int64, int8, uint8, double. -*@li x2: A Tensor of the same type as "x1". \n +* @li x2: A Tensor of the same type as "x1". \n -*@par Outputs: -*y: A Tensor. Has the same type as "x1". \n +* @par Outputs: +* y: A Tensor. Has the same type as "x1". \n -*@attention Constraints: -*@li x2: The input data does not support 0. -*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the +* @attention Constraints: +* @li x2: The input data does not support 0. +* @li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the * requirement of double thousandths in the mini form. -*@li Due to different architectures, the calculation results of this operator +* @li Due to different architectures, the calculation results of this operator * on NPU and CPU may be inconsistent. -*@li If shape is expressed as (D1,D2... ,Dn), +* @li If shape is expressed as (D1,D2... ,Dn), * then D1*D2... *DN<=1000000,n<=8. \n -*@par Third-party framework compatibility: +* @par Third-party framework compatibility: * Compatible with the TensorFlow operator Mod. */ REG_OP(Mod) @@ -667,18 +667,18 @@ REG_OP(Mod) .OP_END_FACTORY_REG(Mod) /** -*@brief Returns the truth value of (x != y) element-wise. +* @brief Returns the truth value of (x != y) element-wise. -*@par Inputs: +* @par Inputs: * Two inputs, including: -*@li x1: A Tensor. Must be one of the following types: float16, float32, int32, +* @li x1: A Tensor. Must be one of the following types: float16, float32, int32, * int8, uint8, double, int16, int64, uint16, half, uint32, uint64. -*@li x2: A Tensor of the same type as "x1". \n +* @li x2: A Tensor of the same type as "x1". \n -*@par Outputs: -*y: A Tensor of type bool. \n +* @par Outputs: +* y: A Tensor of type bool. \n -*@par Third-party framework compatibility: +* @par Third-party framework compatibility: * Compatible with the TensorFlow operator NotEqual. */ REG_OP(NotEqual) @@ -688,17 +688,17 @@ REG_OP(NotEqual) .OP_END_FACTORY_REG(NotEqual) /** -*@brief Computes ndtri element-wise (y = sqrt(2) * erfinv(2 * x - 1)). +* @brief Computes ndtri element-wise (y = sqrt(2) * erfinv(2 * x - 1)). -*@par Inputs: +* @par Inputs: * One input, including: \n -*x: A Tensor. Must be one of the following types: bfloat16, float16, +* x: A Tensor. Must be one of the following types: bfloat16, float16, * float32, double. \n -*@par Outputs: -*y: A Tensor. Has the same type and format as input "x". \n +* @par Outputs: +* y: A Tensor. Has the same type and format as input "x". \n -*@par Third-party framework compatibility: +* @par Third-party framework compatibility: * Compatible with the TensorFlow operator Ndtri. */ REG_OP(Ndtri) @@ -726,21 +726,21 @@ REG_OP(Neg) .OP_END_FACTORY_REG(Neg) /** -*@brief Returns x1/x2 element-wise for integer types. +* @brief Returns x1/x2 element-wise for integer types. -*@par Inputs: -*@li x1: A Tensor. Must be one of the following types: +* @par Inputs: +* @li x1: A Tensor. Must be one of the following types: * float32, float16, int8, uint8, int32, int16, * uint16, double, int64, complex64, complex128. -*@li x2: A Tensor of the same data type as "x1". \n +* @li x2: A Tensor of the same data type as "x1". \n -*@par Outputs: -*y: A Tensor. Has the same type as "x1". +* @par Outputs: +* y: A Tensor. Has the same type as "x1". -*@attention Constraints: +* @attention Constraints: * Broadcasting is supported. \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator TruncateDiv. \n */ @@ -1427,17 +1427,17 @@ REG_OP(RsqrtGrad) .OP_END_FACTORY_REG(RsqrtGrad) /** -*@brief Computes hyperbolic sine of "x" element-wise. +* @brief Computes hyperbolic sine of "x" element-wise. -*@par Inputs: -*x: An NCHW, NHWC,or ND Tensor of type float, double, complex64, - * complex128, half. \n +* @par Inputs: +* x: An NCHW, NHWC,or ND Tensor of type float, double, complex64, +* complex128, half. \n -*@par Outputs: -*y: A NCHW, NHWC,or ND Tensor of type float, double, complex64, - * complex128, half. \n +* @par Outputs: +* y: A NCHW, NHWC,or ND Tensor of type float, double, complex64, +* complex128, half. \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator Sinh. \n */ @@ -1513,18 +1513,18 @@ REG_OP(DivNoNan) .OP_END_FACTORY_REG(DivNoNan) /** -*@brief Reverses specific dimensions of a tensor. +* @brief Reverses specific dimensions of a tensor. -*@par Inputs: +* @par Inputs: * One input: \n -*x: A Tensor, Must be one of the following types: +* x: A Tensor, Must be one of the following types: * int32, uint8, int16, int8, int64, int64, uint16, uint32, uint64, * and format can be [NCHW,NHWC,ND]. \n -*@par Outputs: -*y: A Tensor. Has the same type and format as "x". \n +* @par Outputs: +* y: A Tensor. Has the same type and format as "x". \n -*@par Third-party framework compatibility: +* @par Third-party framework compatibility: * Compatible with the TensorFlow operator Invert. */ REG_OP(Invert) @@ -1772,16 +1772,16 @@ REG_OP(Atan2) .OP_END_FACTORY_REG(Atan2) /** -*@brief Computes fresnel_cos of x element-wise. +* @brief Computes fresnel_cos of x element-wise. * -*@par Inputs: -*x: A tensor. Must be one of the following types: bfloat16, float16, float32, +* @par Inputs: +* x: A tensor. Must be one of the following types: bfloat16, float16, float32, * double. \n * -*@par Outputs: -*y: A tensor. Has the same type as "x". \n +* @par Outputs: +* y: A tensor. Has the same type as "x". \n * -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator FresnelCos. * */ @@ -1791,17 +1791,17 @@ REG_OP(FresnelCos) .OP_END_FACTORY_REG(FresnelCos) /** -*@brief Computes fresnel_sin of x element-wise. +* @brief Computes fresnel_sin of x element-wise. * -*@par Inputs: -*x: A tensor. Must be one of the following types: bfloat16, float16, float32, +* @par Inputs: +* x: A tensor. Must be one of the following types: bfloat16, float16, float32, * double. \n * -*@par Outputs: -*y: A tensor. Has the same type as "x". \n +* @par Outputs: +* y: A tensor. Has the same type as "x". \n * -*@par Third-party framework compatibility: +* @par Third-party framework compatibility: * Compatible with the TensorFlow operator FresnelSin. * */ @@ -2317,16 +2317,16 @@ REG_OP(Sin) .OP_END_FACTORY_REG(Sin) /** -*@brief: Computes tan of "x" element-wise. +* @brief: Computes tan of "x" element-wise. -*@par Inputs: -*One input: -*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128, int32, int64 +* @par Inputs: +* One input: +* x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128, int32, int64 -*@par Outputs: -*y: A Tensor. Has the same type as "x". \n +* @par Outputs: +* y: A Tensor. Has the same type as "x". \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with TensorFlow operator Tan. */ REG_OP(Tan) @@ -2337,27 +2337,27 @@ REG_OP(Tan) .OP_END_FACTORY_REG(Tan) /** -*@brief Returns element-wise remainder of division. +* @brief Returns element-wise remainder of division. -*@par Inputs: -*Two inputs, including: +* @par Inputs: +* Two inputs, including: * @li x1: A Tensor. Must be one of the following types: float16, float32, * double, int32, int64. * @li x2: A Tensor of the same type as "x1". \n -*@par Outputs: -*y: A Tensor. Has the same type as "x1". \n +* @par Outputs: +* y: A Tensor. Has the same type as "x1". \n -*@attention Constraints: -*@li x2: The input data does not support 0 -*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the -*requirement of double thousandths in the mini form -*@li Due to different architectures, the calculation results of this operator -*on NPU and CPU may be inconsistent -*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 +* @attention Constraints: +* @li x2: The input data does not support 0 +* @li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the +* requirement of double thousandths in the mini form +* @li Due to different architectures, the calculation results of this operator +* on NPU and CPU may be inconsistent +* @li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 -*@par Third-party framework compatibility -*Compatible with the TensorFlow operator TruncateMod. +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator TruncateMod. */ REG_OP(TruncateMod) .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, @@ -3138,21 +3138,21 @@ REG_OP(SquareSumV2) .OP_END_FACTORY_REG(SquareSumV2) /** -*@brief Confuse reducesumd and square. +* @brief Confuse reducesumd and square. -*@par Inputs: -*x: A Tensor of type float16, float32. \n +* @par Inputs: +* x: A Tensor of type float16, float32. \n -*@par Attributes: +* @par Attributes: * Two attributes, including: \n -*@li axis: A optional listint, specifies the dimensions to reduce. -*@li keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false". \n +* @li axis: A optional listint, specifies the dimensions to reduce. +* @li keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false". \n -*@par Outputs: -y: A Tensor. Has the same type as "x". +* @par Outputs: +* y: A Tensor. Has the same type as "x". -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(SquareSumV1) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -3183,19 +3183,19 @@ REG_OP(SquareSumAll) .OP_END_FACTORY_REG(SquareSumAll) /** -*@brief Confuse broadcast, addn and mul. +* @brief Confuse broadcast, addn and mul. -*@par Inputs: -*Three inputs, including: -*@li x1: A Tensor. Must be one of the following types:int32, int16, +* @par Inputs: +* Three inputs, including: +* @li x1: A Tensor. Must be one of the following types:int32, int16, * float16, float32. -*@li x2: A Tensor of the same type as "x1". -*@li x3: A Tensor of the same type as "x1". \n +* @li x2: A Tensor of the same type as "x1". +* @li x3: A Tensor of the same type as "x1". \n -*@par Outputs: -*y: A Tensor. Has the same type as "x1". \n +* @par Outputs: +* y: A Tensor. Has the same type as "x1". \n -*@par Restrictions: +* @par Restrictions: * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(FusedMulAddN) @@ -3206,41 +3206,41 @@ REG_OP(FusedMulAddN) .OP_END_FACTORY_REG(FusedMulAddN) /** -*@brief Add 'bias' to 'x'. +* @brief Add 'bias' to 'x'. -*@par Inputs: +* @par Inputs: * Two inputs, including: -*@li x: An ND tensor of type float16 or float32. -*@li bias: An ND tensor of type float16 or float32. \n +* @li x: An ND tensor of type float16 or float32. +* @li bias: An ND tensor of type float16 or float32. \n -*@par Attributes: -*@li axis: An optional int32 used to compute the shape of bias input from the online bottoms. Defaults to "1". -*@li num_axes: An optional int32 used to compute the shape of +* @par Attributes: +* @li axis: An optional int32 used to compute the shape of bias input from the online bottoms. Defaults to "1". +* @li num_axes: An optional int32 used to compute the shape of * bias input from a Caffe model trained offline. Defaults to "1". -*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe model trained offline. +* @li bias_from_blob: An optional bool. If "true", bias is input from a Caffe model trained offline. * If "false", bias is input from online bottoms. Defaults to "true". \n -*@par Outputs: -*y: An ND tensor of type float16 or float32. \n +* @par Outputs: +* y: An ND tensor of type float16 or float32. \n -*@attention Constraints: +* @attention Constraints: * Assume that the shape length of "x" is "n" and that of "bias" is "m". -*@li "axis" is within the range [-n, n-1]. num_axes >= -1. -*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", +* @li "axis" is within the range [-n, n-1]. num_axes >= -1. +* @li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", * the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis). * If "axis < 0", the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < -axis). -*@li If "bias_from_blob = true" and "num_axes = 0", "bias" is a scalar with shape length 1 and dimension size 1. -*@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", +* @li If "bias_from_blob = true" and "num_axes = 0", "bias" is a scalar with shape length 1 and dimension size 1. +* @li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", * "axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and * the (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes). * If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and * the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < num_axes). -*@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0", +* @li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0", * "axis + m" must be less than or equal to "n" and the ith axis of "bias" and * the (i+"axis")th axis of "x" must have the same size (0 <= i < m). * If "axis < 0", "n + axis + m" must be less than or equal to "n" and * the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < m). \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the Caffe operator Bias. */ @@ -3383,7 +3383,7 @@ REG_OP(Muls) *@par Inputs: *One input, including: -*x1: A Tensor. Must be one of the following types:int32,int16, float16, float32. +*x: A Tensor. Must be one of the following types:float32, float16, int64, int32, int16, bool. *@par Outputs: *y: A Tensor. Has the same type and shape as "x1". \n @@ -3392,9 +3392,9 @@ REG_OP(Muls) * Compatible with the Pytorch operator fills. */ REG_OP(Fills) - .INPUT(x, TensorType({DT_FLOAT,DT_INT16,DT_INT32,DT_FLOAT16})) - .OUTPUT(y, TensorType({DT_FLOAT,DT_INT16,DT_INT32,DT_FLOAT16})) - .REQUIRED_ATTR(value,Float) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT64, DT_INT32, DT_INT16, DT_BOOL})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT64, DT_INT32, DT_INT16, DT_BOOL})) + .REQUIRED_ATTR(value, Float) .OP_END_FACTORY_REG(Fills) /** @@ -3420,19 +3420,19 @@ REG_OP(Fills) .OP_END_FACTORY_REG(Adds) /** -*@brief Computes the product of x and y and returns 0 if the y is zero, +* @brief Computes the product of x and y and returns 0 if the y is zero, * even if x is NaN or infinite. -*@par Inputs: +* @par Inputs: * Two inputs, including: \n -*@li x1: A Tensor. Must be one of the following types:float16, float32, +* @li x1: A Tensor. Must be one of the following types:float16, float32, * double, complex64, complex128. -*@li x2: A Tensor. Has the same type and shape as "x1". \n +* @li x2: A Tensor. Has the same type and shape as "x1". \n -*@par Outputs: -*y: A Tensor. Has the same type and shape as "x1". \n +* @par Outputs: +* y: A Tensor. Has the same type and shape as "x1". \n -*@par Third-party framework compatibility: +* @par Third-party framework compatibility: * Compatible with the TensorFlow operator MulNoNan. */ REG_OP(MulNoNan) diff --git a/third_party/fwkacllib/inc/ops/experiment_ops.h b/third_party/fwkacllib/inc/ops/experiment_ops.h new file mode 100644 index 00000000..769b5a0b --- /dev/null +++ b/third_party/fwkacllib/inc/ops/experiment_ops.h @@ -0,0 +1,158 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * \file experiment_ops.h + * \brief + */ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_EXPERIMENT_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_EXPERIMENT_OPS_H_ + +#include "graph/operator_reg.h" +namespace ge { +/** +* @brief Updates "var" according to the AdamW algorithm. +* +* @attention Constraints: +* The input tensors must have the same shape.* +* +* @par Inputs: +* @li var: A mutable Tensor of the type TensorType::NumberType(). +* Should be from a Variable(). +* @li m: A mutable Tensor of the same type as "var". +* Should be from a Variable(). +* @li v: A mutable Tensor of the same type as "var". +* Should be from a Variable(). +* @li beta1_power: A scalar of the same type as "var". +* @li beta2_power: A scalar of the same type as "var". +* @li lr: learning_rate. A scalar of the same type as "var". +* @li weight_decay: learning_rate. A scalar of the same type as "var". +* @li beta1: A scalar of the same type as "var". +* @li beta2: A scalar of the same type as "var". +* @li epsilon: A scalar of the same type as "var". +* @li grad: A Tensor of the same type as "var", for the gradient. +* @li max_grad_norm: A mutable Tensor of the same type as "var", an optional input. +* Should be from a Variable(). +* +* @par Attributes: +* @li amsgrad: An optional bool. Defaults to "False". +* If "True", max_grad_norm input and output must be entered. +* @li maximize: An optional bool. Defaults to "False". +* +* @par Outputs: +* @li var: A mutable tensor. Has the same type as input "var". +* @li m: A mutable tensor. Has the same type as input "m". +* @li v: A mutable tensor. Has the same type as input "v". \n +*/ +REG_OP(ApplyAdamW) + .INPUT(var, TensorType::NumberType()) + .INPUT(m, TensorType::NumberType()) + .INPUT(v, TensorType::NumberType()) + .INPUT(beta1_power, TensorType::NumberType()) + .INPUT(beta2_power, TensorType::NumberType()) + .INPUT(lr, TensorType::NumberType()) + .INPUT(weight_decay, TensorType::NumberType()) + .INPUT(beta1, TensorType::NumberType()) + .INPUT(beta2, TensorType::NumberType()) + .INPUT(epsilon, TensorType::NumberType()) + .INPUT(grad, TensorType::NumberType()) + .OPTIONAL_INPUT(max_grad_norm, TensorType::NumberType()) + .OUTPUT(var, TensorType::NumberType()) + .OUTPUT(m, TensorType::NumberType()) + .OUTPUT(v, TensorType::NumberType()) + .ATTR(amsgrad, Bool, false) + .ATTR(maximize, Bool, false) + .OP_END_FACTORY_REG(ApplyAdamW) + +/** +* @brief Multiplies matrix "a" by matrix "b", producing "a * b". \n +* @par Inputs: +* Four inputs, including: +* @li x1: A matrix Tensor. Must be one of the following types: float32, +* float16, int32, int8, int4, bf16. 3D. Has format ND. +* @li x2: A matrix Tensor. Must be one of the following types: float32, +* float16, int32, int8, int4, bf16. 3D. Has format ND. +* @li bias: A optional Tensor. Must be one of the following types: +* float32, float16, int32, bf16. 1D. Has format ND. +* @li offset_w: A optional Tensor. Must be one of the following types: +* int8, int4. Has format ND. \n + +* @par Attributes: +* Three attributes, including: +* @li perm_x1: A list int. "x1" is permuted to shape [B, M, K] before multiplication. +* @li perm_x2: A list int. "x2" is permuted to shape [B, K, N] before multiplication. +* @li perm_y: A list int. "y" is permuted after multiplication. +* @li offset_x: An optional integer for quantized TransposeBatchMatMul. +* The negative offset added to the input "x1" for int8, int4 type. Ensure offset_x +* within the effective range of input data type. Defaults to "0". \n + +* @par Outputs: +* y: The result matrix Tensor. 3D. Must be one of the following +* types: float32, float16, int32, bf16. 3D. Has format ND. \n +*/ +REG_OP(TransposeBatchMatMul) + .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16})) + .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) + .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) + .ATTR(perm_x1, ListInt, {}) + .ATTR(perm_x2, ListInt, {}) + .ATTR(perm_y, ListInt, {}) + .ATTR(offset_x, Int, 0) + .OP_END_FACTORY_REG(TransposeBatchMatMul) + +/** +* @brief Performs non-maximum suppression (NMS) on the rotated boxes according +* to their intersection-over-union (IoU). Rotated NMS interatively removes lower +* scoring rotated boxes which have an IoU greater than iou_threshold with +* another (higher scoring) rotated box. + +* @par Inputs: +* Three inputs, including: +* @li boxes: A 2D Tensor of float16 or float32 with shape (N, 5). Rotated boxes to +* perform NMS on. They are expected to be in (x1, y1, x2, y2, angle_degress) format. +* @li scores: A 1D Tensor of float16 or float32 with shape (N). Scores for each one of +* the rotated boxes. +* @li labels: A 1D Tensor of int32 or int64 with shape (N). Labels for each one of +* the rotated boxes. + +* @par Attributes: +* iou_threshold: A required float attribute. Discards all overlapping rotated +* boxes with IoU < iou_threshold. + +* @par Outputs: +* Two outputs, including: +* @li selected_detections: A 2D Tensor of float16 or float32 with shape (N, 5). +* The selected boxes that kept by Rotated NMS, sorted in decreasing order of scores. +* @li keep_indices: A 1D Tensor of int32 or int64 with shape (N). The indices of +* selected_detections. + +* @attention Constraints: +* Currently, the tensor type of input (boxes, scores) only support float. +* The tensor type of keep_indices only support int32. +*/ +REG_OP(RotatedNMS) + .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(labels, TensorType({DT_INT32, DT_INT64})) + .OUTPUT(selected_detections, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(keep_indices, TensorType({DT_INT32, DT_INT64})) + .REQUIRED_ATTR(iou_threshold, Float) + .OP_END_FACTORY_REG(RotatedNMS) +} // namespace ge + +#endif // OPS_BUILT_IN_OP_PROTO_INC_EXPERIMENT_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/functional_ops.h b/third_party/fwkacllib/inc/ops/functional_ops.h index da968f5e..3d1a7060 100644 --- a/third_party/fwkacllib/inc/ops/functional_ops.h +++ b/third_party/fwkacllib/inc/ops/functional_ops.h @@ -362,6 +362,59 @@ REG_OP(ToBool) .OUTPUT(output, DT_BOOL) .OP_END_FACTORY_REG(ToBool) +/** + * @brief Abstract tiling function to an op definition + * The input will be data or shape \n + + * @par Inputs: + * @li x: the data of input. all types are available, + * @li outputshape: the shape of previous op output shape . all types are available. \n + + * @par Outputs: + * @li tiling_data: tiling data of tiling function. It should be a buffer + * @li tiling_key: tiling key of tiling function. + * @li block_dim: block dim of tiling function. + * @li tiling_cond: tiling condition of tiling function which will be used to determined real execute kernel. \n + + * @par Attributes: + * @li tiling_node: A string. real tiling node such as matmul. + * @li op_type: A string. Op type of the original node. \n + + * @par Third-party framework compatibility + */ +REG_OP(OpTiling) + .DYNAMIC_INPUT(x, TensorType::ALL()) + .DYNAMIC_INPUT(output_shape, TensorType::ALL()) + .OUTPUT(tiling_data, TensorType({DT_UINT8})) + .OUTPUT(tiling_key, TensorType({DT_UINT64})) + .OUTPUT(block_dim, TensorType({DT_INT32})) + .OUTPUT(tiling_cond, TensorType({DT_INT32})) + .REQUIRED_ATTR(tiling_node, String) + .REQUIRED_ATTR(op_type, String) + .OP_END_FACTORY_REG(OpTiling) + +/** + * @brief Calculate condition value by input tensor which will be used for if input or case input. \n + + * @par Inputs: + * @li x: the data or shape of input. all types are available, + + * @par Outputs: + * @li cond: condition value calculated by cond fuction. + It will be cond input of if or branch_index input of case. \n + + * @par Attributes: + * @li cond_func: A string. real condition function registered to calculate condition value. + * @li x_dependency: List of int. It should be the same number of inputs: 0(shape) 1(data). \n + + * @par Third-party framework compatibility + */ +REG_OP(ConditionCalc) + .DYNAMIC_INPUT(x, TensorType::ALL()) + .OUTPUT(cond, TensorType({DT_INT32})) + .REQUIRED_ATTR(cond_func, String) + .REQUIRED_ATTR(x_dependency, ListInt) + .OP_END_FACTORY_REG(ConditionCalc) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/hcom_ops.h b/third_party/fwkacllib/inc/ops/hcom_ops.h index 497f6a68..5db47345 100644 --- a/third_party/fwkacllib/inc/ops/hcom_ops.h +++ b/third_party/fwkacllib/inc/ops/hcom_ops.h @@ -28,7 +28,7 @@ namespace ge { * @brief Outputs a tensor gathering all input tensors. * @par Inputs: * x: A tensor. Must be one of the following types: int8, int16, int32, float16, - float32. + float32, uint8, uint16, uint32, float64. * @par Attributes: * @li rank_size: A required integer identifying the number of ranks participating in the op. @@ -41,8 +41,10 @@ namespace ge { as the name of a world group. */ REG_OP(HcomAllGather) - .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) + .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64, + DT_UINT8, DT_UINT16, DT_UINT32, DT_FLOAT64})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64, + DT_UINT8, DT_UINT16, DT_UINT32, DT_FLOAT64})) .REQUIRED_ATTR(rank_size, Int) .REQUIRED_ATTR(group, String) .OP_END_FACTORY_REG(HcomAllGather) @@ -99,8 +101,10 @@ REG_OP(HcomAllReduce) as the name of a world group. */ REG_OP(HcomBroadcast) - .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) - .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64, + DT_UINT8, DT_UINT16, DT_UINT32, DT_FLOAT64})) + .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64, + DT_UINT8, DT_UINT16, DT_UINT32, DT_FLOAT64})) .REQUIRED_ATTR(root_rank, Int) .REQUIRED_ATTR(group, String) .ATTR(fusion, Int, 0) @@ -186,7 +190,8 @@ REG_OP(HcomReduceScatter) * @see HcomReceive */ REG_OP(HcomSend) - .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) + .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64, + DT_UINT8, DT_UINT16, DT_UINT32, DT_FLOAT64})) .REQUIRED_ATTR(group, String) .REQUIRED_ATTR(sr_tag, Int) .REQUIRED_ATTR(dest_rank, Int) @@ -217,7 +222,8 @@ REG_OP(HcomSend) * @see HcomSend */ REG_OP(HcomReceive) - .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64, + DT_UINT8, DT_UINT16, DT_UINT32, DT_FLOAT64})) .REQUIRED_ATTR(group, String) .REQUIRED_ATTR(sr_tag, Int) .REQUIRED_ATTR(src_rank, Int) @@ -303,12 +309,14 @@ REG_OP(HcomRemoteScatterWrite) using the RDMA. */ REG_OP(HcomAllToAllV) - .INPUT(send_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) + .INPUT(send_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64, + DT_UINT8, DT_UINT16, DT_UINT32, DT_FLOAT64})) .INPUT(send_counts, TensorType({DT_INT64})) .INPUT(send_displacements, TensorType({DT_INT64})) .INPUT(recv_counts, TensorType({DT_INT64})) .INPUT(recv_displacements, TensorType({DT_INT64})) - .OUTPUT(recv_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) + .OUTPUT(recv_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64, + DT_UINT8, DT_UINT16, DT_UINT32, DT_FLOAT64})) .REQUIRED_ATTR(group, String) .OP_END_FACTORY_REG(HcomAllToAllV) @@ -345,8 +353,10 @@ REG_OP(HcomGatherAllToAllV) .INPUT(addrinfo_count_per_rank, TensorType({DT_INT64})) .INPUT(recv_counts, TensorType({DT_INT64})) .INPUT(recv_displacements, TensorType({DT_INT64})) - .OUTPUT(recv_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) - .OUTPUT(gathered, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) + .OUTPUT(recv_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64, + DT_UINT8, DT_UINT16, DT_UINT32, DT_FLOAT64})) + .OUTPUT(gathered, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64, + DT_UINT8, DT_UINT16, DT_UINT32, DT_FLOAT64})) .REQUIRED_ATTR(group, String) .REQUIRED_ATTR(dtype, Type) .REQUIRED_ATTR(addr_length, Int) diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index 37f0e2b1..3db3cb84 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -2371,5 +2371,78 @@ REG_OP(ImageProjectiveTransform) .ATTR(fill_mode, String, "CONSTANT") .OUTPUT(transformed_images, TensorType({DT_UINT8, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) .OP_END_FACTORY_REG(ImageProjectiveTransform) + +/** +* @brief image to transforms. \n + +* @par Inputs: +* @li images: [batch, height, width, channels], 4-D tensor. +* @li transforms: [batch, 8] or [1, 8] matrix, 2-D tensor. +* @li outout_shape: [new_height, new_width], 1-D tensor. +* @li fill_value: [scalar], 1-D tensor. + +* @par Attributes: +* @li interpolation: Interpolation method, "NEAREST" or "BILINEAR", 0-D tensor. +* @li fill_mode: Defaults to "CONSTANT". Fill mode, "REFLECT", "WRAP", or "CONSTANT", 0-D tensor. + +* @par Outputs +* transformed_images: has the same type as iamges, 4-D tensor with shape[batch, new_height, new_width, channels]. \n + +* @par Third-party framework compatibility. +* Compatible with tensorflow ImageProjectiveTransformv2 operator. +*/ +REG_OP(ImageProjectiveTransformV2) + .INPUT(images, TensorType({DT_UINT8, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(transforms, TensorType({DT_FLOAT})) + .INPUT(output_shape, TensorType({DT_INT32})) + .OPTIONAL_INPUT(fill_value, TensorType({DT_UINT8, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .REQUIRED_ATTR(interpolation, String) + .ATTR(fill_mode, String, "CONSTANT") + .OUTPUT(transformed_images, TensorType({DT_UINT8, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OP_END_FACTORY_REG(ImageProjectiveTransformV2) + +/** +* @brief Extracts a glimpse from the input tensor . \n + +* @par Inputs: +* Input input must be a 4-D tensor. Inputs include: +* @li input: A 4-D float tensor of shape [batch_size, height, width, channels]. +The format must be NHWC. +* @li size: A 1-D tensor of 2 elements containing the size of the glimpses to +extract. The glimpse height must be specified first, following by the glimpse +width. +* @li offsets: A 2-D integer tensor of shape [batch_size, 2] containing the y, +x locations of the center of each window . \n + +* @par Attributes: +* @li centered: indicates if the offset coordinates are centered relative to +the image, in which case the (0, 0) offset is relative to the center of the +input images. If false, the (0,0) offset corresponds to the upper left corner +of the input images. +* @li normalized: indicates if the offset coordinates are normalized. +* @li uniform_noise: indicates if the noise should be generated using a +uniform distribution or a Gaussian distribution. +* @li noise: indicates if the noise should uniform, gaussian, or zero. +The default is uniform which means the the noise type will be decided by +uniform_noise . \n + +* @par Outputs: +* glimpse:A tensor representing the glimpses [batch_size, glimpse_height, +glimpse_width, channels]. The format must be NHWC. \n + +* @par Third-party framework compatibility +* Compatible with tensorflow ExtractGlimpseV2 operator. +*/ + +REG_OP(ExtractGlimpseV2) + .INPUT(input, TensorType({DT_FLOAT})) + .INPUT(size, TensorType({DT_INT32})) + .INPUT(offsets, TensorType({DT_FLOAT})) + .OUTPUT(glimpse, TensorType({DT_FLOAT})) + .ATTR(centered, Bool, true) + .ATTR(normalized, Bool, true) + .ATTR(uniform_noise, Bool, true) + .ATTR(noise, String, "uniform") + .OP_END_FACTORY_REG(ExtractGlimpseV2) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index 8e9ee4db..3d162d3a 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -230,7 +230,7 @@ REG_OP(Bucketize) *input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n *@par Outputs: -*output_y: A tensor with the same type and shape of input_x \n +* output_y: A tensor with the same type and shape of input_x \n *@par Third-party framework compatibility *Compatible with the Pytorch operator Trunc. \n @@ -1008,6 +1008,36 @@ REG_OP(Complex) .ATTR(Tout, Type, DT_COMPLEX64) .OP_END_FACTORY_REG(Complex) +/** +* @brief Counts the number of occurrences of each value in an integer array . \n + +* @par Inputs: +* Five inputs, including: +* indices: A 2D Tensor of type int64. +* values: A 1D Tensor of type int32 or int64. +* dense_shape: A 1D Tensor of type int64. +* size: A non-negative scalar Tensor. +* weights: A Tensor of type int32 or int64 or fp32 or fp64 or only 1 \n + +* @par Attributes: +* dtype: An optional bool.Defaults to False. bool . \n + +* @par Outputs: +* y: A Tensor . Has the same type as `input_weights` .\n + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseBincount. +*/ +REG_OP(SparseBincount) + .INPUT(indices, TensorType({DT_INT64})) + .INPUT(values, TensorType({DT_INT32, DT_INT64})) + .INPUT(dense_shape, TensorType({DT_INT64})) + .INPUT(size, TensorType({DT_INT32, DT_INT64})) + .INPUT(weights, TensorType({DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE})) + .ATTR(binary_output, Bool, false) + .OUTPUT(output, TensorType({DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE})) + .OP_END_FACTORY_REG(SparseBincount) + /** * @brief deal complex. @@ -1098,7 +1128,7 @@ REG_OP(Cross) .OP_END_FACTORY_REG(Cross) /** - *@brief Computes batched the p-norm distance between each pair of + * @brief Computes batched the p-norm distance between each pair of *the two collections of row vectors. \n *@par Inputs: @@ -1445,6 +1475,72 @@ REG_OP(SparseCountSparseOutput) .ATTR(maxlength, Int, -1) .REQUIRED_ATTR(binary_output, Bool) .OP_END_FACTORY_REG(SparseCountSparseOutput) + +/** +* @brief Counts the number of occurrences of each value in an integer array. \n + +* @par Inputs: +* @li splits: A Tensor of type int64. 1D int64 Tensor. +* @li values: A Tensor. Must be one of the following types: int32, int64. 2D int Tensor. +* @li size: A Tensor. Must have the same type as values. non-negative int scalar Tensor. +* @li weights: A Tensor. Must be one of the following types: float32. + is a float32 Tensor with the same shape as input, + or a length-0 Tensor, in which case it acts as all weights equal to 1. \n + +* @par Outputs: +* @li output: A Tensor with length "size" for each stride and has the same dtype as weights. \n + +* @par Attributes: +* binary_output: An optional bool. Defaults to False. bool; + Whether the kernel should count the appearance or number of occurrences. \n + +* @attention Constraints: +* The operator will use the interface set_atomic_add(), therefore weights and output should be float32 only. \n + +* @par Third-party framework compatibility +* Compatible with tensorflow RaggedBinCount operator. +*/ + +REG_OP(RaggedBinCount) + .INPUT(splits, TensorType(DT_INT64)) + .INPUT(values, TensorType({DT_INT32, DT_INT64})) + .INPUT(size, TensorType({DT_INT32, DT_INT64})) + .INPUT(weights, TensorType(DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE)) + .OUTPUT(output, TensorType(DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE)) + .ATTR(binary_output, Bool, false) + .OP_END_FACTORY_REG(RaggedBinCount) + +/** +* @brief Counts the number of occurrences of each value in an integer array. \n + +* @par Inputs: +* @li input: A Tensor of type int32, int64. 1D or 2D int Tensor. +* @li size: A Tensor. Must have the same type as input. non-negative int scalar Tensor. +* @li weights: A Tensor. Must be one of the following types: int32, int64, float32, float64. + with the same shape as input, + or a length-0 Tensor, in which case it acts as all weights equal to 1. \n + +* @par Outputs: +* @li output: A Tensor with length "size" for each stride and has the same dtype as weights. \n + +* @par Attributes: +* binary_output: An optional bool. Defaults to False. bool; + Whether the kernel should count the appearance or number of occurrences. \n + +* @attention Constraints: +* The operator will use the interface set_atomic_add(), therefore weights and output should be float32 only. \n + +* @par Third-party framework compatibility +* Compatible with tensorflow DenseBincount operator. +*/ + +REG_OP(DenseBincount) + .INPUT(input, TensorType({DT_INT32, DT_INT64})) + .INPUT(size, TensorType({DT_INT32, DT_INT64})) + .INPUT(weights, TensorType(DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE)) + .OUTPUT(output, TensorType(DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE)) + .ATTR(binary_output, Bool, false) + .OP_END_FACTORY_REG(DenseBincount) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index 15f648f0..272d4021 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -199,27 +199,27 @@ REG_OP(SwinTransformerLnQKV) .OP_END_FACTORY_REG(SwinTransformerLnQKV) /** -*@brief Multiplies matrix "a" by matrix "b", producing "a * b". \n -*@par Inputs: -*Three inputs, including: +* @brief Multiplies matrix "a" by matrix "b", producing "a * b". \n +* @par Inputs: +* Three inputs, including: * @li x1: A matrix Tensor. 2D. Must be one of the following types: float16, -* float32, int32, bfloat16. Has format [ND, NHWC]. +* float32, int32. Has format [ND, NHWC]. * @li x2: A matrix Tensor. 2D. Must be one of the following types: float16, -* float32, int32, bfloat16. Has format [ND, NHWC]. +* float32, int32. Has format [ND, NHWC]. * @li bias: A optional 1D Tensor. Must be one of the following types: float16, -* float32, int32, bfloat16. Has format [ND, NHWC]. \n +* float32, int32. Has format [ND, NHWC]. \n -*@par Attributes: -*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to +* @par Attributes: +* @li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to * [K, M]. -*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to +* @li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to * [K, M]. \n -*@par Outputs: -*y: The result matrix Tensor. 2D. Must be one of the following types: float16, -* float32, int32, bfloat16. Has format [ND, NHWC]. \n +* @par Outputs: +* y: The result matrix Tensor. 2D. Must be one of the following types: float16, +* float32, int32. Has format [ND, NHWC]. \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator BatchMatmul. */ REG_OP(MatMul) @@ -232,36 +232,36 @@ REG_OP(MatMul) .OP_END_FACTORY_REG(MatMul) /** -*@brief Multiplies matrix "a" by matrix "b", producing "a * b". \n -*@par Inputs: -*Four inputs, including: +* @brief Multiplies matrix "a" by matrix "b", producing "a * b". \n +* @par Inputs: +* Four inputs, including: * @li x1: A matrix Tensor. 2D. Must be one of the following types: float32, -* float16, int32, int8, int4, bfloat16. Has format [ND, NHWC]. +* float16, int32, int8, int4. Has format [ND, NHWC]. * @li x2: A matrix Tensor. 2D. Must be one of the following types: float32, -* float16, int32, int8, int4, bfloat16. Has format [ND, NHWC]. +* float16, int32, int8, int4. Has format [ND, NHWC]. * @li bias: A 1D Tensor. Must be one of the following types: float32, -* float16, int32 bfloat16. Has format [ND, NHWC]. -* @li offset_w: A Optional 1D Tensor for quantized inference. Type is int8. +* float16, int32. Has format [ND, NHWC]. +* @li offset_w: A Optional 1D Tensor for quantized inference. Type is int8, int4. * Reserved. \n -*@par Attributes: +* @par Attributes: * @li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to * [M, K]. * @li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to -[K, N]. +* [K, N]. * @li offset_x: An optional integer for quantized MatMulV2. * The negative offset added to the input x1 for int8 type. Ensure offset_x * within the effective range of int8 [-128, 127]. Defaults to "0". \n -*@par Outputs: -*y: The result matrix Tensor. 2D. Must be one of the following types: float32, -* float16, int32, bfloat16. Has format [ND, NHWC]. \n +* @par Outputs: +* y: The result matrix Tensor. 2D. Must be one of the following types: float32, +* float16, int32. Has format [ND, NHWC]. \n -*@attention Constraints: +* @attention Constraints: * if performances better in format NZ, please close * "MatmulTransdataFusionPass" in fusion configuration. \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator BatchMatmul. */ REG_OP(MatMulV2) @@ -276,9 +276,9 @@ REG_OP(MatMulV2) .OP_END_FACTORY_REG(MatMulV2) /** -*@brief Multiplies matrix "a" by matrix "b", producing "a * b". \n -*@par Inputs: -*Five inputs, including: +* @brief Multiplies matrix "a" by matrix "b", producing "a * b". \n +* @par Inputs: +* Five inputs, including: * @li x1: A matrix Tensor. 2D. Must be one of the following types: int8. * @li x2: A matrix Tensor. 2D. Must be one of the following types: int8. * @li compress_index: A compress index matrix of type int8. @@ -287,20 +287,20 @@ REG_OP(MatMulV2) * @li offset_w: An optional matrix Tensor. 2D. Must be one of the following * types: int8. \n -*@par Attributes: -*@li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to +* @par Attributes: +* @li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to * [M, K]. -*@li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to +* @li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to * [K, N]. -*@li offset_x: An optional integer for quantized MatMulV2Compress. -*The negative offset added to the input x1 for int8 type. Ensure offset_x +* @li offset_x: An optional integer for quantized MatMulV2Compress. +* The negative offset added to the input x1 for int8 type. Ensure offset_x * within the effective range of int8 [-128, 127]. Defaults to "0". \n -*@par Outputs: -*y: The result matrix Tensor. 2D. Must be one of the following types: int32, +* @par Outputs: +* y: The result matrix Tensor. 2D. Must be one of the following types: int32, * float16. \n -*@attention Constraints: +* @attention Constraints: * if performances better in format NZ, please close * "MatmulTransdataFusionPass" in fusion configuration. @@ -318,23 +318,23 @@ REG_OP(MatMulV2Compress) .OP_END_FACTORY_REG(MatMulV2Compress) /** -*@brief Performs Matrix-to-matrix Multiply, +* @brief Performs Matrix-to-matrix Multiply, * producing y=alpha[0]*a*b+beta[0]*c. \n -*@attention Constraints: +* @attention Constraints: * For better performance, The k-axis must be aligned to 16 (input type * is float16) or 32 (input type is int8). \n -*@par Inputs: -*Five inputs, including: +* @par Inputs: +* Five inputs, including: * @li a: A matrix Tensor. Must be one of the following types:float32, float16, * int8, int32. Has format ND. * @li b: A matrix Tensor. Must be one of the following types:float32, float16, * int8, int32. Has format ND. -*@li c: A matrix Tensor. Must be one of the following types:float32, float16, +* @li c: A matrix Tensor. Must be one of the following types:float32, float16, * int8, int32. Has format ND. * @li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the * following types: float16, int32, float32, int8. Has format ND. -*@li beta: A 1D Tensor. The shape of beta is [1]. Must be one of the following +* @li beta: A 1D Tensor. The shape of beta is [1]. Must be one of the following * types: float16, int32, float32, int8. Has format ND.\n * The format of a, b, c has restriction:\n * When type of a is int8 and type of c is int32, the format of a, b, c should @@ -346,16 +346,16 @@ REG_OP(MatMulV2Compress) * When type of a is float16 and type of c is float32, the format of a, b, c * should all be ND. \n -*@par Attributes: -*Two attributes, including: -*@li transpose_a: Optional. A bool. If True, changes the shape of "a" from +* @par Attributes: +* Two attributes, including: +* @li transpose_a: Optional. A bool. If True, changes the shape of "a" from * [M, K] to [K, M]. -*@li transpose_b: Optional. A bool. If True, changes the shape of "b" from +* @li transpose_b: Optional. A bool. If True, changes the shape of "b" from * [K, N] to [N, K]. \n -*@par Outputs: -*y: The result matrix Tensor. Must be one of the following types: float16, -* float32, int32. Has format [ND], the format should be equal to a. +* @par Outputs: +* y: The result matrix Tensor. Must be one of the following types: float16, +* float32, int32, int8. Has format [ND], the format should be equal to a. */ REG_OP(GEMM) @@ -370,27 +370,27 @@ REG_OP(GEMM) .OP_END_FACTORY_REG(GEMM) /** -*@brief Multiplies matrix "a" by matrix "b", producing "a * b". \n -*@par Inputs: -*Two inputs, including: +* @brief Multiplies matrix "a" by matrix "b", producing "a * b". \n +* @par Inputs: +* Two inputs, including: * @li x1: A matrix Tensor. Must be one of the following types: float16, -* float32, int32, bfloat16. 2D or higher. Has format [ND, NHWC]. +* float32, int32. 2D or higher. Has format [ND, NHWC]. * @li x2: A matrix Tensor. Must be one of the following types: float16, -* float32, int32, bfloat16. 2D or higher. Has format [ND, NHWC]. \n +* float32, int32. 2D or higher. Has format [ND, NHWC]. \n -*@par Attributes: -*@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] +* @par Attributes: +* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] * to [B, K, M]. -*@li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] +* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] * to [B, K, M]. \n -*@par Outputs: +* @par Outputs: * y: The result matrix Tensor. 2D or higher. Must be one of the following -* types: float16, bfloat16, +* types: float16, * float32, int32. 2D or higher. Has format [ND, NHWC]. Has the same shape * length as "x1" and "x2". \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator BatchMatmul. */ @@ -408,12 +408,12 @@ REG_OP(BatchMatMul) * @par Inputs: * Three inputs, including: * @li x1: A matrix Tensor. Must be one of the following types: float16, -* float32, int32, int8, int4, bfloat16. 2D or higher. Has format [ND, NHWC]. +* float32, int32, int8, int4. 2D or higher. Has format [ND, NHWC]. * @li x2: A matrix Tensor. Must be one of the following types: float16, -* float32, int32, int8, int4, bfloat16. 2D or higher. Has format [ND, NHWC]. +* float32, int32, int8, int4. 2D or higher. Has format [ND, NHWC]. * @li bias: A optional Tensor. Must be one of the following types: * float16, -* float32, int32, int8, int4, bfloat16. Has format [ND, NHWC]. +* float32, int32. Has format [ND, NHWC]. * @li offset_w: A optional Tensor. Must be one of the following types: * int8, int4. Has format [ND, NHWC]. \n @@ -429,7 +429,7 @@ REG_OP(BatchMatMul) * float32, int32. 2D or higher. Has format [ND, NHWC]. Has the same shape * length as "x1" and "x2". \n -*@attention Constraints: +* @attention Constraints: * if performances better in format NZ, please close * "MatmulTransdataFusionPass" in fusion configuration. \n @@ -449,22 +449,22 @@ REG_OP(BatchMatMulV2) .OP_END_FACTORY_REG(BatchMatMulV2) /** -*@brief Computes half the L2 norm of a tensor without the sqrt . \n +* @brief Computes half the L2 norm of a tensor without the sqrt . \n -*@par Inputs: +* @par Inputs: * x: A Tensor. * TensorType::FloatingDataType() . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x". \n +* @par Outputs: +* y: A Tensor. Has the same type as "x". \n -*@attention Constraints: +* @attention Constraints: * if performances better in format NZ, please close - "MatmulTransdataFusionPass" in fusion configuration. \n +* "MatmulTransdataFusionPass" in fusion configuration. \n -*@par Third-party framework compatibility -*Compatible with the TensorFlow operator L2Loss. +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator L2Loss. */ REG_OP(L2Loss) .INPUT(x, TensorType::FloatingDataType()) @@ -472,17 +472,17 @@ REG_OP(L2Loss) .OP_END_FACTORY_REG(L2Loss) /** -*@brief: Returns a batched diagonal tensor with a given batched diagonal values . \n +* @brief: Returns a batched diagonal tensor with a given batched diagonal values . \n -*@par Inputs: -*x: A Tensor. Must be one of the following types: +* @par Inputs: +* x: A Tensor. Must be one of the following types: * float16, float32, double, int32, uint8, int16, int8, complex64, int64, * qint8, quint8, qint32, uint16, complex128, uint32, uint64 . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator MatrixDiag. */ REG_OP(MatrixDiag) @@ -491,17 +491,17 @@ REG_OP(MatrixDiag) .OP_END_FACTORY_REG(MatrixDiag) /** -*@brief: Returns a batched diagonal tensor with a given batched diagonal values . \n +* @brief: Returns a batched diagonal tensor with a given batched diagonal values . \n -*@par Inputs: +* @par Inputs: * Two inputs, including: -*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. -*@li assist: A Tensor of the same type as "x" . \n +* @li x: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. +* @li assist: A Tensor of the same type as "x" . \n -*@par Outputs: +* @par Outputs: *y: A Tensor. Has the same type as "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator MatrixDiag. * * @par Restrictions: @@ -514,17 +514,17 @@ REG_OP(MatrixDiagD) .OP_END_FACTORY_REG(MatrixDiagD) /** -*@brief: Returns the batched diagonal part of a batched tensor . \n +* @brief: Returns the batched diagonal part of a batched tensor . \n -*@par Inputs: -*x: A Tensor. Must be one of the following types: -* float16, float32, double, int32, uint8, int16, int8, complex64, int64, -* qint8, quint8, qint32, uint16, complex128, uint32, uint64 . \n +* @par Inputs: +* x: A Tensor. Must be one of the following types: +* float16, float32, double, int32, uint8, int16, int8, complex64, int64, +* qint8, quint8, qint32, uint16, complex128, uint32, uint64 . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator MatrixDiagPart. */ REG_OP(MatrixDiagPart) @@ -533,17 +533,17 @@ REG_OP(MatrixDiagPart) .OP_END_FACTORY_REG(MatrixDiagPart) /** -*@brief: Returns the batched diagonal part of a batched tensor . \n +* @brief: Returns the batched diagonal part of a batched tensor . \n -*@par Inputs: +* @par Inputs: * Two inputs, including: -*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. -*@li assist: A Tensor of the same type as "x" . \n +* @li x: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. +* @li assist: A Tensor of the same type as "x" . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator MatrixDiagPart. * * @par Restrictions: @@ -556,19 +556,19 @@ REG_OP(MatrixDiagPartD) .OP_END_FACTORY_REG(MatrixDiagPartD) /** -*@brief: Returns a batched matrix tensor with new batched diagonal values . \n +* @brief: Returns a batched matrix tensor with new batched diagonal values . \n -*@par Inputs: +* @par Inputs: * Two inputs, including: -*@li x: A Tensor. Must be one of the following types: +* @li x: A Tensor. Must be one of the following types: * float16, float32, double, int32, uint8, int16, int8, complex64, int64, * qint8, quint8, qint32, uint16, complex128, uint32, uint64. -*@li diagonal: A Tensor of the same type as "x" . \n +* @li diagonal: A Tensor of the same type as "x" . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator MatrixSetDiag. */ REG_OP(MatrixSetDiag) @@ -578,18 +578,18 @@ REG_OP(MatrixSetDiag) .OP_END_FACTORY_REG(MatrixSetDiag) /** -*@brief: Returns a batched matrix tensor with new batched diagonal values . \n +* @brief: Returns a batched matrix tensor with new batched diagonal values . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. -*@li diagonal: A Tensor of the same type as "x". -*@li assist: A Tensor of the same type as "x" . \n +* @li x: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. +* @li diagonal: A Tensor of the same type as "x". +* @li assist: A Tensor of the same type as "x" . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator MatrixSetDiag. * * @par Restrictions: @@ -653,29 +653,83 @@ REG_OP(AttentionScore) .OP_END_FACTORY_REG(AttentionScore) /** -*@brief Applies sparse "updates" to individual values or slices in a Variable . \n +* @brief Function AttentionScoreGrad. \n -*@par Inputs: +* @par Inputs: +* seven inputs, including: +* @li attention_score: A matrix Tensor. The type only support float16. +* @li dx: A matrix Tensor. The type only support float16. +* @li query: A matrix Tensor. The type only support float16. +* @li key: A matrix Tensor. The type only support float16. +* @li value: A matrix Tensor. The type only support float16. +* @li scale: A scalar. The type only support float16. +* @li drop_mask: A matrix Tensor. The type only support uint8. \n + +* @par Attributes: +* @li keep_prob: A mutable Tensor. Must met all of the following rules: + shape of "keep_prob" should be (1,) or [1,]. +* @li query_transpose: A bool. If True, changes the shape of "query" from [K, M] to + [M, K]. +* @li key_transpose: A bool. If True, changes the shape of "key" from [N, K] to + [K, N]. +* @li value_transpose: A bool. If True, changes the shape of "mid_data" from [K, M] to + [M, K]. +* @li dx_transpose: A bool. If True, changes the shape of "value" from [N, K] to + [K, N]. +* @li softmax_axes: A int. The dimension softmax would be performed on. Defaults + to "-1" . \n + +* @par Outputs: +* value_dw: The result matrix Tensor. The type only support float16. +* query_dx: The result matrix Tensor. The type only support float16. +* key_dw: The result matrix Tensor. The type only support float16. + +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(AttentionScoreGrad) + .INPUT(attention_score, TensorType({DT_FLOAT16})) + .INPUT(dx, TensorType({DT_FLOAT16})) + .INPUT(query, TensorType({DT_FLOAT16})) + .INPUT(key, TensorType({DT_FLOAT16})) + .INPUT(value, TensorType({DT_FLOAT16})) + .INPUT(scale, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(drop_mask, TensorType({DT_INT8})) + .OUTPUT(value_dw, TensorType({DT_FLOAT16})) + .OUTPUT(query_dx, TensorType({DT_FLOAT16})) + .OUTPUT(key_dw, TensorType({DT_FLOAT16})) + .ATTR(keep_prob, Float, 1.0) + .ATTR(query_transpose, Bool, false) + .ATTR(key_transpose, Bool, false) + .ATTR(value_transpose, Bool, false) + .ATTR(dx_transpose, Bool, false) + .ATTR(softmax_axes, Int, -1) + .OP_END_FACTORY_REG(AttentionScoreGrad) + +/** +* @brief Applies sparse "updates" to individual values or slices in a Variable . \n + +* @par Inputs: * Three inputs, including: -*@li var: An ND Tensor. -*Must be one of the following types: float16, float32, int8, uint8, double, +* @li var: An ND Tensor. +* Must be one of the following types: float16, float32, int8, uint8, double, * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32, * uint64 -*@li indices: An ND Tensor. -*Must be one of the following types: int32 or int64 -*@li updates: An ND Tensor. -*Must be one of the following types: float16, float32, int8, uint8, double, +* @li indices: An ND Tensor. +* Must be one of the following types: int32 or int64 +* @li updates: An ND Tensor. +* Must be one of the following types: float16, float32, int8, uint8, double, * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32, * uint64 -*@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", +* @par Attributes: +* use_locking: An optional bool. Defaults to "False". If "True", * the operation will be protected by a lock . \n -*@par Outputs: -*var: A Tensor. Has the same type and format as input "var" . \n +* @par Outputs: +* var: A Tensor. Has the same type and format as input "var" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterNdUpdate. */ REG_OP(ScatterNdUpdate) @@ -687,28 +741,28 @@ REG_OP(ScatterNdUpdate) .OP_END_FACTORY_REG(ScatterNdUpdate) /** -*@brief Applies sparse addition to individual values or slices in a Variable . \n +* @brief Applies sparse addition to individual values or slices in a Variable . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li x: An ND Tensor. \n +* @li x: An ND Tensor. \n -*Must be one of the following types: float16, float32, bool, int8, uint8 -*@li indices: An ND Tensor. \n +* Must be one of the following types: float16, float32, bool, int8, uint8 +* @li indices: An ND Tensor. \n -*Must be one of the following types: int32 -*@li updates: An ND Tensor. \n +* Must be one of the following types: int32 +* @li updates: An ND Tensor. \n -*Must be one of the following types: float16, float32, bool, int8, uint8 +* Must be one of the following types: float16, float32, bool, int8, uint8 -*@par Outputs: -*y: A Tensor. Has the same type and format as input "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type and format as input "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator TensorScatterUpdate. -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(TensorScatterUpdate) .INPUT(x, TensorType::BasicType()) @@ -718,23 +772,24 @@ REG_OP(TensorScatterUpdate) .OP_END_FACTORY_REG(TensorScatterUpdate) /** -*@brief Uses "updates" to update tensor "data" by "indices". \n +* @brief Uses "updates" to update tensor "data" by "indices". \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li data: An ND Tensor . \n -*Must be one of the following types: float16, float32, int32, int8, uint8 -*@li indices: An ND Tensor of type int32 or int64 -*@li updates: An Tensor. Same shape as indices. format:NCHW, NHWC . \n -*Must be one of the following types: float16, float32, int32, int8, uint8 +* @li data: An ND Tensor . \n +* Must be one of the following types: float16, float32, int32, int8, uint8 +* @li indices: An ND Tensor of type int32 or int64 +* @li updates: An Tensor. Same shape as indices. format:NCHW, NHWC . \n +* Must be one of the following types: float16, float32, int32, int8, uint8 -*@par Attributes: -*@li axis: An optional attribute. Defaults to 0. +* @par Attributes: +* @li axis: An optional attribute. Defaults to 0. +* @li reduction: An optional attribute. Defaults to string "none" and can be "add" or "mul". -*@par Outputs: -*y: A Tensor. Has the same type and format as input "data" . \n +* @par Outputs: +* y: A Tensor. Has the same type and format as input "data" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the ONNX operator ScatterElements. */ REG_OP(ScatterElements) @@ -743,31 +798,59 @@ REG_OP(ScatterElements) .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(axis, Int, 0) + .ATTR(reduction, String, "none") .OP_END_FACTORY_REG(ScatterElements) /** -*@brief Adds sparse "updates" to a variable reference . \n +* @brief Uses "updates" to update tensor "data" by "indices". \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li var: An ND Tensor . +* @li var: A Tensor of type BasicType. +* @li indices: An ND Tensor of type int32 or int64. +* @li updates: An Tensor with the same dtype as 'var'. Same shape as indices. \n + +* @par Attributes: +* @li use_locking: An optional bool. Defaults to "False". If "True", +* the operation will be protected by a lock . \n + +* @par Outputs: +* var: A Tensor. Has the same type and format as input "var" . \n + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterNdMax. +*/ +REG_OP(ScatterNdMax) + .INPUT(var, TensorType::BasicType()) + .INPUT(indices, TensorType::IndexNumberType()) + .INPUT(updates, TensorType::BasicType()) + .OUTPUT(var, TensorType::BasicType()) + .ATTR(use_locking, Bool, false) + .OP_END_FACTORY_REG(ScatterNdMax) -*Must be one of the following types: float16, float, int32, int8, uint8 -*@li indices: An ND Tensor . \n +/** +* @brief Adds sparse "updates" to a variable reference . \n -*Must be one of the following types: int32 or int64 -*@li updates: An ND Tensor . +* @par Inputs: +* Three inputs, including: +* @li var: An ND Tensor . -*Must be one of the following types: float16, float, int32, int8, uint8 +* Must be one of the following types: float16, float, int32, int8, uint8 +* @li indices: An ND Tensor . \n -*@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", +* Must be one of the following types: int32 or int64 +* @li updates: An ND Tensor . + +* Must be one of the following types: float16, float, int32, int8, uint8 + +* @par Attributes: +* use_locking: An optional bool. Defaults to "False". If "True", * the operation will be protected by a lock . \n -*@par Outputs: -*var: A Tensor. Has the same type and format as input "var" . \n +* @par Outputs: +* var: A Tensor. Has the same type and format as input "var" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterAdd. */ REG_OP(ScatterAdd) @@ -779,25 +862,25 @@ REG_OP(ScatterAdd) .OP_END_FACTORY_REG(ScatterAdd) /** -*@brief Adds sparse "updates" to a variable reference . \n +* @brief Adds sparse "updates" to a variable reference . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li var: An ND Tensor . -*Must be one of the following types: float16, float32, int32, int8, uint8 +* @li var: An ND Tensor . +* Must be one of the following types: float16, float32, int32, int8, uint8 -*@li indices: An ND Tensor of type int32 or int64 +* @li indices: An ND Tensor of type int32 or int64 -*@li updates: An ND Tensor . -*Must be one of the following types: float16, float32, int32, int8, uint8 +* @li updates: An ND Tensor . +* Must be one of the following types: float16, float32, int32, int8, uint8 -*@par Attributes: +* @par Attributes: * axis: An required int. The axis along which to index. \n -*@par Outputs: -*var: A Tensor. Has the same type and format as input "var" . \n +* @par Outputs: +* var: A Tensor. Has the same type and format as input "var" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the pytorch operator ScatterAdd. */ REG_OP(ScatterAddWithAxis) @@ -809,26 +892,26 @@ REG_OP(ScatterAddWithAxis) .OP_END_FACTORY_REG(ScatterAddWithAxis) /** -*@brief Divides a variable reference by sparse updates . \n +* @brief Divides a variable reference by sparse updates . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li var: An ND Tensor. -*Must be one of the following types: float16, float, int32, int8, uint8 +* @li var: An ND Tensor. +* Must be one of the following types: float16, float, int32, int8, uint8 -*@li indices: An ND Tensor. -*Must be one of the following types: int32 or int64 -*@li updates: An ND Tensor. -*Must be one of the following types: float16, float, int32, int8, uint8 +* @li indices: An ND Tensor. +* Must be one of the following types: int32 or int64 +* @li updates: An ND Tensor. +* Must be one of the following types: float16, float, int32, int8, uint8 -*@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", +* @par Attributes: +* use_locking: An optional bool. Defaults to "False". If "True", * the operation will be protected by a lock . \n -*@par Outputs: -*var: A Tensor. Has the same type and format as input "var" . \n +* @par Outputs: +* var: A Tensor. Has the same type and format as input "var" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterDiv. */ REG_OP(ScatterDiv) @@ -840,24 +923,24 @@ REG_OP(ScatterDiv) .OP_END_FACTORY_REG(ScatterDiv) /** -*@brief Applies sparse addition to individual values or slices in a Variable . \n +* @brief Applies sparse addition to individual values or slices in a Variable . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li var: An ND Tensor. -*Must be one of the following types: float16, float, int32, int8, uint8 -*@li indices: An ND Tensor. -*Must be one of the following types: int32 or int64 -*@li updates: An ND Tensor. -*Must be one of the following types: float16, float, int32, int8, uint8 -*@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", +* @li var: An ND Tensor. +* Must be one of the following types: float16, float, int32, int8, uint8 +* @li indices: An ND Tensor. +* Must be one of the following types: int32 or int64 +* @li updates: An ND Tensor. +* Must be one of the following types: float16, float, int32, int8, uint8 +* @par Attributes: +* use_locking: An optional bool. Defaults to "False". If "True", * the operation will be protected by a lock . \n -*@par Outputs: -*var: A Tensor. Has the same type and format as input "var" . \n +* @par Outputs: +* var: A Tensor. Has the same type and format as input "var" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterNdAdd. */ REG_OP(ScatterNdAdd) @@ -869,28 +952,28 @@ REG_OP(ScatterNdAdd) .OP_END_FACTORY_REG(ScatterNdAdd) /** -*@brief Applies sparse addition to individual values or slices in a Variable . \n +* @brief Applies sparse addition to individual values or slices in a Variable . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li x: An ND Tensor. \n +* @li x: An ND Tensor. \n -*Must be one of the following types: float16, float32, int32, int8, uint8 -*@li indices: An ND Tensor. \n +* Must be one of the following types: float16, float32, int32, int8, uint8 +* @li indices: An ND Tensor. \n -*Must be one of the following types: int32 -*@li updates: An ND Tensor. \n +* Must be one of the following types: int32 +* @li updates: An ND Tensor. \n * Must be one of the following types: float16, float32, int32, int8, uint8 -*@par Outputs: -*y: A Tensor. Has the same type and format as input "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type and format as input "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator TensorScatterAdd. -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(TensorScatterAdd) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) @@ -900,25 +983,25 @@ REG_OP(TensorScatterAdd) .OP_END_FACTORY_REG(TensorScatterAdd) /** -*@brief Applies sparse subtraction to individual values or slices in a Variable . \n +* @brief Applies sparse subtraction to individual values or slices in a Variable . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li var: An ND Tensor. -*Must be one of the following types: float16, float, int32, int8, uint8 -*@li indices: An ND Tensor. -*Must be one of the following types: int32 or int64 -*@li updates: An ND Tensor. -*Must be one of the following types: float16, float, int32, int8, uint8 - -*@par Attributes: +* @li var: An ND Tensor. +* Must be one of the following types: float16, float, int32, int8, uint8 +* @li indices: An ND Tensor. +* Must be one of the following types: int32 or int64 +* @li updates: An ND Tensor. +* Must be one of the following types: float16, float, int32, int8, uint8 + +* @par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", * the operation will be protected by a lock . \n -*@par Outputs: +* @par Outputs: * var: A Tensor. Has the same type and format as input "var" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterNdSub. */ REG_OP(ScatterNdSub) @@ -930,28 +1013,55 @@ REG_OP(ScatterNdSub) .OP_END_FACTORY_REG(ScatterNdSub) /** -*@brief Applies sparse addition to individual values or slices in a Variable . \n +* @brief Uses "updates" to update tensor "data" by "indices". \n -*@par Inputs: +* @par Inputs: +* Three inputs, including: +* @li var: A Tensor of type BasicType. +* @li indices: A ND Tensor of type int32 or int64. +* @li updates: A Tensor with the same dtype as 'var'. Same shape as indices. \n + +* @par Attributes: +* use_locking: An optional bool. Defaults to "False". If "True", +* the operation will be protected by a lock . \n + +* @par Outputs: +* var: A Tensor. Has the same type and format as input "var" . \n + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterNdMin. +*/ +REG_OP(ScatterNdMin) + .INPUT(var, TensorType::BasicType()) + .INPUT(indices, TensorType::IndexNumberType()) + .INPUT(updates, TensorType::BasicType()) + .OUTPUT(var, TensorType::BasicType()) + .ATTR(use_locking, Bool, false) + .OP_END_FACTORY_REG(ScatterNdMin) + +/** +* @brief Applies sparse addition to individual values or slices in a Variable . \n + +* @par Inputs: * Three inputs, including: -*@li x: An ND Tensor. \n +* @li x: An ND Tensor. \n -*Must be one of the following types: float16, float32, int32, int8, uint8 -*@li indices: An ND Tensor. \n +* Must be one of the following types: float16, float32, int32, int8, uint8 +* @li indices: An ND Tensor. \n -*Must be one of the following types: int32 -*@li updates: An ND Tensor. \n +* Must be one of the following types: int32 +* @li updates: An ND Tensor. \n -*Must be one of the following types: float16, float32, int32, int8, uint8 +* Must be one of the following types: float16, float32, int32, int8, uint8 -*@par Outputs: +* @par Outputs: * y: A Tensor. Has the same type and format as input "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator TensorScatterSub. -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(TensorScatterSub) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) @@ -961,24 +1071,24 @@ REG_OP(TensorScatterSub) .OP_END_FACTORY_REG(TensorScatterSub) /** -*@brief Subtracts sparse updates to a variable reference . \n +* @brief Subtracts sparse updates to a variable reference . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li var: An ND Tensor. -*Must be one of the following types: float16, float, int32, int8, uint8 -*@li indices: An ND Tensor. -*Must be one of the following types: int32 or int64 -*@li updates: An ND Tensor. -*Must be one of the following types: float16, float, int32, int8, uint8 -*@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", +* @li var: An ND Tensor. +* Must be one of the following types: float16, float, int32, int8, uint8 +* @li indices: An ND Tensor. +* Must be one of the following types: int32 or int64 +* @li updates: An ND Tensor. +* Must be one of the following types: float16, float, int32, int8, uint8 +* @par Attributes: +* use_locking: An optional bool. Defaults to "False". If "True", * the operation will be protected by a lock . \n -*@par Outputs: +* @par Outputs: * var: A Tensor. Has the same type and format as input "var" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterSub. */ REG_OP(ScatterSub) @@ -990,17 +1100,17 @@ REG_OP(ScatterSub) .OP_END_FACTORY_REG(ScatterSub) /** -*@brief: Returns the batched diagonal part of a batched tensor with "assist" . \n +* @brief: Returns the batched diagonal part of a batched tensor with "assist" . \n -*@par Inputs: +* @par Inputs: * Two inputs, including: * @li x: A Tensor of type float16, float32, or int32. * @li assist: A Tensor of the same type as "x" . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator DiagPart. * * @par Restrictions: @@ -1013,16 +1123,16 @@ REG_OP(DiagPartD) .OP_END_FACTORY_REG(DiagPartD) /** -*@brief: Returns the batched diagonal part of a batched tensor . \n +* @brief: Returns the batched diagonal part of a batched tensor . \n -*@par Inputs: -*x: A Tensor. Must be one of the following types: +* @par Inputs: +* x: A Tensor. Must be one of the following types: * float16, float32, int32, int64, double, complex64, complex128 . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator DiagPart. */ REG_OP(DiagPart) @@ -1033,35 +1143,35 @@ REG_OP(DiagPart) .OP_END_FACTORY_REG(DiagPart) /** -*@brief Also known as a "fully-connected" layer, computes an inner product +* @brief Also known as a "fully-connected" layer, computes an inner product * with a set of learned weights, and (optionally) adds biases. \n -*@par Inputs: +* @par Inputs: * Four inputs, including: -*@li x: A Tensor of type float16, int8, int4, float32, bfloat16. -*@li w: A weight matrix of type float16, int8, int4, float32, bfloat16. -*@li b: An optional Tensor of type float16, int8, int4, float32, bfloat16. -*@li offset_w: An optional Tensor of type int8, int4. +* @li x: A Tensor of type float16, int8, int4, float32. +* @li w: A weight matrix of type float16, int8, int4, float32. +* @li b: An optional Tensor of type float16, int32, float32. +* @li offset_w: An optional Tensor of type int8, int4. * Reserved. Only None Supported. \n -*@par Attributes: -*@li num_output: Required. An int, output neuron number. Reserved. -*@li transpose: A bool, specifying weight whether to transpose input w, +* @par Attributes: +* @li num_output: Required. An int, output neuron number. Reserved. +* @li transpose: A bool, specifying weight whether to transpose input w, * either "true" or "false". Defaults to "false". -*@li axis: Optional. An int, 1 or 2, specifying which dimension the input +* @li axis: Optional. An int, 1 or 2, specifying which dimension the input * "K" starts from. Defaults to 1. * The product of the subsequent dimensions starting form first dimension * or the second dimension is "K". -*@li offset_x: An optional integer for quantized FullyConnection. -*The negative offset added to the input image for int8 type. Ensure offset_x +* @li offset_x: An optional integer for quantized FullyConnection. +* The negative offset added to the input image for int8 type. Ensure offset_x * within the effective range of int8 [-128, 127]. Defaults to "0". \n -*@par Outputs: -*y: The result tensor of type float16, int32, float32, bfloat16. \n +* @par Outputs: +* y: The result tensor of type float16, int32, float32. \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the Caffe operator InnerProduct. \n -*@par Quantization supported or not +* @par Quantization supported or not * Yes */ REG_OP(FullyConnection) @@ -1077,35 +1187,35 @@ REG_OP(FullyConnection) .OP_END_FACTORY_REG(FullyConnection) /** -*@brief Also known as a "fully-connected-compress" layer, computes an inner +* @brief Also known as a "fully-connected-compress" layer, computes an inner * product with a set of learned weights, and (optionally) adds biases. \n -*@par Inputs: +* @par Inputs: * Five inputs, including: -*@li x: A Tensor of type uint8, int8. -*@li w: A weight matrix of type int8. -*@li compress_index: A compress index matrix of type int8. -*@li b: A optional Tensor of type int32. -*@li offset_w: A optional Tensor of type int8. - -*@par Attributes: -*@li num_output: A int, specifying the number of outputs. -*@li transpose: A bool, specifying whether to transpose input w, either "true" +* @li x: A Tensor of type uint8, int8. +* @li w: A weight matrix of type int8. +* @li compress_index: A compress index matrix of type int8. +* @li b: A optional Tensor of type int32. +* @li offset_w: A optional Tensor of type int8. + +* @par Attributes: +* @li num_output: A int, specifying the number of outputs. +* @li transpose: A bool, specifying whether to transpose input w, either "true" * or "false". Defaults to "false". -*@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" +* @li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" * starts from. Defaults to "1". -*The product of the subsequent dimensions starting form first dimension or the +* The product of the subsequent dimensions starting form first dimension or the * second dimension is "K". -*@li offset_x: An optional integer for quantized FullyConnectionCompress. -*The negative offset added to the input image for int8 type. Ensure offset_x +* @li offset_x: An optional integer for quantized FullyConnectionCompress. +* The negative offset added to the input image for int8 type. Ensure offset_x * within the effective range of int8 [-128, 127]. Defaults to "0". \n -*@par Outputs: -*y: The result tensor of type int32. \n +* @par Outputs: +* y: The result tensor of type int32. \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the Caffe operator InnerProduct. \n -*@par Quantization supported or not +* @par Quantization supported or not * Yes */ REG_OP(FullyConnectionCompress) @@ -1122,33 +1232,33 @@ REG_OP(FullyConnectionCompress) .OP_END_FACTORY_REG(FullyConnectionCompress) /** -*@brief Computes the confusion matrix from predictions and labels . \n +* @brief Computes the confusion matrix from predictions and labels . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li labels: A Tensor. Must be one of the following types: float16, float32, +* @li labels: A Tensor. Must be one of the following types: float16, float32, * int32, int8, uint8. -*@li predictions: A Tensor. Must be one of the following types: float16, +* @li predictions: A Tensor. Must be one of the following types: float16, * float32, int32, int8, uint8. -*@li weights: A Tensor. Must be one of the following types: float16, float32, +* @li weights: A Tensor. Must be one of the following types: float16, float32, * int32, int8, uint8 . \n -*@par Attributes: -*@li num_classes: An integer for the shape of the output matrix. +* @par Attributes: +* @li num_classes: An integer for the shape of the output matrix. * No default value. -*@li dtype: Data type of the confusion matrix. No default value . \n +* @li dtype: Data type of the confusion matrix. No default value . \n -*@par Outputs: -*y: A Tensor. Has the same type and format as input "labels" +* @par Outputs: +* y: A Tensor. Has the same type and format as input "labels" -*@attention Constraints: -*@li "weights", "labels", and "predictions" are 1D tensors. -*@li The output is with shape (num_classes, num_classes), +* @attention Constraints: +* @li "weights", "labels", and "predictions" are 1D tensors. +* @li The output is with shape (num_classes, num_classes), * where, 1 <= num_classes <= 4096 . \n -*@see Region() +* @see Region() -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ConfusionMatrix. */ REG_OP(ConfusionMatrix) @@ -1161,26 +1271,26 @@ REG_OP(ConfusionMatrix) .OP_END_FACTORY_REG(ConfusionMatrix) /** -*@brief Multiplies sparse updates into a variable reference . \n +* @brief Multiplies sparse updates into a variable reference . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li var: An ND Tensor. -*Must be one of the following types: float16, float, int32, int8, uint8 -*@li indices: An ND Tensor. -*Must be one of the following types: int32 or int64 -*@li updates: An ND Tensor . \n +* @li var: An ND Tensor. +* Must be one of the following types: float16, float, int32, int8, uint8 +* @li indices: An ND Tensor. +* Must be one of the following types: int32 or int64 +* @li updates: An ND Tensor . \n -*Must be one of the following types: float16, float, int32, int8, uint8 +* Must be one of the following types: float16, float, int32, int8, uint8 -*@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", the operation +* @par Attributes: +* use_locking: An optional bool. Defaults to "False". If "True", the operation * will be protected by a lock . \n -*@par Outputs: -*var: A Tensor. Has the same type and format as input "var" . \n +* @par Outputs: +* var: A Tensor. Has the same type and format as input "var" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterMul. */ REG_OP(ScatterMul) @@ -1192,28 +1302,28 @@ REG_OP(ScatterMul) .OP_END_FACTORY_REG(ScatterMul) /** -*@brief Reduces sparse updates into a variable reference using - * the "min" operation . \n +* @brief Reduces sparse updates into a variable reference using +* the "min" operation . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li var: An ND Tensor. -*Must be one of the following types: float16, float, int32, int8, uint8 +* @li var: An ND Tensor. +* Must be one of the following types: float16, float, int32, int8, uint8 -*@li indices: An ND Tensor. -*Must be one of the following types: int32 or int64 +* @li indices: An ND Tensor. +* Must be one of the following types: int32 or int64 -*@li updates: An ND Tensor. -*Must be one of the following types: float16, float, int32, int8, uint8 +* @li updates: An ND Tensor. +* Must be one of the following types: float16, float, int32, int8, uint8 -*@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", the operation +* @par Attributes: +* use_locking: An optional bool. Defaults to "False". If "True", the operation * will be protected by a lock . \n -*@par Outputs: -*var: A Tensor. Has the same type and format as input "var" . \n +* @par Outputs: +* var: A Tensor. Has the same type and format as input "var" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterMin. */ REG_OP(ScatterMin) @@ -1225,28 +1335,28 @@ REG_OP(ScatterMin) .OP_END_FACTORY_REG(ScatterMin) /** -*@brief Reduces sparse updates into a variable reference using the "max" operation . \n +* @brief Reduces sparse updates into a variable reference using the "max" operation . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li var: An ND Tensor . +* @li var: An ND Tensor . -*Must be one of the following types: float16, float, int32, int8, uint8 -*@li indices: An NCHW, NHWC, or ND Tensor . \n +* Must be one of the following types: float16, float, int32, int8, uint8 +* @li indices: An NCHW, NHWC, or ND Tensor . \n -*Must be one of the following types: int32 or int64 -*@li updates: An NCHW, NHWC, or ND Tensor . +* Must be one of the following types: int32 or int64 +* @li updates: An NCHW, NHWC, or ND Tensor . -*Must be one of the following types: float16, float, int32, int8, uint8 +* Must be one of the following types: float16, float, int32, int8, uint8 -*@par Attributes: -*use_locking: An optional bool. Defaults to "False". +* @par Attributes: +* use_locking: An optional bool. Defaults to "False". * If "True", the operation will be protected by a lock . \n -*@par Outputs: -*var: A Tensor. Has the same type and format as input "var" . \n +* @par Outputs: +* var: A Tensor. Has the same type and format as input "var" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterMax. */ REG_OP(ScatterMax) @@ -1258,28 +1368,28 @@ REG_OP(ScatterMax) .OP_END_FACTORY_REG(ScatterMax) /** -*@brief Applies sparse updates to a variable reference . \n +* @brief Applies sparse updates to a variable reference . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li var: An ND Tensor . +* @li var: An ND Tensor . -*Must be one of the following types: float16, float, int32, int8, uint8 -*@li indices: An ND Tensor . \n +* Must be one of the following types: float16, float, int32, int8, uint8 +* @li indices: An ND Tensor . \n -*Must be one of the following types: int32 or int64 -*@li updates: An ND Tensor . +* Must be one of the following types: int32 or int64 +* @li updates: An ND Tensor . -*Must be one of the following types: float16, float, int32, int8, uint8 +* Must be one of the following types: float16, float, int32, int8, uint8 -*@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", +* @par Attributes: +* use_locking: An optional bool. Defaults to "False". If "True", * the operation will be protected by a lock . \n -*@par Outputs: -*var: A Tensor. Has the same type and format as input "var" . \n +* @par Outputs: +* var: A Tensor. Has the same type and format as input "var" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterUpdate. */ REG_OP(ScatterUpdate) @@ -1291,24 +1401,24 @@ REG_OP(ScatterUpdate) .OP_END_FACTORY_REG(ScatterUpdate) /** -*@brief Returns a tensor with the `k[0]`-th to `k[1]`-th diagonals of the batched `input` . \n +* @brief Returns a tensor with the `k[0]`-th to `k[1]`-th diagonals of the batched `input` . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li input: Rank `r` tensor where `r >= 2`. \n +* @li input: Rank `r` tensor where `r >= 2`. \n -*@li k: \n -*Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main \n -*diagonal, and negative value means subdiagonals. `k` can be a single integer \n -*(for a single diagonal) or a pair of integers specifying the low and high ends \n -*of a matrix band. `k[0]` must not be larger than `k[1]`. \n +* @li k: \n +* Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main \n +* diagonal, and negative value means subdiagonals. `k` can be a single integer \n +* (for a single diagonal) or a pair of integers specifying the low and high ends \n +* of a matrix band. `k[0]` must not be larger than `k[1]`. \n -*@li padding_value: The value to fill the area outside the specified diagonal band with. \n +* @li padding_value: The value to fill the area outside the specified diagonal band with. \n -*@par Outputs: -*diagonal: The extracted diagonal(s) . \n +* @par Outputs: +* diagonal: The extracted diagonal(s) . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterUpdate. */ REG_OP(MatrixDiagPartV2) @@ -1319,24 +1429,24 @@ REG_OP(MatrixDiagPartV2) .OP_END_FACTORY_REG(MatrixDiagPartV2) /** -*@brief Returns a batched matrix tensor with new batched diagonal values . \n +* @brief Returns a batched matrix tensor with new batched diagonal values . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li input: "Rank `r+1`, where `r >= 1`. \n +* @li input: "Rank `r+1`, where `r >= 1`. \n -*@li diagonal: Rank `r` when `k` is an integer or `k[0] == k[1]`. Otherwise, it has rank `r+1`. \n +* @li diagonal: Rank `r` when `k` is an integer or `k[0] == k[1]`. Otherwise, it has rank `r+1`. \n -*@li k: -*Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main \n -*diagonal, and negative value means subdiagonals. `k` can be a single integer \n -*(for a single diagonal) or a pair of integers specifying the low and high ends \n -*of a matrix band. `k[0]` must not be larger than `k[1]`. \n +* @li k: +* Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main \n +* diagonal, and negative value means subdiagonals. `k` can be a single integer \n +* (for a single diagonal) or a pair of integers specifying the low and high ends \n +* of a matrix band. `k[0]` must not be larger than `k[1]`. \n -*@par Outputs: -*output: Rank `r+1`, with `output.shape = input.shape` . \n +* @par Outputs: +* output: Rank `r+1`, with `output.shape = input.shape` . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterUpdate. */ REG_OP(MatrixSetDiagV2) @@ -1347,29 +1457,29 @@ REG_OP(MatrixSetDiagV2) .OP_END_FACTORY_REG(MatrixSetDiagV2) /** -*@brief Returns a batched matrix tensor with new batched diagonal values . \n +* @brief Returns a batched matrix tensor with new batched diagonal values . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li input: "Rank `r+1`, where `r >= 1`. \n +* @li input: "Rank `r+1`, where `r >= 1`. \n -*@li diagonal: Rank `r` when `k` is an integer or `k[0] == k[1]`. Otherwise, it has rank `r+1`. \n +* @li diagonal: Rank `r` when `k` is an integer or `k[0] == k[1]`. Otherwise, it has rank `r+1`. \n -*@li k: -*Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main \n -*diagonal, and negative value means subdiagonals. `k` can be a single integer \n -*(for a single diagonal) or a pair of integers specifying the low and high ends \n -*of a matrix band. `k[0]` must not be larger than `k[1]`. \n +* @li k: +* Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main \n +* diagonal, and negative value means subdiagonals. `k` can be a single integer \n +* (for a single diagonal) or a pair of integers specifying the low and high ends \n +* of a matrix band. `k[0]` must not be larger than `k[1]`. \n -*@par Attributes: -*@li align: An optional string. Defaults to RIGHT_LEFT. It is a string specifying \n -*how superdiagonals and subdiagonals should be aligned, respectively. \n -*other optional: LEFT_RIGHT, LEFT_LEFT, and RIGHT_RIGHT.\n +* @par Attributes: +* @li align: An optional string. Defaults to RIGHT_LEFT. It is a string specifying \n +* how superdiagonals and subdiagonals should be aligned, respectively. \n +* other optional: LEFT_RIGHT, LEFT_LEFT, and RIGHT_RIGHT.\n -*@par Outputs: -*output: Rank `r+1`, with `output.shape = input.shape` . \n +* @par Outputs: +* output: Rank `r+1`, with `output.shape = input.shape` . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterUpdate. */ REG_OP(MatrixSetDiagV3) @@ -1381,34 +1491,34 @@ REG_OP(MatrixSetDiagV3) .OP_END_FACTORY_REG(MatrixSetDiagV3) /** -*@brief Returns a batched diagonal tensor with given batched diagonal values . \n +* @brief Returns a batched diagonal tensor with given batched diagonal values . \n -*@par Inputs: +* @par Inputs: * Five inputs, including: -*@li diagonal: Rank `r`, where `r >= 1` \n +* @li diagonal: Rank `r`, where `r >= 1` \n -*@li k: -*Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main \n -*diagonal, and negative value means subdiagonals. `k` can be a single integer \n -*(for a single diagonal) or a pair of integers specifying the low and high ends \n -*of a matrix band. `k[0]` must not be larger than `k[1]`. \n +* @li k: +* Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main \n +* diagonal, and negative value means subdiagonals. `k` can be a single integer \n +* (for a single diagonal) or a pair of integers specifying the low and high ends \n +* of a matrix band. `k[0]` must not be larger than `k[1]`. \n -*@li num_rows: -*The number of rows of the output matrix. If it is not provided, the op assumes \n -*the output matrix is a square matrix and infers the matrix size from k and the \n -*innermost dimension of `diagonal`. \n +* @li num_rows: +* The number of rows of the output matrix. If it is not provided, the op assumes \n +* the output matrix is a square matrix and infers the matrix size from k and the \n +* innermost dimension of `diagonal`. \n -*@li num_cols: An NCHW, NHWC, or ND Tensor. -*The number of columns of the output matrix. If it is not provided, the op \n -*assumes the output matrix is a square matrix and infers the matrix size from \n -*k and the innermost dimension of `diagonal`. \n +* @li num_cols: An NCHW, NHWC, or ND Tensor. +* The number of columns of the output matrix. If it is not provided, the op \n +* assumes the output matrix is a square matrix and infers the matrix size from \n +* k and the innermost dimension of `diagonal`. \n -*@li padding_value: The number to fill the area outside the specified diagonal band with. \n +* @li padding_value: The number to fill the area outside the specified diagonal band with. \n -*@par Outputs: -*output: Has rank `r+1` when `k` is an integer or `k[0] == k[1]`, rank `r` otherwise . \n +* @par Outputs: +* output: Has rank `r+1` when `k` is an integer or `k[0] == k[1]`, rank `r` otherwise . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterUpdate. */ REG_OP(MatrixDiagV2) @@ -1452,13 +1562,13 @@ REG_OP(IndexAdd) /** * @brief According to the index number of indexes, replace the value -*corresponding to X1 with the value in x2. +* corresponding to X1 with the value in x2. * @par Inputs: * Three inputs, including: * @li x1: A Tensor. Must be one of the following types: -*float16, float32, double, int32, uint8, int16, int8, complex64, int64, -*qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n +* float16, float32, double, int32, uint8, int16, int8, complex64, int64, +* qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n * @li x2: A Tensor of the same type as "x1". * @li indices: A Tensor of the indices, @@ -1484,20 +1594,20 @@ REG_OP(IndexPut) .OP_END_FACTORY_REG(IndexPut) /** -*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n +* @brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n -*@par Inputs: -*x: A Tensor. Must be one of the following types: -*float16, float32, double, int32, uint8, int16, int8, complex64, int64, -*qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n +* @par Inputs: +* x: A Tensor. Must be one of the following types: +* float16, float32, double, int32, uint8, int16, int8, complex64, int64, +* qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n -*@par Attributes: -*diagonal: An optional attribute indicates the diagonal to consider. \n +* @par Attributes: +* diagonal: An optional attribute indicates the diagonal to consider. \n -*@par Outputs: -*y: A Tensor. Has the same type as "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the Pytorch operator Triu. */ REG_OP(Triu) @@ -1507,20 +1617,20 @@ REG_OP(Triu) .OP_END_FACTORY_REG(Triu) /** -*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n +* @brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n *@par Inputs: -*x: A Tensor. Must be one of the following types: -*float16, float32, double, int32, uint8, int16, int8, complex64, int64, -*qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n +* x: A Tensor. Must be one of the following types: +* float16, float32, double, int32, uint8, int16, int8, complex64, int64, +* qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n -*@par Attributes: -*diagonal: An optional attribute indicates the diagonal to consider. \n +* @par Attributes: +* diagonal: An optional attribute indicates the diagonal to consider. \n -*@par Outputs: -*y: A Tensor. Has the same type as "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the Pytorch operator Tril. */ REG_OP(Tril) @@ -1529,26 +1639,26 @@ REG_OP(Tril) .OUTPUT(y, TensorType::BasicType()) .OP_END_FACTORY_REG(Tril) /** -*@brief Concatenates a list of N tensors along the first dimension. -*@par Inputs: +* @brief Concatenates a list of N tensors along the first dimension. +* @par Inputs: * @li x: A list of Tensors. Must be one of the following types: int32, * float16, float32. Tensors to be concatenated. All must have size 1 in * the first dimension and same shape.It's a dynamic input. \n -*@par Attributes: +* @par Attributes: * @li equation: The subscripts for the Einstein summation. \n * @li N: tensor size of input. \n -*@par Outputs: -*@li y: Sums the product of the elements of the input operands along +* @par Outputs: +* @li y: Sums the product of the elements of the input operands along * dimensions specified * using a notation based on the Einstein summation convention. \n -*@attention Constraints: -*Input N must be Int. \n +* @attention Constraints: +* Input N must be Int. \n -*@par Third-party framework compatibility -*Compatible with Pytorch einsum operator. +* @par Third-party framework compatibility +* Compatible with Pytorch einsum operator. */ REG_OP(Einsum) .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) @@ -1558,22 +1668,22 @@ REG_OP(Einsum) .OP_END_FACTORY_REG(Einsum) /** -*@brief Returns a 2-D tensor with ones on the diagonal and zeros elsewhere. \n +* @brief Returns a 2-D tensor with ones on the diagonal and zeros elsewhere. \n -*@par Inputs: -*No inputs +* @par Inputs: +* No inputs -*@par Attributes: -*@li num_rows: An required int. \n -*@li num_columns: An optional int.Defaults to 0. \n -*@li batch_shape: An optional ListInt.Defaults to []. \n -*@li dtype: An optional int.Defaults to 0. \n +* @par Attributes: +* @li num_rows: An required int. \n +* @li num_columns: An optional int.Defaults to 0. \n +* @li batch_shape: An optional ListInt.Defaults to []. \n +* @li dtype: An optional int.Defaults to 0. \n -*@par Outputs: -*y: A Tensor with targeted type and shape. \n +* @par Outputs: +* y: A Tensor with targeted type and shape. \n -*@par Third-party framework compatibility -*Compatible with the Pytorch operator Eye. \n +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Eye. \n */ REG_OP(Eye) .OUTPUT(y, TensorType::BasicType()) /* "Result, has targeted element type" */ @@ -1584,20 +1694,20 @@ REG_OP(Eye) .OP_END_FACTORY_REG(Eye) /** -*@brief: Fill diagonal of at least 2 dimension tensors with value . \n +* @brief: Fill diagonal of at least 2 dimension tensors with value . \n -*@par Inputs: -*x: A Tensor. Must be one of the following types: +* @par Inputs: +* x: A Tensor. Must be one of the following types: * float32, int32, int64 . \n -*@par Outputs: +* @par Outputs: *y: A Tensor. Has the same type as "x" . \n -*@par Attributes: -*fill_value:The value to fill in -*wrap: An optional bool. Defaults to "False". If "True", Use recursive fill. \n +* @par Attributes: +* fill_value:The value to fill in +* wrap: An optional bool. Defaults to "False". If "True", Use recursive fill. \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the Pytorch operator FillDiagonal. */ REG_OP(FillDiagonal) @@ -1608,16 +1718,16 @@ REG_OP(FillDiagonal) .OP_END_FACTORY_REG(FillDiagonal) /** -*@brief: Returns the sum of the elements of the diagonal of the input 2-D matrix. \n +* @brief: Returns the sum of the elements of the diagonal of the input 2-D matrix. \n -*@par Inputs: -*x: A Tensor. Must be one of the following types: +* @par Inputs: +* x: A Tensor. Must be one of the following types: * float16, float. \n -*@par Outputs: -*y: A Tensor. Has the same type as "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the Pytorch operator Trace. */ @@ -1627,16 +1737,16 @@ REG_OP(Trace) .OP_END_FACTORY_REG(Trace) /** -*@brief Computes the generalized inverse of any matrix. \n +* @brief Computes the generalized inverse of any matrix. \n -*@par Inputs: +* @par Inputs: * @li x: input matrix. Must be one of the following types: * double, float. \n -*@par Attributes: +* @par Attributes: * @li rcond: An optional float >= 0 or inf. Defaults to 1e-15. \n -*@par Outputs: +* @par Outputs: * y: A Tensor with the same type and shape of x's transpose. \n */ @@ -1755,12 +1865,12 @@ REG_OP(TensorScatterMin) * Compatible with the TensorFlow operator ScatterUpdate. */ REG_OP(MatrixDiagV3) - .INPUT(x, TensorType::BasicType()) + .INPUT(x, TensorType({BasicType(), DT_BOOL})) .INPUT(k, TensorType({DT_INT32})) .INPUT(num_rows, TensorType({DT_INT32})) .INPUT(num_cols, TensorType({DT_INT32})) - .INPUT(padding_value, TensorType::BasicType()) - .OUTPUT(y, TensorType::BasicType()) + .INPUT(padding_value, TensorType({BasicType(), DT_BOOL})) + .OUTPUT(y, TensorType({BasicType(), DT_BOOL})) .ATTR(align, String, "RIGHT_LEFT") .OP_END_FACTORY_REG(MatrixDiagV3) @@ -1817,58 +1927,31 @@ REG_OP(SwinAttentionScore) .OP_END_FACTORY_REG(SwinAttentionScore) /** -* @brief Uses "updates" to update tensor "data" by "indices". \n - -* @par Inputs: -* Three inputs, including: -* @li var: A Tensor of type BasicType. -* @li indices: An ND Tensor of type int32 or int64. -* @li updates: An Tensor with the same dtype as 'var'. Same shape as indices. \n - -* @par Attributes: -* @li use_locking: An optional bool. Defaults to "False". If "True", -* the operation will be protected by a lock . \n - -* @par Outputs: -* var: A Tensor. Has the same type and format as input "var" . \n - -* @par Third-party framework compatibility -* Compatible with the TensorFlow operator ScatterNdMax. -*/ -REG_OP(ScatterNdMax) - .INPUT(var, TensorType::BasicType()) - .INPUT(indices, TensorType::IndexNumberType()) - .INPUT(updates, TensorType::BasicType()) - .OUTPUT(var, TensorType::BasicType()) - .ATTR(use_locking, Bool, false) - .OP_END_FACTORY_REG(ScatterNdMax) - -/** -* @brief Uses "updates" to update tensor "data" by "indices". \n - +* @brief + swin_transformer model specific structure.Operator only supports swin_transformer. \n * @par Inputs: * Three inputs, including: -* @li var: A Tensor of type BasicType. -* @li indices: A ND Tensor of type int32 or int64. -* @li updates: A Tensor with the same dtype as 'var'. Same shape as indices. \n +* @li x: A Tensor. Must be one of the following types: float16. +* @li weight: A Tensor. Must be one of the following types: float16. +* @li bias: A Tensor. Must be one of the following types: float16. \n * @par Attributes: -* use_locking: An optional bool. Defaults to "False". If "True", -* the operation will be protected by a lock . \n +* @li shifts: A optional attribute, the type is list int. Defaults to (). \n * @par Outputs: -* var: A Tensor. Has the same type and format as input "var" . \n +* One output, including: +* @li y: A Tensor. Must be one of the following types: float16. \n -* @par Third-party framework compatibility -* Compatible with the TensorFlow operator ScatterNdMin. +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n */ -REG_OP(ScatterNdMin) - .INPUT(var, TensorType::BasicType()) - .INPUT(indices, TensorType::IndexNumberType()) - .INPUT(updates, TensorType::BasicType()) - .OUTPUT(var, TensorType::BasicType()) - .ATTR(use_locking, Bool, false) - .OP_END_FACTORY_REG(ScatterNdMin) +REG_OP(SwinAttentionFFN) + .INPUT(x1, TensorType({DT_FLOAT16})) + .INPUT(x2, TensorType({DT_FLOAT16})) + .INPUT(bias, TensorType({DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT16})) + .ATTR(shifts, ListInt, {}) + .OP_END_FACTORY_REG(SwinAttentionFFN) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index 4c55eac0..c6244a81 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -28,7 +28,7 @@ namespace ge { * @brief Computes the gradients of depthwise convolution with respect to * the filter. \n * @par Inputs: -* Three inputs include: +* Three inputs include: * @li input: 4D origin shape of input tensor [N, C, H, W] or [N, H, W, C], * support float16. * @li filter_size: A 4D tensor of type int32, int64, with shape [H, W, C, K] @@ -52,8 +52,7 @@ namespace ge { * @par Outputs: * filter_grad: Gradient of the deep convolution relative to the filter with -* shape [H, W, C, K]. Must be one of the following types: float16, float32, -* double . \n +* shape [H, W, C, K]. Must be one of the following types: float16. \n * @attention Constraints:\n * The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but @@ -350,7 +349,7 @@ REG_OP(BiasAddGrad) /** *@brief Computes the gradients of convolution with respect to the input. -*@par Inputs: +* @par Inputs: * Three inputs: * @li input_size: A const Tensor of type int32. Currently does not support * data tensor. An integer vector representing the shape of input, where @@ -454,7 +453,7 @@ REG_OP(Conv2DBackpropInput) /** *@brief Computes the gradients of convolution with respect to the input. -*@par Inputs: +* @par Inputs: * Two inputs: * @li filter: A Tensor. Types is float16. * 4-D with shape [filter_height, filter_width, in_channels, out_channels] @@ -501,7 +500,7 @@ REG_OP(Conv2DBackpropInputD) /** *@brief Computes the Deconvolution with respect to the input. -*@par Inputs: +* @par Inputs: * Two required inputs: * @li x: A Tensor of type float16 or int8. 4D with shape * [batch, out_channels, out_height, out_width]. Gradients with respect @@ -1518,7 +1517,7 @@ REG_OP(Conv2DTranspose) /** *@brief Computes the transpose of convolution 2d with respect to the input. -*@par Inputs: +* @par Inputs: * Four inputs: * @li x: A Tensor of type float16, int8. * @li filter: A Tensor of type float16, int8. Must have the same type as "x". @@ -1566,7 +1565,7 @@ REG_OP(Conv2DTransposeD) /** *@brief Computes the deformed convolution output with the expected input -*@par Inputs: +* @par Inputs: * Two inputs: * @li x: A Tensor of type float16,float32 * @li offsets: A Tensor of type float16,float32.Deformation offset parameter. @@ -1599,7 +1598,7 @@ REG_OP(DeformableOffsets) /** *@brief Computes the gradients of DeformableOffsets with respect to input and offsets -*@par Inputs: +* @par Inputs: * Three inputs: * @li grad: A Tensor of type float16,float32. gradients with respect to DeformableOffsets output * @li x: A Tensor of type float16,float32. @@ -1636,7 +1635,7 @@ REG_OP(DeformableOffsetsGrad) /** *@brief Computes the deformed dilation output with the expected input -*@par Inputs: +* @par Inputs: * One inputs: * x: A Tensor of type int8, float16, float32 *@par Attributes: @@ -1656,7 +1655,7 @@ REG_OP(Dilation) /** *@brief Computes the post-cube processing output with the expected input -*@par Inputs: +* @par Inputs: * Ten inputs: * x1: A Tensor of type float16, bfloat16, float32, int32 * x2: A Tensor of type float16, int8, int4 diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index f34de163..16ec4357 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -75,37 +75,37 @@ REG_OP(Pooling) .OP_END_FACTORY_REG(Pooling) /** -*@brief Performs average pooling on the input. \n -*@par Inputs: -*x: A tensor of type float16, float32, double. \n +* @brief Performs average pooling on the input. \n +* @par Inputs: +* x: A tensor of type float16, float32, double. \n -*@par Attributes: -*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) +* @par Attributes: +* @li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) * of the sliding window, where N = C = 1, and H and W are positive integers * within the range [1, 255]. * @li strides: A required list of 4 ints, specifying the stride of the * sliding window. The strides of the N and C dimensions are 1. The strides of * the H and W dimensions are positive integers within the range [1, 63]. -*@li padding: A required string, specifying the padding algorithm, +* @li padding: A required string, specifying the padding algorithm, * either "VALID" or "SAME". With "SAME" means that the outputs will have the * same spatial dimensions as its inputs. With "VALID" means no padding. -*@li data_format: An optional string, specifying the data format of "ksize" +* @li data_format: An optional string, specifying the data format of "ksize" * and "strides", either "NCHW", or "NHWC" (default). \n -*@par Outputs: +* @par Outputs: * y: The average pooled output tensor. Has the same type and format * as input "x". \n * @attention Constraints: * @li This operator applies only to a TensorFlow network. * @li Only single input and single output are supported. -*@li Global pooling is supported. +* @li Global pooling is supported. * @li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. * ksize_H * ksize_W < 256 -*@li Due to instruction restrictions, +* @li Due to instruction restrictions, * the values of "strides_h" and "strides_w" are positive integers within * the range [1, 63]. -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator AvgPool. */ REG_OP(AvgPool) @@ -118,37 +118,44 @@ REG_OP(AvgPool) .OP_END_FACTORY_REG(AvgPool) /** -*@brief Performs average pooling on the input. -*@par Inputs: -*x: A tensor of type float16, float32, double. +* @brief Performs average pooling on the input. +* @par Inputs: +* x: A tensor of type float16, float32, double. -*@par Attributes: -*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, - * where N = C = 1, and H and W are positive integers within the range [1, 255]. -*@li strides: A required list of 4 ints, specifying the stride of the sliding window. - * The strides of the N and C dimensions are 1. - * The strides of the H and W dimensions are positive integers within the range [1, 63]. -*@li padding_mode: A required string, specifying the padding algorithm, +* @par Attributes: +* @li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) +* of the sliding window, where N = C = 1, + * and H and W are positive integers within the range [1, 255]. +* @li strides: A required list of 4 ints, specifying the stride of the + * sliding window. The strides of the N and C dimensions are 1. + * The strides of the H and W dimensions are positive integers within + * the range [1, 63]. +* @li padding_mode: A required string, specifying the padding algorithm, * either "VALID", "SAME" and "CALCULATED". - * With "SAME" means that the outputs will have the same spatial dimensions as its inputs. - * With "VALID" means no padding. + * With "SAME" means that the outputs will have the same spatial dimensions + * as its inputs. With "VALID" means no padding. * @li pads: Pad value when padding_mode is "CALCULATED". -* @li data_format: An optional string, specifying the data format of "ksize" and "strides", - * either "NCHW", or "NHWC" (default). -* @li global_pooling: Global or not. If true, pads will change to {0,0,0,0} and ksize will change to [input_h, input_w] -* @li ceil_mode: Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". +* @li data_format: An optional string, specifying the data format of "ksize" + * and "strides", either "NCHW", or "NHWC" (default). +* @li global_pooling: Global or not. If true, pads will change to {0,0,0,0} +* and ksize will change to [input_h, input_w]. +* @li ceil_mode: Use ceil or floor to calculate the output size when +* padding_mode is "CALCULATED". * @li exclusive: Ignore padding area or not when calculating average. * @par Outputs: -* y: The average pooled output tensor. Has the same type and format as input "x". +* y: The average pooled output tensor. Has the same type and format as +* input "x". -*@attention Constraints: -*@li Only single input and single output are supported. +* @attention Constraints: +* @li Only single input and single output are supported. * @li Global pooling is supported. -*@li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. ksize_H * ksize_W < 256 -*@li Due to instruction restrictions, - * the values of "strides_h" and "strides_w" are positive integers within the range [1, 63]. -*@par Third-party framework compatibility +* @li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. +* ksize_H * ksize_W < 256 +* @li Due to instruction restrictions, + * the values of "strides_h" and "strides_w" are positive integers within + * the range [1, 63]. +* @par Third-party framework compatibility * Compatible with the TensorFlow operator AvgPoolV2. */ REG_OP(AvgPoolV2) @@ -173,7 +180,7 @@ REG_OP(AvgPoolV2) * @par Attributes: * @li ksize: List of ints that has length 1, 3 or 5. The size of the window * for each dimension of the input tensor. -*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding +* @li strides:List of ints that has length 1, 3 or 5. The stride of the sliding * window for each dimension of the input tensor. * @li pads: List of ints, implicit zero paddings on both sides of the input. * @li ceil_mode: When true, will use ceil instead of floor in the formula to @@ -184,13 +191,13 @@ REG_OP(AvgPoolV2) * size of the pooling region will be used. * @li data_format: A string, format of input data. \n -*@par Outputs: -*y: The average pooled output tensor. \n +* @par Outputs: +* y: The average pooled output tensor. \n -*@attention Constraints: +* @attention Constraints: * @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]. -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator AvgPool3D. */ REG_OP(AvgPool3D) @@ -211,24 +218,24 @@ REG_OP(AvgPool3D) * @par Inputs: * @li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double. * @li filter: An optional tensor of type float16, float32, double, fractal_z_3d layout. -*@li multiplier: An optional tensor of float16, float32, double. +* @li multiplier: An optional tensor of float16, float32, double. * @par Attributes: -*@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor. +* @li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor. * @li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor. -*@li pads: List of ints, implicit zero paddings on both sides of the input. -*@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. +* @li pads: List of ints, implicit zero paddings on both sides of the input. +* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. * @li count_include_pad: When true, will include the zero-padding in the averaging calculation. -*@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. -*@li data_format: A string, format of input data . \n +* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. +* @li data_format: A string, format of input data . \n -*@par Outputs: -*y: The average pooled output tensor . \n +* @par Outputs: +* y: The average pooled output tensor . \n -*@attention Constraints: -*"ksize" is in the range [1, 255]. "strides" is in the range [1, 63] +* @attention Constraints: +* "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator AvgPool3D. */ REG_OP(AvgPool3DD) @@ -266,7 +273,7 @@ REG_OP(AvgPool3DD) * @li data_format: A string, format of input data. \n * @par Outputs: -* @li output: A mutable tensor with the same shape and type as "grads". +* output: A mutable tensor with the same shape and type as "orig_input_shape". * @attention Constraints: * @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]. \n @@ -336,31 +343,31 @@ REG_OP(AvgPool3DGradD) .OP_END_FACTORY_REG(AvgPool3DGradD) /** -*@brief Performs max_pool_ext2 on the input . \n +* @brief Performs max_pool_ext2 on the input . \n -*@par Inputs: +* @par Inputs: * One input: -*x: A Tensor of type float16. +* x: A Tensor of type float16. -*@par Attributes: -*@li ksize: A required list of int8, int16, int32, or int64 values, - * specifying the size of the window for each dimension of the input tensor. No default value. -*@li strides: A required list of int8, int16, int32, or int64 values, - * specifying the stride of the sliding window for each dimension of the input tensor. No default value. -*@li padding: A required string. No default value. -*@li data_format: An optional string . \n +* @par Attributes: +* @li ksize: A required list of int8, int16, int32, or int64 values, +* specifying the size of the window for each dimension of the input tensor. No default value. +* @li strides: A required list of int8, int16, int32, or int64 values, +* specifying the stride of the sliding window for each dimension of the input tensor. No default value. +* @li padding: A required string. No default value. +* @li data_format: An optional string . \n -*@par Outputs: -*y: A Tensor. Has the same type and format as input "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type and format as input "x" . \n -*@attention Constraints: -*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. -*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, +* @attention Constraints: +* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. +* @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. -*@li "padding" is either "SAME" or "VALID" . \n +* @li "padding" is either "SAME" or "VALID" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator MaxPoolV2. */ REG_OP(MaxPoolExt2) @@ -377,35 +384,35 @@ REG_OP(MaxPoolExt2) .OP_END_FACTORY_REG(MaxPoolExt2) /** -*@brief Performs max pooling on the input . \n +* @brief Performs max pooling on the input . \n * @par Inputs: * One input: * x: A Tensor. Supported type:float16, float32, double, int8, int16, * int32, int64, uint8, uint16, qint8 -*@par Attributes: -*@li ksize: A required list of int8, int16, int32, or int64 values, +* @par Attributes: +* @li ksize: A required list of int8, int16, int32, or int64 values, * specifying the size of the window for each dimension of the input tensor. * No default value. -*@li strides: A required list of int8, int16, int32, or int64 values, +* @li strides: A required list of int8, int16, int32, or int64 values, * specifying the stride of the sliding window for each dimension of * the input tensor. No default value. -*@li padding: A required string. No default value. -*@li data_format: An optional string. Defaults to "NHWC" . \n +* @li padding: A required string. No default value. +* @li data_format: An optional string. Defaults to "NHWC" . \n -*@par Outputs: -*y: A Tensor. Has the same type and format as input "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type and format as input "x" . \n -*@attention Constraints: -*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, +* @attention Constraints: +* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, * ksize[1] * ksize[2] <= 255. -*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, +* @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. -*@li "padding" is either "SAME" or "VALID". +* @li "padding" is either "SAME" or "VALID". -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator MaxPool. */ REG_OP(MaxPool) @@ -423,33 +430,33 @@ REG_OP(MaxPool) /** * @brief Performs max 3d pooling on the input . \n -*@par Inputs: +* @par Inputs: * x: A Tensor. Supported type float16, float32, double . \n -*@par Attributes: -*@li ksize: A required list of int8, int16, int32, or int64 values, +* @par Attributes: +* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value. -*@li strides: A required list of int8, int16, int32, or int64 values, +* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value. -*@li padding: A required string type of float16. -*@li pads: A list type of int32. Default value {0,0,0,0,0,0}. -*@li dilation: A list type of int32. Default value {1,1,1,1,1,1}. -*@li ceil_mode: A ceil mode number of int32 . Default value 0. -*@li data_format: An optional string. Defaults to "NDHWC" . \n +* @li padding: A required string type of float16. +* @li pads: A list type of int32. Default value {0,0,0,0,0,0}. +* @li dilation: A list type of int32. Default value {1,1,1,1,1,1}. +* @li ceil_mode: A ceil mode number of int32 . Default value 0. +* @li data_format: An optional string. Defaults to "NDHWC" . \n -*@par Outputs: -*y: A Tensor. Has the same type and format as input "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type and format as input "x" . \n -*@attention Constraints: -*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, +* @attention Constraints: +* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, * ksize[1] * ksize[2] <= 255. -*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, +* @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. -*@li "padding" is either "SAME" or "VALID" . \n +* @li "padding" is either "SAME" or "VALID" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator MaxPool3D. */ REG_OP(MaxPool3D) @@ -466,7 +473,7 @@ REG_OP(MaxPool3D) /** * @brief Performs max pooling3d on both max values and indices. -* +* * @par Inputs: * One input: * x: An 6D tensor. Supported type: float16. Format as NDC1HWC0. @@ -484,7 +491,7 @@ REG_OP(MaxPool3D) * @li data_format: the format of torch input, default value is "NCDHW". * @li argmax_type: the function of this field is to determine the type of * output argmax, "bitmask" is the default value, the argmax will return -* a img2col bitmask. "index_int32" and "index_int64" represent the torch +* a img2col bitmask. "index_int32" and "index_int64" represent the torch * output indices. * @par Outputs: * y: An 6D tensor. the maxpool3d output(max value), format as NDoC1HoWoC0. @@ -505,8 +512,8 @@ REG_OP(MaxPool3DWithArgmax) .OP_END_FACTORY_REG(MaxPool3DWithArgmax) /** -*@brief Applies a 2D adaptive max pooling over an input signal conposed of several input planes. \n -* The output is of size H x W, for any input size. +* @brief Applies a 2D adaptive max pooling over an input signal conposed of several input planes. \n +* The output is of size H x W, for any input size. * @par Inputs: * One input, including: @@ -698,31 +705,31 @@ REG_OP(MaxPoolV2) * @brief Performs max pooling on the input and outputs both max values and * indices . \n -*@par Inputs: +* @par Inputs: * One input: * x: An 4D Tensor. Supported type: float, double, int32, * uint8, int16, int8, int64, uint16, half, uint32, uint64. * Must set the format, supported format list ["NCHW, NHWC"]. \n -*@par Attributes: -*@li ksize: A required list of int8, int16, int32, or int64 values, +* @par Attributes: +* @li ksize: A required list of int8, int16, int32, or int64 values, * specifying the size of the window for each dimension of the input tensor. * No default value. -*@li strides: A required list of int8, int16, int32, or int64 values, +* @li strides: A required list of int8, int16, int32, or int64 values, * specifying the stride of the sliding window for each dimension of * the input tensor. No default value. -*@li padding: A required string. No default value . -*@li Targmax:An optional int with default value 7 . \n +* @li padding: A required string. No default value . +* @li Targmax:An optional int with default value 7 . \n -*@par Outputs: -*@li y: A Tensor. Has the same type and format as input "x". -*@li argmax: A Tensor. Has the same type and format as input "x". +* @par Outputs: +* @li y: A Tensor. Has the same type and format as input "x". +* @li argmax: A Tensor. Has the same type and format as input "x". * @attention Constraints: -*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, +* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, * ksize[1] * ksize[2] <= 255. -*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, +* @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. -*@li "padding" is either "SAME" or "VALID" . +* @li "padding" is either "SAME" or "VALID" . *@par Third-party framework compatibility * Compatible with the TensorFlow operator MaxPoolWithArgmax. @@ -760,7 +767,7 @@ REG_OP(MaxPoolWithArgmax) * @li padding: A required string. No default value . \n * @par Outputs: -*y: A Tensor. Has the same type and format as input "x" . \n +* y: A Tensor. Has the same type and format as input "x" . \n * @attention Constraints: * @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, @@ -873,7 +880,7 @@ REG_OP(MaxPoolGradGradWithArgmax) * @li data_format: An optional string. Defaults to "NHWC". \n * @par Outputs: -* out_grad: A mutable tensor with the same shape and type as "input_grad". \n +* out_grad: A mutable tensor with the same shape and type as "orig_input_shape". \n * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator AvgPoolGrad. @@ -965,7 +972,7 @@ REG_OP(AvgPoolV2Grad) /** * @brief Computes gradients of averagev2 pooling function. * @par Inputs: -*input_grad: An NHWC tensor of type float16, float32, or double. +* input_grad: An NHWC tensor of type float16, float32, or double. * @par Attributes: * @li orig_input_shape: A required tuple or list of type int32. @@ -983,10 +990,10 @@ REG_OP(AvgPoolV2Grad) * @li data_format: An optional string. Defaults to "NHWC". * @par Outputs: -*out_grad: A mutable tensor with the same shape and type as "orig_input". +* out_grad: A mutable tensor with the same shape and type as "orig_input". * @par Third-party framework compatibility -*Compatible with the TensorFlow operator AvgPoolGrad. +* Compatible with the TensorFlow operator AvgPoolGrad. */ REG_OP(AvgPoolV2GradD) .INPUT(input_grad, TensorType({DT_FLOAT16})) @@ -1005,16 +1012,16 @@ REG_OP(AvgPoolV2GradD) .OP_END_FACTORY_REG(AvgPoolV2GradD) /** -*@brief upsample the layer, similar to the nearest-neighbor difference scaling algorithm. +* @brief upsample the layer, similar to the nearest-neighbor difference scaling algorithm. -*@par Inputs: +* @par Inputs: * one input, including: * x: A tensor of type float16 or float32. -*@par Attributes: -*@li scale: A optional float32, scale factor of x. Defaults to "1.0". -*@li stride_h: An optional int32, broadcast the axis of h. Defaults to "2". -*@li stride_w: An optional int32, broadcast the axis of w. Defaults to "2". -*@par Outputs: +* @par Attributes: +* @li scale: A optional float32, scale factor of x. Defaults to "1.0". +* @li stride_h: An optional int32, broadcast the axis of h. Defaults to "2". +* @li stride_w: An optional int32, broadcast the axis of w. Defaults to "2". +* @par Outputs: *y: A tensor of type float16 or float32. */ REG_OP(Upsample) @@ -1026,10 +1033,10 @@ REG_OP(Upsample) .OP_END_FACTORY_REG(Upsample) /** -*@brief Computes gradient of the FractionalMaxPool function . \n +* @brief Computes gradient of the FractionalMaxPool function . \n -*@par Inputs: -*Inputs include: +* @par Inputs: +* Inputs include: * @li orig_input: A Tensor. Must be one of the following types: float32, float64, int32, int64. * @li orig_output: A Tensor. Must have the same type as orig_input. * @li out_backprop: A Tensor. Must have the same type as orig_input. @@ -1037,17 +1044,17 @@ REG_OP(Upsample) * @li row_pooling_sequence: A Tensor of type int64. * @li col_pooling_sequence: A Tensor of type int64 . \n -*@par Attributes: -*overlapping: An optional bool. Defaults to False . \n +* @par Attributes: +* overlapping: An optional bool. Defaults to False . \n -*@par Outputs: -*y: A Tensor. Has the same type as orig_input . \n +* @par Outputs: +* y: A Tensor. Has the same type as orig_input . \n -*@attention Constraints: -*The implementation for FractionalMaxPoolGrad on Ascend uses AICPU, with bad performance. +* @attention Constraints: +* The implementation for FractionalMaxPoolGrad on Ascend uses AICPU, with bad performance. -*@par Third-party framework compatibility -*@li compatible with tensorflow FractionalMaxPoolGrad operator. +* @par Third-party framework compatibility +* @li compatible with tensorflow FractionalMaxPoolGrad operator. */ REG_OP(FractionalMaxPoolGrad) .INPUT(orig_input, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64})) @@ -1330,7 +1337,7 @@ REG_OP(AvgPool1DD) * @par Inputs: * One input: -* x: An 5hd Tensor of type float16. +* x: An 5hd Tensor of type float16. * Must set the format, supported format list ["NC1HWC0"]. * @par Attributes: * @li ksize: A required list of int8, int16, int32, or int64 values, @@ -1376,19 +1383,19 @@ REG_OP(MaxPoolWithArgmaxV2) * @par Inputs: * Three inputs, including: -* @li x: An 5hd tensor of type float16. +* @li x: An 5hd tensor of type float16. * Must set the format, supported format list ["NC1HWC0"] -* @li grad: An 5hd tensor of type float16. +* @li grad: An 5hd tensor of type float16. * Must set the format, supported format list ["NC1HWC0"] -* @li argmax: An 5hd tensor of type uint16 or int64. +* @li argmax: An 5hd tensor of type uint16 or int64. * Must set the format, supported format list ["NC1HWC0"] \n * @par Attributes: -* @li ksize: A required list of int8, int16, int32, or int64 values, +* @li ksize: A required list of int8, int16, int32, or int64 values, * specifying the size of the window for each dimension of the input tensor. No default value. -* @li strides: A required list of int8, int16, int32, or int64 values, +* @li strides: A required list of int8, int16, int32, or int64 values, * specifying the stride of the sliding window for each dimension of the input tensor. No default value. -* @li pads: A required list of int8, int16, int32, or int64 values, +* @li pads: A required list of int8, int16, int32, or int64 values, * specifying the pad of the input feature map. No default value. \n * @li dtype: A optional int. default value is 3. * @li dilation: A optional list of int8, int16, int32, or int64 values. @@ -1398,11 +1405,11 @@ REG_OP(MaxPoolWithArgmaxV2) * y: A Tensor. Has the same type and format as input "x". \n * @attention Constraints: -* @li ksize: a list that has length 4: +* @li ksize: a list that has length 4: * ksize[0] = 1, ksize[1] = 1, ksize[2] * ksize[3] <= (ub_size-8)*1024//7//2//16. -* @li strides: a list that has length 4: +* @li strides: a list that has length 4: * strides[0] = 1, strides[1] = 1, 1 <= strides[2] <= 2048, 1 <= strides[3] <= 2048. -* @li pads: a list that has length 4: +* @li pads: a list that has length 4: * pads[0] = 1, pads[1] = 1, 1 <= pads[2] <= (ksize[2]//2), 1 <= pads[3] <= (ksize[3]//2). * @li dilation: a list that has length 4. * @li ceil_mode: is a bool, default is false. \n @@ -1532,7 +1539,7 @@ REG_OP(MaxPoolV3Grad) *@brief Performs Dilation2D on the input . \n *@par Inputs: -*@li x: A tensor of shape is 4d, format is support NHWC. +* @li x: A tensor of shape is 4d, format is support NHWC. *@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. \n *@par Attributes: @@ -1566,12 +1573,12 @@ REG_OP(Dilation2D) *@par Inputs: *@li x: A tensor of shape is 4d, format is support NHWC. -*@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. +* @li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. *@li out_backprop: Has the same type and format as input x and the c dimension is same with x. \n *@par Attributes *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1. -*@li rates: A required list of 4 ints, the rates of the N and C dimensions are 1. +* @li rates: A required list of 4 ints, the rates of the N and C dimensions are 1. *@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID. *@li pads: A optional list of 4 ints. *@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". @@ -1605,7 +1612,7 @@ REG_OP(Dilation2DBackpropFilter) *@par Inputs: *@li x: A tensor of shape is 4d, format is support NHWC. -*@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. +* @li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. *@li out_backprop: Has the same type and format as input x and the c dimension is same with x. \n *@par Attributes @@ -1640,7 +1647,7 @@ REG_OP(Dilation2DBackpropInput) .OP_END_FACTORY_REG(Dilation2DBackpropInput) /** -* @brief Applies a 2D adaptive average pooling over +* @brief Applies a 2D adaptive average pooling over * an input signal composed of several input planes. \n * @par Inputs: @@ -1696,11 +1703,11 @@ REG_OP(AdaptiveAvgPool2dGrad) * @li argmax: A tensor of type uint16 or int64. \n * @par Attributes: -* @li ksize: A required list of int8, int16, int32, or int64 values, +* @li ksize: A required list of int8, int16, int32, or int64 values, * specifying the size of the window for each dimension of the input tensor. No default value. -* @li strides: A required list of int8, int16, int32, or int64 values, +* @li strides: A required list of int8, int16, int32, or int64 values, * specifying the stride of the sliding window for each dimension of the input tensor. No default value. -* @li pads: A required list of int8, int16, int32, or int64 values, +* @li pads: A required list of int8, int16, int32, or int64 values, * specifying the pad of the input feature map. No default value. \n * @par Outputs: @@ -1708,11 +1715,11 @@ REG_OP(AdaptiveAvgPool2dGrad) * @attention Constraints: * @li The MaxPoolGradWithArgmaxV2 operator has the same function, and it is recommended to use the V2 operator. -* @li ksize: a list that has length 4: +* @li ksize: a list that has length 4: * ksize[0] = 1, ksize[3] = 1, ksize[1] * ksize[2] <= (ub_size-8)*1024//7//2//16. -* @li strides: a list that has length 4: +* @li strides: a list that has length 4: * strides[0] = 1, strides[3] = 1, 1 <= strides[1] <= 2048, 1 <= strides[2] <= 2048. -* @li pads: a list that has length 4: +* @li pads: a list that has length 4: * pads[0] = 1, pads[3] = 1, 1 <= pads[2] <= (ksize[1]//2), 1 <= pads[2] <= (ksize[3]//2). * @li ceil_mode: defaults to False.\n @@ -1778,7 +1785,7 @@ REG_OP(MaxPoolWithArgmaxV1) .OP_END_FACTORY_REG(MaxPoolWithArgmaxV1) /** -*@brief Randomly sample a subset of positive and negative examples,and overwrite +* @brief Randomly sample a subset of positive and negative examples,and overwrite the label vector to the ignore value (-1) for all elements that are not included in the sample.\n @@ -1790,14 +1797,14 @@ included in the sample.\n * @li batch_size_per_images: A require attribute of type int. * @li positive_fraction: A require attribute of type float. -*@par Outputs: -*y: The result of subSample. \n +* @par Outputs: +* y: The result of subSample. \n -*@par Third-party framework compatibility -*Compatible with the Pytorch operator SubSample. +* @par Third-party framework compatibility +* Compatible with the Pytorch operator SubSample. -*@attention Constraints: -*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. +* @attention Constraints: +* Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. */ REG_OP(SubSample) .INPUT(labels, TensorType({DT_INT32})) @@ -1807,7 +1814,7 @@ REG_OP(SubSample) .OP_END_FACTORY_REG(SubSample) /** -*@brief Randomly sample a subset of positive and negative examples,and overwrite +* @brief Randomly sample a subset of positive and negative examples,and overwrite the label vector to the ignore value (-1) for all elements that are not included in the sample.\n @@ -1820,14 +1827,14 @@ included in the sample.\n * @li batch_size_per_images: A require attribute of type int. * @li positive_fraction: A require attribute of type float. -*@par Outputs: -*y: The result of subSample. \n +* @par Outputs: +* y: The result of subSample. \n -*@par Third-party framework compatibility -*Compatible with the Pytorch operator SubSampleLabels. +* @par Third-party framework compatibility +* Compatible with the Pytorch operator SubSampleLabels. -*@attention Constraints: -*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. +* @attention Constraints: +* Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. */ REG_OP(SubSampleLabels) .INPUT(labels, TensorType({DT_INT32})) @@ -1838,22 +1845,22 @@ REG_OP(SubSampleLabels) .OP_END_FACTORY_REG(SubSampleLabels) /** -*@brief Computes GlobalLpPool, GlobalLpPool consumes an input tensor X and applies lp pool pooling across the +* @brief Computes GlobalLpPool, GlobalLpPool consumes an input tensor X and applies lp pool pooling across the values in the same channel. \n -*@par Inputs: +* @par Inputs: * x: A Tensor of type float16 or float32 . \n -*@par Attributes: -*@li p: Optional. Must be one of the following types: float32. Defaults to 2.0. \n +* @par Attributes: +* @li p: Optional. Must be one of the following types: float32. Defaults to 2.0. \n -*@par Outputs: +* @par Outputs: * y: A Tensor. Has the same type as "x", when shape of x is [N,C,H,W], shape of y is [N,C,1,1]. -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the onnx operator GlobalLpPool. -*@par Restrictions: -*Warning: THIS FUNCTION IS DEPRECATED. -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(GlobalLpPool) diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index e4d7936c..d8c7b7cf 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -25,17 +25,17 @@ namespace ge { /** -*@brief The GELU activation function is x*Φ(x), +* @brief The GELU activation function is x*Φ(x), * where Φ(x) the standard Gaussian cumulative distribution function. -*@par Inputs: -*One input, including: \n -*x: A Tensor. Must be one of the following types: float16, float32. \n +* @par Inputs: +* One input, including: \n +* x: A Tensor. Must be one of the following types: float16, float32. \n -*@par Outputs: -*y: A Tensor. Has the same type as "x". \n +* @par Outputs: +* y: A Tensor. Has the same type as "x". \n -*@par Third-party framework compatibility: +* @par Third-party framework compatibility: * Compatible with the TensorFlow operator Gelu. */ REG_OP(Gelu) @@ -125,17 +125,17 @@ REG_OP(SwishGrad) .OP_END_FACTORY_REG(SwishGrad) /** -*@brief Computes the gradient for the gelu of "x" . +* @brief Computes the gradient for the gelu of "x" . -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li dy: A Tensor. Must be one of the following types: float16, float32. -*@li x: A Tensor of the same type as "dy". -*@li y: A Tensor of the same type as "dy" . \n +* @li dy: A Tensor. Must be one of the following types: float16, float32. +* @li x: A Tensor of the same type as "dy". +* @li y: A Tensor of the same type as "dy" . \n -*@par Outputs: -*z: A Tensor. Has the same type as "dy". -*@par Third-party framework compatibility +* @par Outputs: +* z: A Tensor. Has the same type as "dy". +* @par Third-party framework compatibility * Compatible with the TensorFlow operator GeluGrad. */ REG_OP(GeluGrad) @@ -799,20 +799,20 @@ REG_OP(LeakyReluGrad) .OP_END_FACTORY_REG(LeakyReluGrad) /** -*@brief Thresholds grad each element of the input Tensor . +* @brief Thresholds grad each element of the input Tensor . -*@par Inputs: +* @par Inputs: * @li gradients: A Tensor shape and dtype of input gradients. Support float16, int32. * @li features: A Tensor shape and dtype of input features. Support float16, int32 . \n -*@par Attributes: -*threshold: A float32 scale value to threshold at . \n +* @par Attributes: +* threshold: A float32 scale value to threshold at . \n -*@par Outputs: -*backprops: A Tensor of shape and dtype of output backprops, should be same shape and type as inputs . \n +* @par Outputs: +* backprops: A Tensor of shape and dtype of output backprops, should be same shape and type as inputs . \n -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ThresholdGradV2D) .INPUT(gradients, TensorType({DT_INT32, DT_FLOAT16})) @@ -822,20 +822,20 @@ REG_OP(ThresholdGradV2D) .OP_END_FACTORY_REG(ThresholdGradV2D) /** -*@brief Thresholds each element of the input Tensor y = (x > threshold) ? x : value . +* @brief Thresholds each element of the input Tensor y = (x > threshold) ? x : value . -*@par Inputs: -*x: A Tensor dtype of real number . \n +* @par Inputs: +* x: A Tensor dtype of real number . \n -*@par Attributes: -*@li threshold: A float32 scale value to threshold at. -*@li value: A float32 scale value to replace with . \n +* @par Attributes: +* @li threshold: A float32 scale value to threshold at. +* @li value: A float32 scale value to replace with . \n -*@par Outputs: -*y: A Tensor of shape and dtype of output, should be same shape and type as input . \n +* @par Outputs: +* y: A Tensor of shape and dtype of output, should be same shape and type as input . \n -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ThresholdV2D) .INPUT(x, TensorType::RealNumberType()) diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index 57631d14..e976047d 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -528,33 +528,57 @@ REG_OP(DropOutGenMaskV3) /** -*@brief Generate stateless random bit mask for dropout . \n +* @brief Generate stateless random bit mask for dropout . \n -*@par Inputs: +* @par Inputs: include: -*@li shape:The shape of the output tensor. -*@li prob:0-D. Number of bit 1 . \n -*@li seed:If either seed or seed2 are set to be non-zero, the random number -*generator is seeded by the given seed. Otherwise, it is seeded by a random seed. -*@li seed2:A second seed to avoid seed collision . \n +* @li shape:The shape of the output tensor. +* @li prob:0-D. Number of bit 1 . \n +* @li seed:Frist seed to avoid seed collision. +* @li seed1:Second seed to avoid seed collision . \n +* @li offset:Initial offset of random number . \n -*@par Outputs: +* @par Outputs: *y:Output (1-D) random number using uint data format . \n -*@attention Constraints: +* @attention Constraints: *The output is aligned with 128 bits -*@see StatelessDropOutGenMask() +* @see StatelessDropOutGenMask() */ REG_OP(StatelessDropOutGenMask) .INPUT(shape, TensorType({ DT_INT32, DT_INT64 })) .INPUT(prob, TensorType({ DT_FLOAT16, DT_FLOAT })) .INPUT(seed, TensorType({ DT_INT32, DT_INT64 })) .INPUT(seed1, TensorType({ DT_INT32, DT_INT64 })) + .OPTIONAL_INPUT(offset, TensorType({ DT_INT64 })) .OUTPUT(y, TensorType({ DT_UINT8 })) .OP_END_FACTORY_REG(StatelessDropOutGenMask) /** +* @brief Generate bernoulli distribution for tensor input . \n + +* @par Inputs: +include: +* @li shape:The shape of the output tensor. A Tensor of type int32, int64. +* @li prob:0-D. Number of bit 1 . \n +* @li seed:If seed is set to be -1, and offset is set to be 0, the random number +* generator is seeded by arandom seed. Otherwise, it is seeded by the given seed. +* @li offset:To avoid seed collision . \n + +* @par Outputs: +* y:A Tensor. A Tensor of type int8, uint8, int16, uint16, +* int32, uint32, int64, uint64, bool, float16, float, double, bf16. \n +*/ +REG_OP(StatelessBernoulli) + .INPUT(shape, TensorType({ DT_INT32, DT_INT64})) + .INPUT(prob, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE })) + .INPUT(seed, TensorType({ DT_INT64 })) + .INPUT(offset, TensorType({ DT_INT64 })) + .OUTPUT(y, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32, + DT_INT64, DT_UINT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BF16})) + .OP_END_FACTORY_REG(StatelessBernoulli) +/** *@brief Generates values in an interval . \n *@par Inputs: diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index 810d024b..d6791af1 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -24,10 +24,10 @@ namespace ge { /** -*@brief Creates a sequence of numbers . \n +* @brief Creates a sequence of numbers . \n -*@par Inputs: -*Three inputs, including: +* @par Inputs: +* Three inputs, including: * @li start: A 0D Tensor (scalar). Acts as first entry in the range if "limit" * is not "None"; otherwise, acts as range limit and first entry defaults to "0". * The supported types are: float32, int32, double, int64. @@ -37,11 +37,11 @@ namespace ge { * @li delta: A 0D Tensor (scalar). Number that increments "start". * Defaults to "1". The supported types are: float32, int32, double, int64 . \n -*@par Outputs: -*y: A 1D Tensor . \n +* @par Outputs: +* y: A 1D Tensor . \n -*@par Third-party framework compatibility -*Compatible with the TensorFlow operator Range. +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator Range. */ REG_OP(Range) .INPUT(start, TensorType({DT_FLOAT,DT_INT32,DT_DOUBLE,DT_INT64})) @@ -51,10 +51,10 @@ REG_OP(Range) .OP_END_FACTORY_REG(Range) /** -*@brief: Creates a sequence of numbers . \n +* @brief: Creates a sequence of numbers . \n -*@par Inputs: -*Four inputs, including: +* @par Inputs: +* Four inputs, including: * @li x: A 1D Tensor of type float32 or int32. The assistant data. * @li start: A 0D Tensor (scalar) of type float32 or int32. Acts as first entry in the range if "limit" * is not "None"; otherwise, acts as range limit and first entry defaults to "0". @@ -65,20 +65,20 @@ REG_OP(Range) * @li delta: A 0D Tensor (scalar) of type float32 or int32. * Number that increments "start". Defaults to "1" . \n -*@par Outputs: -*y: A 1D Tensor . \n +* @par Outputs: +* y: A 1D Tensor . \n -*@par Quantization supported or not -*Not supported +* @par Quantization supported or not +* Not supported -*@par Quantized inference supported or not -*Not supported +* @par Quantized inference supported or not +* Not supported -*@par Multiple batches supported or not -*Supported +* @par Multiple batches supported or not +* Supported -*@see Range() -*@since V100R001C33 +* @see Range() +* @since V100R001C33 * * @par Restrictions: * Warning: THIS FUNCTION IS DEPRECATED. Please use Range instead. @@ -92,23 +92,23 @@ REG_OP(RangeD) .OP_END_FACTORY_REG(RangeD) /** -*@brief Constructs a tensor by tiling a given tensor . \n +* @brief Constructs a tensor by tiling a given tensor . \n -*@par Inputs: -*Two inputs, including: +* @par Inputs: +* Two inputs, including: * @li x: A Tensor. * Must be one of the following types: float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. * @li multiples: A 1D Tensor of type int32 or int64. * The length must be the same as the number of dimensions in "input" -*@par Outputs: -*y: A Tensor. Has the same type as "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x" . \n -*@see TileD() +* @see TileD() -*@par Third-party framework compatibility -*Compatible with the TensorFlow operator Tile. +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator Tile. */ REG_OP(Tile) .INPUT(x, TensorType::BasicType()) @@ -117,24 +117,24 @@ REG_OP(Tile) .OP_END_FACTORY_REG(Tile) /** -*@brief Constructs a tensor by tiling a given tensor . \n +* @brief Constructs a tensor by tiling a given tensor . \n -*@par Inputs: -*x: A Tensor. Must be one of the following types: float32, float16, int32 . \n +* @par Inputs: +* x: A Tensor. Must be one of the following types: float32, float16, int32 . \n -*@par Attributes: -*multiples: A required Tensor of type int32 or int64. +* @par Attributes: +* multiples: A required Tensor of type int32 or int64. * Number of replication times . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x" . \n -*@see Tile() +* @see Tile() -*@par Third-party framework compatibility -*Compatible with the TensorFlow operator Tile. -*@par Restrictions: -*Warning: THIS FUNCTION IS DEPRECATED. Please use Tile instead. +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator Tile. +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use Tile instead. */ REG_OP(TileD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -178,10 +178,10 @@ REG_OP(GatherNd) .OP_END_FACTORY_REG(GatherNd) /** -*@brief Gather slices from "x" according to "indices" by corresponding axis . +* @brief Gather slices from "x" according to "indices" by corresponding axis . -*@par Inputs: -*Three inputs, including: +* @par Inputs: +* Three inputs, including: * @li x: A Tensor. Must be one of the following types: float32, float64, int32, * uint8, int16, int8, int64, qint8, quint8, qint32, qint16, quint16, * uint16, complex128, float16, uint32, uint64, complex64, complex128. @@ -189,16 +189,16 @@ REG_OP(GatherNd) * @li axis: A Tensor of type as int32 or int64, * Must be in the range [-rank(input_tensor), rank(input_tensor)) . -*@par Attributes: +* @par Attributes: * batch_dims: An optional int. Defaults to 0. -*@par Outputs: -*y: A Tensor. Has the same type as "x" . +* @par Outputs: +* y: A Tensor. Has the same type as "x" . -*@attention Constraints: -*Value in indices must be in range [0, x.shape[axis]) +* @attention Constraints: +* Value in indices must be in range [0, x.shape[axis]) -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator GatherV2 . */ @@ -211,24 +211,24 @@ REG_OP(GatherV2) .OP_END_FACTORY_REG(GatherV2) /** -*@brief Gather slices from "x" according to "indices" by corresponding axis . \n +* @brief Gather slices from "x" according to "indices" by corresponding axis . \n -*@par Inputs: -*Two inputs, including: +* @par Inputs: +* Two inputs, including: * @li x: A Tensor. Must be one of the following types: float32, float16, int32, uint32, int8, uint8, * int16, uint16, int64, uint64. * @li indices: A Tensor of type int32 or int64 . \n -*@par Attributes: -*axis: A int32 specifying the axis to gather from . \n +* @par Attributes: +* axis: A int32 specifying the axis to gather from . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x" . \n -*@attention Constraints: +* @attention Constraints: -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator GatherV2. * * @par Restrictions: @@ -244,20 +244,20 @@ REG_OP(GatherV2D) .OP_END_FACTORY_REG(GatherV2D) /** -*@Gathers values along an axis specified by dim . \n +* @Gathers values along an axis specified by dim . \n -*@par Inputs: -*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64. -*@li index: A Tensor. Must be one of the following types: int64 . \n +* @par Inputs: +* @li x: A Tensor. Must be one of the following types: float16, float32, int32, int64. +* @li index: A Tensor. Must be one of the following types: int64 . \n -*@par Attributes: +* @par Attributes: * dim: the axis along which to index . \n -*@par Outputs: +* @par Outputs: * y: A Tensor. Has the same type as "x" . \n -*@par Third-party framework compatibility -*Compatible with the PyTorch operator Gather. +* @par Third-party framework compatibility +* Compatible with the PyTorch operator Gather. */ REG_OP(GatherElements) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_INT16,DT_INT32, @@ -269,20 +269,20 @@ REG_OP(GatherElements) .OP_END_FACTORY_REG(GatherElements) /** -*@Gathers values along an axis specified by dim . \n +* @Gathers values along an axis specified by dim . \n -*@par Inputs: -*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +* @par Inputs: +* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, * int64, uint16, float16, uint32, uint64, bool. -*@li dim: A Tensor. Must be one of the following types: int32, int64. -*@li index: A Tensor. Must be one of the following types: int32, int64 . \n +* @li dim: A Tensor. Must be one of the following types: int32, int64. +* @li index: A Tensor. Must be one of the following types: int32, int64 . \n -*@par Outputs: +* @par Outputs: * y: A Tensor. Has the same type as "x" . \n -*@par Third-party framework compatibility -*Compatible with the PyTorch operator Gather. +* @par Third-party framework compatibility +* Compatible with the PyTorch operator Gather. */ REG_OP(GatherD) .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32 @@ -294,13 +294,13 @@ REG_OP(GatherD) .OP_END_FACTORY_REG(GatherD) /** -*@brief Extracts a strided slice of a tensor. Roughly speaking, this op +* @brief Extracts a strided slice of a tensor. Roughly speaking, this op extracts a slice of size (end-begin)/stride from the given input tensor. Starting at the location specified by begin the slice continues by adding stride to the index until all dimensions are not less than end. -*@par Inputs: -*Four inputs, including: +* @par Inputs: +* Four inputs, including: * @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, * complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, * complex128, float16, uint32, uint64, complex64, complex128. @@ -310,7 +310,7 @@ REG_OP(GatherD) * @li strides: A Tensor of type int32 or int64, for the increment . \n -*@par Attributes: +* @par Attributes: * @li begin_mask: A Tensor of type int32. A bitmask where a bit "i" being "1" means to ignore the begin value and instead use the largest interval possible. @@ -326,10 +326,10 @@ REG_OP(GatherD) A bitmask where bit "i" implies that the "i"th specification should shrink the dimensionality . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator StridedSlice. */ REG_OP(StridedSlice) @@ -346,17 +346,17 @@ REG_OP(StridedSlice) .OP_END_FACTORY_REG(StridedSlice) /** -*@brief Extracts a strided slice of a tensor. Roughly speaking, this op +* @brief Extracts a strided slice of a tensor. Roughly speaking, this op extracts a slice of size "(end-begin)/stride" from the given input tensor. Starting at the location specified by "begin" the slice continues by adding "stride" to the index until all dimensions are not less than "end" . \n -*@par Inputs: -*x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +* @par Inputs: +* x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, * complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, * complex128, float16, uint32, uint64, complex64, complex128 . \n -*@par Attributes: +* @par Attributes: * @li begin: A Tensor of type int32 or int64. The index of the first value to select. * @li end: A Tensor of type int32 or int64. @@ -376,10 +376,10 @@ REG_OP(StridedSlice) A bitmask where bit "i" implies that the "i"th specification should shrink the dimensionality . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator StridedSlice. * @par Restrictions: @@ -401,16 +401,16 @@ REG_OP(StridedSliceD) .OP_END_FACTORY_REG(StridedSliceD) /** -*@brief Since StridedSlice cuts out pieces of its "input" which is size "dy", +* @brief Since StridedSlice cuts out pieces of its "input" which is size "dy", its gradient will have the same shape (which is passed here as "shape"). The gradient will be zero in any element that the slice does not select . \n -*@par Inputs: -*dy: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +* @par Inputs: +* dy: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, * complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, * complex128, float16, uint32, uint64, complex64, complex128 . \n -*@par Attributes: +* @par Attributes: * @li shape: A Tensor of type int32 or int64. * @li begin: A Tensor of type int32 or int64. The index of the first value to select. @@ -432,10 +432,10 @@ REG_OP(StridedSliceD) A bitmask where bit "i" implies that the "i"th specification should shrink the dimensionality . \n -*@par Outputs: -*output: A Tensor. Has the same type as "dy" . \n +* @par Outputs: +* output: A Tensor. Has the same type as "dy" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator StridedSliceGradD. * @par Restrictions: @@ -456,12 +456,12 @@ REG_OP(StridedSliceGradD) .OP_END_FACTORY_REG(StridedSliceGradD) /** -*@brief Since StridedSlice cuts out pieces of its "input" which is size "dy", +* @brief Since StridedSlice cuts out pieces of its "input" which is size "dy", its gradient will have the same shape (which is passed here as "shape"). The gradient will be zero in any element that the slice does not select . \n -*@par Inputs: -*Five inputs, including: +* @par Inputs: +* Five inputs, including: * @li shape: A Tensor of type int32 or int64. * @li begin: A Tensor of type int32 or int64. The index of the first value to select. @@ -473,7 +473,7 @@ REG_OP(StridedSliceGradD) * complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, * complex128, float16, uint32, uint64, complex64, complex128 . \n -*@par Attributes: +* @par Attributes: * @li begin_mask: A Tensor of type int32. A bitmask where a bit "i" being "1" means to ignore the begin value and instead use the largest interval possible. @@ -489,10 +489,10 @@ REG_OP(StridedSliceGradD) A bitmask where bit "i" implies that the "i"th specification should shrink the dimensionality . \n -*@par Outputs: -*output: A Tensor has the same type as "dy" . \n +* @par Outputs: +* output: A Tensor has the same type as "dy" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator StridedSliceGrad. */ REG_OP(StridedSliceGrad) @@ -510,19 +510,19 @@ REG_OP(StridedSliceGrad) .OP_END_FACTORY_REG(StridedSliceGrad) /** -*@brief Computes the sum along segments of a tensor . \n +* @brief Computes the sum along segments of a tensor . \n -*@par Inputs: -*Three inputs, including: +* @par Inputs: +* Three inputs, including: * @li x: A Tensor of type NumberType. * @li segment_ids: A Tensor of type IndexNumberType, whose shape is a prefix * of "x.shape". * @li num_segments: A Tensor of type IndexNumberType . \n -*@par Outputs: -*y: A Tensor of type NumberType . \n +* @par Outputs: +* y: A Tensor of type NumberType . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator UnsortedSegmentSum. */ REG_OP(UnsortedSegmentSum) @@ -533,11 +533,11 @@ REG_OP(UnsortedSegmentSum) .OP_END_FACTORY_REG(UnsortedSegmentSum) /** -*@brief Creates a one-dimensional tensor of size steps whose values are evenly spaced from start to +* @brief Creates a one-dimensional tensor of size steps whose values are evenly spaced from start to * end, inclusive, on a logarithmic scale with base base. \n -*@par Inputs: -*One inputs, including: +* @par Inputs: +* One inputs, including: * assist: A tensor. Must be one of the following types: * float16, float32. \n @@ -548,11 +548,11 @@ REG_OP(UnsortedSegmentSum) * @li base: An optional float.Defaults to 10.0. \n * @li dtype: An optional int.Defaults to 1. \n -*@par Outputs: -*y: A Tensor with the same type and shape of input_x's. \n +* @par Outputs: +* y: A Tensor with the same type and shape of input_x's. \n -*@par Third-party framework compatibility -*Compatible with the Pytorch operator logspaced. \n +* @par Third-party framework compatibility +* Compatible with the Pytorch operator logspaced. \n */ REG_OP(LogSpaceD) .INPUT(assist, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -565,21 +565,21 @@ REG_OP(LogSpaceD) .OP_END_FACTORY_REG(LogSpaceD) /** -*@brief Computes the sum along segments of a tensor . \n +* @brief Computes the sum along segments of a tensor . \n -*@par Inputs: -*Two inputs, including: +* @par Inputs: +* Two inputs, including: * @li x: A Tensor of type float16, float32, int32, int8, uint8. * @li segment_ids: A Tensor of type int32, whose shape is a prefix * of "x.shape" . \n -*@par Attributes: -*num_segments: An int32, specifying the number of distinct segment IDs . \n +* @par Attributes: +* num_segments: An int32, specifying the number of distinct segment IDs . \n -*@par Outputs: -*y: A Tensor with same type as "x" . \n +* @par Outputs: +* y: A Tensor with same type as "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator UnsortedSegmentSum. * @par Restrictions: @@ -593,22 +593,23 @@ REG_OP(UnsortedSegmentSumD) .OP_END_FACTORY_REG(UnsortedSegmentSumD) /** -*@brief Reverses specific dimensions of a tensor . \n +* @brief Reverses specific dimensions of a tensor . \n -*@par Inputs: +* @par Inputs: * Two inputs, including: -*@li x: An ND Tensor (up to 8D). -*Must be one of the following types: int8, uint8, int16, uint16, int32, int64, bool, float16, float32, double, complex64, complex128, string. -*@li axis: A 1D Tensor. -*Must be one of the following types: int32, int64 +* @li x: An ND Tensor (up to 8D). +* Must be one of the following types: int8, uint8, int16, uint16, int32, int64, bool, float16, float32, +* double, complex64, complex128, string. +* @li axis: A 1D Tensor. +* Must be one of the following types: int32, int64 -*@par Outputs: -*y: A Tensor. Has the same type and format as "x" +* @par Outputs: +* y: A Tensor. Has the same type and format as "x" -*@attention Constraints: +* @attention Constraints: "axis" must be within the rank of "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ReverseV2. */ REG_OP(ReverseV2) @@ -622,27 +623,27 @@ REG_OP(ReverseV2) .OP_END_FACTORY_REG(ReverseV2) /** -*@brief Reverses specific dimensions of a tensor . \n +* @brief Reverses specific dimensions of a tensor . \n -*@par Inputs: +* @par Inputs: * One input: -*@li x: An ND Tensor (up to 8D). +* @li x: An ND Tensor (up to 8D). * Must be one of the following types: int8, uint8, int16, uint16, int32, * int64, bool, float16, float, double, complex64, complex128, string . \n -*@par Attributes: -*axis: The indices of the dimensions to reverse. Support type: listInt . \n +* @par Attributes: +* axis: The indices of the dimensions to reverse. Support type: listInt . \n -*@par Outputs: -*y: A Tensor. Has the same type and format as "x" +* @par Outputs: +* y: A Tensor. Has the same type and format as "x" -*@attention Constraints: +* @attention Constraints: "axis" must be within the rank of "x" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ReverseV2. -*@par Restrictions: -*Warning: THIS FUNCTION IS DEPRECATED. Please use ReverseV2 instead. +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ReverseV2 instead. */ REG_OP(ReverseV2D) .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, @@ -655,9 +656,9 @@ REG_OP(ReverseV2D) .OP_END_FACTORY_REG(ReverseV2D) /** -*@brief: Selects elements from "x1" or "x2", depending on "condition" . \n +* @brief: Selects elements from "x1" or "x2", depending on "condition" . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: * @li condition: A Tensor of type bool. * @li x1: A Tensor. Must be one of the following types: float16, float32, @@ -666,10 +667,10 @@ REG_OP(ReverseV2D) * format:ND * @li x2: A Tensor of the same type as "x1".format:ND -*@par Outputs: -*y: A Tensor. Has the same type as "x1". format:ND +* @par Outputs: +* y: A Tensor. Has the same type as "x1". format:ND -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator Select. */ REG_OP(Select) @@ -680,18 +681,18 @@ REG_OP(Select) .OP_END_FACTORY_REG(Select) /** -*@brief: SelectV2s elements from "then" or "else", depending on "condition" . \n +* @brief: SelectV2s elements from "then" or "else", depending on "condition" . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: * @li condition: A Tensor of type bool. * @li then: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. * @li else: A Tensor of the same type as "then" . \n -*@par Outputs: -*result: A Tensor. Has the same type as "then" . \n +* @par Outputs: +* result: A Tensor. Has the same type as "then" . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator SelectV2. */ REG_OP(SelectV2) @@ -703,22 +704,22 @@ REG_OP(SelectV2) /** -*@brief: Computes the maximum along segments of a tensor. -*Computes a tensor such that output[i]=(data[i]) where max is over j such that segment_ids[j] == i. -*If the max is empty for a given segment ID i, output[i] = 0 +* @brief: Computes the maximum along segments of a tensor. +* Computes a tensor such that output[i]=(data[i]) where max is over j such that segment_ids[j] == i. +* If the max is empty for a given segment ID i, output[i] = 0 -*@par Inputs: -*Two inputs, include: +* @par Inputs: +* Two inputs, include: * @li x:A Tensor of type float16, float32, int32,int8,uint8. * @li segment_ids:should be the size of the first dimension must sorted and need not cover all values in the full range of valid values must be positive intege -*@par Outputs: -*y:A Tensor with same type as "x" . \n +* @par Outputs: +* y:A Tensor with same type as "x" . \n -*@par Third-party framework compatibility -*Compatible with the TensorFlow operator SegmentMax. +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator SegmentMax. */ REG_OP(SegmentMax) .INPUT(x, TensorType::RealNumberType()) @@ -727,18 +728,18 @@ REG_OP(SegmentMax) .OP_END_FACTORY_REG(SegmentMax) /** -*@brief Computes the sum along segments of a tensor . \n +* @brief Computes the sum along segments of a tensor . \n -*@par Inputs: -*Two inputs, including: +* @par Inputs: +* Two inputs, including: * @li x: A Tensor of type NumberType. * @li segment_ids: A Tensor of type IndexNumberType, whose shape is a prefix * of "x.shape". -*@par Outputs: -*y: A Tensor of type NumberType . \n +* @par Outputs: +* y: A Tensor of type NumberType . \n -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator SegmentSum. */ REG_OP(SegmentSum) @@ -748,28 +749,28 @@ REG_OP(SegmentSum) .OP_END_FACTORY_REG(SegmentSum) /** -*@brief: Computes the maximum along segments of a tensor. -*Computes a tensor such that output[i]=(data[i]) where max is over j +* @brief: Computes the maximum along segments of a tensor. +* Computes a tensor such that output[i]=(data[i]) where max is over j * such that segment_ids[j] == i. -*If the max is empty for a given segment ID i, output[i] = 0 +* If the max is empty for a given segment ID i, output[i] = 0 -*@par Inputs: -*One inputs, include: +* @par Inputs: +* One inputs, include: * @li x:A Tensor of type float16, float, int32. format:ND -*@par Attributes: +* @par Attributes: * @li segment_ids:should be the size of the first dimension must sorted and need not cover all values in the full range of valid values must be positive intege -*@par Outputs: -*y:A Tensor with same type as "x". format:ND +* @par Outputs: +* y:A Tensor with same type as "x". format:ND -*@par Third-party framework compatibility -*Compatible with the TensorFlow operator SegmentMax. +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator SegmentMax. -*@par Restrictions: -*Warning: THIS FUNCTION IS DEPRECATED. Please use SegmentMax instead. +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use SegmentMax instead. */ REG_OP(SegmentMaxD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) @@ -778,11 +779,11 @@ REG_OP(SegmentMaxD) .OP_END_FACTORY_REG(SegmentMaxD) /** -*@brief Returns a one-hot tensor. The locations represented by index in "x" take value "on_value", +* @brief Returns a one-hot tensor. The locations represented by index in "x" take value "on_value", * while all other locations take value "off_value" . \n -*@par Inputs: -*Four inputs, including: +* @par Inputs: +* Four inputs, including: * @li x: A Tensor of indices. Must be one of the following types: int32, uint8, int64. * @li depth: A scalar of type int32. The depth of the one hot dimension. * @li on_value: A scalar. The value to fill in output when indices[j] = i, @@ -790,13 +791,13 @@ REG_OP(SegmentMaxD) * @li off_value: A scalar. The value to fill in output when indices[j] != i, * Has the same type as "on_value" . \n -*@par Attributes: -*axis: An int. The axis to fill. Defaults to "-1" . \n +* @par Attributes: +* axis: An int. The axis to fill. Defaults to "-1" . \n -*@par Outputs: -*y: A Tensor. Has the same type as "on_value" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "on_value" . \n -*@par Third-party framework compatibility: +* @par Third-party framework compatibility: * Compatible with the TensorFlow operator OneHot. */ REG_OP(OneHot) @@ -809,25 +810,25 @@ REG_OP(OneHot) .OP_END_FACTORY_REG(OneHot) /** -*@brief Returns a one-hot tensor. The locations represented by index in "x" take value "on_value", +* @brief Returns a one-hot tensor. The locations represented by index in "x" take value "on_value", * while all other locations take value "off_value" . \n -*@par Inputs: -*Three inputs, including: -*@li x: A Tensor of indices. Must be one of the following types: int32, uint8, int64. -*@li on_value: A scalar. The value to fill in output when indices[j] = i, +* @par Inputs: +* Three inputs, including: +* @li x: A Tensor of indices. Must be one of the following types: int32, uint8, int64. +* @li on_value: A scalar. The value to fill in output when indices[j] = i, * Must be one of the following types: float16, float32, int32, int8, uint8. -*@li off_value: A scalar. The value to fill in output when indices[j] != i, +* @li off_value: A scalar. The value to fill in output when indices[j] != i, * Has the same type as "on_value" . \n -*@par Attributes: -*@li depth: A scalar of type int32. The depth of the one hot dimension. -*@li axis: An int. The axis to fill. Defaults to "-1" . \n +* @par Attributes: +* @li depth: A scalar of type int32. The depth of the one hot dimension. +* @li axis: An int. The axis to fill. Defaults to "-1" . \n -*@par Outputs: -*y: A Tensor. Has the same type as "on_value" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "on_value" . \n -*@par Third-party framework compatibility: +* @par Third-party framework compatibility: * Compatible with the TensorFlow operator OneHot. * * @par Restrictions: @@ -845,22 +846,22 @@ REG_OP(OneHotD) .OP_END_FACTORY_REG(OneHotD) /** -*@brief Extracts a slice from a tensor. +* @brief Extracts a slice from a tensor. * This operation extracts a slice of size "size" from a tensor "x" * starting at the location specified by "begin" . \n -*@par Inputs: -*@li x: A Tensor. Must be one of the following types: +* @par Inputs: +* @li x: A Tensor. Must be one of the following types: * float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, * int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. -*@li offsets: A Tensor of type int32 or int64. The starting location for the slice. -*@li size: A Tensor of type int32 or int64. The tensor shape . \n +* @li offsets: A Tensor of type int32 or int64. The starting location for the slice. +* @li size: A Tensor of type int32 or int64. The tensor shape . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x". The slice extracted from the tensor . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x". The slice extracted from the tensor . \n -*@par Third-party framework compatibility -*Compatible with the TensorFlow operator Slice. +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator Slice. */ REG_OP(Slice) .INPUT(x, TensorType::BasicType()) @@ -870,23 +871,23 @@ REG_OP(Slice) .OP_END_FACTORY_REG(Slice) /** -*@brief Extracts a slice from a tensor. +* @brief Extracts a slice from a tensor. * This operation extracts a slice of size "size" from a tensor "x" * starting at the location specified by "begin" . \n -*@par Inputs: -*@li x: A Tensor. Must be one of the following types: +* @par Inputs: +* @li x: A Tensor. Must be one of the following types: * float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, * int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32 . \n -*@par Attributes: -*@li offsets: The starting location for the slice. -*@li size: The tensor shape . \n +* @par Attributes: +* @li offsets: The starting location for the slice. +* @li size: The tensor shape . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x". The slice extracted from the tensor. -*@par Restrictions: -*Warning: THIS FUNCTION IS DEPRECATED. Please use Slice instead. +* @par Outputs: +* y: A Tensor. Has the same type as "x". The slice extracted from the tensor. +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use Slice instead. */ REG_OP(SliceD) .INPUT(x, TensorType::BasicType()) @@ -896,25 +897,25 @@ REG_OP(SliceD) .OP_END_FACTORY_REG(SliceD) /** -*@brief Extracts a slice from a tensor. +* @brief Extracts a slice from a tensor. * This operation extracts a slice of size "size" from a tensor "x" * starting at the location specified by "begin" . \n -*@par Inputs: -*@li x: A Tensor. Must be one of the following types: +* @par Inputs: +* @li x: A Tensor. Must be one of the following types: * float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, * int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32 . \n -*@par Inputs: -*@li offsets: The starting location for the slice. +* @par Inputs: +* @li offsets: The starting location for the slice. -*@par Attributes: -*@li size: The tensor shape . \n +* @par Attributes: +* @li size: The tensor shape . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x". The slice extracted from the tensor. -*@par Restrictions: -*Warning: THIS FUNCTION IS DEPRECATED. Please use Slice instead. +* @par Outputs: +* y: A Tensor. Has the same type as "x". The slice extracted from the tensor. +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use Slice instead. */ REG_OP(SliceDV2) .INPUT(x, TensorType::BasicType()) @@ -1100,20 +1101,22 @@ REG_OP(TopK) .ATTR(dim, Int, -1) .OP_END_FACTORY_REG(TopK) /** -*@brief Creates a new tensor by applying sparse "updates" to individual values or slices within a tensor (initially zero for numeric, empty for string) of the given "shape" according to "indices" . \n +* @brief Creates a new tensor by applying sparse "updates" to individual values or +* slices within a tensor (initially zero for numeric, +* empty for string) of the given "shape" according to "indices" . \n -*@par Inputs: -*Inputs including: +* @par Inputs: +* Inputs including: * @li indices: A required index tensor. Must be one of the following types: int32 or int64. * @li x: A required slice tensor. Must be one of the following types: float32, float16, int32, int8, uint8... * @li shape: A required list of int32 or int64, specifying the output shape. -*@par Outputs: -*y:A output Tensor with same datatype as "updates" . \n +* @par Outputs: +* y:A output Tensor with same datatype as "updates" . \n -*@attention Constraints: -*@li "y" has the same shape as "shape". -*@li "y" has the same type as "x". -*@par Third-party framework compatibility +* @attention Constraints: +* @li "y" has the same shape as "shape". +* @li "y" has the same type as "x". +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterNd. */ REG_OP(ScatterNd) @@ -1123,25 +1126,25 @@ REG_OP(ScatterNd) .OUTPUT(y, TensorType::BasicType()) .OP_END_FACTORY_REG(ScatterNd) /** -*@brief Creates a new tensor by applying sparse "updates" to individual values +* @brief Creates a new tensor by applying sparse "updates" to individual values * or slices within a tensor (initially zero for numeric, empty for string) of * the given "shape" according to "indices" . \n -*@par Inputs: -*Inputs including: +* @par Inputs: +* Inputs including: * @li indices: A required index tensor. Must be one of the following types: * int32 or int64. format:ND. * @li x: A required slice tensor. Must be one of the following types: * float16, float, int32, int8, uint8. format:ND. -*@par Attributes: +* @par Attributes: * @li shape: A required list of int32 or int64, specifying the output shape. -*@par Outputs: -*y: A Tensor. Has the same type as "x". format:ND . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x". format:ND . \n -*@attention Constraints: -*@li "y" has the same shape as "shape". -*@li "y" has the same type as "x". -*@par Third-party framework compatibility +* @attention Constraints: +* @li "y" has the same shape as "shape". +* @li "y" has the same type as "x". +* @par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterNd. * @par Restrictions: @@ -1177,8 +1180,8 @@ REG_OP(ScatterNdD) * @par Third-party framework compatibility * Compatible with the TensorFlow operator InTopK. * -*@par Restrictions: -*Warning: THIS FUNCTION IS DEPRECATED. Please use InTopK instead. +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use InTopK instead. */ REG_OP(InTopKD) .INPUT(x1, TensorType({DT_FLOAT})) @@ -1312,28 +1315,28 @@ REG_OP(StridedSliceAssignD) .OP_END_FACTORY_REG(StridedSliceAssignD) /** -*@brief Gather slices from "params" according to "indices"."indices" must be +* @brief Gather slices from "params" according to "indices"."indices" must be an integer tensor of any dimension(usually 0-D or 1-D). Produces an output tensor with shape "indices.shape + params.shape[1:]" . \n -*@par Inputs: -*Two inputs, including: +* @par Inputs: +* Two inputs, including: * @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, * int64, qint8, quint8, qint32, qint16, quint16, uint16, * float16, uint32, uint64, complex64, complex128. * @li indices: A Tensor of type int32 or int64 . -*@par Attributes: +* @par Attributes: * @li validate_indices: A bool specifying whether to verify the argument of "indice" . * @li batch_dims: An optional int. Defaults to 0. -*@par Outputs: -*y: A Tensor. Has the same type as "x" . +* @par Outputs: +* y: A Tensor. Has the same type as "x" . -*@attention Constraints: +* @attention Constraints: * "indices" is in the range [0, x.shape[0]) . -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with the TensorFlow operator Gather . */ @@ -1346,22 +1349,22 @@ REG_OP(Gather) .OP_END_FACTORY_REG(Gather) /** -*@brief Computes the cumulative product of the tensor "x" along "axis" . \n +* @brief Computes the cumulative product of the tensor "x" along "axis" . \n -*@par Inputs: +* @par Inputs: * Two inputs, including: -*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, * complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64 -*@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". +* @li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". * -*@par Attributes: -*@li exclusive: If "False", performs inclusive cumprod, which means that the first element of the input +* @par Attributes: +* @li exclusive: If "False", performs inclusive cumprod, which means that the first element of the input * is identical to the first element of the output. If "True", performs exclusive cumprod. -*@li reverse: A bool. Defaults to "False". +* @li reverse: A bool. Defaults to "False". * -*@par Outputs: -*y: A Tensor. Has the same type as "x". -*@par Third-party framework compatibility +* @par Outputs: +* y: A Tensor. Has the same type as "x". +* @par Third-party framework compatibility * Compatible with the TensorFlow operator Cumprod. */ REG_OP(Cumprod) @@ -1373,22 +1376,22 @@ REG_OP(Cumprod) .OP_END_FACTORY_REG(Cumprod) /** -*@brief Computes the cumulative product of the tensor "x" along "axis" . \n +* @brief Computes the cumulative product of the tensor "x" along "axis" . \n -*@par Inputs: +* @par Inputs: * One input: -*x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +* x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, * complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64 * -*@par Attributes: -*@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". -*@li exclusive: If "False", performs inclusive cumprod, which means that the first element of the input +* @par Attributes: +* @li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". +* @li exclusive: If "False", performs inclusive cumprod, which means that the first element of the input * is identical to the first element of the output. If "True", performs exclusive cumprod. -*@li reverse: A bool. Defaults to "False". +* @li reverse: A bool. Defaults to "False". * -*@par Outputs: -*y: A Tensor. Has the same type as "x". -*@par Third-party framework compatibility +* @par Outputs: +* y: A Tensor. Has the same type as "x". +* @par Third-party framework compatibility * Compatible with the TensorFlow operator Cumprod. * @par Restrictions: @@ -1403,21 +1406,21 @@ REG_OP(CumprodD) .OP_END_FACTORY_REG(CumprodD) /** -*@brief Computes the cumulative sum of the tensor "x" along "axis" . \n +* @brief Computes the cumulative sum of the tensor "x" along "axis" . \n -*@par Inputs: +* @par Inputs: * Two inputs, including: -*@li x: A Tensor. Must be one of the following types: float32, int32, uint8, int8, float16. -*@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". +* @li x: A Tensor. Must be one of the following types: float32, int32, uint8, int8, float16. +* @li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". * -*@par Attributes: -*@li exclusive: If "False", performs inclusive cumsum, which means that the first element of the input is +* @par Attributes: +* @li exclusive: If "False", performs inclusive cumsum, which means that the first element of the input is * identical to the first element of the output. If "True", performs exclusive cumsum. -*@li reverse: A bool. Defaults to "False". +* @li reverse: A bool. Defaults to "False". * -*@par Outputs: -*@li y: A Tensor. Has the same type as "x". -*@par Third-party framework compatibility +* @par Outputs: +* @li y: A Tensor. Has the same type as "x". +* @par Third-party framework compatibility * Compatible with the TensorFlow operator Cumsum. */ REG_OP(Cumsum) @@ -1429,21 +1432,21 @@ REG_OP(Cumsum) .OP_END_FACTORY_REG(Cumsum) /** -*@brief Computes the cumulative sum of the tensor "x" along "axis". +* @brief Computes the cumulative sum of the tensor "x" along "axis". * -*@par Inputs: +* @par Inputs: * One input: -*x: A Tensor. Must be one of the following types: float32, int32, uint8, int8, float16. +* x: A Tensor. Must be one of the following types: float32, int32, uint8, int8, float16. * -*@par Attributes: -*@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". -*@li exclusive: If "False", performs inclusive cumsum, which means that the first element of the input is +* @par Attributes: +* @li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". +* @li exclusive: If "False", performs inclusive cumsum, which means that the first element of the input is * identical to the first element of the output. If "True", performs exclusive cumsum. -*@li reverse: A bool. Defaults to "False". +* @li reverse: A bool. Defaults to "False". * -*@par Outputs: -*y: A Tensor. Has the same type as "x". -*@par Third-party framework compatibility +* @par Outputs: +* y: A Tensor. Has the same type as "x". +* @par Third-party framework compatibility * Compatible with the TensorFlow operator Cumsum. * @par Restrictions: @@ -1458,10 +1461,10 @@ REG_OP(CumsumD) .OP_END_FACTORY_REG(CumsumD) /** -*@brief Updates specified rows with values in v. -*Computes x[i, :] = v; return x. -*@par Inputs: -*Three inputs, including: +* @brief Updates specified rows with values in v. +* Computes x[i, :] = v; return x. +* @par Inputs: +* Three inputs, including: * @li x: A Tensor. * TensorType::NumberType(). * @li indices: A vector of type int32. @@ -1470,11 +1473,11 @@ REG_OP(CumsumD) * Same dimension sizes as x except the first dimension, * which must be the same as the size of "indices" . \n -*@par Outputs: -*y: A Tensor of the same type as "x". +* @par Outputs: +* y: A Tensor of the same type as "x". * An alias of "x". The content of "y" is undefined if there are duplicates in indices. -*@par Third-party framework compatibility -*Compatible with the TensorFlow operator InplaceUpdate. +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator InplaceUpdate. */ REG_OP(InplaceUpdate) .INPUT(x, TensorType::BasicType()) @@ -1484,23 +1487,23 @@ REG_OP(InplaceUpdate) .OP_END_FACTORY_REG(InplaceUpdate) /** -*@brief Updates specified rows with values in v. -*Computes x[i, :] = v; return x. -*@par Inputs: -*Two inputs, including: +* @brief Updates specified rows with values in v. +* Computes x[i, :] = v; return x. +* @par Inputs: +* Two inputs, including: * @li x: A Tensor of type int32, float16, floay32. * @li v: A Tensor of the same type as "x". * Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices" . \n -*@par Attributes: -*indices: A required list of ints. Indices into the left-most dimension of "x" . \n +* @par Attributes: +* indices: A required list of ints. Indices into the left-most dimension of "x" . \n -*@par Outputs: -*y: A Tensor of the same type as "x". +* @par Outputs: +* y: A Tensor of the same type as "x". * An alias of "x". The content of "y" is undefined if there are duplicates in indices . \n -*@par Third-party framework compatibility -*Compatible with the TensorFlow operator InplaceUpdate. +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator InplaceUpdate. * * @par Restrictions: * Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceUpdate instead. @@ -1513,10 +1516,10 @@ REG_OP(InplaceUpdateD) .OP_END_FACTORY_REG(InplaceUpdateD) /** -*@brief Adds "v" into specified rows of "x". -*Computes y = x; y[i, :] += v. -*@par Inputs: -*Three inputs, including: +* @brief Adds "v" into specified rows of "x". +* Computes y = x; y[i, :] += v. +* @par Inputs: +* Three inputs, including: * @li x: A Tensor. * TensorType::NumberType(). * @li indices: A vector of type int32. @@ -1525,11 +1528,11 @@ REG_OP(InplaceUpdateD) * Same dimension sizes as x except the first dimension, * which must be the same as the size of "indices" . \n -*@par Outputs: -*y: A Tensor of the same type as "x". +* @par Outputs: +* y: A Tensor of the same type as "x". * An alias of "x". The content of "y" is undefined if there are duplicates in indices. -*@par Third-party framework compatibility -*Compatible with the TensorFlow operator InplaceAdd. +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator InplaceAdd. */ REG_OP(InplaceAdd) .INPUT(x, TensorType::BasicType()) @@ -1539,23 +1542,23 @@ REG_OP(InplaceAdd) .OP_END_FACTORY_REG(InplaceAdd) /** -*@brief Adds "v" into specified rows of "x". -*Computes y = x; y[i, :] += v. -*@par Inputs: -*Two inputs, including: +* @brief Adds "v" into specified rows of "x". +* Computes y = x; y[i, :] += v. +* @par Inputs: +* Two inputs, including: * @li x: A Tensor of type is int32, float16, float32. * @li v: A Tensor of the same type as "x". * Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices" . \n -*@par Attributes: -*indices: A required list of ints. Indices into the left-most dimension of "x" . \n +* @par Attributes: +* indices: A required list of ints. Indices into the left-most dimension of "x" . \n -*@par Outputs: -*y: A Tensor of the same type as "x". +* @par Outputs: +* y: A Tensor of the same type as "x". * An alias of "x". The content of "y" is undefined if there are duplicates in indices . \n -*@par Third-party framework compatibility -*Compatible with the TensorFlow operator InplaceAdd. +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator InplaceAdd. * * @par Restrictions: * Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceAdd instead. @@ -1568,21 +1571,21 @@ REG_OP(InplaceAddD) .OP_END_FACTORY_REG(InplaceAddD) /** -*@brief Subtracts "v" into specified rows of "x". -*Computes y = x; y[i, :] -= v; return y. -*@par Inputs: -**Three inputs, including: +* @brief Subtracts "v" into specified rows of "x". +* Computes y = x; y[i, :] -= v; return y. +* @par Inputs: +** Three inputs, including: * @li x: A Tensor. TensorType::NumberType(). * @li indices: A vector of type int32. Indices into the left-most dimension of x. * @li v: A Tensor of the same type as "x". * Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices" . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x". +* @par Outputs: +* y: A Tensor. Has the same type as "x". * An alias of "x". The content of "y" is undefined if there are duplicates in indices . \n -*@par Third-party framework compatibility -*Compatible with the TensorFlow operator InplaceSub. +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator InplaceSub. */ REG_OP(InplaceSub) .INPUT(x, TensorType::BasicType()) @@ -1592,24 +1595,24 @@ REG_OP(InplaceSub) .OP_END_FACTORY_REG(InplaceSub) /** -*@brief Subtracts "v" into specified rows of "x". -*Computes y = x; y[i, :] -= v . \n +* @brief Subtracts "v" into specified rows of "x". +* Computes y = x; y[i, :] -= v . \n -*@par Inputs: -**Two inputs, including: +* @par Inputs: +** Two inputs, including: * @li x: A Tensor of type is int32, float16, float32. * @li v: A Tensor of the same type as "x". * Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices" . \n -*@par Attributes: -*indices: A required list of ints. Indices into the left-most dimension of "x" . \n +* @par Attributes: +* indices: A required list of ints. Indices into the left-most dimension of "x" . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x". +* @par Outputs: +* y: A Tensor. Has the same type as "x". * An alias of x. The content of y is undefined if there are duplicates in indices . \n -*@par Third-party framework compatibility -*Compatible with the TensorFlow operator InplaceSub. +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator InplaceSub. * * @par Restrictions: * Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceSub instead. @@ -1825,28 +1828,39 @@ REG_OP(UnsortedSegmentProdD) .OP_END_FACTORY_REG(UnsortedSegmentProdD) /** -*@brief Performs object detection . \n - -*@par Inputs: -*@li cls_prob: An NCHW tensor of type float16 or float32, specifying the probability of the proposal is the background class. -*@li bbox_delta: An NCHW tensor of type float16 or float32, specifying the coordinates of the proposals bounding boxes. -*@li im_info: An ND tensor of type float16 or float32, specifying the Image information . \n - -*@par Attributes: -*@li feat_stride: A optional float32, specifying the stride of the sliding window. Must be greater than "0".Defaults to "16". -*@li base_size: A optional float32, specifying the size of the generated base box. Must be greater than "0". Defaults to "16". -*@li min_size: A optional float32, specifying the minimum edge length of a proposal. A box with any edge less than this value is removed. Must be greater than "0". Defaults to "16". -*@li ratio: A optional list of floats, specifying the aspect ratio of the generated base box. Defaults to [0.5, 1, 2]. -*@li scale: A optional list of floats, specifying the ratio of the size of the generated base box to "base_size". Defaults to [8, 16, 32]. -*@li pre_nms_topn: A required int, specifying top K boxes before NMS. For float16 input, pre_nms_topn <= 6000. For float32 input, pre_nms_topn <= 3000. Defaults to "3000". -*@li post_nms_topn: A required int, specifying the number of boxes to be output after NMS. The value is a multiple of 16. For float16 input, post_nms_topn <= 6000. For float32 input, post_nms_topn <= 3000 (the maximum multiple of 16 is 2992 within the range). Defaults to "304". -*@li iou_threshold: A required float32, specifying the NMS threshold. The value range is (0,1]. Defaults to "0.7". -*@li output_actual_rois_num: An optional bool. Defaults to "false" . \n - -*@par Outputs: -*@li rois: A Tensor with shape [batch, 5, post_nms_topn], of type float16 or float32, specifying the output box information. "post_nms_topn" must be a multiple of 16. The dimension "5" indicates (batchID, x1, y1, x2, y2). The number of BBoxes output per batch is determined by "actual_rois_num". -*@li actual_rois_num: A Tensor with shape [batch, 8], of type int32, specifying the number of BBoxes output per batch. -*@par Third-party framework compatibility +* @brief Performs object detection . \n + +* @par Inputs: +* @li cls_prob: An NCHW tensor of type float16 or float32, +* specifying the probability of the proposal is the background class. +* @li bbox_delta: An NCHW tensor of type float16 or float32, specifying the coordinates of the proposals bounding boxes. +* @li im_info: An ND tensor of type float16 or float32, specifying the Image information . \n + +* @par Attributes: +* @li feat_stride: A optional float32, specifying the stride of the sliding window. +* Must be greater than "0".Defaults to "16". +* @li base_size: A optional float32, specifying the size of the generated base box. +* Must be greater than "0". Defaults to "16". +* @li min_size: A optional float32, specifying the minimum edge length of a proposal. +* A box with any edge less than this value is removed. Must be greater than "0". Defaults to "16". +* @li ratio: A optional list of floats, specifying the aspect ratio of the generated base box. Defaults to [0.5, 1, 2]. +* @li scale: A optional list of floats, specifying the ratio of the size of the generated base box to "base_size". +* Defaults to [8, 16, 32]. +* @li pre_nms_topn: A required int, specifying top K boxes before NMS. +* For float16 input, pre_nms_topn <= 6000. For float32 input, pre_nms_topn <= 3000. Defaults to "3000". +* @li post_nms_topn: A required int, specifying the number of boxes to be output after NMS. +* The value is a multiple of 16. For float16 input, post_nms_topn <= 6000. For float32 input, +* post_nms_topn <= 3000 (the maximum multiple of 16 is 2992 within the range). Defaults to "304". +* @li iou_threshold: A required float32, specifying the NMS threshold. The value range is (0,1]. Defaults to "0.7". +* @li output_actual_rois_num: An optional bool. Defaults to "false" . \n + +* @par Outputs: +* @li rois: A Tensor with shape [batch, 5, post_nms_topn], +* of type float16 or float32, specifying the output box information. +* "post_nms_topn" must be a multiple of 16. The dimension "5" indicates (batchID, x1, y1, x2, y2). +* The number of BBoxes output per batch is determined by "actual_rois_num". +* @li actual_rois_num: A Tensor with shape [batch, 8], of type int32, specifying the number of BBoxes output per batch. +* @par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(Proposal) @@ -1867,32 +1881,44 @@ REG_OP(UnsortedSegmentProdD) .OP_END_FACTORY_REG(Proposal) /** -*@brief Performs object detection. Different from Proposal, this is an internal API called after FE fusion and has an additional "rpn_bbox" attribute. The suffix "D" in the API name will be removed from the generated model . \n - -*@par Inputs: -*@li cls_prob: An NCHW tensor of type float16, specifying the probability of the proposal is the background class. -*@li bbox_delta: An NCHW tensor of type float16, specifying the coordinates of the proposals bounding boxes. -*@li im_info: An ND tensor of type float16 or float32, specifying the Image information. -*@li rpn_bbox: An NCHW tensor of type float16, specifying the coordinates of the proposals bounding boxes . \n - -*@par Attributes: -*@li feat_stride: A required float32, specifying the stride of the sliding window. Must be greater than "0".Defaults to "16". -*@li base_size: A required float32, specifying the size of the generated base box. Must be greater than "0". Defaults to "16". -*@li min_size: A required float32, specifying the minimum edge length of a proposal. A box with any edge less than this value is removed. Must be greater than "0". Defaults to "16". -*@li ratio: A required list of floats, specifying the aspect ratio of the generated base box. Defaults to [0.5, 1, 2]. -*@li scale: A required list of floats, specifying the ratio of the size of the generated base box to "base_size". Defaults to [8, 16, 32]. -*@li pre_nms_topn: A required int, specifying top K boxes before NMS. For float16 input, pre_nms_topn <= 6000. For float32 input, pre_nms_topn <= 3000. Defaults to "3000". -*@li post_nms_topn: A required int, specifying the number of boxes to be output after NMS. The value is a multiple of 16. For float16 input, post_nms_topn <= 6000. For float32 input, post_nms_topn <= 3000 (the maximum multiple of 16 is 2992 within the range). Defaults to "304". -*@li iou_threshold: A required float32, specifying the NMS threshold. The value range is (0,1]. Defaults to 0.7. -*@li output_actual_rois_num: An optional bool. Defaults to "false" . \n - -*@par Outputs: -*@li rois: A Tensor with shape [batch, 5, post_nms_topn], of type float16 or float32, specifying the output box information. "post_nms_topn" must be a multiple of 16. The dimension "5" indicates (batchID, x1, y1, x2, y2). The number of BBoxes output per batch is determined by "actual_rois_num". -*@li actual_rois_num: A Tensor with shape [batch, 8], of type int32, specifying the number of BBoxes output per batch. -*@par Third-party framework compatibility +* @brief Performs object detection. Different from Proposal, +* this is an internal API called after FE fusion and has an additional "rpn_bbox" attribute. +* The suffix "D" in the API name will be removed from the generated model . \n + +* @par Inputs: +* @li cls_prob: An NCHW tensor of type float16, specifying the probability of the proposal is the background class. +* @li bbox_delta: An NCHW tensor of type float16, specifying the coordinates of the proposals bounding boxes. +* @li im_info: An ND tensor of type float16 or float32, specifying the Image information. +* @li rpn_bbox: An NCHW tensor of type float16, specifying the coordinates of the proposals bounding boxes . \n + +* @par Attributes: +* @li feat_stride: A required float32, specifying the stride of the sliding window. +* Must be greater than "0".Defaults to "16". +* @li base_size: A required float32, specifying the size of the generated base box. +* Must be greater than "0". Defaults to "16". +* @li min_size: A required float32, specifying the minimum edge length of a proposal. +* A box with any edge less than this value is removed. Must be greater than "0". Defaults to "16". +* @li ratio: A required list of floats, specifying the aspect ratio of the generated base box. Defaults to [0.5, 1, 2]. +* @li scale: A required list of floats, specifying the ratio of the size of the generated base box to "base_size". +* Defaults to [8, 16, 32]. +* @li pre_nms_topn: A required int, specifying top K boxes before NMS. +* For float16 input, pre_nms_topn <= 6000. For float32 input, pre_nms_topn <= 3000. Defaults to "3000". +* @li post_nms_topn: A required int, specifying the number of boxes to be output after NMS. +* The value is a multiple of 16. For float16 input, post_nms_topn <= 6000. For float32 input, +* post_nms_topn <= 3000 (the maximum multiple of 16 is 2992 within the range). Defaults to "304". +* @li iou_threshold: A required float32, specifying the NMS threshold. The value range is (0,1]. Defaults to 0.7. +* @li output_actual_rois_num: An optional bool. Defaults to "false" . \n + +* @par Outputs: +* @li rois: A Tensor with shape [batch, 5, post_nms_topn], of type float16 or float32, +* specifying the output box information. "post_nms_topn" must be a multiple of 16. +* The dimension "5" indicates (batchID, x1, y1, x2, y2). +* The number of BBoxes output per batch is determined by "actual_rois_num". +* @li actual_rois_num: A Tensor with shape [batch, 8], of type int32, specifying the number of BBoxes output per batch. +* @par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. -*@par Restrictions: -*Warning: THIS FUNCTION IS DEPRECATED. Please use Proposal instead. +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use Proposal instead. */ REG_OP(ProposalD) .INPUT(cls_prob, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1913,23 +1939,26 @@ REG_OP(ProposalD) .OP_END_FACTORY_REG(ProposalD) /** -*@brief Performs plane or channel conversion on YoloV2. -* If reverse=true: (N, H, W, C)->(N, H*stride, W*stride, C/(stride*stride)) -* If reverse=false: (N, H, W, C)->(N, H/stride, W/stride, C*(stride*stride)) +* @brief Performs plane or channel conversion on YoloV2. +* If reverse=true: (N, H, W, C)->(N, H* stride, W* stride, C/(stride* stride)) +* If reverse=false: (N, H, W, C)->(N, H/stride, W/stride, C*(stride* stride)) -*@par Inputs: -*x: An (N, H, W, C) tensor. Type is float16, float32, int8, uint8, int16, uint16, int32, uint32, int64 or uint64. . \n +* @par Inputs: +* x: An (N, H, W, C) tensor. Type is float16, float32, int8, uint8, int16, uint16, int32, uint32, int64 or uint64. \n -*@par Attributes: -*@li stride: An optional int32, specifying the plane or channel scaling factor. Defaults to "2". -*@li reverse: An optional bool, specifying the conversion mode. If "true", depth to space conversion is performed. If "false", space to depth conversion is performed. Defaults to "false" . \n +* @par Attributes: +* @li stride: An optional int32, specifying the plane or channel scaling factor. Defaults to "2". +* @li reverse: An optional bool, specifying the conversion mode. If "true", +* depth to space conversion is performed. If "false", space to depth conversion is performed. Defaults to "false" . \n + +* @par Outputs: +* y: An (N, H, W, C) tensor. Has same type as "x" . \n -*@par Outputs: -*y: An (N, H, W, C) tensor. Has same type as "x" . \n +* @attention Constraints: +* @li If reverse=true: C/(stride* stride) yields an integer result. +* @li If reverse=false: W/stride and H/stride yield integer results. -*@attention Constraints: -*@li If reverse=true: C/(stride*stride) yields an integer result. If reverse=false: W/stride and H/stride yield integer results. -*@par Third-party framework compatibility +* @par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(PassThrough) @@ -1940,29 +1969,31 @@ REG_OP(PassThrough) .OP_END_FACTORY_REG(PassThrough) /** -*@brief Crops the input tensor x to the shape of size. For example: +* @brief Crops the input tensor x to the shape of size. For example: *(1) x: bottom to be cropped, with shape (20, 50, 512, 512); *(2) size: reference input for cropping, with shape (20, 10, 256, 256); *(3) axis = 1; *(4) offset = (25, 128, 128); *(5) y = x[:, 25:25 + size.shape[1], 128:128 + size.shape[2], 128:128 + size.shape[3]] . \n -*@par Inputs: -*Inputs include: +* @par Inputs: +* Inputs include: * @li x: A required Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32,int64, uint64. * @li size: A required Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. -*@par Attributes: -*@li axis: A required int32, specifying the first dimension to crop. Defaults to "2". -*@li offset: A required array, specifying the shift for all/each dimension to align the cropped bottom with the reference bottom. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. -*@par Outputs: -*y: A required Tensor. Has the same type and shape as "size" . \n - -*@attention Constraints: -*@li "y" must have the same type and shape as "size". "x" must have the same type as "size". -*@li "axis" must be less than the rank of "x". -*@li The "offset" for each dimension must not exceed the maximum value of the corresponding dimension of "x". -*@li The array length of "offset" plus the value of "axis" equals to the rank of "y". -*@par Third-party framework compatibility +* @par Attributes: +* @li axis: A required int32, specifying the first dimension to crop. Defaults to "2". +* @li offset: A required array, +* specifying the shift for all/each dimension to align the cropped bottom with the reference bottom. +* Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. +* @par Outputs: +* y: A required Tensor. Has the same type and shape as "size" . \n + +* @attention Constraints: +* @li "y" must have the same type and shape as "size". "x" must have the same type as "size". +* @li "axis" must be less than the rank of "x". +* @li The "offset" for each dimension must not exceed the maximum value of the corresponding dimension of "x". +* @li The array length of "offset" plus the value of "axis" equals to the rank of "y". +* @par Third-party framework compatibility * Compatible with the Caffe operator Crop. */ REG_OP(Crop) @@ -1974,24 +2005,24 @@ REG_OP(Crop) .OP_END_FACTORY_REG(Crop) /** -*@brief Returns a namedtuple (values, indices) where values is the cumulative +* @brief Returns a namedtuple (values, indices) where values is the cumulative * the cumulative minimum of elements of input in the dimension dim. * And indices is the index location of each maximum value found in the dimension dim. \n -*@par Inputs: -*One inputs, including: +* @par Inputs: +* One inputs, including: * x: A tensor . Must be one of the following types: * float16, float32, int32, uint32, int8, uint8. \n -*@par Attributes: +* @par Attributes: * axis: Axis along which to cummin. \n -*@par Outputs: +* @par Outputs: * @li y: A Tensor with the same type and shape of x's. * @li indices: A Tensor with the int32 type and the same shape of x's. \n -*@par Third-party framework compatibility -*Compatible with the Pytorch operator Cummin. \n +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Cummin. \n */ REG_OP(Cummin) .INPUT(x, TensorType::BasicType()) @@ -2001,24 +2032,24 @@ REG_OP(Cummin) .OP_END_FACTORY_REG(Cummin) /** -*@brief Returns a namedtuple (values, indices) where values is the cumulative +* @brief Returns a namedtuple (values, indices) where values is the cumulative * the cumulative maximum of elements of input in the dimension dim. * And indices is the index location of each maximum value found in the dimension dim. \n -*@par Inputs: -*One inputs, including: +* @par Inputs: +* One inputs, including: * x: A tensor . Must be one of the following types: * float16, float32, int32, uint32, int8, uint8. \n -*@par Attributes: +* @par Attributes: * dim: Axis along which to cummax. \n -*@par Outputs: +* @par Outputs: * @li y: A Tensor with the same type and shape of x's. * @li indices: A Tensor with the int32/int64 type and the same shape of x's. \n -*@par Third-party framework compatibility -*Compatible with the Pytorch operator Cummax. \n +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Cummax. \n */ REG_OP(Cummax) .INPUT(x, TensorType::BasicType()) @@ -2028,7 +2059,7 @@ REG_OP(Cummax) .OP_END_FACTORY_REG(Cummax) /** -*@brief Extends the input with copies of data along a specified dimension. For example: +* @brief Extends the input with copies of data along a specified dimension. For example: *(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2); \n *(2) axis = 1; \n *(3) tiles = 2; \n @@ -2036,15 +2067,16 @@ REG_OP(Cummax) * [9, 10], [11, 12], [7, 8], [9, 10], [11, 12]]], * with shape (2, 6, 2) . \n -*@par Inputs: +* @par Inputs: * One input: -*input_x: A Tensor with any format. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n +* input_x: A Tensor with any format. Must be one of the following types: +* float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n * @par Attributes: * @li axis: An optional int32, specifying the axis to tile. Defaults to 1. * @li tiles: A required int32, specifying the number of copies (tiles) to output . \n -*@par Outputs: +* @par Outputs: * output_y: A Tensor of any format. Must be one of the following types: * float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n @@ -2076,8 +2108,8 @@ REG_OP(TileWithAxis) * @par Outputs: * y: A Tensor of the same type as "x". -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ReadSelect) .INPUT(x, TensorType::ALL()) @@ -2091,11 +2123,11 @@ REG_OP(ReadSelect) * @par Inputs: * x: A Tensor. Must be one of the following types: int32, float32, float16, int8 . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x". +* @par Outputs: +* y: A Tensor. Has the same type as "x". -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(WriteSelect) .INPUT(x, TensorType::ALL()) @@ -2112,8 +2144,8 @@ REG_OP(WriteSelect) * @li axis: A required int32, specifying the index of axis to read by stride. * @li stride: A required int32, specifying the value of reading stride. \n -*@par Outputs: -*y: A Tensor of the same type as "x". +* @par Outputs: +* y: A Tensor of the same type as "x". */ REG_OP(StridedRead) .INPUT(x, TensorType::ALL()) @@ -2132,8 +2164,8 @@ REG_OP(StridedRead) * @li axis: A required int32, specifying the index of axis to write by stride. * @li stride: A required int32, specifying the value of writing stride. \n -*@par Outputs: -*y: A Tensor. Has the same type as "x". +* @par Outputs: +* y: A Tensor. Has the same type as "x". */ REG_OP(StridedWrite) .INPUT(x, TensorType::ALL()) @@ -2150,9 +2182,11 @@ REG_OP(StridedWrite) * @li x: A Tensor. Must be one of the following types: float32, float16. * @li axis A Tensor of type int32 or int16. Defaults to "0". * -*@par Attributes: -*@li exclusive: If "False", performs inclusive CumulativeLogsumexp, which means that the first element of the input is identical to the first element of the output. If "True", performs exclusive CumulativeLogsumexp. -*@li reverse: A bool. Defaults to "False". +* @par Attributes: +* @li exclusive: If "False", performs inclusive CumulativeLogsumexp, +* which means that the first element of the input is identical to the first element of the output. +* If "True", performs exclusive CumulativeLogsumexp. +* @li reverse: A bool. Defaults to "False". * * @par Outputs: * y: A Tensor. Has the same type as "x". @@ -2168,20 +2202,22 @@ REG_OP(CumulativeLogsumexp) .OP_END_FACTORY_REG(CumulativeLogsumexp) /** -*@brief Computes the cumulative log sum exp of the tensor "x" along "axis". +* @brief Computes the cumulative log sum exp of the tensor "x" along "axis". * -*@par Inputs: +* @par Inputs: * One input: * x: A Tensor. Must be one of the following types: float32, float16. * -*@par Attributes: -*@li axis A Tensor of type int32 or int16. Defaults to "0". -*@li exclusive: If "False", performs inclusive cumulativeLogsumexp, which means that the first element of the input is identical to the first element of the output. If "True", performs exclusive CumulativeLogsumexp. -*@li reverse: A bool. Defaults to "False". +* @par Attributes: +* @li axis A Tensor of type int32 or int16. Defaults to "0". +* @li exclusive: If "False", performs inclusive cumulativeLogsumexp, +* which means that the first element of the input is identical to the first element of the output. +* If "True", performs exclusive CumulativeLogsumexp. +* @li reverse: A bool. Defaults to "False". * -*@par Outputs: -*y: A Tensor. Has the same type as "x". -*@par Third-party framework compatibility +* @par Outputs: +* y: A Tensor. Has the same type as "x". +* @par Third-party framework compatibility * Compatible with the TensorFlow operator Cumsum. * * @par Restrictions: @@ -2201,7 +2237,7 @@ REG_OP(CumulativeLogsumexpD) * @par Inputs: * Three inputs, including: * @li var: A Tensor. Must be one of the following types: -* float16, float32, int16, int32, int8, uint8. +* double, float16, float32, int16, int32, int8, uint8. * @li indices: A Tensor of the indices, type should be int32. * @li updates: A Tensor of the same type as "var". \n @@ -2216,12 +2252,12 @@ REG_OP(CumulativeLogsumexpD) */ REG_OP(InplaceIndexAdd) .INPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8, - DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + DT_UINT8, DT_FLOAT32, DT_FLOAT16, DT_DOUBLE})) .INPUT(indices, TensorType({DT_INT32})) .INPUT(updates, TensorType({DT_INT16, DT_INT32, DT_INT8, - DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + DT_UINT8, DT_FLOAT32, DT_FLOAT16, DT_DOUBLE})) .OUTPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8, - DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + DT_UINT8, DT_FLOAT32, DT_FLOAT16, DT_DOUBLE})) .REQUIRED_ATTR(axis, Int) .OP_END_FACTORY_REG(InplaceIndexAdd) @@ -2395,7 +2431,7 @@ REG_OP(IndexFillD) .OP_END_FACTORY_REG(IndexFillD) /** -* @brief For each row r of this and for each column c, do (*this)(r, c) += src(j, c), \n +* @brief For each row r of this and for each column c, do (* this)(r, c) += src(j, c), \n * where j ranges from indexes[r].first through indexes[r].second - 1. \n * In general indexes must be >= 0 and < src.NumRows(); \n * but to represent an empty range you may use the pair (-1, -1) or any pair of numbers (i, j) such that i >= j. \n @@ -2443,7 +2479,7 @@ REG_OP(AddRowRanges) * @par Outputs: * y: A ND Tensor of float32/float16/int32/int8 with shapes 1-D (D,), 2-D(N, D), 3-D(N, C, D) -*@attention Constraints: +* @attention Constraints: * Warning: input shape's length must not be bigger than 1024 * 1024 * 1024. */ REG_OP(MaskedFillRange) @@ -2461,7 +2497,7 @@ REG_OP(MaskedFillRange) * @par Inputs: * Six inputs, including: * @li topk_pq_distance: A sorted Tensor, Will be updated after calculation. -* Must be one of the following types: float32, float16. +* Must be one of the following types: float32, float16. * @li topk_pq_index: A Tensor of type int32, index corresponding to topk_pq_distance. * @li topk_pq_ivf: A Tensor of type int32 , the bucket number corresponding to topk_pq_distance. * @li pq_distance: A Tensor of type float32 or float16, @@ -2493,7 +2529,7 @@ REG_OP(InplaceTopKDistance) * @li pq_ivf: A Tensor of type int32, index corresponding to sorted_distance. * @li pq_index: A Tensor of type int32 , the bucket number corresponding to sorted_distance. \n * -*@par Outputs: +* @par Outputs: * @li topk_distance: A Tensor of type float16, the new data set will be reordered with sorted_distance and updated to topk_distance. * @li topk_ivf: A Tensor of type int32, index corresponding to topk_distance. * @li topk_index: A scalar of type int32 , the bucket number corresponding to topk_distance. \n @@ -2515,13 +2551,13 @@ REG_OP(TopKPQDistanceMerge) .OP_END_FACTORY_REG(TopKPQDistanceMerge) /** -*@brief Extracts a strided slice of a tensor. Roughly speaking, this op +* @brief Extracts a strided slice of a tensor. Roughly speaking, this op extracts a slice of size (end-begin)/stride from the given input tensor. Starting at the location specified by begin the slice continues by adding stride to the index until all dimensions are not less than end. -*@par Inputs: -*Four inputs, including: +* @par Inputs: +* Four inputs, including: * @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, * complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, * complex128, float16, uint32, uint64. @@ -2533,8 +2569,8 @@ REG_OP(TopKPQDistanceMerge) * @li axes: A Tensor of type int32 or int64, for the increment . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x" . \n +* @par Outputs: +* y: A Tensor. Has the same type as "x" . \n * @par Restrictions: * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -2558,7 +2594,7 @@ REG_OP(StridedSliceV3) * @li energy: A Tensor. Must be one of the following types: float32, float16. * @li offset: A Tensor of type int32. \n -*@par Outputs: +* @par Outputs: * y: A Tensor with same type as "alpha". \n * * @par Attributes: @@ -2646,6 +2682,39 @@ REG_OP(NonMaxSuppressionBucketize) .OUTPUT(output_nmsed_score, TensorType({DT_FLOAT})) .OUTPUT(output_nmsed_class, TensorType({DT_FLOAT})) .OP_END_FACTORY_REG(NonMaxSuppressionBucketize) + +/** +* @brief insert the values into the sorted sequence and return the index. \n + +* @par Inputs: +* @li sorted_sequence: A Tensor of {DT_FLOAT16,DT_FLOAT,DT_INT16,DT_INT8,DT_UINT8,DT_INT32,DT_INT64}, + the values of the last dim are sorted by ascending order. +* @li values: the inserted Tensor. Must have the same type as input. only the last dim can be different from + the sorted_sequence. \n + +* @par Outputs: +* @li out: output tensor of the op, which is the same shape as input "values". Dtype is int32 or int64. \n + +* @par Attributes: +* @li dtype: An optional type. Default value is DT_INT64, only supports DT_INT64/DT_INT32. + +* @li right: An optional bool. Default value is false, false means the inserted position aligns to the left side when + the sequence contains same value and the position candidates are not unique, while true means aligning to + the right side when in such situation. \n + +* @par Third-party framework compatibility +* Compatible with pytorch1.8.1 searchsorted operator. +*/ + +REG_OP(SearchSorted) + .INPUT(sorted_sequence, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT16, DT_INT8, + DT_UINT8, DT_INT32, DT_INT64})) + .INPUT(values, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT16, DT_INT8, + DT_UINT8, DT_INT32, DT_INT64})) + .OUTPUT(out, TensorType(DT_INT32, DT_INT64)) + .ATTR(dtype, Type, DT_INT64) + .ATTR(right, Bool, false) + .OP_END_FACTORY_REG(SearchSorted) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/ops/sparse_ops.h index bf0f670a..21051d8f 100644 --- a/third_party/fwkacllib/inc/ops/sparse_ops.h +++ b/third_party/fwkacllib/inc/ops/sparse_ops.h @@ -1041,4 +1041,4 @@ REG_OP(DeserializeManySparse) .OP_END_FACTORY_REG(DeserializeManySparse) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_SPARSE_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_SPARSE_OPS_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/spectral_ops.h b/third_party/fwkacllib/inc/ops/spectral_ops.h index ab9e1dec..6b51beea 100644 --- a/third_party/fwkacllib/inc/ops/spectral_ops.h +++ b/third_party/fwkacllib/inc/ops/spectral_ops.h @@ -143,6 +143,42 @@ REG_OP(IFFT2D) .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128})) .OP_END_FACTORY_REG(IFFT2D) +/** +* @brief Computes the Fourier transform of short overlapping windows of the input. \n + +* @par Inputs: +* @li x: A 1-D or 2-D tensor. +* @li window: An optional tensor. The optional window function. Default: None (treated as window of all 1 s) \n + +* @par Attributes: +* @li n_fft: A required int. Size of Fourier transform +* @li hop_length: An optional int. The distance between neighboring sliding window frames. +* Default: None (treated as equal to floor(n_fft/4)) +* @li win_length: An optional int. The size of window frame and STFT filter. +* Default: None (treated as equal to n_fft) +* @li normalized: An optional bool. Controls whether to return the normalized STFT results Default: False +* @li onesided: An optional bool. Controls whether to return half of results to avoid redundancy for real inputs. +* Default: True for real input and window, False otherwise. +* @li return_complex: An optional bool. Whether to return a complex tensor, or a real tensor +* with an extra last dimension for the real and imaginary components. \n + +* @par Outputs: +* y: A tensor containing the STFT result with shape described above. \n + +* @par Third-party framework compatibility +* Compatible with pytorch STFT operator. +*/ +REG_OP(STFT) + .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .OPTIONAL_INPUT(window, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .ATTR(hop_length, Int, 0) + .ATTR(win_length, Int, 0) + .ATTR(normalized, Bool, false) + .ATTR(onesided, Bool, true) + .ATTR(return_complex, Bool, true) + .REQUIRED_ATTR(n_fft, Int) + .OP_END_FACTORY_REG(STFT) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index f39574b3..2081ac97 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -79,7 +79,11 @@ typedef enum tagRtPlatformType { PLATFORM_LHISI_SD3403 = 7, PLATFORM_MINI_V3 = 8, PLATFORM_MINI_5612 = 9, - PLATFORM_END = 10, + PLATFORM_CLOUD_V2_910B1 = 10, + PLATFORM_CLOUD_V2_910B2 = 11, + PLATFORM_CLOUD_V2_910B3 = 12, + PLATFORM_CLOUD_V2_910B4 = 13, + PLATFORM_END = 14, } rtPlatformType_t; typedef enum tagRtCubeFracMKNFp16 { diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index 85c2d832..5d3110d4 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -166,6 +166,19 @@ typedef enum tagRtDumpKind { RT_DATA_DUMP_KIND_RESERVED = 1, } rtDumpKind_t; +/** + * @ingroup rt_kernel + * @brief rt kernel type + */ +typedef enum rtKernelType { + KERNEL_TYPE_CCE = 0, + KERNEL_TYPE_FWK = 1, + KERNEL_TYPE_AICPU = 2, + KERNEL_TYPE_AICPU_CUSTOM = 4, + KERNEL_TYPE_HWTS = 10, + KERNEL_TYPE_RESERVED = 99, +} rtKernelType_t; + /** * @ingroup rt_kernel * @brief report callback @@ -523,6 +536,23 @@ RTS_API rtError_t rtAicpuKernelLaunchWithFlag(const rtKernelLaunchNames_t *launc const rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm, uint32_t flags); +/** + * @ingroup rtAicpuKernelLaunchEx + * @brief launch cpu kernel to device with dump identifier and kernelType + * @param [in] kernelType aicpu kernel type + * @param [in] launchNames names address for kernel launch + * @param [in] blockDim block dimentions + * @param [in] argsInfo argments address for kernel function + * @param [in] smDesc shared memory description + * @param [in] stm associated stream + * @param [in] flags dump flag or others function flag + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtAicpuKernelLaunchEx(uint32_t kernelType, const rtKernelLaunchNames_t *launchNames, + uint32_t blockDim, const rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, + rtStream_t stm, uint32_t flags); + /** * @ingroup rt_kernel * @brief L1 fusion dump addr transfered to device diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index 368c6201..1c33e1b3 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -31,6 +31,7 @@ extern "C" { #define RT_MEMORY_TS (0x40U) // Used for Ts memory #define RT_MEMORY_TS_4G (0x40U) // Used for Ts memory(only 1951) #define RT_MEMORY_HOST (0x81U) // Memory on host +#define RT_MEMORY_SVM (0x90U) // Memory for SVM #define RT_MEMORY_RESERVED (0x100U) #define RT_MEMORY_L1 (0x1U << 16U) diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index c3d4bbd1..0c01789c 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -49,6 +49,7 @@ typedef enum tagModelTaskType { RT_MODEL_TASK_BARRIER, RT_MODEL_TASK_NPU_GET_FLOAT_STATUS, RT_MODEL_TASK_NPU_CLEAR_FLOAT_STATUS, + RT_MODEL_TASK_DVPP, } rtModelTaskType_t; typedef enum tagModelStreamType { diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h index 47d33a9e..8d3522a4 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_callback.h +++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h @@ -1,13 +1,20 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved. - * Description: handle perf data - * Author: xp - * Create: 2019-10-13 +/** + * @file prof_callback.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2022. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * */ #ifndef MSPROFILER_PROF_CALLBACK_H_ #define MSPROFILER_PROF_CALLBACK_H_ +#include +#include + #ifdef __cplusplus extern "C" { #endif // __cplusplus @@ -18,9 +25,6 @@ extern "C" { #define MSVP_PROF_API __attribute__((visibility("default"))) #endif -#include "stddef.h" -#include "stdint.h" - /** * @name MsprofErrorCode * @brief error code @@ -170,7 +174,7 @@ MSVP_PROF_API int32_t MsprofInit(uint32_t moduleId, void *data, uint32_t dataLen * @param moduleId [IN] module Id * @param handle [IN] the pointer of callback */ -MSVP_PROF_API int32_t MsprofRegisterCallback(uint32_t moduleId, ProfCommandHandle callback); +MSVP_PROF_API int32_t MsprofRegisterCallback(uint32_t moduleId, ProfCommandHandle handle); /* * @name profReportData * @brief start reporter/stop reporter/report date diff --git a/third_party/fwkacllib/inc/toolchain/prof_common.h b/third_party/fwkacllib/inc/toolchain/prof_common.h index e2eb5b69..411d7a29 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_common.h +++ b/third_party/fwkacllib/inc/toolchain/prof_common.h @@ -1,23 +1,27 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved. - * Description: handle perf data - * Author: Huawei Technologies Co., Ltd. - * Create: 2019-10-13 +/** + * @file prof_common.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2022. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * */ #ifndef MSPROFILER_PROF_COMMON_H_ #define MSPROFILER_PROF_COMMON_H_ +#include + #ifdef __cplusplus extern "C" { #endif // __cplusplus -#include - #define MSPROF_DATA_HEAD_MAGIC_NUM 0x5a5a enum MsprofDataTag { - MSPROF_ACL_DATA_TAG = 0, //acl data tag, range: 0~19 - MSPROF_GE_DATA_TAG_MODEL_LOAD = 20, //ge data tag, range: 20~39 + MSPROF_ACL_DATA_TAG = 0, // acl data tag, range: 0~19 + MSPROF_GE_DATA_TAG_MODEL_LOAD = 20, // ge data tag, range: 20~39 MSPROF_GE_DATA_TAG_FUSION = 21, MSPROF_GE_DATA_TAG_INFER = 22, MSPROF_GE_DATA_TAG_TASK = 23, @@ -25,14 +29,14 @@ enum MsprofDataTag { MSPROF_GE_DATA_TAG_STEP = 25, MSPROF_GE_DATA_TAG_ID_MAP = 26, MSPROF_GE_DATA_TAG_HOST_SCH = 27, - MSPROF_RUNTIME_DATA_TAG_API = 40, //runtime data tag, range: 40~59 + MSPROF_RUNTIME_DATA_TAG_API = 40, // runtime data tag, range: 40~59 MSPROF_RUNTIME_DATA_TAG_TRACK = 41, - MSPROF_AICPU_DATA_TAG = 60, //aicpu data tag, range: 60~79 + MSPROF_AICPU_DATA_TAG = 60, // aicpu data tag, range: 60~79 MSPROF_AICPU_MODEL_TAG = 61, - MSPROF_HCCL_DATA_TAG = 80, //hccl data tag, range: 80~99 - MSPROF_DP_DATA_TAG = 100, //dp data tag, range: 100~119 - MSPROF_MSPROFTX_DATA_TAG = 120, //hccl data tag, range: 120~139 - MSPROF_DATA_TAG_MAX = 65536, //data tag value type is uint16_t + MSPROF_HCCL_DATA_TAG = 80, // hccl data tag, range: 80~99 + MSPROF_DP_DATA_TAG = 100, // dp data tag, range: 100~119 + MSPROF_MSPROFTX_DATA_TAG = 120, // hccl data tag, range: 120~139 + MSPROF_DATA_TAG_MAX = 65536, // data tag value type is uint16_t }; /** @@ -154,6 +158,8 @@ enum MsprofGeTaskType { MSPROF_GE_TASK_TYPE_AI_CORE = 0, MSPROF_GE_TASK_TYPE_AI_CPU, MSPROF_GE_TASK_TYPE_AIV, + MSPROF_GE_TASK_TYPE_WRITE_BACK, + MSPROF_GE_TASK_TYPE_INVALID }; enum MsprofGeShapeType { MSPROF_GE_SHAPE_TYPE_STATIC = 0, @@ -368,11 +374,11 @@ struct MsprofHcclProfReduce { uint64_t src; uint64_t dst; uint64_t size; - uint32_t op; // {0: sum, 1: mul, 2: max, 3: min} - uint32_t dataType; // data type {0: INT8, 1: INT16, 2: INT32, 3: FP16, 4:FP32, 5:INT64, 6:UINT64} - uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} + uint32_t op; // {0: sum, 1: mul, 2: max, 3: min} + uint32_t dataType; // data type {0: INT8, 1: INT16, 2: INT32, 3: FP16, 4:FP32, 5:INT64, 6:UINT64} + uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} uint32_t remoteRank; - uint32_t transportType; // transport type {0: SDMA, 1: RDMA, 2:LOCAL} + uint32_t transportType; // transport type {0: SDMA, 1: RDMA, 2:LOCAL} uint32_t role; // role {0: dst, 1:src} double durationEstimated; }; @@ -383,9 +389,9 @@ struct MsprofHcclProfRDMA { uint64_t dst; uint64_t size; uint64_t notifyID; - uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} + uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} uint32_t remoteRank; - uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL} + uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL} uint32_t role; // role {0: dst, 1:src} uint32_t type; // RDMA type {0: RDMASendNotify, 1:RDMASendPayload} double durationEstimated; @@ -397,7 +403,7 @@ struct MsprofHcclProfMemcpy { uint64_t dst; uint64_t size; uint64_t notifyID; - uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} + uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} uint32_t remoteRank; uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL} uint32_t role; // role {0: dst, 1:src} @@ -448,18 +454,17 @@ struct MsprofStampInfo { uint16_t dataTag; uint32_t processId; uint32_t threadId; - uint32_t category; //marker category + uint32_t category; // marker category uint32_t eventType; int32_t payloadType; - union PayloadValue //payload info for marker - { + union PayloadValue { uint64_t ullValue; int64_t llValue; double dValue; uint32_t uiValue[2]; int32_t iValue[2]; float fValue[2]; - } payload; + } payload; // payload info for marker uint64_t startTime; uint64_t endTime; int32_t messageType; diff --git a/third_party/fwkacllib/inc/toolchain/prof_engine.h b/third_party/fwkacllib/inc/toolchain/prof_engine.h index 0e757dcf..f5276653 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_engine.h +++ b/third_party/fwkacllib/inc/toolchain/prof_engine.h @@ -1,17 +1,12 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * @file prof_engine.h * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2022. All rights reserved. * - * http://www.apache.org/licenses/LICENSE-2.0 + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. */ #ifndef MSPROF_ENGINE_PROF_ENGINE_H_ @@ -35,7 +30,7 @@ namespace Engine { * record config info */ struct ModuleJobConfig { - std::map switches; /**< key is the config name, value is the config value(on or off) */ + std::map switches; /**< key is the config name, value is the config value(on or off) */ }; /** @@ -48,59 +43,59 @@ struct ModuleJobConfig { * @brief class PluginIntf */ class MSVP_PROF_API PluginIntf { - public: - virtual ~PluginIntf() {} - - public: - /** - * @ingroup PluginIntf - * @name : Init - * @brief : API of user plugin, libmsporf call this API to send a Reporter to user plugin - * @par description : - * API of user plugin, libmsporf call this API to send a Reporter to user plugin. - * @param reporter [IN] const Reporter* the Reporter from libmsprof - * @retval PROFILING_SUCCESS 0 (success) - * @retval PROFILING_FAILED -1 (failed) - * - * @par depend: - * @li libmsprof - * @li prof_engine.h - * @since c60 - * @see UnInit - */ - virtual int Init(const Reporter *reporter) = 0; - - /** - * @ingroup PluginIntf - * @name : OnNewConfig - * @brief : API of user plugin, libmsprof call this API to send config info to user plugin \n - If the user plugin needn't config, no need to redefine this function - * @param config [IN] const ModuleJobConfig * the config from libmsprof - * @retval PROFILING_SUCCESS 0 (success) - * @retval PROFILING_FAILED -1 (failed) - * - * @par depend: - * @li libmsprof - * @li prof_engine.h - * @since c60 - * @see Init | UnInit - */ - virtual int OnNewConfig(const ModuleJobConfig *config) { return 0; } - - /** - * @ingroup PluginIntf - * @name : UnInit - * @brief : API of user plugin, libmsprof call this API to notify plugin stop to send data - * @retval PROFILING_SUCCESS 0 (success) - * @retval PROFILING_FAILED -1 (failed) - * - * @par depend: - * @li libmsprof - * @li prof_engine.h - * @since c60 - * @see Init - */ - virtual int UnInit() = 0; +public: + virtual ~PluginIntf() {} + +public: +/** + * @ingroup PluginIntf + * @name : Init + * @brief : API of user plugin, libmsporf call this API to send a Reporter to user plugin + * @par description : + * API of user plugin, libmsporf call this API to send a Reporter to user plugin. + * @param reporter [IN] const Reporter* the Reporter from libmsprof + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * + * @par depend: + * @li libmsprof + * @li prof_engine.h + * @since c60 + * @see UnInit + */ + virtual int Init(const Reporter *reporter) = 0; + +/** + * @ingroup PluginIntf + * @name : OnNewConfig + * @brief : API of user plugin, libmsprof call this API to send config info to user plugin \n + If the user plugin needn't config, no need to redefine this function + * @param config [IN] const ModuleJobConfig * the config from libmsprof + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * + * @par depend: + * @li libmsprof + * @li prof_engine.h + * @since c60 + * @see Init | UnInit + */ + virtual int OnNewConfig(const ModuleJobConfig *config) = 0; + +/** + * @ingroup PluginIntf + * @name : UnInit + * @brief : API of user plugin, libmsprof call this API to notify plugin stop to send data + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * + * @par depend: + * @li libmsprof + * @li prof_engine.h + * @since c60 + * @see Init + */ + virtual int UnInit() = 0; }; /** @@ -113,39 +108,39 @@ class MSVP_PROF_API PluginIntf { * @brief class EngineIntf */ class MSVP_PROF_API EngineIntf { - public: - virtual ~EngineIntf() {} - - public: - /** - * @ingroup EngineIntf - * @name : CreatePlugin - * @brief : API of user engine, libmsporf call this API to get a plugin - * @retval PluginIntf * The pointer of the new plugin - * - * @par depend: - * @li libmsprof - * @li prof_engine.h - * @since c60 - * @see ReleasePlugin - */ - virtual PluginIntf *CreatePlugin() = 0; - - /** - * @ingroup EngineIntf - * @name : ReleasePlugin - * @brief : API of user engine, libmsprof call this API to release a plugin - * @param plugin [IN] PluginIntf * the plugin to release - * @retval PROFILING_SUCCESS 0 (success) - * @retval PROFILING_FAILED -1 (failed) - * - * @par depend: - * @li libmsprof - * @li prof_engine.h - * @since c60 - * @see CreatePlugin - */ - virtual int ReleasePlugin(PluginIntf *plugin) = 0; +public: + virtual ~EngineIntf() {} + +public: +/** + * @ingroup EngineIntf + * @name : CreatePlugin + * @brief : API of user engine, libmsporf call this API to get a plugin + * @retval PluginIntf * The pointer of the new plugin + * + * @par depend: + * @li libmsprof + * @li prof_engine.h + * @since c60 + * @see ReleasePlugin + */ + virtual PluginIntf *CreatePlugin() = 0; + + /** + * @ingroup EngineIntf + * @name : ReleasePlugin + * @brief : API of user engine, libmsprof call this API to release a plugin + * @param plugin [IN] PluginIntf * the plugin to release + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) +* + * @par depend: + * @li libmsprof + * @li prof_engine.h + * @since c60 + * @see CreatePlugin + */ + virtual int ReleasePlugin(PluginIntf *plugin) = 0; }; /** diff --git a/third_party/fwkacllib/inc/toolchain/prof_reporter.h b/third_party/fwkacllib/inc/toolchain/prof_reporter.h index afd4863f..25c12a54 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_reporter.h +++ b/third_party/fwkacllib/inc/toolchain/prof_reporter.h @@ -1,17 +1,8 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved. + * Description: handle perf data + * Author: xp + * Create: 2019-10-13 */ #ifndef MSPROF_ENGINE_PROF_REPORTER_H @@ -81,4 +72,4 @@ public: } // namespace Engine } // namespace Msprof -#endif // MSPROF_ENGINE_PROF_REPORTER_H_ +#endif // MSPROF_ENGINE_PROF_REPORTER_H diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h index f42ea167..e6084561 100644 --- a/third_party/fwkacllib/inc/toolchain/slog.h +++ b/third_party/fwkacllib/inc/toolchain/slog.h @@ -207,6 +207,8 @@ enum { HSS, /**< helper */ FFTS, OP, + UDF, + HICAID, INVLID_MOUDLE_ID };