diff --git a/RELEASE.md b/RELEASE.md index 74d88134..51697212 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,22 @@ +# Release 1.0.0 + +## Major Features and Improvements +* Automatically dump the input and output of the abnormal operator when the network execution is abnormal; +* Realize dynamic multi-batch based on GotoLabel; +* Optimize the performance of dynamic shapeï¼› +* The dynamic resolution feature supports new scene that the network has multiple inputs and the shape of each input is different. + +## Bugfixes +* Fixed the issue that the input and output data of the AICPU operator cannot be dumped in the single-operator execution scenario. +* Fixed the execution fails in the custom AICPU operator cascading scenario. +* Fixed the issue that in the dynamic batch+dynamic AIPP scenario, the getinputformat and getinputdims parameters are inconsistent. + + +## Thanks to our Contributors +Thanks goes to these wonderful people: wuweikang,wangcong,weiyang,yanghaorang,xutianchun,shibeiji,zhouchao, tanghuikang, zhoulili, liujunzhu, zhengyuanhua, taoxiangdong Contributions of any kind are welcome! + +Contributions of any kind are welcome! + # Release 0.7.0-beta ## Major Features and Improvements diff --git a/cmake/external_libs/securec.cmake b/cmake/external_libs/securec.cmake index 7dfdbeed..83a4409d 100644 --- a/cmake/external_libs/securec.cmake +++ b/cmake/external_libs/securec.cmake @@ -4,7 +4,7 @@ graphengine_add_pkg(securec MD5 0782dd2351fde6920d31a599b23d8c91 LIBS c_sec PATCHES ${GE_SOURCE_DIR}/third_party/patch/securec/securec.patch001 - CMAKE_OPTION " " + CMAKE_OPTION "-DCMAKE_BUILD_TYPE=Release" ) include_directories(${securec_INC}) file(COPY ${securec_INC}/../lib/libc_sec.so DESTINATION ${CMAKE_SOURCE_DIR}/build/graphengine) diff --git a/inc/external/ge/ge_prof.h b/inc/external/ge/ge_prof.h new file mode 100644 index 00000000..dbd87966 --- /dev/null +++ b/inc/external/ge/ge_prof.h @@ -0,0 +1,69 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_GE_GE_PROF_H_ +#define INC_EXTERNAL_GE_GE_PROF_H_ + +#include +#include +#include + +#include "ge/ge_api_error_codes.h" + +namespace ge { +enum ProfDataTypeConfig { + kProfAcl = 0x0001, + kProfTaskTime = 0x0002, + kProfAiCoreMetrics = 0x0004, + kProfAicpuTrace = 0x0008, + kProfModelExecute = 0x0010, + kProfRuntimeApi = 0x0020, + kProfRuntimeTrace = 0x0040, + kProfScheduleTimeline = 0x0080, + kProfScheduleTrace = 0x0100, + kProfAiVectorCoreMetrics = 0x0200, + kProfSubtaskTime = 0x0400, + kProfTrainingTrace = 0x0800, + kProfHcclTrace = 0x1000, + kProfDataProcess = 0x2000, + kProfTaskTrace = 0x3842, + kProfModelLoad = 0x8000000000000000 +}; + +enum ProfilingAicoreMetrics { + kAicoreArithmaticThroughput = 0, + kAicorePipeline = 1, + kAicoreSynchronization = 2, + kAicoreMemory = 3, + kAicoreInternalMemory = 4, + kAicoreStall = 5, + kAicoreMetricsAll = 255 // only for op_trace +}; + +typedef struct ProfAicoreEvents ProfAicoreEvents; +typedef struct aclgrphProfConfig aclgrphProfConfig; + +Status aclgrphProfInit(const char *profiler_path, uint32_t length); +Status aclgrphProfFinalize(); +aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t device_nums, + ProfilingAicoreMetrics aicore_metrics, ProfAicoreEvents *aicore_events, + uint64_t data_type_config); +Status aclgrphProfDestroyConfig(aclgrphProfConfig *profiler_config); +Status aclgrphProfStart(aclgrphProfConfig *profiler_config); +Status aclgrphProfStop(aclgrphProfConfig *profiler_config); +} // namespace ge + +#endif // INC_EXTERNAL_GE_GE_PROF_H_ diff --git a/inc/framework/common/ge_inner_error_codes.h b/inc/framework/common/ge_inner_error_codes.h index ca727589..3ab6cf06 100644 --- a/inc/framework/common/ge_inner_error_codes.h +++ b/inc/framework/common/ge_inner_error_codes.h @@ -97,6 +97,7 @@ GE_ERRORNO_COMMON(INTERNAL_ERROR, 4, "Internal errors"); // 1343225 GE_ERRORNO_COMMON(CSEC_ERROR, 5, "Failed to call libc_sec API!"); // 1343225861 GE_ERRORNO_COMMON(TEE_ERROR, 6, "Failed to call tee API!"); // 1343225862 GE_ERRORNO_COMMON(END_OF_SEQUENCE, 7, "End of sequence!"); // 1343225863 +GE_ERRORNO_COMMON(PATH_INVALID, 8, "Path is invalid!"); // 1343225864 // Error code for plugin manager GE_ERRORNO_COMMON(GE_PLGMGR_PATH_INVALID, 30, "Path is invalid!"); // 1343225886 @@ -124,9 +125,13 @@ GE_ERRORNO_CLIENT(GE_CLI_GE_ALREADY_INITIALIZED, 10, "GE is already initialized. GE_ERRORNO_CLIENT(GE_CLI_GE_NOT_INITIALIZED, 11, "GE is not yet initialized or is finalized."); // 1343229963 // Init module error code definition -GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported."); // 1343234048 -GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization."); // 1343234049 -GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported."); // 1343234050 +GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported."); // 1343234048 +GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization."); // 1343234049 +GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported."); // 1343234050 +GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported."); // 1343234051 +GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done."); // 1343234052 +GE_ERRORNO_INIT(GE_PROF_MODE_CONFLICT, 5, + "Profiling command mode which is preferred is running, the api mode will not work."); // 1343234053 // Session module error code definition GE_ERRORNO_SESSION(GE_SESS_INIT_FAILED, 0, "Failed to initialize session."); // 1343238144 diff --git a/inc/framework/common/util.h b/inc/framework/common/util.h index 71c9367c..b1c278d8 100644 --- a/inc/framework/common/util.h +++ b/inc/framework/common/util.h @@ -398,6 +398,24 @@ bool CheckOutputPathValid(const std::string &file_path, const std::string &atc_p /// @param [out] result /// bool ValidateStr(const std::string &filePath, const std::string &mode); + +/// +/// @ingroup domi_common +/// @brief Check whether the file is normal file. +/// @param [in] file_path file path +/// @param [out] result +/// +bool IsValidFile(const char *file_path); + +/// +/// @ingroup domi_common +/// @brief Check path invalid +/// @param [in] path, path to be checked +/// @param [in] length, length of path +/// @return 0 success +/// @return -1 fail +/// +Status CheckPath(const char *path, size_t length); } // namespace ge #endif // INC_FRAMEWORK_COMMON_UTIL_H_ diff --git a/inc/graph/debug/ge_attr_define.h b/inc/graph/debug/ge_attr_define.h index b0bf8ce9..a32907bb 100644 --- a/inc/graph/debug/ge_attr_define.h +++ b/inc/graph/debug/ge_attr_define.h @@ -142,6 +142,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_DIMS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_GRAPH_HAS_BEEN_ADDED; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME; diff --git a/src/common/graph/compute_graph.cc b/src/common/graph/compute_graph.cc index e6c306b0..bae4d362 100644 --- a/src/common/graph/compute_graph.cc +++ b/src/common/graph/compute_graph.cc @@ -658,7 +658,7 @@ ComputeGraph::UpdateOutputMapping(const std::map &output_map return GRAPH_FAILED; } - size_t num = op_desc->GetInputsSize(); + size_t num = op_desc->GetAllInputsSize(); for (size_t i = 0; i < num; i++) { GeTensorDesc tensor = op_desc->GetInputDesc(i); uint32_t cur_index = 0; diff --git a/src/common/graph/format_refiner.cc b/src/common/graph/format_refiner.cc index 4cb41349..c716825a 100644 --- a/src/common/graph/format_refiner.cc +++ b/src/common/graph/format_refiner.cc @@ -149,9 +149,10 @@ graphStatus FormatRefiner::GetAnchorPoints(const ge::ComputeGraphPtr &graph, std // consider special node save process // get all input desc format bool node_is_all_nd = false; - auto input_size = static_cast(op_desc->GetInputsSize()); + auto input_size = static_cast(op_desc->GetAllInputsSize()); for (uint32_t i = 0; i < input_size; i++) { // Operator pre-set format but not origin format + GE_IF_BOOL_EXEC(op_desc->MutableInputDesc(i) == nullptr, continue); auto input_format = op_desc->MutableInputDesc(i)->GetFormat(); // Pre-save data node (only main graph data) and default infer fail if (node_ptr->GetType() == DATA) { @@ -164,6 +165,7 @@ graphStatus FormatRefiner::GetAnchorPoints(const ge::ComputeGraphPtr &graph, std // Get all output desc format auto output_size = static_cast(op_desc->GetOutputsSize()); for (uint32_t i = 0; i < output_size; i++) { + GE_IF_BOOL_EXEC(op_desc->MutableOutputDesc(i) == nullptr, continue); auto output_format = op_desc->MutableOutputDesc(i)->GetFormat(); if (output_format != FORMAT_ND && output_format != FORMAT_RESERVED) { node_is_all_nd = true; @@ -222,8 +224,9 @@ graphStatus FormatRefiner::BackInferProcess(std::deque &nodes, ge:: for (const auto &in_anchor : node->GetAllInDataAnchors()) { GELOGD("Node is [%s] [B]", (node->GetName()).c_str()); auto in_data_anchor_idx = in_anchor->GetIdx(); - auto to_be_set_format = - node->GetOpDesc()->MutableInputDesc(static_cast(in_data_anchor_idx))->GetOriginFormat(); + auto input_desc = node->GetOpDesc()->MutableInputDesc(static_cast(in_data_anchor_idx)); + GE_IF_BOOL_EXEC(input_desc == nullptr, continue); + auto to_be_set_format = input_desc->GetOriginFormat(); if (to_be_set_format == FORMAT_ND) { GELOGD("Node [%s] [B], format is ND", (node->GetName()).c_str()); continue; diff --git a/src/common/graph/ge_attr_define.cc b/src/common/graph/ge_attr_define.cc index 708347a7..4834c73b 100644 --- a/src/common/graph/ge_attr_define.cc +++ b/src/common/graph/ge_attr_define.cc @@ -123,6 +123,7 @@ const std::string ATTR_NAME_AIPP_OUTPUTS = "_aipp_outputs"; const std::string ATTR_NAME_INPUT_DIMS = "input_dims"; +const std::string ATTR_NAME_GRAPH_HAS_BEEN_ADDED = "_graph_has_been_added"; const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id"; const std::string ATTR_NAME_PARENT_GRAPH_NAME = "_parent_graph_name"; diff --git a/src/common/graph/node.cc b/src/common/graph/node.cc index b210957d..10d6b3ed 100644 --- a/src/common/graph/node.cc +++ b/src/common/graph/node.cc @@ -68,7 +68,7 @@ graphStatus Node::Init() { return GRAPH_SUCCESS; } GE_CHK_BOOL_EXEC(op_ != nullptr, return GRAPH_FAILED, "original OpDesc is nullptr"); - size_t size = op_->GetInputsSize(); + size_t size = op_->GetAllInputsSize(); for (size_t i = 0; i < size; i++) { std::shared_ptr anchor = ComGraphMakeShared(shared_from_this(), i); if (anchor == nullptr) { @@ -305,13 +305,19 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus Node::AddLinkFrom(con GELOGE(GRAPH_FAILED, "add input desc failed."); return GRAPH_FAILED; } - std::shared_ptr anchor = ComGraphMakeShared(shared_from_this(), in_data_anchors_.size()); - if (anchor == nullptr) { - GELOGE(GRAPH_FAILED, "out_anchor size is:%zu, malloc shared_ptr failed.", out_anchors.size()); - return GRAPH_FAILED; + + if (index < GetAllInDataAnchors().size()) { + (void)out_anchors.at(0)->LinkTo(in_data_anchors_[index]); + } else { + std::shared_ptr anchor = + ComGraphMakeShared(shared_from_this(), in_data_anchors_.size()); + if (anchor == nullptr) { + GELOGE(GRAPH_FAILED, "out_anchor size is:%zu, malloc shared_ptr failed.", out_anchors.size()); + return GRAPH_FAILED; + } + in_data_anchors_.push_back(anchor); + (void)out_anchors.at(0)->LinkTo(in_data_anchors_.back()); } - in_data_anchors_.push_back(anchor); - (void)out_anchors.at(0)->LinkTo(in_data_anchors_.back()); return GRAPH_SUCCESS; } @@ -347,20 +353,30 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus Node::AddLinkFrom(con } GE_CHECK_NOTNULL(op_); - auto op_desc = input_node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - - if (op_->AddInputDesc(name, op_desc->GetOutputDesc(0)) != GRAPH_SUCCESS) { - GELOGE(GRAPH_FAILED, "add input desc failed."); - return GRAPH_FAILED; + auto input_op_desc = input_node->GetOpDesc(); + GE_CHECK_NOTNULL(input_op_desc); + auto index = op_->GetInputIndexByName(name); + if (index != -1) { + if (index >= static_cast(in_data_anchors_.size())) { + GELOGE(GRAPH_FAILED, "op %s get input name %s 's index %d is illegal.", op_->GetName().c_str(), name.c_str(), + index); + return GRAPH_FAILED; + } + (void)out_anchors.at(0)->LinkTo(in_data_anchors_[index]); + } else { + std::shared_ptr anchor = + ComGraphMakeShared(shared_from_this(), in_data_anchors_.size()); + if (anchor == nullptr) { + GELOGE(GRAPH_FAILED, "in_data_anchors_size is:%zu, malloc shared_ptr failed.", in_data_anchors_.size()); + return GRAPH_FAILED; + } + in_data_anchors_.push_back(anchor); + (void)out_anchors.at(0)->LinkTo(in_data_anchors_.back()); } - std::shared_ptr anchor = ComGraphMakeShared(shared_from_this(), in_data_anchors_.size()); - if (anchor == nullptr) { - GELOGE(GRAPH_FAILED, "out_anchor size is:%zu, malloc shared_ptr failed.", out_anchors.size()); + if (op_->AddInputDesc(name, input_op_desc->GetOutputDesc(0)) != GRAPH_SUCCESS) { + GELOGE(GRAPH_FAILED, "add input desc failed."); return GRAPH_FAILED; } - in_data_anchors_.push_back(anchor); - (void)out_anchors.at(0)->LinkTo(in_data_anchors_.back()); return GRAPH_SUCCESS; } diff --git a/src/common/graph/op_desc.cc b/src/common/graph/op_desc.cc index 706ec9cd..fdd1acb7 100644 --- a/src/common/graph/op_desc.cc +++ b/src/common/graph/op_desc.cc @@ -675,7 +675,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ConstGeTensorDescPtr OpDesc::GetI return nullptr; } if (inputs_desc_[index]->IsValid() != GRAPH_SUCCESS) { - GELOGE(GRAPH_FAILED, "inputsDesc[%u] is InValid", index); + GELOGW("inputsDesc[%u] is InValid", index); return nullptr; } else { return inputs_desc_[static_cast(index)]; diff --git a/src/common/graph/operator.cc b/src/common/graph/operator.cc index 1320e4c2..21554fa1 100644 --- a/src/common/graph/operator.cc +++ b/src/common/graph/operator.cc @@ -1504,7 +1504,9 @@ class GraphBuilderImpl { GE_CHK_BOOL_EXEC(dst_anchor != nullptr, return GRAPH_FAILED, "GetInDataAnchor failed."); auto ret = GraphUtils::AddEdge(src_anchor, dst_anchor); - GE_CHK_BOOL_EXEC(ret == GRAPH_SUCCESS, return GRAPH_FAILED, "AddEdge failed."); + GE_CHK_BOOL_EXEC(ret == GRAPH_SUCCESS, return GRAPH_FAILED, + "from node[%s][%d] to node[%s][%d]AddEdge failed.", src_node_ptr->GetName().c_str(), + src_anchor->GetIdx(), dst_node_info->second->GetName().c_str(), dst_anchor->GetIdx()); } } auto out_control_anchor = src_node_ptr->GetOutControlAnchor(); @@ -1536,19 +1538,23 @@ inline bool HasSameNameNode(const ComputeGraphPtr &compute_graph) { for (const auto &graph : compute_graph->GetAllSubgraphs()) { std::set node_names; for (auto const &node : graph->GetDirectNode()) { - node_names.insert(node->GetName()); - } - - if (node_names.size() != graph->GetDirectNodesSize()) { - return true; + auto result = node_names.insert(node->GetName()); + if (!result.second) { + GELOGE(GRAPH_FAILED, "graph %s has same name node%s", graph->GetName().c_str(), node->GetName().c_str()); + return true; + } } } std::set node_names; for (auto const &node : compute_graph->GetDirectNode()) { - node_names.insert(node->GetName()); + auto result = node_names.insert(node->GetName()); + if (!result.second) { + GELOGE(GRAPH_FAILED, "graph %s has same name node%s", compute_graph->GetName().c_str(), node->GetName().c_str()); + return true; + } } - return node_names.size() != compute_graph->GetDirectNodesSize(); + return false; } ComputeGraphPtr GraphUtils::CreateGraphFromOperator(const string &name, const vector &inputs) { diff --git a/src/common/graph/shape_refiner.cc b/src/common/graph/shape_refiner.cc index a87e3753..17423da4 100644 --- a/src/common/graph/shape_refiner.cc +++ b/src/common/graph/shape_refiner.cc @@ -51,6 +51,9 @@ graphStatus ReverseBrushWhileBodySubGraph(const ConstNodePtr &node) { for (const auto &node_sub : sub_graph_body->GetAllNodes()) { for (size_t i = 0; i < node_sub->GetAllInDataAnchorsSize(); i++) { auto input_desc = node_sub->GetOpDesc()->MutableInputDesc(i); + GE_IF_BOOL_EXEC(input_desc == nullptr, + GELOGW("Get null input by index %zu from node %s ", i, node_sub->GetName().c_str()); + continue); (void)input_desc->SetUnknownDimNumShape(); } for (size_t i = 0; i < node_sub->GetAllOutDataAnchorsSize(); i++) { @@ -376,10 +379,13 @@ graphStatus UpdateOpInputDesc(const ConstNodePtr &node_ptr) { continue; } int peer_out_idx = peer_out_data_anchor->GetIdx(); - auto in_desc = node_ptr->GetOpDesc()->MutableInputDesc(static_cast(in_idx)); auto peer_out_desc = peer_out_data_node->GetOpDesc()->MutableOutputDesc(static_cast(peer_out_idx)); // check shape and dtype continuity. do not stop process + auto in_desc = node_ptr->GetOpDesc()->MutableInputDesc(static_cast(in_idx)); + if (in_desc == nullptr) { + continue; + } auto in_shape = in_desc->GetShape().GetDims(); auto in_dtype = in_desc->GetDataType(); auto peer_out_shape = peer_out_desc->GetShape().GetDims(); diff --git a/src/common/graph/utils/ge_ir_utils.cc b/src/common/graph/utils/ge_ir_utils.cc index c08ea9ab..f238c6e8 100644 --- a/src/common/graph/utils/ge_ir_utils.cc +++ b/src/common/graph/utils/ge_ir_utils.cc @@ -264,11 +264,11 @@ void OnnxUtils::AddAttrProtoForOpInAndOutDesc(onnx::NodeProto *node_proto, const return; } // Input describes - auto size_in = op_desc->GetInputsSize(); + auto size_in = op_desc->GetAllInputsSize(); AddAttrProto(node_proto, onnx::AttributeProto_AttributeType_INT, "input_desc_nums", &size_in); if (size_in > 0) { for (uint32_t i = 0; i < size_in; i++) { - auto input_desc = op_desc->GetInputDescPtr(i); + auto input_desc = op_desc->GetInputDescPtrDfault(i); if (input_desc != nullptr) { auto data_type = TypeUtils::DataTypeToSerialString(input_desc->GetDataType()); AddAttrProto(node_proto, onnx::AttributeProto_AttributeType_STRING, "input_desc_dtype:" + std::to_string(i), @@ -480,9 +480,20 @@ void OnnxUtils::AddAttrProtoFromNodeMembers(const NodePtr &node, onnx::NodeProto if (!recv_list.empty()) { AddAttrProto(node_proto, onnx::AttributeProto_AttributeType_INTS, "recv_event_id_list", &recv_list); } - // 2.Attributes added from node's op_(message OpDef) auto op_desc = node->op_; if (op_desc != nullptr) { + // for input_name_idx_ in opdesc + auto input_name_2_indexs = op_desc->GetAllInputName(); + ::google::protobuf::RepeatedPtrField<::std::string> input_names; + ::google::protobuf::RepeatedField<::google::protobuf::int64> input_indexes; + for (const auto &input_name_2_index : input_name_2_indexs) { + std::string input_name = input_name_2_index.first; + input_names.Add(std::move(input_name)); + input_indexes.Add(input_name_2_index.second); + } + AddAttrProto(node_proto, onnx::AttributeProto_AttributeType_STRINGS, "_input_name_key", input_names); + AddAttrProto(node_proto, onnx::AttributeProto_AttributeType_INTS, "_input_name_value", input_indexes); + // 2.Attributes added from node's op_(message OpDef) // Input and out describes AddAttrProtoForOpInAndOutDesc(node_proto, op_desc); // Others diff --git a/src/common/graph/utils/graph_utils.cc b/src/common/graph/utils/graph_utils.cc index e2f9f857..c741a316 100644 --- a/src/common/graph/utils/graph_utils.cc +++ b/src/common/graph/utils/graph_utils.cc @@ -1470,8 +1470,7 @@ graphStatus GraphUtils::CopyTensorAttrs(const OpDescPtr &dst_desc, const NodePtr for (uint32_t i = 0; i < src_node->GetAllInDataAnchorsSize(); ++i) { auto input_desc = dst_desc->MutableInputDesc(i); if (input_desc == nullptr) { - GELOGE(GRAPH_FAILED, "Param dst node not valid"); - return GRAPH_FAILED; + continue; } input_desc->CopyAttrsFrom(src_desc->GetInputDesc(i)); } diff --git a/src/common/graph/utils/op_desc_utils.cc b/src/common/graph/utils/op_desc_utils.cc index e0579581..63fff177 100644 --- a/src/common/graph/utils/op_desc_utils.cc +++ b/src/common/graph/utils/op_desc_utils.cc @@ -513,7 +513,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector OpDescUtils:: } return MutableWeights(*node); } - GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus OpDescUtils::SetWeights(ge::Node &node, const vector &weights) { GE_CHK_BOOL_EXEC(node.GetOpDesc() != nullptr, return GRAPH_PARAM_INVALID, "node.GetOpDesc is nullptr!"); diff --git a/src/ge/CMakeLists.txt b/src/ge/CMakeLists.txt index 18c433cb..8c20b336 100755 --- a/src/ge/CMakeLists.txt +++ b/src/ge/CMakeLists.txt @@ -58,6 +58,7 @@ include_directories(${CMAKE_BINARY_DIR}/proto/ge) # need to remove dependencies on pb files later file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "analyzer/analyzer.cc" + "client/ge_prof.cc" "client/ge_api.cc" "common/dump/dump_manager.cc" "common/dump/dump_properties.cc" @@ -225,6 +226,7 @@ target_link_libraries(ge_runner ${msprof} ${runtime} ${resouce} + ${ascend_hal} rt dl) @@ -335,6 +337,7 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "host_kernels/unpack_kernel.cc" "host_kernels/unsqueeze_kernel.cc" "hybrid/hybrid_davinci_model_stub.cc" + "hybrid/node_executor/aicpu/aicpu_ext_info.cc" "init/gelib.cc" "ir_build/atc_ir_common.cc" "ir_build/ge_ir_build.cc" diff --git a/src/ge/analyzer/analyzer.cc b/src/ge/analyzer/analyzer.cc index 1c944971..b7d09bea 100644 --- a/src/ge/analyzer/analyzer.cc +++ b/src/ge/analyzer/analyzer.cc @@ -101,7 +101,7 @@ Status Analyzer::BuildJsonObject(uint64_t session_id, uint64_t graph_id) { ge::Status Analyzer::Initialize() { ClearHistoryFile(); - return CreateAnalyzerFile(); + return SUCCESS; } void Analyzer::Finalize() { @@ -136,7 +136,7 @@ void Analyzer::DestroyGraphJsonObject(uint64_t session_id, uint64_t graph_id) { } else { auto iter1 = (iter->second).find(graph_id); if (iter1 == (iter->second).end()) { - GELOGW("can not find the graph json object by session_id[%lu] and graph_id[%lu].Do nothing", session_id, + GELOGW("Can not find the graph json object by session_id[%lu] and graph_id[%lu]. Do nothing.", session_id, graph_id); } (iter->second).erase(iter1); @@ -169,6 +169,10 @@ void Analyzer::ClearHistoryFile() { } ge::Status Analyzer::CreateAnalyzerFile() { + if (is_json_file_create_) { + GELOGD("analyzer file has been created!No necessary to create again!"); + return SUCCESS; + } GELOGD("start to create analyzer file!"); // Check whether the manifest exists, if not, create it. string real_path = RealPath(kFilePath.c_str()); @@ -176,18 +180,19 @@ ge::Status Analyzer::CreateAnalyzerFile() { GELOGE(FAILED, "File path is invalid."); return FAILED; } - string file = real_path + "/" + kAnalyzeFile; - GELOGD("Created analyzer file:[%s]", file.c_str()); - int fd = open(file.c_str(), O_WRONLY | O_CREAT | O_TRUNC, kFileAuthority); + std::lock_guard lg(file_mutex_); + json_file_name_ = real_path + "/" + kAnalyzeFile; + GELOGD("Created analyzer file:[%s]", json_file_name_.c_str()); + int fd = open(json_file_name_.c_str(), O_WRONLY | O_CREAT | O_TRUNC, kFileAuthority); if (fd < 0) { - GELOGE(INTERNAL_ERROR, "Fail to open the file: %s.", file.c_str()); + GELOGE(INTERNAL_ERROR, "Fail to open the file: %s.", json_file_name_.c_str()); return INTERNAL_ERROR; } if (close(fd) != 0) { - GELOGE(INTERNAL_ERROR, "Fail to close the file: %s.", file.c_str()); + GELOGE(INTERNAL_ERROR, "Fail to close the file: %s.", json_file_name_.c_str()); return INTERNAL_ERROR; } - json_file_name_ = file; + is_json_file_create_ = true; GELOGD("success to create analyzer file[%s]!", json_file_name_.c_str()); return SUCCESS; @@ -231,6 +236,12 @@ ge::Status Analyzer::DoAnalyze(DataInfo &data_info) { GELOGE(status, "save op info failed!"); return FAILED; } + // create json file + status = CreateAnalyzerFile(); + if (status != SUCCESS) { + GELOGE(status, "create analyzer file failed!"); + return status; + } // save data to file return SaveAnalyzerDataToFile(); } diff --git a/src/ge/analyzer/analyzer.h b/src/ge/analyzer/analyzer.h index 4ac8b391..1afeeca3 100644 --- a/src/ge/analyzer/analyzer.h +++ b/src/ge/analyzer/analyzer.h @@ -24,6 +24,7 @@ #include #include #include +#include #include "external/ge/ge_api_types.h" #include "graph/compute_graph.h" @@ -181,6 +182,7 @@ class Analyzer { std::mutex file_mutex_; // protect json_file_ std::ofstream json_file_; std::string json_file_name_; + std::atomic_bool is_json_file_create_{false}; }; } // namespace ge #endif // DOMI_ANALYZER_ANANLYZER_H_ diff --git a/src/ge/client/CMakeLists.txt b/src/ge/client/CMakeLists.txt index afdbd141..a87beb77 100755 --- a/src/ge/client/CMakeLists.txt +++ b/src/ge/client/CMakeLists.txt @@ -29,6 +29,7 @@ file(GLOB PROTO_HEADER_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "ge_api.cc" + "ge_prof.cc" ) ge_protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) @@ -66,5 +67,6 @@ target_link_libraries(ge_client ${slog} ${mmpa} ${runtime} + ${msprof} rt dl) diff --git a/src/ge/client/ge_api.cc b/src/ge/client/ge_api.cc index 0458a508..ad01e48f 100644 --- a/src/ge/client/ge_api.cc +++ b/src/ge/client/ge_api.cc @@ -39,7 +39,7 @@ using std::vector; namespace { const int32_t kMaxStrLen = 128; -} +} // namespace static bool g_ge_initialized = false; static std::mutex g_ge_release_mutex; // GEFinalize and ~Session use diff --git a/src/ge/client/ge_prof.cc b/src/ge/client/ge_prof.cc new file mode 100644 index 00000000..d4407852 --- /dev/null +++ b/src/ge/client/ge_prof.cc @@ -0,0 +1,375 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ge/ge_prof.h" +#include "ge/ge_api.h" +#include "init/gelib.h" +#include "common/debug/log.h" +#include "framework/common/debug/ge_log.h" +#include "common/profiling/profiling_manager.h" +#include "graph/load/graph_loader.h" +#include "toolchain/prof_acl_api.h" + +using std::map; +using std::string; +using std::vector; + +namespace { +const uint32_t kMaxDeviceNum = 64; +const std::string PROFILING_INIT = "prof_init"; +const std::string PROFILING_FINALIZE = "prof_finalize"; +const std::string PROFILING_START = "prof_start"; +const std::string PROFILING_STOP = "prof_stop"; +const std::string DEVICES_NUMS = "devNums"; +const std::string DEVICE_ID_LIST = "devIdList"; +const std::string AICORE_METRICS = "aicoreMetrics"; + +const std::map kProfAicoreMetricsToString = { + {ge::kAicoreArithmaticThroughput, "AICORE_ARITHMATIC_THROUGHPUT"}, + {ge::kAicorePipeline, "AICORE_PIPELINE"}, + {ge::kAicoreSynchronization, "AICORE_SYNCHRONIZATION"}, + {ge::kAicoreMemory, "AICORE_MEMORY"}, + {ge::kAicoreInternalMemory, "AICORE_INTERNAL_MEMORY"}, + {ge::kAicoreStall, "AICORE_STALL"}, + {ge::kAicoreMetricsAll, "AICORE_METRICS_ALL"}}; + +const std::map kDataTypeConfigMapping = {{ge::kProfAcl, PROF_ACL_API}, + {ge::kProfTaskTime, PROF_TASK_TIME}, + {ge::kProfAiCoreMetrics, PROF_AICORE_METRICS}, + {ge::kProfAicpuTrace, PROF_AICPU_TRACE}, + {ge::kProfModelExecute, PROF_MODEL_EXECUTE}, + {ge::kProfRuntimeApi, PROF_RUNTIME_API}, + {ge::kProfRuntimeTrace, PROF_RUNTIME_TRACE}, + {ge::kProfScheduleTimeline, PROF_SCHEDULE_TIMELINE}, + {ge::kProfScheduleTrace, PROF_SCHEDULE_TRACE}, + {ge::kProfAiVectorCoreMetrics, PROF_AIVECTORCORE_METRICS}, + {ge::kProfSubtaskTime, PROF_SUBTASK_TIME}, + {ge::kProfTrainingTrace, PROF_TRAINING_TRACE}, + {ge::kProfHcclTrace, PROF_HCCL_TRACE}, + {ge::kProfDataProcess, PROF_DATA_PROCESS}, + {ge::kProfTaskTrace, PROF_TASK_TRACE}, + {ge::kProfModelLoad, PROF_MODEL_LOAD}}; +} // namespace + +static bool g_graph_prof_init_ = false; +static std::mutex g_prof_mutex_; + +namespace ge { +struct aclgrphProfConfig { + ProfConfig config; +}; + +Status aclgrphProfInit(const char *profiler_path, uint32_t length) { + GELOGT(TRACE_INIT, "Graph prof init start"); + + std::shared_ptr instance_ptr = ge::GELib::GetInstance(); + if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); + return FAILED; + } + + std::lock_guard lock(g_prof_mutex_); + if (g_graph_prof_init_) { + GELOGW("Multi graph profiling initializations."); + return GE_PROF_MULTI_INIT; + } + + Status ret = CheckPath(profiler_path, length); + if (ret != SUCCESS) { + GELOGE(ret, "Profiling config path is invalid."); + return ret; + } + // if command mode is set, just return + if (ProfilingManager::Instance().ProfilingOn()) { + GELOGW("Graph prof init failed, cause profiling command pattern is running."); + return GE_PROF_MODE_CONFLICT; + } + + ret = ProfInit(profiler_path); + if (ret != SUCCESS) { + GELOGE(ret, "ProfInit init fail"); + return ret; + } + + GraphLoader graph_loader; + Command command; + command.cmd_params.clear(); + command.cmd_type = PROFILING_INIT; + command.module_index = kProfModelLoad | kProfTrainingTrace; + ret = graph_loader.CommandHandle(command); + if (ret != SUCCESS) { + GELOGE(ret, "Handle profiling command %s failed, config = %s", PROFILING_INIT.c_str(), profiler_path); + return ret; + } + if (!g_graph_prof_init_) { + g_graph_prof_init_ = true; + GELOGI("Profiling init successfully."); + } + + GELOGI("Successfully execute GraphProfInit."); + return SUCCESS; +} + +Status aclgrphProfFinalize() { + std::shared_ptr instance_ptr = ge::GELib::GetInstance(); + if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); + return FAILED; + } + std::lock_guard lock(g_prof_mutex_); + // if command mode is set, just return + if (ProfilingManager::Instance().ProfilingOn()) { + GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); + return GE_PROF_MODE_CONFLICT; + } + + if (!g_graph_prof_init_) { + GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); + return GE_PROF_NOT_INIT; + } + GraphLoader graph_loader; + Command command; + command.cmd_params.clear(); + command.cmd_type = PROFILING_FINALIZE; + Status ret = graph_loader.CommandHandle(command); + if (ret != SUCCESS) { + GELOGE(ret, "Handle profiling command %s failed.", PROFILING_FINALIZE.c_str()); + return ret; + } + + ret = ProfFinalize(); + if (ret != SUCCESS) { + GELOGE(ret, "Finalize profiling failed, result = %d", ret); + } + + if (ret == SUCCESS) { + g_graph_prof_init_ = false; + GELOGI("Successfully execute GraphProfFinalize."); + } + return ret; +} + +bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector &prof_config_params) { + prof_config_params.clear(); + prof_config_params.emplace_back(DEVICES_NUMS); + prof_config_params.emplace_back(std::to_string(profiler_config->config.devNums)); + prof_config_params.emplace_back(DEVICE_ID_LIST); + std::string devID = ""; + if (profiler_config->config.devNums == 0) { + GELOGW("The device num is invalid."); + return false; + } + for (uint32_t i = 0; i < profiler_config->config.devNums; i++) { + devID.append(std::to_string(profiler_config->config.devIdList[i])); + if (i != profiler_config->config.devNums - 1) { + devID.append(","); + } + } + + prof_config_params.push_back(devID); + prof_config_params.push_back(AICORE_METRICS); + auto iter = + kProfAicoreMetricsToString.find(static_cast(profiler_config->config.aicoreMetrics)); + if (iter == kProfAicoreMetricsToString.end()) { + GELOGW("The prof aicore metrics is invalid."); + return false; + } + prof_config_params.push_back(iter->second); + return true; +} + +bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { + if (deviceid_list == nullptr) { + GELOGE(PARAM_INVALID, "deviceIdList is nullptr"); + return false; + } + if (device_nums == 0 || device_nums > kMaxDeviceNum) { + GELOGE(PARAM_INVALID, "The device nums is invalid."); + return false; + } + + // real device num + int32_t dev_count = 0; + rtError_t rt_err = rtGetDeviceCount(&dev_count); + if (rt_err != RT_ERROR_NONE) { + GELOGE(INTERNAL_ERROR, "Get the Device count fail."); + return false; + } + + if (device_nums > static_cast(dev_count)) { + GELOGE(PARAM_INVALID, "Device num(%u) is not in range 1 ~ %d.", device_nums, dev_count); + return false; + } + + std::unordered_set record; + for (size_t i = 0; i < device_nums; ++i) { + uint32_t dev_id = deviceid_list[i]; + if (dev_id >= static_cast(dev_count)) { + GELOGE(PARAM_INVALID, "Device id %u is not in range 0 ~ %d(exclude %d)", dev_id, dev_count, dev_count); + return false; + } + if (record.count(dev_id) > 0) { + GELOGE(PARAM_INVALID, "Device id %u is duplicatedly set", dev_id); + return false; + } + record.insert(dev_id); + } + return true; +} + +aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t device_nums, + ProfilingAicoreMetrics aicore_metrics, ProfAicoreEvents *aicore_events, + uint64_t data_type_config) { + if (!isProfConfigValid(deviceid_list, device_nums)) { + return nullptr; + } + aclgrphProfConfig *config = new (std::nothrow) aclgrphProfConfig(); + if (config == nullptr) { + GELOGE(INTERNAL_ERROR, "new aclgrphProfConfig fail"); + return nullptr; + } + config->config.devNums = device_nums; + if (memcpy_s(config->config.devIdList, sizeof(config->config.devIdList), deviceid_list, + device_nums * sizeof(uint32_t)) != EOK) { + GELOGE(INTERNAL_ERROR, "copy devID failed. size = %u", device_nums); + delete config; + return nullptr; + } + + config->config.aicoreMetrics = static_cast(aicore_metrics); + uint64_t data_type = 0; + for (auto &iter : kDataTypeConfigMapping) { + if ((iter.first & data_type_config) == iter.first) { + data_type |= iter.second; + } + } + config->config.dataTypeConfig = data_type; + GELOGI("Successfully create prof config."); + return config; +} + +Status aclgrphProfDestroyConfig(aclgrphProfConfig *profiler_config) { + if (profiler_config == nullptr) { + GELOGE(PARAM_INVALID, "destroy profilerConfig failed, profilerConfig must not be nullptr"); + return PARAM_INVALID; + } + + delete profiler_config; + GELOGI("Successfully destroy prof config."); + return SUCCESS; +} + +Status aclgrphProfStart(aclgrphProfConfig *profiler_config) { + if (profiler_config == nullptr) { + GELOGE(PARAM_INVALID, "aclgrphProfConfig is invalid."); + return FAILED; + } + std::shared_ptr instance_ptr = ge::GELib::GetInstance(); + if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); + return FAILED; + } + + std::lock_guard lock(g_prof_mutex_); + // if command mode is set, just return + if (ProfilingManager::Instance().ProfilingOn()) { + GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); + return GE_PROF_MODE_CONFLICT; + } + if (!g_graph_prof_init_) { + GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); + return GE_PROF_NOT_INIT; + } + + Status ret = ProfStartProfiling(&profiler_config->config); + if (ret != SUCCESS) { + GELOGE(ret, "Start profiling failed, prof result = %d", ret); + return FAILED; + } + + std::vector prof_params; + if (!TransProfConfigToParam(profiler_config, prof_params)) { + GELOGE(PARAM_INVALID, "Transfer profilerConfig to string vector failed"); + return PARAM_INVALID; + } + + GraphLoader graph_loader; + Command command; + command.cmd_params.clear(); + command.cmd_type = PROFILING_START; + command.cmd_params = prof_params; + command.module_index = profiler_config->config.dataTypeConfig; + ret = graph_loader.CommandHandle(command); + if (ret != SUCCESS) { + GELOGE(ret, "Handle profiling command failed"); + return FAILED; + } + + GELOGI("Successfully execute GraphProfStartProfiling."); + + return SUCCESS; +} + +Status aclgrphProfStop(aclgrphProfConfig *profiler_config) { + if (profiler_config == nullptr) { + GELOGE(PARAM_INVALID, "aclgrphProfConfig is invalid."); + return FAILED; + } + std::shared_ptr instance_ptr = ge::GELib::GetInstance(); + if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); + return FAILED; + } + + std::lock_guard lock(g_prof_mutex_); + // if command mode is set, just return + if (ProfilingManager::Instance().ProfilingOn()) { + GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); + return GE_PROF_MODE_CONFLICT; + } + if (!g_graph_prof_init_) { + GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); + return GE_PROF_NOT_INIT; + } + + Status ret = ProfStopProfiling(&profiler_config->config); + if (ret != SUCCESS) { + GELOGE(ret, "Stop profiling failed, prof result = %d", ret); + return ret; + } + + std::vector prof_params; + if (!TransProfConfigToParam(profiler_config, prof_params)) { + GELOGE(PARAM_INVALID, "Transfer profilerConfig to string vector failed"); + return PARAM_INVALID; + } + + GraphLoader graph_loader; + Command command; + command.cmd_params.clear(); + command.cmd_type = PROFILING_STOP; + command.cmd_params = prof_params; + command.module_index = profiler_config->config.dataTypeConfig; + ret = graph_loader.CommandHandle(command); + if (ret != SUCCESS) { + GELOGE(ret, "Handle profiling command failed"); + return FAILED; + } + + GELOGI("Successfully execute GraphProfStopProfiling."); + return SUCCESS; +} +} // namespace ge diff --git a/src/ge/client/module.mk b/src/ge/client/module.mk index 9224a0db..1a304cbf 100644 --- a/src/ge/client/module.mk +++ b/src/ge/client/module.mk @@ -4,6 +4,7 @@ LOCAL_PATH := $(call my-dir) COMMON_LOCAL_SRC_FILES := \ proto/ge_api.proto \ ge_api.cc \ + ge_prof.cc \ COMMON_LOCAL_C_INCLUDES := \ @@ -69,6 +70,8 @@ LOCAL_SHARED_LIBRARIES := \ libregister \ libge_compiler \ libge_common \ + libmsprof + LOCAL_LDFLAGS := -lrt -ldl @@ -102,6 +105,7 @@ LOCAL_SHARED_LIBRARIES := \ libruntime \ libge_compiler \ libge_common \ + libmsprof LOCAL_LDFLAGS := -lrt -ldl diff --git a/src/ge/common/CMakeLists.txt b/src/ge/common/CMakeLists.txt index 58ba9bac..f6c75f87 100755 --- a/src/ge/common/CMakeLists.txt +++ b/src/ge/common/CMakeLists.txt @@ -27,6 +27,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "context/ctx.cc" "cust_aicpu_kernel_store.cc" "debug/memory_dumper.cc" + "dump/dump_properties.cc" "fmk_error_codes.cc" "formats/format_transfers/datatype_transfer.cc" "formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc" diff --git a/src/ge/common/dump/dump_manager.cc b/src/ge/common/dump/dump_manager.cc index d6783830..fbf9afe7 100644 --- a/src/ge/common/dump/dump_manager.cc +++ b/src/ge/common/dump/dump_manager.cc @@ -49,7 +49,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf dump_properties_.ClearDumpPropertyValue(); return SUCCESS; } + dump_properties_.SetDumpStatus(dump_status); + dump_op_switch = dump_config.dump_op_switch; + dump_properties_.SetDumpOpSwitch(dump_op_switch); if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) { GELOGE(PARAM_INVALID, "Dump list is invalid,dump_op_switch is %s", dump_op_switch.c_str()); return PARAM_INVALID; @@ -95,14 +98,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf return SUCCESS; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpManager::IsDumpOpen() { - std::lock_guard lock(mutex_); - if (!dump_properties_.GetDumpPath().empty()) { - return true; - } - return false; -} - FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const DumpProperties &DumpManager::GetDumpProperties() { std::lock_guard lock(mutex_); return dump_properties_; diff --git a/src/ge/common/dump/dump_manager.h b/src/ge/common/dump/dump_manager.h index ee38cef1..dbc89cc8 100644 --- a/src/ge/common/dump/dump_manager.h +++ b/src/ge/common/dump/dump_manager.h @@ -28,7 +28,6 @@ class DumpManager { static DumpManager &GetInstance(); Status SetDumpConf(const DumpConfig &dump_config); - bool IsDumpOpen(); const DumpProperties &GetDumpProperties(); void SetModelName(const std::string &model_name); const std::string &GetModelName(); diff --git a/src/ge/common/dump/dump_op.cc b/src/ge/common/dump/dump_op.cc index a36204dd..31a88023 100644 --- a/src/ge/common/dump/dump_op.cc +++ b/src/ge/common/dump/dump_op.cc @@ -16,7 +16,6 @@ #include "common/dump/dump_op.h" -#include "aicpu/common/aicpu_task_struct.h" #include "common/dump/dump_manager.h" #include "common/ge/datatype_util.h" #include "framework/common/debug/ge_log.h" @@ -28,6 +27,7 @@ #include "proto/ge_ir.pb.h" #include "proto/op_mapping_info.pb.h" #include "runtime/mem.h" +#include "aicpu/common/aicpu_task_struct.h" namespace { const uint32_t kAicpuLoadFlag = 1; diff --git a/src/ge/common/dump/dump_properties.cc b/src/ge/common/dump/dump_properties.cc index cbf3697d..b6247c6e 100644 --- a/src/ge/common/dump/dump_properties.cc +++ b/src/ge/common/dump/dump_properties.cc @@ -31,7 +31,7 @@ namespace { const std::string kEnableFlag = "1"; - +const std::string kDumpStatusOpen = "on"; const uint32_t kAicoreOverflow = (0x1 << 0); const uint32_t kAtomicOverflow = (0x1 << 1); const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); @@ -81,12 +81,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti if (enable_dump_ == kEnableFlag) { std::string dump_step; if (GetContext().GetOption(OPTION_EXEC_DUMP_STEP, dump_step) == GRAPH_SUCCESS) { - GELOGD("Get dump step %s successfully", dump_step.c_str()); + GELOGI("Get dump step %s successfully", dump_step.c_str()); SetDumpStep(dump_step); } string dump_mode; if (GetContext().GetOption(OPTION_EXEC_DUMP_MODE, dump_mode) == GRAPH_SUCCESS) { - GELOGD("Get dump mode %s successfully", dump_mode.c_str()); + GELOGI("Get dump mode %s successfully", dump_mode.c_str()); SetDumpMode(dump_mode); } AddPropertyValue(DUMP_ALL_MODEL, {}); @@ -192,6 +192,37 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperti return dump_mode_; } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpStatus(const std::string &status) { + dump_status_ = status; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperties::GetDumpStatus() const { + return dump_status_; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch( + const std::string &dump_op_switch) { + dump_op_switch_ = dump_op_switch; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperties::GetDumpOpSwitch() const { + return dump_op_switch_; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsSingleOpNeedDump() const { + if (dump_op_switch_ == kDumpStatusOpen) { + return true; + } + return false; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsDumpOpen() const { + if (enable_dump_ == kEnableFlag || dump_status_ == kDumpStatusOpen) { + return true; + } + return false; +} + void DumpProperties::CopyFrom(const DumpProperties &other) { if (&other != this) { enable_dump_ = other.enable_dump_; diff --git a/src/ge/common/dump/dump_properties.h b/src/ge/common/dump/dump_properties.h index a397cac4..7909d5a5 100644 --- a/src/ge/common/dump/dump_properties.h +++ b/src/ge/common/dump/dump_properties.h @@ -61,10 +61,26 @@ class DumpProperties { const std::string &GetDumpMode() const; + void SetDumpStatus(const std::string &status); + + const std::string &GetDumpStatus() const; + + void SetDumpOpSwitch(const std::string &dump_op_switch); + + const std::string &GetDumpOpSwitch() const; + bool IsOpDebugOpen() const { return is_op_debug_; } + bool IsDumpOpen() const; + + bool IsSingleOpNeedDump() const; + uint32_t GetOpDebugMode() const { return op_debug_mode_; } + const std::string &GetEnableDump() const { return enable_dump_; } + + const std::string &GetEnableDumpDebug() const { return enable_dump_debug_; } + private: void CopyFrom(const DumpProperties &other); @@ -76,6 +92,8 @@ class DumpProperties { std::string dump_path_; std::string dump_step_; std::string dump_mode_; + std::string dump_status_; + std::string dump_op_switch_; std::map> model_dump_properties_map_; bool is_op_debug_ = false; diff --git a/src/ge/common/ge/op_tiling_manager.cc b/src/ge/common/ge/op_tiling_manager.cc index 7fb7a8fc..ec43ab2e 100644 --- a/src/ge/common/ge/op_tiling_manager.cc +++ b/src/ge/common/ge/op_tiling_manager.cc @@ -15,14 +15,15 @@ */ #include "common/ge/op_tiling_manager.h" +#include "common/util/error_manager/error_manager.h" #include "framework/common/debug/log.h" #include namespace { const char *const kEnvName = "ASCEND_OPP_PATH"; const std::string kDefaultPath = "/usr/local/Ascend/opp"; -const std::string kDefaultBuiltInTilingPath = "/op_impl/built-in/liboptiling.so"; -const std::string kDefaultCustomTilingPath = "/op_impl/custom/liboptiling.so"; +const std::string kDefaultBuiltInTilingPath = "/op_impl/built-in/ai_core/tbe/op_tiling/liboptiling.so"; +const std::string kDefaultCustomTilingPath = "/op_impl/custom/ai_core/tbe/op_tiling/liboptiling.so"; const uint8_t kPrefixIndex = 9; } // namespace @@ -44,7 +45,9 @@ std::string OpTilingManager::GetPath() { if (opp_path_env != nullptr) { char resolved_path[PATH_MAX]; if (realpath(opp_path_env, resolved_path) == NULL) { - GELOGE(PARAM_INVALID, "Failed load tiling lib as env 'ASCEND_OPP_PATH'(%s) is invalid path.", opp_path_env); + ErrorManager::GetInstance().ATCReportErrMessage("E19024", {"env", "value", "situation"}, + {"ASCEND_OPP_PATH", opp_path_env, "loading the tiling lib"}); + GELOGE(PARAM_INVALID, "Failed load tiling lib as env 'ASCEND_OPP_PATH'[%s] is invalid path.", opp_path_env); return std::string(); } opp_path = resolved_path; diff --git a/src/ge/common/ge_common.mk b/src/ge/common/ge_common.mk index 7632b46d..e40ef3c1 100644 --- a/src/ge/common/ge_common.mk +++ b/src/ge/common/ge_common.mk @@ -12,6 +12,7 @@ GE_COMMON_LOCAL_SRC_FILES := \ math/fp16_math.cc \ debug/memory_dumper.cc \ formats/utils/formats_trans_utils.cc \ + dump/dump_properties.cc \ formats/format_transfers/datatype_transfer.cc \ formats/format_transfers/format_transfer_transpose.cc \ formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \ diff --git a/src/ge/common/helper/model_cache_helper.cc b/src/ge/common/helper/model_cache_helper.cc index d3b4dde5..b1a71b0a 100644 --- a/src/ge/common/helper/model_cache_helper.cc +++ b/src/ge/common/helper/model_cache_helper.cc @@ -497,7 +497,25 @@ Status ModelCacheHelper::LoadJsonFromFile(const string &file_name, Json &json) c GELOGW("Fail to open the file: %s.", path.c_str()); return INTERNAL_ERROR; } - ifs >> json; + try { + ifs >> json; + } catch (nlohmann::detail::parse_error e) { + GELOGW("Fail to load json from file, json throw an error:%s.", e.what()); + return INTERNAL_ERROR; + } catch (nlohmann::detail::invalid_iterator e) { + GELOGW("Fail to load json from file, json throw an error:%s.", e.what()); + return INTERNAL_ERROR; + } catch (nlohmann::detail::type_error e) { + GELOGW("Fail to load json from file, json throw an error:%s.", e.what()); + return INTERNAL_ERROR; + } catch (nlohmann::detail::out_of_range e) { + GELOGW("Fail to load json from file, json throw an error:%s.", e.what()); + return INTERNAL_ERROR; + } catch (nlohmann::detail::other_error e) { + GELOGW("Fail to load json from file, json throw an error:%s.", e.what()); + return INTERNAL_ERROR; + } + if (!json.is_object()) { GELOGW("Fail to load the json file: %s.", path.c_str()); return INTERNAL_ERROR; diff --git a/src/ge/common/helper/model_helper.cc b/src/ge/common/helper/model_helper.cc index d860f7ba..15683257 100644 --- a/src/ge/common/helper/model_helper.cc +++ b/src/ge/common/helper/model_helper.cc @@ -41,7 +41,22 @@ Status ModelHelper::SaveModelPartition(std::shared_ptr &om_fil const uint8_t *data, size_t size) { if (size < 1 || size > UINT32_MAX) { GELOGE(PARAM_INVALID, "Add model partition failed, partition size %zu invalid", size); - ErrorManager::GetInstance().ATCReportErrMessage("E19022"); + if (size > UINT32_MAX) { + string item = "item"; + if (type == MODEL_DEF) { + item = "model info"; + } else if (type == WEIGHTS_DATA) { + item = "weight data"; + } else if (type == TASK_INFO) { + item = "task info"; + } else if (type == TBE_KERNELS) { + item = "tbe kernels"; + } else if (type == CUST_AICPU_KERNELS) { + item = "aicpu kernels"; + } + ErrorManager::GetInstance().ATCReportErrMessage("E19023", {"size", "item", "maxsize"}, + {std::to_string(size), item, std::to_string(UINT32_MAX)}); + } return PARAM_INVALID; } if (data == nullptr) { @@ -263,7 +278,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c } Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); - if (ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_) != SUCCESS) { + if (status != SUCCESS) { GELOGE(status, "Parse model content failed!"); return status; } diff --git a/src/ge/common/profiling/profiling_manager.cc b/src/ge/common/profiling/profiling_manager.cc index f147db21..d301f647 100644 --- a/src/ge/common/profiling/profiling_manager.cc +++ b/src/ge/common/profiling/profiling_manager.cc @@ -51,10 +51,23 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager &ProfilingMana return profiling_manager; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options, + bool convert_2_phy_device_id) { #ifdef DAVINCI_SUPPORT_PROFILING vector().swap(device_id_); - device_id_.push_back(options.device_id); + // profiling need phy device id + if (!convert_2_phy_device_id) { + device_id_.push_back(options.device_id); + } else { + uint32_t phy_device_id = 0; + rtError_t rt_ret = rtGetDevicePhyIdByIndex(static_cast(options.device_id), &phy_device_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); + return FAILED; + } + device_id_.push_back(phy_device_id); + } + job_id_ = options.job_id; Status ret; diff --git a/src/ge/common/profiling/profiling_manager.h b/src/ge/common/profiling/profiling_manager.h index a030efd3..f4249451 100644 --- a/src/ge/common/profiling/profiling_manager.h +++ b/src/ge/common/profiling/profiling_manager.h @@ -69,7 +69,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { ProfilingManager(); virtual ~ProfilingManager(); static ProfilingManager &Instance(); - ge::Status Init(const Options &options); + ge::Status Init(const Options &options, bool convert_2_phy_device_id = false); ge::Status InitFromOptions(const Options &options); ge::Status InitFromAclCfg(const std::string &config); ge::Status StartProfiling(int32_t iter, int32_t device_id); diff --git a/src/ge/common/properties_manager.cc b/src/ge/common/properties_manager.cc index 2e2405e7..a4879460 100644 --- a/src/ge/common/properties_manager.cc +++ b/src/ge/common/properties_manager.cc @@ -172,6 +172,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &PropertiesManag return dump_properties_map_[session_id]; } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void PropertiesManager::AddDumpProperties( + uint64_t session_id, const DumpProperties &dump_properties) { + std::lock_guard lock(mutex_); + dump_properties_map_.emplace(session_id, dump_properties); +} + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void PropertiesManager::RemoveDumpProperties(uint64_t session_id) { std::lock_guard lock(mutex_); auto iter = dump_properties_map_.find(session_id); diff --git a/src/ge/common/properties_manager.h b/src/ge/common/properties_manager.h index e4e84f74..9ba7f88e 100644 --- a/src/ge/common/properties_manager.h +++ b/src/ge/common/properties_manager.h @@ -23,8 +23,8 @@ #include #include -#include "graph/op_desc.h" #include "common/dump/dump_properties.h" +#include "graph/op_desc.h" namespace ge { // Configuration property management @@ -83,6 +83,10 @@ class PropertiesManager { void SetPropertyDelimiter(const std::string &de); DumpProperties &GetDumpProperties(uint64_t session_id); + + const map &GetDumpPropertiesMap() { return dump_properties_map_; } + + void AddDumpProperties(uint64_t session_id, const DumpProperties &dump_properties); void RemoveDumpProperties(uint64_t session_id); private: diff --git a/src/ge/common/util.cc b/src/ge/common/util.cc index dca50627..cbd2ee71 100644 --- a/src/ge/common/util.cc +++ b/src/ge/common/util.cc @@ -19,16 +19,16 @@ #include #include -#include #include +#include #include #include #include #include #include -#include "external/ge/ge_api_error_codes.h" #include "common/util/error_manager/error_manager.h" +#include "external/ge/ge_api_error_codes.h" #include "framework/common/debug/ge_log.h" #include "framework/common/fmk_types.h" #include "framework/common/ge_inner_error_codes.h" @@ -58,6 +58,7 @@ const int kWarningThreshold = 536870912 * 2; // 536870912 represent 512M const int kMaxFileSizeLimit = INT_MAX; const int kMaxBuffSize = 256; const char *const kPathValidReason = "The path can only contain 'a-z' 'A-Z' '0-9' '-' '.' '_' and chinese character"; +constexpr uint32_t MAX_CONFIG_FILE_BYTE = 10 * 1024 * 1024; } // namespace namespace ge { @@ -482,4 +483,69 @@ FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::str regfree(®); return true; } + +FMK_FUNC_HOST_VISIBILITY bool IsValidFile(const char *file_path) { + if (file_path == nullptr) { + GELOGE(PARAM_INVALID, "Config path is null."); + return false; + } + if (!CheckInputPathValid(file_path)) { + GELOGE(PARAM_INVALID, "Config path is invalid: %s", file_path); + return false; + } + // Normalize the path + std::string resolved_file_path = RealPath(file_path); + if (resolved_file_path.empty()) { + GELOGE(PARAM_INVALID, "Invalid input file path [%s], make sure that the file path is correct.", file_path); + return false; + } + + mmStat_t stat = {0}; + int32_t ret = mmStatGet(resolved_file_path.c_str(), &stat); + if (ret != EN_OK) { + GELOGE(PARAM_INVALID, "cannot get config file status, which path is %s, maybe not exist, return %d, errcode %d", + resolved_file_path.c_str(), ret, mmGetErrorCode()); + return false; + } + if ((stat.st_mode & S_IFMT) != S_IFREG) { + GELOGE(PARAM_INVALID, "config file is not a common file, which path is %s, mode is %u", resolved_file_path.c_str(), + stat.st_mode); + return false; + } + if (stat.st_size > MAX_CONFIG_FILE_BYTE) { + GELOGE(PARAM_INVALID, "config file %s size[%ld] is larger than max config file Bytes[%u]", + resolved_file_path.c_str(), stat.st_size, MAX_CONFIG_FILE_BYTE); + return false; + } + return true; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status CheckPath(const char *path, size_t length) { + if (path == nullptr) { + GELOGE(PARAM_INVALID, "Config path is invalid."); + return PARAM_INVALID; + } + + if (strlen(path) != length) { + GELOGE(PARAM_INVALID, "Path is invalid or length of config path is not equal to given length."); + return PARAM_INVALID; + } + + if (length == 0 || length > MMPA_MAX_PATH) { + GELOGE(PARAM_INVALID, "Length of config path is invalid."); + return PARAM_INVALID; + } + + INT32 is_dir = mmIsDir(path); + if (is_dir != EN_OK) { + GELOGE(PATH_INVALID, "Open directory %s failed, maybe it is not exit or not a dir", path); + return PATH_INVALID; + } + + if (mmAccess2(path, M_R_OK) != EN_OK) { + GELOGE(PATH_INVALID, "Read path[%s] failed, errmsg[%s]", path, strerror(errno)); + return PATH_INVALID; + } + return SUCCESS; +} } // namespace ge diff --git a/src/ge/executor/CMakeLists.txt b/src/ge/executor/CMakeLists.txt index f3956e31..5f62c45f 100755 --- a/src/ge/executor/CMakeLists.txt +++ b/src/ge/executor/CMakeLists.txt @@ -73,6 +73,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "../graph/manager/trans_var_data_utils.cc" "../graph/manager/util/debug.cc" "../hybrid/hybrid_davinci_model_stub.cc" + "../hybrid/node_executor/aicpu/aicpu_ext_info.cc" "../model/ge_model.cc" "../model/ge_root_model.cc" "../omm/csa_interact.cc" @@ -118,6 +119,7 @@ target_link_libraries(ge_executor ${slog} ${mmpa} ${msprof} + ${error_manager} rt dl) diff --git a/src/ge/executor/ge_executor.cc b/src/ge/executor/ge_executor.cc index b4e9df35..bf1e250b 100644 --- a/src/ge/executor/ge_executor.cc +++ b/src/ge/executor/ge_executor.cc @@ -182,6 +182,37 @@ bool IsDynamicImageSizeMatchModel(uint64_t image_height, uint64_t image_width, GELOGE(ge::FAILED, "Dynamic resolution (%lu,%lu) can not match the gear of model.", image_height, image_width); return false; } + +bool IsDynmaicDimsSizeMatchModel(const vector cur_dynamic_dims, const vector> &batch_info) { + if (batch_info.empty()) { + GELOGE(ge::FAILED, "Dynamic batch info is empty."); + return false; + } + + bool find_match = false; + for (auto resolution : batch_info) { + if (cur_dynamic_dims.size() != resolution.size()) { + GELOGE(ge::FAILED, "Cur dynamic dims param num is %zu, current resolution size is %zu.", cur_dynamic_dims.size(), + resolution.size()); + return false; + } + bool flag = true; + for (std::size_t i = 0; i < resolution.size(); ++i) { + if (cur_dynamic_dims[i] != static_cast(resolution[i])) { + flag = false; + break; + } + } + if (flag) { + find_match = true; + break; + } + } + if (!find_match) { + GELOGE(ge::FAILED, "choose dynamic dims can not match the gear of model."); + } + return find_match; +} } // namespace namespace ge { @@ -347,9 +378,21 @@ Status GeExecutor::SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, u vector cur_dynamic_dims; Status ret = GetCurDynamicDims(model_id, dynamic_dims, cur_dynamic_dims); if (ret != SUCCESS) { - GELOGE(FAILED, "Set cur gear dynmaic dims failed"); + GELOGE(FAILED, "Set cur gear dynamic dims failed"); return FAILED; } + std::vector> batch_info; + int32_t dynamic_type = static_cast(FIXED); + ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); + if (ret != SUCCESS) { + GELOGE(ret, "Get dynamic input info failed."); + return ret; + } + + if (!IsDynmaicDimsSizeMatchModel(cur_dynamic_dims, batch_info)) { + GELOGE(PARAM_INVALID, "The current dynamic input does not match the gear of the model."); + return PARAM_INVALID; + } ret = GraphExecutor::SetDynamicSize(model_id, cur_dynamic_dims, static_cast(DYNAMIC_DIMS)); if (ret != SUCCESS) { @@ -410,6 +453,10 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector & for (std::size_t i = 0; i < all_data_dims.size(); ++i) { if (all_data_dims[i] < 0) { cur_dynamic_dims.push_back(dynamic_dims[i]); + } else if (static_cast(all_data_dims[i]) != dynamic_dims[i]) { + GELOGE(PARAM_INVALID, "Static dims should be same, index: %zu value: %d should be %d", i, dynamic_dims[i], + all_data_dims[i]); + return PARAM_INVALID; } } return SUCCESS; diff --git a/src/ge/executor/module.mk b/src/ge/executor/module.mk index 6b2de8f2..bb642da9 100644 --- a/src/ge/executor/module.mk +++ b/src/ge/executor/module.mk @@ -60,6 +60,7 @@ local_ge_executor_src_files := \ ../single_op/task/aicpu_task_builder.cc \ ../single_op/task/aicpu_kernel_task_builder.cc \ ../hybrid/hybrid_davinci_model_stub.cc\ + ../hybrid/node_executor/aicpu/aicpu_ext_info.cc \ local_ge_executor_c_include := \ proto/insert_op.proto \ @@ -87,6 +88,7 @@ local_ge_executor_shared_library := \ libgraph \ libregister \ libmsprof \ + liberror_manager \ local_ge_executor_ldflags := -lrt -ldl \ @@ -137,6 +139,7 @@ LOCAL_SHARED_LIBRARIES := \ libgraph \ libregister \ libmsprof \ + liberror_manager \ LOCAL_LDFLAGS += $(local_ge_executor_ldflags) diff --git a/src/ge/ge_inference.mk b/src/ge/ge_inference.mk index 3b9e17ea..f83e590a 100644 --- a/src/ge/ge_inference.mk +++ b/src/ge/ge_inference.mk @@ -254,6 +254,7 @@ OME_HOST_SRC_FILES := \ single_op/stream_resource.cc \ single_op/single_op_manager.cc \ hybrid/hybrid_davinci_model_stub.cc \ + hybrid/node_executor/aicpu/aicpu_ext_info.cc \ # graph/load/new_model_manager/task_info/hccl_task_info.cc OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES) @@ -286,6 +287,7 @@ COMMON_LOCAL_C_INCLUDES := \ $(TOPDIR)inc/runtime \ $(TOPDIR)libc_sec/include \ $(TOPDIR)ops/built-in/op_proto/inc \ + $(TOPDIR)toolchain/ide/ide-daemon/external \ third_party/json/include \ third_party/protobuf/include \ third_party/opencv/include \ @@ -340,6 +342,7 @@ DEVICE_LOCAL_C_INCLUDES := \ $(TOPDIR)inc/runtime \ $(TOPDIR)ops/built-in/op_proto/inc \ $(TOPDIR)framework/domi \ + $(TOPDIR)toolchain/ide/ide-daemon/external \ third_party/json/include \ third_party/protobuf/include \ third_party/opencv/include \ @@ -368,6 +371,7 @@ LOCAL_SRC_FILES += $(BUILER_SRC_FILES) LOCAL_SRC_FILES += $(ANALYZER_SRC_FILES) LOCAL_STATIC_LIBRARIES := libge_memory \ + libadump_server_stub \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ @@ -432,6 +436,7 @@ LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES) LOCAL_C_INCLUDES += $(ANALYZER_LOCAL_INCLUDES) LOCAL_STATIC_LIBRARIES := libge_memory \ + libadump_server_stub \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ diff --git a/src/ge/ge_local_engine/engine/host_cpu_engine.cc b/src/ge/ge_local_engine/engine/host_cpu_engine.cc index eb7741c0..fc46385b 100644 --- a/src/ge/ge_local_engine/engine/host_cpu_engine.cc +++ b/src/ge/ge_local_engine/engine/host_cpu_engine.cc @@ -25,40 +25,65 @@ #include "common/ge/plugin_manager.h" #include "graph/utils/type_utils.h" #include "common/fp16_t.h" +#include "common/math/math_util.h" namespace { -#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ - case (DTYPE): { \ - GeTensorPtr ge_tensor = nullptr; \ - if (need_create_flag) { \ - int64_t data_num = out_desc.GetShape().IsScalar() ? 1 : out_desc.GetShape().GetShapeSize(); \ - std::unique_ptr buf(new (std::nothrow) TYPE[data_num]()); \ - if (buf == nullptr) { \ - GELOGE(MEMALLOC_FAILED, "New sizeof(T) * data_num(%zu) memory failed", \ - static_cast(sizeof(TYPE) * data_num)); \ - return MEMALLOC_FAILED; \ - } \ - ge_tensor = MakeShared(out_desc); \ - GE_CHECK_NOTNULL(ge_tensor); \ - GELOGI("node:%s allocate output %zu, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE)); \ - ge_tensor->SetData(reinterpret_cast(buf.get()), data_num * sizeof(TYPE)); \ - ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ - ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ - outputs.emplace_back(ge_tensor); \ - } else { \ - ge_tensor = outputs[i]; \ - GE_CHECK_NOTNULL(ge_tensor); \ - GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i, \ - reinterpret_cast(ge_tensor->GetData().data()), ge_tensor->GetData().size()); \ - } \ - auto tensor = TensorAdapter::AsTensor(*ge_tensor); \ - auto tensor_name = op_desc->GetOutputNameByIndex(i); \ - GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu", \ - op_desc->GetName().c_str(), i); \ - GELOGD("Successfully inserted output tensor. node = %s, index = %zu, output name = %s, addr = %p, size = %zu", \ - op_desc->GetName().c_str(), i, tensor_name.c_str(), tensor.GetData(), tensor.GetSize()); \ - named_outputs.emplace(tensor_name, tensor); \ - break; \ +#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ + case (DTYPE): { \ + GeTensorPtr ge_tensor = nullptr; \ + if (need_create_flag) { \ + int64_t num_size = out_desc.GetShape().IsScalar() ? 1 : out_desc.GetShape().GetShapeSize(); \ + if (out_desc.GetShape().IsUnknownShape()) { \ + std::vector> range; \ + if (out_desc.GetShapeRange(range) != GRAPH_SUCCESS) { \ + GELOGE(INTERNAL_ERROR, "Get shape range failed, node:%s", op_desc->GetName().c_str()); \ + return INTERNAL_ERROR; \ + } \ + int64_t max_range_size = 1; \ + for (const auto &item : range) { \ + FMK_INT64_MULCHECK(max_range_size, item.second); \ + max_range_size *= item.second; \ + } \ + num_size = max_range_size; \ + } \ + if (num_size < 0) { \ + GELOGE(INTERNAL_ERROR, "node:%s, get size for output %zu failed, num=%lld", op_desc->GetName().c_str(), i, \ + num_size); \ + return INTERNAL_ERROR; \ + } \ + auto data_num = static_cast(num_size); \ + GELOGI("node:%s allocate output %zu start, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE)); \ + std::unique_ptr buf(new (std::nothrow) TYPE[data_num]()); \ + if (buf == nullptr) { \ + GELOGE(MEMALLOC_FAILED, "New sizeof(T) * data_num(%zu) memory failed", \ + static_cast(sizeof(TYPE) * data_num)); \ + return MEMALLOC_FAILED; \ + } \ + ge_tensor = MakeShared(out_desc); \ + GE_CHECK_NOTNULL(ge_tensor); \ + GELOGI("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, \ + data_num * sizeof(TYPE)); \ + if (ge_tensor->SetData(reinterpret_cast(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \ + GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \ + return MEMALLOC_FAILED; \ + } \ + ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ + ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ + outputs.emplace_back(ge_tensor); \ + } else { \ + ge_tensor = outputs[i]; \ + GE_CHECK_NOTNULL(ge_tensor); \ + GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i, \ + reinterpret_cast(ge_tensor->GetData().data()), ge_tensor->GetData().size()); \ + } \ + auto tensor = TensorAdapter::AsTensor(*ge_tensor); \ + auto tensor_name = op_desc->GetOutputNameByIndex(i); \ + GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu", \ + op_desc->GetName().c_str(), i); \ + GELOGD("Successfully inserted output tensor. node = %s, index = %zu, output name = %s, addr = %p, size = %zu", \ + op_desc->GetName().c_str(), i, tensor_name.c_str(), tensor.GetData(), tensor.GetSize()); \ + named_outputs.emplace(tensor_name, tensor); \ + break; \ } } // namespace diff --git a/src/ge/ge_runner.mk b/src/ge/ge_runner.mk index b4d27b1b..7a65787c 100644 --- a/src/ge/ge_runner.mk +++ b/src/ge/ge_runner.mk @@ -296,6 +296,7 @@ LIBGE_LOCAL_SRC_FILES := \ LIBCLIENT_LOCAL_SRC_FILES := \ proto/ge_api.proto \ client/ge_api.cc \ + client/ge_prof.cc \ RUNNER_LOCAL_C_INCLUDES := \ $(LOCAL_PATH) ./ \ @@ -312,6 +313,7 @@ RUNNER_LOCAL_C_INCLUDES := \ $(TOPDIR)libc_sec/include \ $(TOPDIR)ops/built-in/op_proto/inc \ $(TOPDIR)framework/domi/analyzer \ + $(TOPDIR)toolchain/ide/ide-daemon/external \ proto/fwk_adapter.proto \ proto/ge_ir.proto \ proto/insert_op.proto \ @@ -353,6 +355,7 @@ LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES) LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) LOCAL_STATIC_LIBRARIES := libge_memory \ + libadump_server \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ @@ -371,6 +374,7 @@ LOCAL_LDFLAGS := -lrt -ldl LOCAL_SHARED_LIBRARIES += \ libruntime \ libresource \ + stub/libascend_hal \ include $(BUILD_HOST_SHARED_LIBRARY) @@ -389,6 +393,7 @@ endif LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc +LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_prof.cc LOCAL_SHARED_LIBRARIES := @@ -438,6 +443,7 @@ LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES) LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) LOCAL_STATIC_LIBRARIES := libge_memory \ + libadump_server \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ @@ -450,6 +456,7 @@ LOCAL_LDFLAGS := -lrt -ldl LOCAL_SHARED_LIBRARIES += \ libruntime \ libresource \ + stub/libascend_hal \ include $(BUILD_HOST_STATIC_LIBRARY) @@ -469,6 +476,7 @@ LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES) LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) LOCAL_STATIC_LIBRARIES := libge_memory \ + libadump_server \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ @@ -481,5 +489,6 @@ LOCAL_LDFLAGS := -lrt -ldl LOCAL_SHARED_LIBRARIES += \ libruntime \ libresource \ + libascend_hal \ include $(BUILD_STATIC_LIBRARY) diff --git a/src/ge/graph/build/memory/block_mem_assigner.cc b/src/ge/graph/build/memory/block_mem_assigner.cc index 53b5b71c..746f73c2 100644 --- a/src/ge/graph/build/memory/block_mem_assigner.cc +++ b/src/ge/graph/build/memory/block_mem_assigner.cc @@ -1296,6 +1296,11 @@ void MergeBlocks(std::vector &dest, std::vector &s return; } if (dest[i] != nullptr && src[i] != nullptr) { + if (!dest[i]->reuse_mem_ || !src[i]->reuse_mem_) { + GELOGD("Diff batch's workspace can't be reused, i: %zu, dest[i]: %s, stream: %ld, src[i]: %s, stream: %ld.", i, + dest[i]->String().c_str(), dest[i]->stream_id_, src[i]->String().c_str(), src[i]->stream_id_); + continue; + } for (auto &symbol : src[i]->SymbolList()) { dest[i]->AddSymbol(symbol); } diff --git a/src/ge/graph/build/memory/graph_mem_assigner.cc b/src/ge/graph/build/memory/graph_mem_assigner.cc index c9a6b8a2..583f65d8 100644 --- a/src/ge/graph/build/memory/graph_mem_assigner.cc +++ b/src/ge/graph/build/memory/graph_mem_assigner.cc @@ -227,7 +227,10 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, size_t &mem_offse if (mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) { GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", mem_offset, VarManager::Instance(session_id)->GetGraphMemoryMaxSize()); - ErrorManager::GetInstance().ATCReportErrMessage("E19022"); + ErrorManager::GetInstance().ATCReportErrMessage( + "E19022", {"size", "item", "maxsize"}, + {std::to_string(mem_offset), "featuremap", + std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())}); return ge::FAILED; } return SUCCESS; @@ -908,6 +911,8 @@ Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodeP GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str()); return ret; } + } else { + GELOGW("Current atomic node %s does not have attr ATOMIC_WORKSPACE_INFO.", node->GetName().c_str()); } return SUCCESS; @@ -1452,14 +1457,56 @@ Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int6 return SUCCESS; } +ge::Status GraphMemoryAssigner::IsIndependentAtomicClean(const ge::NodePtr &node, + bool &is_independent_atomic_clean_node) { + GE_CHECK_NOTNULL(node); + const auto &out_control_anchor = node->GetOutControlAnchor(); + GE_CHECK_NOTNULL(out_control_anchor); + for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) { + if (peer_in_control_anchor != nullptr) { + auto peer_in_node = peer_in_control_anchor->GetOwnerNode(); + auto peer_in_node_desc = peer_in_node->GetOpDesc(); + if (peer_in_node_desc != nullptr) { + bool is_atomic_node = false; + // If GetBool fail, is_atomic_node is false. + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node); + if (is_atomic_node) { + vector is_connect_netoutput; + // If GetBool fail, attr is_connect_netoutput is an empty vector. + (void)ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput); + if (!is_connect_netoutput.empty()) { + GELOGD("Peer in node %s is independent atomic clean node", peer_in_node->GetName().c_str()); + is_independent_atomic_clean_node = true; + break; + } + } + } + } + } + + return SUCCESS; +} + ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, const vector &atomic_mem_start, const vector &atomic_mem_size) { for (ge::NodePtr &node : compute_graph_->GetAllNodes()) { auto node_op_desc = node->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); - if (((n != nullptr) && (node->GetName() == n->GetName())) || - ((n == nullptr) && (node_op_desc->GetType() == ATOMICADDRCLEAN))) { + bool is_valid_atomic_clean_node = (n != nullptr) && (node->GetName() == n->GetName()); + + if (((n == nullptr) && (node_op_desc->GetType() == ATOMICADDRCLEAN))) { + bool is_independent_atomic_clean = false; + if (IsIndependentAtomicClean(node, is_independent_atomic_clean) != SUCCESS) { + GELOGE(FAILED, "Failed to determine the connection relationship of atomic addr clean node."); + return PARAM_INVALID; + } + + is_valid_atomic_clean_node = is_valid_atomic_clean_node || (!is_independent_atomic_clean); + } + + if (is_valid_atomic_clean_node) { + GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str()); vector workspace_vector = node_op_desc->GetWorkspace(); vector workspace_byte_vector = node_op_desc->GetWorkspaceBytes(); workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); diff --git a/src/ge/graph/build/memory/graph_mem_assigner.h b/src/ge/graph/build/memory/graph_mem_assigner.h index daec2f75..e1e408be 100644 --- a/src/ge/graph/build/memory/graph_mem_assigner.h +++ b/src/ge/graph/build/memory/graph_mem_assigner.h @@ -175,6 +175,8 @@ class GraphMemoryAssigner { ge::Status SetAtomicCleanAttr(const ge::NodePtr &n, const std::vector &atomic_mem_start, const std::vector &atomic_mem_size); + ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node); + void AlignMemOffset(const int64_t &mem_align_size); ge::Status UpdateOpInputOffset(const NodePtr &node, vector &input_list) const; diff --git a/src/ge/graph/build/task_generator.cc b/src/ge/graph/build/task_generator.cc index 8f8f28b3..58a8bf7b 100644 --- a/src/ge/graph/build/task_generator.cc +++ b/src/ge/graph/build/task_generator.cc @@ -266,6 +266,14 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra if (is_unknown_shape) { GE_CHK_STATUS_RET(SetUnknownShapeStream(run_context, stream), "Set unknown shape stream failed."); } + std::function callback = [&]() { + if (is_unknown_shape) { + if (DestroyUnknownShapeStream(run_context, stream) != SUCCESS) { + GELOGE(FAILED, "Destory unknown shape stream failed."); + } + } + }; + GE_MAKE_GUARD(release, callback); for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { OpDescPtr op_desc = node->GetOpDesc(); @@ -352,9 +360,6 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id, task_list_size_after - task_list_size_before); } - if (is_unknown_shape) { - GE_CHK_STATUS_RET(DestroyUnknownShapeStream(run_context, stream), "Destory unknown shape stream failed."); - } GE_TIMESTAMP_CALLNUM_EVENT_END(GenerateTask, "GraphBuild::GenerateTask"); return SUCCESS; } @@ -532,6 +537,9 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) { (void)ge_lib->DNNEngineManagerObj().GetDNNEngineName(node); } + (void)op_desc->DelAttr(kIsFirstNode); + (void)op_desc->DelAttr(kIsLastNode); + all_stream_ops[op_desc->GetStreamId()].emplace_back(op_desc); } @@ -645,8 +653,6 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP vector &all_reduce_nodes) const { GELOGI("Start AutoFindBpOpIndex"); NodePtr bp_node = nullptr; - uint32_t last_bp = 0; - uint32_t iter_end = 0; uint32_t current_idx = 0; for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { OpDescPtr op_desc = node->GetOpDesc(); @@ -662,20 +668,40 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP all_reduce_nodes.emplace_back(current_idx); GELOGI("Allreduce name %s, idx %u", op_desc->GetName().c_str(), current_idx); } - if (op_desc->GetType() == NETOUTPUT) { + if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { if (bp_node == nullptr) { bp_node = node; } - iter_end = current_idx; - GELOGI("Iter end name %s, idx %u", op_desc->GetName().c_str(), iter_end); + } + if (graph->GetNeedIteration()) { + if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") { + profiling_point.end_index.insert(current_idx); + GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive", + op_desc->GetName().c_str(), current_idx); + } + if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGN) { + profiling_point.end_index.insert(current_idx); + GELOGI("Iter end name %s, idx %u, from FlowCtrl_LoopCond_ASSIGN", op_desc->GetName().c_str(), current_idx); + } + } else { + if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { + profiling_point.end_index.insert(current_idx); + GELOGI("Iter end name %s, idx %u, from NETOUTPUT", op_desc->GetName().c_str(), current_idx); + } } } - profiling_point.end_index = iter_end; if (bp_node == nullptr) { GELOGW("not find bp_node."); return SUCCESS; } + + profiling_point.bp_index = FindLastBpFromBpNode(graph, bp_node); + return SUCCESS; +} + +uint32_t TaskGenerator::FindLastBpFromBpNode(const ComputeGraphPtr &graph, const NodePtr &bp_node) const { + uint32_t last_bp = 0; OpDescPtr bp_op_desc = nullptr; for (auto &in_anchor : bp_node->GetAllInDataAnchors()) { auto out_anchor = in_anchor->GetPeerOutAnchor(); @@ -691,7 +717,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP } GE_CHECK_NOTNULL(bp_op_desc); - current_idx = 0; + uint32_t current_idx = 0; for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { OpDescPtr op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); @@ -702,8 +728,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP break; } } - profiling_point.bp_index = last_bp; - return SUCCESS; + return last_bp; } Status TaskGenerator::FindFpOfEnv(const ComputeGraphPtr &graph, const std::string &fp_point_str, @@ -734,7 +759,6 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin ProfilingPoint &profiling_point, vector &all_reduce_nodes) const { GELOGI("Start FindBpOfEnv"); uint32_t current_idx = 0; - uint32_t iter_end = 0; uint32_t last_bp = 0; for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { OpDescPtr op_desc = node->GetOpDesc(); @@ -745,10 +769,23 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin continue; } - if (op_desc->GetType() == NETOUTPUT) { - iter_end = current_idx; - GELOGI("Iter end name %s, idx %u", op_desc->GetName().c_str(), iter_end); + if (graph->GetNeedIteration()) { + if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") { + profiling_point.end_index.insert(current_idx); + GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive", + op_desc->GetName().c_str(), current_idx); + } + if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGN) { + profiling_point.end_index.insert(current_idx); + GELOGI("Iter end name %s, idx %u, from FlowCtrl_LoopCond_ASSIGN", op_desc->GetName().c_str(), current_idx); + } + } else { + if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { + profiling_point.end_index.insert(current_idx); + GELOGI("Iter end name %s, idx %u, from NETOUTPUT", op_desc->GetName().c_str(), current_idx); + } } + if (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE) { all_reduce_nodes.emplace_back(current_idx); GELOGI("Allreduce name %s, idx %u", op_desc->GetName().c_str(), current_idx); @@ -760,7 +797,6 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin } profiling_point.bp_index = last_bp; - profiling_point.end_index = iter_end; return SUCCESS; } @@ -857,7 +893,7 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || ProfilingManager::Instance().ProfilingTrainingTraceOn(); if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || - (profiling_point.end_index == 0)) { + (profiling_point.end_index.empty())) { return SUCCESS; } if (profiling_point.fp_index == node_index) { @@ -914,7 +950,7 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || ProfilingManager::Instance().ProfilingTrainingTraceOn(); if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || - (profiling_point.end_index == 0)) { + (profiling_point.end_index.empty())) { return SUCCESS; } if (profiling_point.bp_index == node_index) { @@ -928,7 +964,7 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P bp_log_def->set_notify(false); task_def_list.emplace_back(bp_task_def); } - if (profiling_point.end_index == node_index) { + if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end()) { GELOGI("The iteration end operator is %s, idx %u", op_desc->GetName().c_str(), node_index); TaskDef end_task_def; end_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); diff --git a/src/ge/graph/build/task_generator.h b/src/ge/graph/build/task_generator.h index 6bd3ab03..0d482afe 100644 --- a/src/ge/graph/build/task_generator.h +++ b/src/ge/graph/build/task_generator.h @@ -36,7 +36,7 @@ class OpsKernelManager; struct ProfilingPoint { uint32_t fp_index = 0; uint32_t bp_index = 0; - uint32_t end_index = 0; + std::set end_index; }; // Describes infos needed by generate task for fusion node struct FusionTaskInfo { @@ -112,6 +112,7 @@ class TaskGenerator { Status AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point) const; Status AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, vector &all_reduce_nodes) const; + uint32_t FindLastBpFromBpNode(const ComputeGraphPtr &graph, const NodePtr &bp_node) const; Status FindFpOfEnv(const ComputeGraphPtr &graph, const std::string &fp_point_str, ProfilingPoint &profiling_point) const; diff --git a/src/ge/graph/load/new_model_manager/davinci_model.cc b/src/ge/graph/load/new_model_manager/davinci_model.cc index 7eddde8e..3c2aaffa 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.cc +++ b/src/ge/graph/load/new_model_manager/davinci_model.cc @@ -125,6 +125,7 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptrGetId()); + if (it == zero_copy_op_id_batch_label_.end()) { + zero_copy_task.SetBatchLabel(kDefaultBatchLable); + } else { + zero_copy_task.SetBatchLabel(it->second); + } std::lock_guard lock(outside_addrs_mutex_); if (zero_copy_task.IsTaskArgsSet()) { @@ -3045,6 +3052,9 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map & data.first, addr, size, buffer_addr); // For input data, just copy for rts task. for (ZeroCopyTask &task : zero_copy_tasks_) { + if (task.GetBatchLabel() != kDefaultBatchLable && task.GetBatchLabel() != batch_label) { + continue; + } uintptr_t addr_val = reinterpret_cast(addr); if (task.UpdateTaskParam(addr_val, buffer_addr, zero_copy_batch_label_addrs_, batch_label) != SUCCESS) { return FAILED; @@ -3365,6 +3375,7 @@ Status DavinciModel::InitModelStream(rtStream_t stream) { if (is_async_mode_) { rt_model_stream_ = stream; is_inner_model_stream_ = false; + last_execute_mode_ = true; return SUCCESS; } @@ -3376,12 +3387,14 @@ Status DavinciModel::InitModelStream(rtStream_t stream) { rt_model_stream_ = stream; is_inner_model_stream_ = false; + last_execute_mode_ = false; return SUCCESS; } - if (rt_model_stream_ == nullptr) { + if (last_execute_mode_ || (rt_model_stream_ == nullptr)) { GE_CHK_RT_RET(rtStreamCreateWithFlags(&rt_model_stream_, priority_, RT_STREAM_FORBIDDEN_DEFAULT)); is_inner_model_stream_ = true; + last_execute_mode_ = false; } return SUCCESS; @@ -3516,7 +3529,7 @@ uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) { } void DavinciModel::FreeFeatureMapMem() { - if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { + if (std::getenv(kEnvGeuseStaticMemory) != nullptr && is_inner_mem_base_) { string weight_memory_key = std::to_string(0) + "_f"; if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(weight_memory_key) != nullptr) { GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weight_memory_key, GetDeviceId()), diff --git a/src/ge/graph/load/new_model_manager/davinci_model.h b/src/ge/graph/load/new_model_manager/davinci_model.h index ea94c22c..15f4539f 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.h +++ b/src/ge/graph/load/new_model_manager/davinci_model.h @@ -884,6 +884,7 @@ class DavinciModel { bool is_inner_model_stream_; bool is_async_mode_; // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_. + bool last_execute_mode_; bool is_stream_list_bind_{false}; bool is_pure_head_stream_{false}; diff --git a/src/ge/graph/load/new_model_manager/model_manager.cc b/src/ge/graph/load/new_model_manager/model_manager.cc index cc8c8539..320bfb16 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.cc +++ b/src/ge/graph/load/new_model_manager/model_manager.cc @@ -43,6 +43,13 @@ const std::string kCmdTypeProfInit = "prof_init"; const std::string kCmdTypeProfFinalize = "prof_finalize"; const std::string kCmdTypeProfStart = "prof_start"; const std::string kCmdTypeProfStop = "prof_stop"; +const char *const kLoadOpFromBuf = "loadOpFromBuf"; +struct CustAicpuSoBuf { + uint64_t kernelSoBuf; + uint32_t kernelSoBufLen; + uint64_t kernelSoName; + uint32_t kernelSoNameLen; +} __attribute__((packed)); } // namespace DumpProperties ModelManager::dump_properties_; @@ -163,7 +170,13 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) { GELOGI("The session: %lu not created.", session_id); return; } else { - GE_CHK_RT(rtSetDevice(static_cast(GetContext().DeviceId()))); + rtContext_t ctx = nullptr; + bool has_ctx = (rtCtxGetCurrent(&ctx) == RT_ERROR_NONE); + if (!has_ctx) { + GELOGI("Set device %u.", GetContext().DeviceId()); + GE_CHK_RT(rtSetDevice(static_cast(GetContext().DeviceId()))); + } + Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_DESTROY, session_id, 0); if (ret != SUCCESS) { GELOGW("The session: %lu destroy failed.", session_id); @@ -171,7 +184,11 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) { (void)sess_ids_.erase(session_id); GELOGI("The session: %lu destroyed.", session_id); } - GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); + + if (!has_ctx) { + GELOGI("Reset device %u.", GetContext().DeviceId()); + GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); + } } } @@ -219,6 +236,7 @@ ModelManager::~ModelManager() { std::lock_guard lock(map_mutex_); model_map_.clear(); model_aicpu_kernel_.clear(); + cust_aicpu_so_.clear(); GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0))); } @@ -919,7 +937,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model } davinci_model->SetDeviceId(device_id); davinci_model->SetOmName(model.om_name); - if (DumpManager::GetInstance().IsDumpOpen()) { + if (DumpManager::GetInstance().GetDumpProperties().IsDumpOpen()) { davinci_model->SetDumpProperties(DumpManager::GetInstance().GetDumpProperties()); } else { davinci_model->SetDumpProperties(dump_properties_); @@ -1070,6 +1088,67 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) { return SUCCESS; } +Status ModelManager::LoadCustAicpuSo(const OpDescPtr op_desc, string so_name) { + std::lock_guard lock(cust_aicpu_mutex_); + auto it = cust_aicpu_so_.find(so_name); + if (it == cust_aicpu_so_.end()) { + GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s", + op_desc->GetName().c_str(), so_name.c_str()); + (void)cust_aicpu_so_.insert(so_name); + GELOGI("LaunchCustAicpuSo op name %s, so_name %s.", op_desc->GetName().c_str(), so_name.c_str()); + } + return SUCCESS; +} + +Status ModelManager::LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name) { + CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); + if (aicpu_kernel == nullptr) { + GELOGE(INTERNAL_ERROR, "cust aicpu op %s can't find kernel!", op_desc->GetName().c_str()); + return INTERNAL_ERROR; + } + const void *aicpu_data = aicpu_kernel->GetBinData(); + uint32_t aicpu_data_length = aicpu_kernel->GetBinDataSize(); + + void *d_aicpu_data = nullptr; + void *d_so_name = nullptr; + void *args = nullptr; + rtError_t status; + rtStream_t stream = nullptr; + GE_CHK_RT(rtMalloc(&d_aicpu_data, aicpu_data_length, RT_MEMORY_HBM)); + GE_CHK_RT(rtMemcpy(d_aicpu_data, aicpu_data_length, aicpu_data, aicpu_data_length, RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtMalloc(&d_so_name, so_name.size(), RT_MEMORY_HBM)); + GE_CHK_RT(rtMemcpy(d_so_name, so_name.size(), reinterpret_cast(so_name.c_str()), so_name.size(), + RT_MEMCPY_HOST_TO_DEVICE)); + + CustAicpuSoBuf cust_aicpu_so_buf; + cust_aicpu_so_buf.kernelSoBuf = reinterpret_cast(reinterpret_cast(d_aicpu_data)); + cust_aicpu_so_buf.kernelSoBufLen = aicpu_data_length; + cust_aicpu_so_buf.kernelSoName = reinterpret_cast(reinterpret_cast(d_so_name)); + cust_aicpu_so_buf.kernelSoNameLen = so_name.size(); + + uint32_t args_size = sizeof(CustAicpuSoBuf); + GE_CHK_RT(rtMalloc(&args, args_size, RT_MEMORY_HBM)); + GE_CHK_RT(rtMemcpy(args, args_size, static_cast(&cust_aicpu_so_buf), args_size, RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtStreamCreate(&stream, 0)); + GE_CHK_RT(rtCpuKernelLaunch(nullptr, kLoadOpFromBuf, 1, args, args_size, nullptr, stream)); + + status = rtStreamSynchronize(stream); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); + GE_CHK_RT(rtStreamDestroy(stream)); + GE_CHK_RT(rtFree(args)); + GE_CHK_RT(rtFree(d_aicpu_data)); + GE_CHK_RT(rtFree(d_so_name)); + return RT_ERROR_TO_GE_STATUS(status); + } + GE_CHK_RT(rtStreamDestroy(stream)); + GE_CHK_RT(rtFree(args)); + GE_CHK_RT(rtFree(d_aicpu_data)); + GE_CHK_RT(rtFree(d_so_name)); + GELOGI("Cpu kernel launch loadOpFromBuf task success."); + return SUCCESS; +} + /// /// @ingroup ge /// @brief get model memory size and weight diff --git a/src/ge/graph/load/new_model_manager/model_manager.h b/src/ge/graph/load/new_model_manager/model_manager.h index 0eaab1db..e89bfc36 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.h +++ b/src/ge/graph/load/new_model_manager/model_manager.h @@ -268,6 +268,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); + ge::Status LoadCustAicpuSo(const OpDescPtr op_desc, string so_name); + + ge::Status LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name); + ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); ge::Status GenSessionId(uint64_t &session_id); @@ -333,6 +337,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { uint64_t session_id_bias_; std::set sess_ids_; std::vector exception_infos_; + std::mutex cust_aicpu_mutex_; + std::set cust_aicpu_so_; static DumpProperties dump_properties_; }; diff --git a/src/ge/graph/load/new_model_manager/model_utils.cc b/src/ge/graph/load/new_model_manager/model_utils.cc index 8a92e1e6..9cbb684f 100644 --- a/src/ge/graph/load/new_model_manager/model_utils.cc +++ b/src/ge/graph/load/new_model_manager/model_utils.cc @@ -29,6 +29,14 @@ #include "framework/common/debug/ge_log.h" #include "graph/manager/graph_var_manager.h" +#define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ + do { \ + if (SIZE <= static_cast(OFFSET)) { \ + GELOGE(OUT_OF_MEMORY, "Node: %s, memory out of range[%lu: %ld]", OP->GetName().c_str(), SIZE, OFFSET); \ + return {}; \ + } \ + } while (0) + namespace ge { /// /// @ingroup ge @@ -38,7 +46,7 @@ namespace ge { vector ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { vector v_input_size; GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_size); - const size_t inputs_size = op_desc->GetInputsSize(); + const size_t inputs_size = op_desc->GetAllInputsSize(); const string op_type = op_desc->GetType(); const vector v_is_input_const = op_desc->GetIsInputConst(); @@ -151,7 +159,7 @@ vector ModelUtils::GetWeightSize(ConstOpDescPtr op_desc) { } // other ops get weight from connected constop - const size_t inputs_size = op_desc->GetInputsSize(); + const size_t inputs_size = op_desc->GetAllInputsSize(); const vector v_is_input_const = op_desc->GetIsInputConst(); for (size_t i = 0; i < inputs_size; ++i) { if ((i < v_is_input_const.size()) && v_is_input_const[i]) { @@ -191,7 +199,7 @@ vector ModelUtils::GetWeights(ConstOpDescPtr op_desc) { } // other ops get weight from connected constop - const size_t inputs_size = op_desc->GetInputsSize(); + const size_t inputs_size = op_desc->GetAllInputsSize(); const vector v_is_input_const = op_desc->GetIsInputConst(); for (size_t i = 0; i < inputs_size; ++i) { if ((i < v_is_input_const.size()) && v_is_input_const[i]) { @@ -221,7 +229,7 @@ vector<::tagCcAICPUTensor> ModelUtils::GetInputDescs(ConstOpDescPtr op_desc) { vector<::opTensor_t> v_input_descs; GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_descs); - const size_t inputs_size = op_desc->GetInputsSize(); + const size_t inputs_size = op_desc->GetAllInputsSize(); const vector v_is_input_const = op_desc->GetIsInputConst(); for (size_t i = 0; i < inputs_size; ++i) { @@ -306,7 +314,7 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_data_addr); uint64_t session_id = model_param.session_id; - const size_t inputs_size = op_desc->GetInputsSize(); + const size_t inputs_size = op_desc->GetAllInputsSize(); const vector v_input_offset = op_desc->GetInputOffset(); const string op_type = op_desc->GetType(); @@ -334,6 +342,7 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co if (tensor_size) { int64_t data_offset = 0; GE_CHK_STATUS(TensorUtils::GetDataOffset(*tensor_desc, data_offset)); + VALIDATE_MEM_RANGE(op_desc, model_param.weight_size, data_offset); uint8_t *weight_addr = model_param.weight_base + data_offset; v_input_data_addr.push_back(weight_addr); GELOGI("[IMAS]GetInputDataAddrs graph_%u type[C] name[%s] input[%zu] memaddr[%p]", model_param.graph_id, @@ -345,11 +354,12 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co GE_IF_BOOL_EXEC(non_const_index >= v_input_offset.size(), GELOGW("offsets=%zu, inputs=%zu, index=%zu.", v_input_offset.size(), inputs_size, non_const_index); - break;); + break); int64_t input_offset = v_input_offset[non_const_index]; non_const_index++; GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset), + VALIDATE_MEM_RANGE(op_desc, model_param.var_size, input_offset - model_param.logic_var_base); uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base; v_input_data_addr.push_back(variable_addr); GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", @@ -363,6 +373,7 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co mem_addr = reinterpret_cast(reinterpret_cast(input_offset)); v_input_data_addr.push_back(mem_addr); } else { + VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, input_offset); mem_addr = model_param.mem_base + input_offset; v_input_data_addr.push_back(mem_addr); } @@ -398,6 +409,7 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C } for (size_t i = 0; i < outputs_size; ++i) { GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), + VALIDATE_MEM_RANGE(op_desc, model_param.var_size, v_output_offset[i] - model_param.logic_var_base); uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base; v_output_data_addr.push_back(variable_addr); GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", @@ -410,6 +422,7 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C mem_addr = reinterpret_cast(reinterpret_cast(v_output_offset[i])); v_output_data_addr.push_back(mem_addr); } else { + VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_output_offset[i]); mem_addr = static_cast(model_param.mem_base + v_output_offset[i]); v_output_data_addr.push_back(mem_addr); } @@ -440,15 +453,19 @@ vector ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param for (size_t i = 0; i < v_workspace_bytes.size(); ++i) { if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { v_workspace_data_addr.push_back(reinterpret_cast(reinterpret_cast(v_workspace_offset[i]))); - GELOGI("Fusion: op: %s, GetWorkspaceDataAddrs mem_addr[workspace index %zu]:%p", op_desc->GetName().c_str(), i, - reinterpret_cast(reinterpret_cast(v_workspace_offset[i]))); + GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[L1] name[%s], mem_addr[workspace index %zu]:0x%lx", + model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i]); + } else if (v_workspace_bytes[i] == 0) { + v_workspace_data_addr.push_back(nullptr); + GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] Null addr", + model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i]); } else { - int64_t workspace_offset = v_workspace_offset[i]; - int64_t workspace_bytes = v_workspace_bytes[i]; - uint8_t *mem_addr = workspace_bytes == 0 ? nullptr : model_param.mem_base + workspace_offset; + VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_workspace_offset[i]); + uint8_t *mem_addr = model_param.mem_base + v_workspace_offset[i]; v_workspace_data_addr.push_back(mem_addr); GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] memaddr[%p]", - model_param.graph_id, op_desc->GetName().c_str(), i, workspace_offset, workspace_bytes, mem_addr); + model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i], + mem_addr); } } diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 0cac91eb..3964e0d5 100644 --- a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -26,6 +26,7 @@ #include "framework/common/l2_cache_optimize.h" #include "graph/debug/ge_attr_define.h" #include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/new_model_manager/model_manager.h" #include "graph/load/new_model_manager/model_utils.h" #include "runtime/kernel.h" #include "super_kernel/super_kernel.h" @@ -41,13 +42,6 @@ const char *kIsLastNode = "is_last_node"; const char *kIsFirstNode = "is_first_node"; const int64_t kCloseSkt = 100; const uint32_t kAddrLen = sizeof(void *); -const char *const kLoadOpFromBuf = "loadOpFromBuf"; -struct CustAicpuSoBuf { - uint64_t kernelSoBuf; - uint32_t kernelSoBufLen; - uint64_t kernelSoName; - uint32_t kernelSoNameLen; -} __attribute__((packed)); } // namespace namespace ge { @@ -861,92 +855,6 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) { return SUCCESS; } -Status KernelTaskInfo::LaunchCustAicpuSo(const OpDescPtr op_desc, const domi::KernelDef &kernel_def) { - CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); - if (aicpu_kernel == nullptr) { - GELOGE(INTERNAL_ERROR, "cust aicpu op %s can't find kernel!", op_desc->GetName().c_str()); - return INTERNAL_ERROR; - } - const void *aicpu_data = aicpu_kernel->GetBinData(); - uint32_t aicpu_data_length = aicpu_kernel->GetBinDataSize(); - - void *d_aicpu_data = nullptr; - rtError_t status = rtMalloc(&d_aicpu_data, aicpu_data_length, RT_MEMORY_HBM); - if (status != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); - return RT_ERROR_TO_GE_STATUS(status); - } - - status = rtMemcpy(d_aicpu_data, aicpu_data_length, aicpu_data, aicpu_data_length, RT_MEMCPY_HOST_TO_DEVICE); - if (status != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); - return RT_ERROR_TO_GE_STATUS(status); - } - - void *d_so_name = nullptr; - status = rtMalloc(&d_so_name, so_name_.size(), RT_MEMORY_HBM); - if (status != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); - return RT_ERROR_TO_GE_STATUS(status); - } - - status = rtMemcpy(d_so_name, so_name_.size(), reinterpret_cast(so_name_.c_str()), so_name_.size(), - RT_MEMCPY_HOST_TO_DEVICE); - if (status != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); - return RT_ERROR_TO_GE_STATUS(status); - } - - CustAicpuSoBuf cust_aicpu_so_buf; - cust_aicpu_so_buf.kernelSoBuf = reinterpret_cast(reinterpret_cast(d_aicpu_data)); - cust_aicpu_so_buf.kernelSoBufLen = aicpu_data_length; - cust_aicpu_so_buf.kernelSoName = reinterpret_cast(reinterpret_cast(d_so_name)); - cust_aicpu_so_buf.kernelSoNameLen = so_name_.size(); - - void *args = nullptr; - uint32_t args_size = sizeof(CustAicpuSoBuf); - status = rtMalloc(&args, args_size, RT_MEMORY_HBM); - if (status != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); - return RT_ERROR_TO_GE_STATUS(status); - } - GELOGI("loadOpFromBuf kernelSoBuf %p, kernelSoBufLen %u, kernelSoName %p, kernelSoNameLen %u.", d_aicpu_data, - aicpu_data_length, d_so_name, so_name_.size()); - - status = rtMemcpy(args, args_size, static_cast(&cust_aicpu_so_buf), args_size, RT_MEMCPY_HOST_TO_DEVICE); - if (status != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); - return RT_ERROR_TO_GE_STATUS(status); - } - - rtStream_t stream = nullptr; - status = rtStreamCreate(&stream, 0); - if (status != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt create stream failed, status: 0x%x", status); - return RT_ERROR_TO_GE_STATUS(status); - } - - status = rtCpuKernelLaunch(nullptr, kLoadOpFromBuf, 1, args, args_size, nullptr, stream); - if (status != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt CpuKernelLaunch loadOpFromBuf failed, status: 0x%X", status); - return RT_ERROR_TO_GE_STATUS(status); - } - GELOGI("Cpu kernel launch loadOpFromBuf."); - - status = rtStreamSynchronize(stream); - if (status != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); - return RT_ERROR_TO_GE_STATUS(status); - } - - GE_CHK_RT(rtFree(args)); - GE_CHK_RT(rtFree(d_aicpu_data)); - GE_CHK_RT(rtFree(d_so_name)); - - GELOGI("Cpu kernel launch loadOpFromBuf task success."); - return SUCCESS; -} - Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &kernel_def) { GELOGI("Do InitAicpuTask"); so_name_ = kernel_def.so_name(); @@ -961,7 +869,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k } if (kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { - GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, kernel_def), "launch cust aicpu so failed"); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_), "launch cust aicpu so failed"); } // copy args to new host memory diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h index 1c45682e..8ada2082 100644 --- a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h +++ b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h @@ -106,8 +106,6 @@ class KernelTaskInfo : public TaskInfo { Status InitAicpuTaskExtInfo(const std::string &ext_info); - Status LaunchCustAicpuSo(const OpDescPtr op_desc, const domi::KernelDef &kernel_def); - Status StoreInputOutputTensor(const std::vector &input_data_addrs, const std::vector &output_data_addrs, const std::vector<::tagCcAICPUTensor> &input_descs, diff --git a/src/ge/graph/load/new_model_manager/zero_copy_task.cc b/src/ge/graph/load/new_model_manager/zero_copy_task.cc index 30ce8a86..7db9c459 100644 --- a/src/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/src/ge/graph/load/new_model_manager/zero_copy_task.cc @@ -118,13 +118,11 @@ bool ZeroCopyTask::CheckDynamicBatch(const map> &batch_ad */ Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map> &batch_addrs, const string &batch_label) { - for (auto pair : task_addr_offset_) { - if (pair.first != addr) { - continue; - } - + auto iter = task_addr_offset_.find(addr); + if (iter != task_addr_offset_.end()) { + auto &cur_pair = *iter; uint8_t *args_info = args_info_.data(); - for (auto offset : pair.second) { + for (auto offset : cur_pair.second) { if (!CheckDynamicBatch(batch_addrs, batch_label, reinterpret_cast(args_addr_ + offset))) { continue; } diff --git a/src/ge/graph/load/new_model_manager/zero_copy_task.h b/src/ge/graph/load/new_model_manager/zero_copy_task.h index 799844a5..c83387e8 100644 --- a/src/ge/graph/load/new_model_manager/zero_copy_task.h +++ b/src/ge/graph/load/new_model_manager/zero_copy_task.h @@ -83,6 +83,10 @@ class ZeroCopyTask { */ ge::Status DistributeParam(bool async_mode, rtStream_t stream); + void SetBatchLabel(const string &batch_label) { batch_label_ = batch_label; } + + const string &GetBatchLabel() const { return batch_label_; } + protected: bool CheckDynamicBatch(const map> &batch_addrs, const string &batch_label, uintptr_t addr); @@ -93,7 +97,7 @@ class ZeroCopyTask { const size_t args_size_; vector args_info_; bool is_updated_; - + string batch_label_; //
map> task_addr_offset_; }; diff --git a/src/ge/graph/manager/graph_manager.cc b/src/ge/graph/manager/graph_manager.cc index 9a4e39f3..82108653 100644 --- a/src/ge/graph/manager/graph_manager.cc +++ b/src/ge/graph/manager/graph_manager.cc @@ -267,6 +267,14 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, auto compute_graph = GraphUtils::GetComputeGraph(graph); if (compute_graph != nullptr) { compute_graph->SetGraphID(graph_id); + bool graph_has_been_added = false; + if (AttrUtils::GetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, graph_has_been_added) && + graph_has_been_added) { + GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[GraphManager] same graph object can not be added again, graph_id = %u.", + graph_id); + return GE_GRAPH_GRAPH_ALREADY_EXIST; + } + (void)AttrUtils::SetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, true); } else { GELOGE(FAILED, "compute graph is null"); return FAILED; @@ -1953,9 +1961,9 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { names_to_passes.emplace_back("MergePass", &merge_pass); names_to_passes.emplace_back("CastRemovePass", &cast_remove_pass); names_to_passes.emplace_back("TransposeTransDataPass", &transpose_transdata_pass); + names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); names_to_passes.emplace_back("TransOpSymmetryEliminationPass", &symmetry_elimination_pass); names_to_passes.emplace_back("TransOpNearbyAllreduceFusionPass", &trans_op_nearby_allreduce_fusion_pass); - names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); names_to_passes.emplace_back("DimensionComputePass", &dimension_compute_pass); names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); names_to_passes.emplace_back("DimensionAdjustPass", &dimension_adjust_pass); diff --git a/src/ge/graph/partition/engine_place.cc b/src/ge/graph/partition/engine_place.cc index ba651c88..80ac355f 100644 --- a/src/ge/graph/partition/engine_place.cc +++ b/src/ge/graph/partition/engine_place.cc @@ -23,6 +23,7 @@ #include #include "common/op/ge_op_utils.h" +#include "common/util/error_manager/error_manager.h" #include "graph/utils/graph_utils.h" #include "graph/utils/op_desc_utils.h" #include "init/gelib.h" @@ -82,6 +83,8 @@ Status EnginePlacer::Run() { // If can't get op's engine name, keep check support finish and return failed if (engine_name.empty()) { is_check_support_success = false; + ErrorManager::GetInstance().ATCReportErrMessage("E13003", {"opname", "optype"}, + {op_desc->GetName(), op_desc->GetType()}); GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Can not find engine of op type %s", node_ptr->GetOpDesc()->GetType().c_str()); continue; diff --git a/src/ge/graph/passes/for_pass.cc b/src/ge/graph/passes/for_pass.cc index 409c345f..e913985b 100644 --- a/src/ge/graph/passes/for_pass.cc +++ b/src/ge/graph/passes/for_pass.cc @@ -190,6 +190,10 @@ Status ForPass::FindInputsAndOutputs(const NodePtr &node, std::vectorGetName().c_str(), index); return FAILED; } + GE_IF_BOOL_EXEC( + in_data_anchor->GetPeerOutAnchor() == nullptr, + GELOGW("Get null input by index %d from node %s ", in_data_anchor->GetIdx(), node->GetName().c_str()); + continue); data_inputs.emplace_back(in_data_anchor->GetPeerOutAnchor()); } diff --git a/src/ge/graph/passes/multi_batch_clone_pass.cc b/src/ge/graph/passes/multi_batch_clone_pass.cc index 4bf41dcb..80355ca7 100644 --- a/src/ge/graph/passes/multi_batch_clone_pass.cc +++ b/src/ge/graph/passes/multi_batch_clone_pass.cc @@ -239,7 +239,7 @@ Status MultiBatchClonePass::CreateIndexConstNode(const ComputeGraphPtr &graph, N GeTensorDesc const_tensor(GeShape({count}), FORMAT_ND, DT_INT32); GeTensor tensor(const_tensor); - tensor.SetData(reinterpret_cast(addr.get()), count * sizeof(int32_t)); + (void)tensor.SetData(reinterpret_cast(addr.get()), count * sizeof(int32_t)); if (!AttrUtils::SetTensor(const_desc, ATTR_NAME_WEIGHTS, tensor)) { GELOGE(OUT_OF_MEMORY, "Failed to init tensor value for const %s", const_desc->GetName().c_str()); return FAILED; diff --git a/src/ge/graph/passes/reshape_recovery_pass.cc b/src/ge/graph/passes/reshape_recovery_pass.cc index a3de0525..013c8af4 100644 --- a/src/ge/graph/passes/reshape_recovery_pass.cc +++ b/src/ge/graph/passes/reshape_recovery_pass.cc @@ -50,9 +50,12 @@ Status InsertReshapeIfNeed(const NodePtr &node) { GE_CHECK_NOTNULL(src_tensor); for (auto dst_anchor : src_anchor->GetPeerInDataAnchors()) { auto dst_node = dst_anchor->GetOwnerNode(); + GELOGD("Try insert reshape between %s[%d] and %s[%d] to keep the shape continues", node->GetName().c_str(), + src_anchor->GetIdx(), dst_node->GetName().c_str(), dst_anchor->GetIdx()); GE_CHECK_NOTNULL(dst_node); GE_CHECK_NOTNULL(dst_node->GetOpDesc()); auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx()); + GE_CHECK_NOTNULL(dst_tensor); bool is_need_insert_reshape = src_tensor->GetShape().GetDims() != UNKNOWN_RANK && dst_tensor->GetShape().GetDims() != UNKNOWN_RANK && src_tensor->GetShape().GetDims() != dst_tensor->GetShape().GetDims(); diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.cc b/src/ge/graph/preprocess/multi_batch_copy_graph.cc index 6adcc63e..298e7749 100644 --- a/src/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/src/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -113,10 +113,9 @@ NodePtr InsertCopyNode(const NodePtr &node, size_t n) { desc->CopyAttrsFrom(*src_op_desc); for (uint32_t i = 0; i < node->GetAllInDataAnchorsSize(); ++i) { auto input_desc = desc->MutableInputDesc(i); - GE_IF_BOOL_EXEC(input_desc == nullptr, - GELOGE(INTERNAL_ERROR, "Failed to get input desc by index %u from node %s when copy from %s", i, - desc->GetName().c_str(), node->GetName().c_str()); - return nullptr); + GE_IF_BOOL_EXEC(input_desc == nullptr, GELOGW("Get null input desc by index %u from node %s when copy from %s", i, + desc->GetName().c_str(), node->GetName().c_str()); + continue); input_desc->CopyAttrsFrom(src_op_desc->GetInputDesc(i)); } @@ -991,12 +990,17 @@ Status MultiBatchGraphCopyer::InsertIdentityAfterSwitchN() { size_t i = 0; for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { for (auto &in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { - auto identity_desc = MakeShared(node->GetName() + "_identity_" + std::to_string(i), IDENTITY); - GE_CHECK_NOTNULL(identity_desc); - auto out_node = in_data_anchor->GetOwnerNode(); auto op_desc = out_node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); + if ((out_node->GetType() == MERGE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) { + GELOGD("No need to insert identity between %s and %s.", node->GetName().c_str(), out_node->GetName().c_str()); + continue; + } + + auto identity_desc = MakeShared(node->GetName() + "_identity_" + std::to_string(i), IDENTITY); + GE_CHECK_NOTNULL(identity_desc); + string batch_label; if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { if (!AttrUtils::SetStr(identity_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { diff --git a/src/ge/host_kernels/strided_slice_kernel.cc b/src/ge/host_kernels/strided_slice_kernel.cc index 6a9a558c..13c61666 100644 --- a/src/ge/host_kernels/strided_slice_kernel.cc +++ b/src/ge/host_kernels/strided_slice_kernel.cc @@ -16,131 +16,262 @@ #include "host_kernels/strided_slice_kernel.h" -#include - #include "common/fp16_t.h" #include "common/ge_inner_error_codes.h" #include "common/math/math_util.h" #include "common/op/ge_op_utils.h" +#include "external/graph/types.h" #include "framework/common/debug/ge_log.h" -#include "host_kernels/kernel_utils.h" #include "graph/utils/type_utils.h" +#include "host_kernels/kernel_utils.h" #include "inc/kernel_factory.h" +#include namespace ge { namespace { const int32_t kNumOne = 1; const size_t kStridedSliceInputSize = 4; -const size_t kStridedSliceInputIndex0 = 0; -const size_t kStridedSliceInputIndex1 = 1; -const size_t kStridedSliceInputIndex2 = 2; -const size_t kStridedSliceInputIndex3 = 3; -const int32_t kDefaultSrideSize = 1; -} // namespace -Status StridedSliceKernel::CheckAndGetAttr(const OpDescPtr &attr, const std::vector &input, - Attr &args) { - int64_t begin_mask = 0; - int64_t end_mask = 0; - int64_t ellipsis_mask = 0; - int64_t new_axis_mask = 0; - int64_t shrink_axis_mask = 0; +const size_t kStridedSliceInputIndex = 0; +const size_t kStridedSliceBeginIndex = 1; +const size_t kStridedSliceEndIndex = 2; +const size_t kStridedSliceStrideIndex = 3; +const int32_t kDefaultStrideSize = 1; +const std::set kIndexNumberType = {DT_INT32, DT_INT64}; - if (attr == nullptr) { - GELOGW("input opdescptr is nullptr."); - return PARAM_INVALID; +bool IsEllipsisMaskValid(const GeTensorDescPtr &input_desc, const int ellipsis_mask) { + if (ellipsis_mask != 0) { + auto ellipsis_num = 0; + auto input_shape = input_desc->GetShape(); + bool ellipsis_mask_flag = false; + for (size_t i = 0; i < input_shape.GetDimNum(); i++) { + uint32_t i_temp = static_cast(i); + ellipsis_mask_flag = (static_cast(ellipsis_mask) & (1 << i_temp)); + if (ellipsis_mask_flag) { + ++ellipsis_num; + } + if (ellipsis_num > 1) { + GELOGW("Only one non-zero bit is allowed in ellipsis_mask."); + return false; + } + } } - if (input.size() != kStridedSliceInputSize) { - GELOGW("The number of input for strided slice must be %zu.", kStridedSliceInputSize); - return PARAM_INVALID; + return true; +} +} // namespace +Status StridedSliceKernel::Compute(const ge::OpDescPtr attr, const std::vector &input, + vector &v_output) { + GELOGD("StridedSliceKernel in."); + // 1.Check input and attrs + if (CheckAndGetAttr(attr) != SUCCESS) { + GELOGW("Check and get attrs failed.Ignore kernel."); + return NOT_CHANGED; } - if (!AttrUtils::GetInt(attr, STRIDE_SLICE_ATTR_BEGIN_MASK, begin_mask)) { - GELOGW("get begin_mask attr failed."); - return PARAM_INVALID; + if (CheckInputParam(input) != SUCCESS) { + GELOGW("Check input params failed.Ignore kernel."); + return NOT_CHANGED; } - if (!AttrUtils::GetInt(attr, STRIDE_SLICE_ATTR_END_MASK, end_mask)) { - GELOGW("get end_mask attr failed."); - return PARAM_INVALID; + // 2.Init param with mask attrs. + std::vector input_dims; + std::vector begin_vec; + std::vector output_dims; + std::vector stride_vec; + if (InitParamWithAttrs(input, input_dims, begin_vec, output_dims, stride_vec) != SUCCESS) { + GELOGW("Init param with mask attrs failed.Ignore kernel."); + return NOT_CHANGED; } - if (!AttrUtils::GetInt(attr, STRIDE_SLICE_ATTR_ELLIPSIS_MASK, ellipsis_mask)) { - GELOGW("get ellipsis_mask attr failed."); - return PARAM_INVALID; + + // 3.Set sliced data to output_ptr + ConstGeTensorPtr weight0 = input[kStridedSliceInputIndex]; + auto data_type = weight0->GetTensorDesc().GetDataType(); + size_t data_size = weight0->GetData().size() / GetSizeByDataType(data_type); + void *data = reinterpret_cast(const_cast(weight0->GetData().data())); + GE_CHECK_NOTNULL(data); + // Index 0 can always gets a GeTensorDesc object from any OpDescPtr. + auto output_tensor_desc = attr->GetOutputDesc(0); + GeTensorPtr output_ptr = MakeShared(output_tensor_desc); + if (output_ptr == nullptr) { + GELOGE(MEMALLOC_FAILED, "MakeShared GeTensor failed, node name %s.", attr->GetName().c_str()); + return NOT_CHANGED; } - if (!AttrUtils::GetInt(attr, STRIDE_SLICE_ATTR_NEW_AXIS_MASK, new_axis_mask)) { - GELOGW("get new_axis_mask attr failed."); - return PARAM_INVALID; + auto ret = OpUtils::SetOutputSliceData(data, static_cast(data_size), data_type, input_dims, begin_vec, + output_dims, output_ptr.get(), stride_vec); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "SetOutputSliceData failed."); + return NOT_CHANGED; } - if (!AttrUtils::GetInt(attr, STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK, shrink_axis_mask)) { - GELOGW("get shrink_axis_mask attr failed."); + + // 4.Set output data_type and shape + GeTensorDesc &t_d = output_ptr->MutableTensorDesc(); + t_d.SetDataType(static_cast(data_type)); + + auto final_dim_size = static_cast(output_dims.size()); + vector v_dims; + GetOutputDims(final_dim_size, output_dims, v_dims); + t_d.SetShape(GeShape(v_dims)); + v_output.push_back(output_ptr); + GELOGI("StridedSliceKernel success."); + return SUCCESS; +} +Status StridedSliceKernel::CheckAndGetAttr(const OpDescPtr &attr) { + if (attr == nullptr) { + GELOGE(PARAM_INVALID, "input opdescptr is nullptr."); return PARAM_INVALID; } - if ((ellipsis_mask != 0) || (new_axis_mask != 0)) { - GELOGW("ellipsis_mask or new_axis_mask must be 0 with optimizer."); - return NOT_CHANGED; + // Get all op attr value of strided_slice + for (auto &attr_2_value : attr_value_map_) { + if (!AttrUtils::GetInt(attr, attr_2_value.first, attr_2_value.second)) { + GELOGE(PARAM_INVALID, "Get %s attr failed.", attr_2_value.first.c_str()); + return PARAM_INVALID; + } } - const auto &input_desc = attr->MutableInputDesc(kStridedSliceInputIndex0); + // Check ellipsis_mask is valid + const auto &input_desc = attr->MutableInputDesc(kStridedSliceInputIndex); GE_CHECK_NOTNULL(input_desc); - DataType data_type = input_desc->GetDataType(); - if ((data_type != DT_FLOAT) && (data_type != DT_INT32)) { - GELOGW( - "Data type of StridedSlice OP must be float or int32." - "Constant folding will not be carried out in this condition" - "which might affect the time performance but not the accuracy"); - } - args.begin_mask = begin_mask; - args.end_mask = end_mask; - args.ellipsis_mask = ellipsis_mask; - args.new_axis_mask = new_axis_mask; - args.data_type = static_cast(data_type); - args.shrink_axis_mask = shrink_axis_mask; - - ConstGeTensorPtr weight0 = input[kStridedSliceInputIndex0]; - ConstGeTensorPtr weight1 = input[kStridedSliceInputIndex1]; - ConstGeTensorPtr weight2 = input[kStridedSliceInputIndex2]; - ConstGeTensorPtr weight3 = input[kStridedSliceInputIndex3]; - if (CheckWeight(weight0, weight1, weight2, weight3) != SUCCESS) { - GELOGW("Check And Get Attr failed."); + auto ellipsis_mask = attr_value_map_.at(STRIDE_SLICE_ATTR_ELLIPSIS_MASK); + if (!IsEllipsisMaskValid(input_desc, ellipsis_mask)) { return PARAM_INVALID; } - return SUCCESS; } -Status StridedSliceKernel::CheckWeight(const ConstGeTensorPtr &weight0, const ConstGeTensorPtr &weight1, - const ConstGeTensorPtr &weight2, const ConstGeTensorPtr &weight3) const { - if ((weight0 == nullptr) || (weight1 == nullptr) || (weight2 == nullptr) || (weight3 == nullptr)) { - GELOGW("weight is nullptr."); +Status StridedSliceKernel::CheckInputParam(const std::vector &input) const { + if (input.size() != kStridedSliceInputSize) { + GELOGE(PARAM_INVALID, "The number of input for strided slice must be %zu.", kStridedSliceInputSize); return PARAM_INVALID; } - if (!(weight1->GetTensorDesc().GetDataType() == DT_INT32 && weight2->GetTensorDesc().GetDataType() == DT_INT32 && - weight3->GetTensorDesc().GetDataType() == DT_INT32)) { - GELOGE(INTERNAL_ERROR, "Data type of StridedSlice OP(begin,end,strides) must be int32."); - return INTERNAL_ERROR; + + ConstGeTensorPtr weight0 = input[kStridedSliceInputIndex]; + ConstGeTensorPtr begin_tensor = input[kStridedSliceBeginIndex]; + ConstGeTensorPtr end_tensor = input[kStridedSliceEndIndex]; + ConstGeTensorPtr stride_tensor = input[kStridedSliceStrideIndex]; + GE_CHECK_NOTNULL(weight0); + GE_CHECK_NOTNULL(begin_tensor); + GE_CHECK_NOTNULL(end_tensor); + GE_CHECK_NOTNULL(stride_tensor); + + // check if begin,end,strides data type is supported + auto begin_tensor_desc = begin_tensor->GetTensorDesc(); + auto end_tensor_desc = begin_tensor->GetTensorDesc(); + auto stride_tensor_desc = begin_tensor->GetTensorDesc(); + if (begin_tensor_desc.GetDataType() != end_tensor_desc.GetDataType() || + end_tensor_desc.GetDataType() != stride_tensor_desc.GetDataType()) { + GELOGW("Data type of StridedSlice OP(begin,end,strides) must be same."); + return PARAM_INVALID; + } + if (kIndexNumberType.find(begin_tensor_desc.GetDataType()) == kIndexNumberType.end()) { + GELOGW("Data type of StridedSlice OP(begin,end,strides) must be int32 or int64."); + return PARAM_INVALID; } // check data - size_t weight0_size = weight0->GetData().size() / sizeof(int32_t); - size_t weight1_size = weight1->GetData().size() / sizeof(int32_t); - size_t weight2_size = weight2->GetData().size() / sizeof(int32_t); - size_t weight3_size = weight3->GetData().size() / sizeof(int32_t); - if ((weight0_size == 0) || (weight1_size == 0) || (weight2_size == 0) || (weight3_size == 0)) { + auto x_data_type = weight0->GetTensorDesc().GetDataType(); + auto x_data_size = GetSizeByDataType(x_data_type); + if (x_data_size < 0) { + GELOGW("Data type of x input %s is not supported.", TypeUtils::DataTypeToSerialString(x_data_type).c_str()); + return PARAM_INVALID; + } + size_t weight0_size = weight0->GetData().size() / x_data_size; + size_t begin_data_size = begin_tensor->GetData().size() / sizeof(int32_t); + size_t end_data_size = end_tensor->GetData().size() / sizeof(int32_t); + size_t stride_data_size = stride_tensor->GetData().size() / sizeof(int32_t); + if ((weight0_size == 0) || (begin_data_size == 0) || (end_data_size == 0) || (stride_data_size == 0)) { GELOGW("Data size of inputs is 0."); return PARAM_INVALID; } - // check dim size - size_t weight0_dim_size = weight0->GetTensorDesc().GetShape().GetDimNum(); - if (!((weight0_dim_size >= weight1_size) && (weight1_size == weight2_size) && (weight1_size == weight3_size))) { + if (!((begin_data_size == end_data_size) && (end_data_size == stride_data_size))) { GELOGW("The sizes of begin, end and stride is not supported."); - return NOT_CHANGED; + return PARAM_INVALID; } return SUCCESS; } -Status StridedSliceKernel::MaskCal(const bool &begin_mask_flag, const bool &end_mask_flag, const bool &shrink_mask_flag, - int32_t &begin_i, int32_t &end_i, int32_t &dim_i) const { +Status StridedSliceKernel::InitParamWithAttrs(const std::vector &input, + std::vector &input_dims, std::vector &begin_vec, + std::vector &output_dims, std::vector &stride_vec) { + ConstGeTensorPtr weight0 = input[kStridedSliceInputIndex]; + ConstGeTensorPtr begin_tensor = input[kStridedSliceBeginIndex]; + ConstGeTensorPtr end_tensor = input[kStridedSliceEndIndex]; + ConstGeTensorPtr stride_tensor = input[kStridedSliceStrideIndex]; + + const GeShape x_shape = weight0->GetTensorDesc().GetShape(); + auto x_dims = x_shape.GetDims(); + auto x_dims_num = x_shape.GetDimNum(); + // handle new_axis_mask + ExpandDimsWithNewAxis(begin_tensor, x_dims_num, x_dims); + + const int32_t *begin = reinterpret_cast(begin_tensor->GetData().data()); + const int32_t *end = reinterpret_cast(end_tensor->GetData().data()); + const int32_t *stride = reinterpret_cast(stride_tensor->GetData().data()); + auto begin_dim_num = begin_tensor->GetData().size() / sizeof(int32_t); + auto min_dim = x_dims_num > begin_dim_num ? begin_dim_num : x_dims_num; + for (size_t i = 0; i < x_dims.size(); ++i) { + auto i_temp = static_cast(i); + bool new_axis_mask_flag = + (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_NEW_AXIS_MASK)) & (1 << i_temp)); + if (new_axis_mask_flag) { + output_dims.push_back(1); + input_dims.push_back(1); + begin_vec.push_back(0); + stride_vec.push_back(1); + continue; + } + + int64_t begin_i = 0; + int64_t end_i = 0; + int64_t stride_i = 1; + if (i < min_dim) { + begin_i = begin[i]; + end_i = end[i]; + stride_i = stride[i]; + } else { + begin_i = 0; + end_i = x_dims.at(i); + stride_i = 1; + } + GELOGD("Before mask calculate. Begin is : %d\t,end is : %d\t stride is : %d\t x_dim_i is : %d.", begin_i, end_i, + stride_i, x_dims.at(i)); + auto ret = MaskCal(i, begin_i, end_i, x_dims.at(i)); + if (ret != SUCCESS) { + GELOGW("MaskCal failed, because of data overflow."); + return NOT_CHANGED; + } + int64_t dim_final; + GELOGD("Before stride calculate. Begin is : %d\t,end is : %d\t stride is : %d\t x_dim_i is : %d.", begin_i, end_i, + stride_i, x_dims.at(i)); + (void)StrideCal(x_dims.at(i), begin_i, end_i, stride_i, dim_final); + output_dims.push_back(dim_final); + input_dims.push_back(x_dims.at(i)); + begin_vec.push_back(begin_i); + stride_vec.push_back(stride_i); + } + return SUCCESS; +} +void StridedSliceKernel::ExpandDimsWithNewAxis(const ConstGeTensorPtr &begin_tensor, const size_t x_dims_num, + vector &x_dims) { + auto begin_data_type_size = GetSizeByDataType(begin_tensor->GetTensorDesc().GetDataType()); + size_t begin_vec_size = begin_tensor->GetData().size() / begin_data_type_size; + auto final_dim_num = x_dims_num < begin_vec_size ? begin_vec_size : x_dims_num; + for (size_t i = 0; i < final_dim_num; i++) { + auto i_temp = static_cast(i); + bool new_axis_mask_flag = + (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_NEW_AXIS_MASK)) & (1 << i_temp)); + if (new_axis_mask_flag) { + x_dims.insert(x_dims.begin() + i, 1); + } + } +} +Status StridedSliceKernel::MaskCal(const size_t i, int64_t &begin_i, int64_t &end_i, int64_t &dim_i) const { + uint64_t i_temp = static_cast(i); + bool begin_mask_flag = (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_BEGIN_MASK)) & (1 << i_temp)); + bool end_mask_flag = (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_END_MASK)) & (1 << i_temp)); + bool ellipsis_mask_flag = + (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_ELLIPSIS_MASK)) & (1 << i_temp)); + bool shrink_mask_flag = + (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK)) & (1 << i_temp)); if (shrink_mask_flag) { begin_i = (begin_i < 0 ? (dim_i + begin_i) : begin_i); - FMK_INT32_ADDCHECK(begin_i, kNumOne); + FMK_INT32_ADDCHECK(begin_i, kNumOne) end_i = begin_i + kNumOne; } else { if (begin_mask_flag) { @@ -153,130 +284,43 @@ Status StridedSliceKernel::MaskCal(const bool &begin_mask_flag, const bool &end_ } else { end_i = (end_i < 0 ? (dim_i + end_i) : end_i); } + if (ellipsis_mask_flag) { + begin_i = 0; + end_i = dim_i; + } } return SUCCESS; } +Status StridedSliceKernel::StrideCal(const int64_t x_dims_i, int64_t &begin_i, int64_t &end_i, int64_t &stride_i, + int64_t &dim_final) const { + if (stride_i == 0) { + stride_i = kDefaultStrideSize; + } else if (stride_i < 0) { + stride_i = -stride_i; + begin_i = x_dims_i - begin_i - 1; + end_i = x_dims_i - end_i - 1; + } -void StridedSliceKernel::GetOutputDims(uint32_t dims_size, const std::vector &output_dims, const Attr &args, + if (end_i > x_dims_i) { + end_i = x_dims_i; + } + + if ((begin_i == 0) && (end_i == 0)) { + dim_final = x_dims_i; + } else { + dim_final = abs(end_i - begin_i) / stride_i; + } + return SUCCESS; +} +void StridedSliceKernel::GetOutputDims(uint32_t dims_size, const std::vector &output_dims, vector &v_dims) { for (uint32_t k = 0; k < dims_size; k++) { - bool shrink_mask_i = (static_cast(args.shrink_axis_mask) & (1 << k)); + bool shrink_mask_i = (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK)) & (1 << k)); if (shrink_mask_i) { continue; } v_dims.push_back(output_dims[k]); } } - -Status StridedSliceKernel::CheckOutputDims(const std::vector &output_dims, const OpDescPtr attr) { - // check dim not all less than 0 - for (auto dim : output_dims) { - if (dim > 0) { - return SUCCESS; - } - } - GELOGW("all output dim <=0, can't be processed. op_name : %s", attr->GetName().c_str()); - return NOT_CHANGED; -} - -Status StridedSliceKernel::Compute(const ge::OpDescPtr attr, const std::vector &input, - vector &v_output) { - GELOGI("StridedSliceKernel in."); - Attr args; - Status ret = CheckAndGetAttr(attr, input, args); - if (ret != SUCCESS) { - GELOGW("Check And Get Attr failed."); - return NOT_CHANGED; - } - - ConstGeTensorPtr weight0 = input[kStridedSliceInputIndex0]; - ConstGeTensorPtr weight1 = input[kStridedSliceInputIndex1]; - ConstGeTensorPtr weight2 = input[kStridedSliceInputIndex2]; - ConstGeTensorPtr weight3 = input[kStridedSliceInputIndex3]; - - const GeShape x_shape = weight0->GetTensorDesc().GetShape(); - size_t dim_size = x_shape.GetDimNum(); - size_t data_size = weight0->GetData().size() / sizeof(int32_t); - - const int32_t *begin = reinterpret_cast(weight1->GetData().data()); - const int32_t *end = reinterpret_cast(weight2->GetData().data()); - const int32_t *stride = reinterpret_cast(weight3->GetData().data()); - if ((begin == nullptr) || (end == nullptr) || (stride == nullptr)) { - GELOGW("input weight tensor is nullptr."); - return NOT_CHANGED; - } - - std::vector input_dims; - std::vector begin_vec; - std::vector output_dims; - std::vector stride_vec; - int64_t dim_final; - for (size_t i = 0; i < dim_size; i++) { - int32_t begin_i = begin[i]; - int32_t end_i = end[i]; - int32_t stride_i = stride[i]; - int32_t dim_i = static_cast(x_shape.GetDim(i)); - GELOGI("%d\t %d\t %d\t %d", begin_i, end_i, stride_i, dim_i); - uint32_t i_temp = static_cast(i); - bool begin_mask_i = (static_cast(args.begin_mask) & (1 << i_temp)); - bool end_mask_i = (static_cast(args.end_mask) & (1 << i_temp)); - bool shrink_mask_i = (static_cast(args.shrink_axis_mask) & (1 << i_temp)); - ret = MaskCal(begin_mask_i, end_mask_i, shrink_mask_i, begin_i, end_i, dim_i); - if (ret != SUCCESS) { - GELOGW("MaskCal failed, because of data overflow."); - return NOT_CHANGED; - } - if (stride_i == 0) { - stride_i = kDefaultSrideSize; - } else if (stride_i < 0) { - stride_i = -stride_i; - begin_i = x_shape.GetDim(i) - begin_i - 1; - end_i = x_shape.GetDim(i) - end_i - 1; - } - if ((begin_i == 0) && (end_i == 0)) { - dim_final = x_shape.GetDim(i); - } else { - dim_final = abs(end_i - begin_i) / stride_i; - } - output_dims.push_back(dim_final); - input_dims.push_back(x_shape.GetDim(i)); - begin_vec.push_back(begin_i); - stride_vec.push_back(stride_i); - } - - // Index 0 can always gets a GeTensorDesc object from any OpDescPtr. - auto output_tensor_desc = attr->GetOutputDesc(0); - GeTensorPtr output_ptr = MakeShared(output_tensor_desc); - if (output_ptr == nullptr) { - GELOGW("MakeShared GeTensor failed, node name %s.", attr->GetName().c_str()); - return NOT_CHANGED; - } - - void *data = reinterpret_cast(const_cast(weight0->GetData().data())); - GE_CHECK_NOTNULL(data); - - ret = CheckOutputDims(output_dims, attr); - if (ret != SUCCESS) { - return ret; - } - - ret = OpUtils::SetOutputSliceData(data, static_cast(data_size), args.data_type, input_dims, begin_vec, - output_dims, output_ptr.get(), stride_vec); - if (ret != SUCCESS) { - GELOGW("SetOutputSliceData failed."); - return NOT_CHANGED; - } - - GeTensorDesc &t_d = output_ptr->MutableTensorDesc(); - t_d.SetDataType(static_cast(args.data_type)); - - uint32_t final_dim_size = static_cast(output_dims.size()); - vector v_dims; - GetOutputDims(final_dim_size, output_dims, args, v_dims); - t_d.SetShape(GeShape(v_dims)); - v_output.push_back(output_ptr); - GELOGI("StridedSliceKernel success."); - return SUCCESS; -} REGISTER_KERNEL(STRIDEDSLICE, StridedSliceKernel); } // namespace ge diff --git a/src/ge/host_kernels/strided_slice_kernel.h b/src/ge/host_kernels/strided_slice_kernel.h index 0ba3afbd..5d130cd7 100644 --- a/src/ge/host_kernels/strided_slice_kernel.h +++ b/src/ge/host_kernels/strided_slice_kernel.h @@ -17,34 +17,33 @@ #ifndef GE_GRAPH_PASSES_FOLDING_KERNEL_STRIDED_SLICE_KERNEL_H_ #define GE_GRAPH_PASSES_FOLDING_KERNEL_STRIDED_SLICE_KERNEL_H_ -#include - #include "inc/kernel.h" +#include namespace ge { -struct Attr { - int64_t begin_mask; - int64_t end_mask; - int64_t ellipsis_mask; - int64_t new_axis_mask; - int64_t data_type; - int64_t shrink_axis_mask; -}; - class StridedSliceKernel : public Kernel { public: Status Compute(const OpDescPtr attr, const std::vector &input, vector &v_output) override; private: - Status CheckAndGetAttr(const OpDescPtr &attr, const std::vector &input, Attr &args); - Status CheckWeight(const ConstGeTensorPtr &weight0, const ConstGeTensorPtr &weight1, const ConstGeTensorPtr &weight2, - const ConstGeTensorPtr &weight3) const; - Status MaskCal(const bool &begin_mask_flag, const bool &end_mask_flag, const bool &shrink_mask_flag, int32_t &begin_i, - int32_t &end_i, int32_t &dim_i) const; - void GetOutputDims(uint32_t dims_size, const std::vector &output_dims, const Attr &args, - vector &v_dims); - Status CheckOutputDims(const std::vector &output_dims, const OpDescPtr attr); + Status CheckAndGetAttr(const OpDescPtr &attr); + Status CheckInputParam(const std::vector &input) const; + Status InitParamWithAttrs(const std::vector &input, std::vector &input_dims, + std::vector &begin_vec, std::vector &output_dims, + std::vector &stride_vec); + Status MaskCal(const size_t i, int64_t &begin_i, int64_t &end_i, int64_t &dim_i) const; + Status StrideCal(const int64_t x_dims_i, int64_t &begin_i, int64_t &end_i, int64_t &stride_i, + int64_t &dim_final) const; + void ExpandDimsWithNewAxis(const ConstGeTensorPtr &begin_tensor, const size_t x_dims_num, vector &x_dims); + + void GetOutputDims(uint32_t dims_size, const std::vector &output_dims, vector &v_dims); + + map attr_value_map_ = {{STRIDE_SLICE_ATTR_BEGIN_MASK, 0}, + {STRIDE_SLICE_ATTR_END_MASK, 0}, + {STRIDE_SLICE_ATTR_ELLIPSIS_MASK, 0}, + {STRIDE_SLICE_ATTR_NEW_AXIS_MASK, 0}, + {STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK, 0}}; }; } // namespace ge #endif // GE_GRAPH_PASSES_FOLDING_KERNEL_STRIDED_SLICE_KERNEL_H_ diff --git a/src/ge/hybrid/executor/hybrid_model_executor.cc b/src/ge/hybrid/executor/hybrid_model_executor.cc index 6fe23dee..718801b4 100644 --- a/src/ge/hybrid/executor/hybrid_model_executor.cc +++ b/src/ge/hybrid/executor/hybrid_model_executor.cc @@ -27,6 +27,12 @@ const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL"; HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream) : model_(model), device_id_(device_id), stream_(stream) {} +HybridModelExecutor::~HybridModelExecutor() { + if (context_.rt_gen_context != nullptr) { + (void)rtCtxDestroy(context_.rt_gen_context); + } +} + Status HybridModelExecutor::Init() { GELOGD("Start to init HybridGraphEngine."); GE_CHK_STATUS_RET_NOLOG(InitExecutionContext()); diff --git a/src/ge/hybrid/executor/hybrid_model_executor.h b/src/ge/hybrid/executor/hybrid_model_executor.h index 9996dbe0..2d1320a2 100644 --- a/src/ge/hybrid/executor/hybrid_model_executor.h +++ b/src/ge/hybrid/executor/hybrid_model_executor.h @@ -35,7 +35,7 @@ class HybridModelExecutor { HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream); - ~HybridModelExecutor() = default; + ~HybridModelExecutor(); Status Init(); diff --git a/src/ge/hybrid/model/hybrid_model_builder.cc b/src/ge/hybrid/model/hybrid_model_builder.cc index 45fb3a6a..0671990c 100644 --- a/src/ge/hybrid/model/hybrid_model_builder.cc +++ b/src/ge/hybrid/model/hybrid_model_builder.cc @@ -618,7 +618,8 @@ Status HybridModelBuilder::VarNodeToTensor(const NodePtr &var_node, std::unique_ } int64_t var_size = CalcVarSizeInBytes(*tensor_desc); - tensor.reset(new (std::nothrow) TensorValue(dev_mem, var_size)); + // var size is only for checking, will not allocate any memory by it + tensor.reset(new (std::nothrow) TensorValue(dev_mem, static_cast(var_size))); GE_CHECK_NOTNULL(tensor); return SUCCESS; } diff --git a/src/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc b/src/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc index 332675bf..1d6c464f 100644 --- a/src/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc +++ b/src/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc @@ -197,7 +197,7 @@ void AicpuExtInfoHandler::GetShapeAndType(const AicpuShapeAndType *shape_and_typ dims.emplace_back(tmpDim); } data_type = static_cast(shape_and_type->type); - shape = std::move(GeShape(dims)); + shape = GeShape(dims); } } // namespace hybrid -} // namespace ge \ No newline at end of file +} // namespace ge diff --git a/src/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc b/src/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc index 49ff722f..1c98abee 100644 --- a/src/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc +++ b/src/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc @@ -48,6 +48,7 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { std::vector inputs; for (int32_t i = 0; i < context.NumInputs(); ++i) { const auto &input_desc = op_desc->GetInputDesc(i); + GE_CHECK_NOTNULL(context.GetInput(i)); auto in_tensor = MakeShared(input_desc, reinterpret_cast(context.GetInput(i)->GetData()), context.GetInput(i)->GetSize()); GE_CHECK_NOTNULL(in_tensor); diff --git a/src/ge/init/gelib.cc b/src/ge/init/gelib.cc index d5e745eb..ec56cc0a 100644 --- a/src/ge/init/gelib.cc +++ b/src/ge/init/gelib.cc @@ -167,7 +167,6 @@ Status GELib::SystemInitialize(const map &options) { // In train and infer, profiling is always needed. InitOptions(options); - InitProfiling(this->options_); auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); GE_IF_BOOL_EXEC(model_manager->EnableExceptionDump(options) != SUCCESS, @@ -175,23 +174,23 @@ Status GELib::SystemInitialize(const map &options) { return FAILED); // 1.`is_train_mode_` means case: train // 2.`(!is_train_mode_) && (options_.device_id != kDefaultDeviceIdForInfer)` means case: online infer - // these two case need call `InitSystemWithOptions->rtGetDeviceIndexByPhyId` - // to convert phy device id to logical device id - // note:rtGetDeviceIndexByPhyId return `0` logical id when input phy device id is `0` + // these two case with logical device id if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { + InitProfiling(this->options_, true); status = InitSystemWithOptions(this->options_); } else { + InitProfiling(this->options_); status = InitSystemWithoutOptions(); } return status; } -void GELib::InitProfiling(Options &options) { +void GELib::InitProfiling(Options &options, bool convert_2_phy_device_id) { GELOGI("Init Profiling. session Id: %ld, device id:%d ", options.session_id, options.device_id); std::lock_guard lock(status_mutex_); GetContext().Init(); // Profiling init - if (ProfilingManager::Instance().Init(options) != SUCCESS) { + if (ProfilingManager::Instance().Init(options, convert_2_phy_device_id) != SUCCESS) { GELOGW("Profiling init failed."); } } @@ -362,6 +361,9 @@ Status GELib::Finalize() { GELOGW("not initialize"); return SUCCESS; } + if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { + GE_CHK_RT_RET(rtSetDevice(options_.device_id)); + } Status final_state = SUCCESS; Status mid_state; GELOGI("engineManager finalization."); @@ -412,10 +414,14 @@ Status GELib::Finalize() { GetMutableGlobalOptions().erase(ENABLE_SINGLE_STREAM); + if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { + GE_CHK_RT_RET(rtDeviceReset(options_.device_id)); + } + instancePtr_ = nullptr; init_flag_ = false; if (final_state != SUCCESS) { - GELOGE(FAILED, "MemManager finalization."); + GELOGE(FAILED, "finalization failed."); return final_state; } GELOGI("finalization success."); diff --git a/src/ge/init/gelib.h b/src/ge/init/gelib.h index b5621dfd..c8b3ff8a 100644 --- a/src/ge/init/gelib.h +++ b/src/ge/init/gelib.h @@ -68,7 +68,7 @@ class GELib { // get incre build cache path const std::string &GetIncreBuildCachePath() const { return incre_build_cache_path_; } - void InitProfiling(Options &options); + void InitProfiling(Options &options, bool convert_2_phy_device_id = false); void ShutDownProfiling(); Status InitSystemWithoutOptions(); diff --git a/src/ge/session/inner_session.cc b/src/ge/session/inner_session.cc index 9f1f199f..3d3adfd8 100644 --- a/src/ge/session/inner_session.cc +++ b/src/ge/session/inner_session.cc @@ -18,6 +18,7 @@ #include #include #include +#include "common/dump/dump_properties.h" #include "common/util.h" #include "framework/common/debug/ge_log.h" #include "graph/ge_context.h" @@ -30,6 +31,8 @@ namespace ge { namespace { +const int32_t kDumpStatus = 0; + Status CheckReuseMemoryOption(const std::map &options) { auto iter = options.find(OPTION_EXEC_DISABLE_REUSED_MEMORY); if (iter != options.end()) { @@ -47,7 +50,7 @@ Status CheckReuseMemoryOption(const std::map &options) { } // namespace static std::mutex mutex_; // BuildGraph and RunGraph use - +bool InnerSession::is_dump_server_inited_ = false; InnerSession::InnerSession(uint64_t session_id, const std::map &options) : init_flag_(false), session_id_(session_id), options_(options), graph_manager_(domi::GetContext()) {} @@ -71,12 +74,12 @@ Status InnerSession::Initialize() { GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId())); - PropertiesManager::Instance().GetDumpProperties(session_id_).InitByOptions(); + DumpProperties dump_properties; + dump_properties.InitByOptions(); ret = graph_manager_.Initialize(options_); if (ret != SUCCESS) { GELOGE(ret, "[InnerSession:%lu] initialize failed.", session_id_); - PropertiesManager::Instance().RemoveDumpProperties(session_id_); return ret; } @@ -84,7 +87,6 @@ Status InnerSession::Initialize() { if (ret != SUCCESS) { GELOGE(ret, "failed to set malloc size"); (void)graph_manager_.Finalize(); - PropertiesManager::Instance().RemoveDumpProperties(session_id_); GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); return ret; } @@ -95,7 +97,6 @@ Status InnerSession::Initialize() { ret = VarManager::Instance(session_id_)->Init(version, session_id_, DEFAULT_DEVICE_ID, DEFAULT_JOB_ID); if (ret != SUCCESS) { GELOGE(ret, "failed to init session instance"); - PropertiesManager::Instance().RemoveDumpProperties(session_id_); } init_flag_ = true; return SUCCESS; @@ -120,8 +121,6 @@ Status InnerSession::Finalize() { GELOGI("VarManager free var memory."); (void)VarManager::Instance(session_id_)->FreeVarMemory(); - PropertiesManager::Instance().RemoveDumpProperties(session_id_); - GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); return ret; @@ -297,4 +296,5 @@ Status InnerSession::SaveVariables(const Graph &graph, const std::vector &outputs, std::vector &var_values) { return graph_manager_.SaveVariables(graph, var_names, outputs, var_values); } + } // namespace ge diff --git a/src/ge/session/inner_session.h b/src/ge/session/inner_session.h index 3d9bf39f..6d57af61 100644 --- a/src/ge/session/inner_session.h +++ b/src/ge/session/inner_session.h @@ -71,6 +71,7 @@ class InnerSession { std::mutex resource_mutex_; // AddGraph, RemoveGraph and Finalize use void UpdateThreadContext(const std::map &options); void UpdateThreadContext(uint32_t graph_id); + static bool is_dump_server_inited_; }; } // namespace ge diff --git a/src/ge/single_op/single_op.cc b/src/ge/single_op/single_op.cc index aeefe2be..a74be1f3 100644 --- a/src/ge/single_op/single_op.cc +++ b/src/ge/single_op/single_op.cc @@ -24,6 +24,7 @@ #include "graph/load/new_model_manager/model_utils.h" #include "runtime/mem.h" #include "single_op/single_op_manager.h" +#include "graph/load/new_model_manager/model_manager.h" namespace ge { namespace { @@ -42,6 +43,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOp::~SingleOp() { delete task; task = nullptr; } + GELOGI("SingleOp destory sessionId = %lu", aicpu_session_id_); + ModelManager::GetInstance()->DestroyAicpuSession(aicpu_session_id_); } Status SingleOp::ValidateArgs(const std::vector &inputs, const std::vector &outputs) { @@ -166,6 +169,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c if (ret != SUCCESS) { return ret; } + ret = task->OpenDump(args_, stream_); + if (ret != SUCCESS) { + GELOGE(ret, "Open dump failed"); + return ret; + } } return ret; @@ -173,9 +181,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c void SingleOp::SetStream(rtStream_t stream) { stream_ = stream; } +void SingleOp::SetSessionID(uint64_t session_id) { aicpu_session_id_ = session_id; } + DynamicSingleOp::DynamicSingleOp(uintptr_t resource_id, std::mutex *stream_mutex, rtStream_t stream) : resource_id_(resource_id), stream_mutex_(stream_mutex), stream_(stream) {} +DynamicSingleOp::~DynamicSingleOp() { + GELOGI("DynamicSingleOp destory sessionId = %lu", aicpu_session_id_); + ModelManager::GetInstance()->DestroyAicpuSession(aicpu_session_id_); +} + Status DynamicSingleOp::ValidateParams(const vector &input_desc, const std::vector &inputs, std::vector &output_desc, std::vector &outputs) const { if (inputs.size() != input_desc.size()) { @@ -236,14 +251,22 @@ Status DynamicSingleOp::AllocateWorkspaces(const std::vector &workspace return SUCCESS; } +Status DynamicSingleOp::ExecuteTbeTask(const vector &input_desc, const vector &inputs, + vector &output_desc, vector &outputs) { + GE_CHK_STATUS_RET_NOLOG(op_task_->UpdateRunInfo(input_desc, output_desc)); + + std::vector workspace_buffers; + GE_CHK_STATUS_RET_NOLOG(AllocateWorkspaces(op_task_->GetWorkspaceSizes(), workspace_buffers)); + + return op_task_->LaunchKernel(inputs, outputs, workspace_buffers, stream_); +} + Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, const vector &input_buffers, vector &output_desc, vector &output_buffers) { GE_CHECK_NOTNULL(op_task_); GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); std::lock_guard lk(*stream_mutex_); - GE_CHK_STATUS_RET_NOLOG(op_task_->UpdateRunInfo(input_desc, output_desc)); - std::vector workspace_buffers; - GE_CHK_STATUS_RET_NOLOG(AllocateWorkspaces(op_task_->GetWorkspaceSizes(), workspace_buffers)); + std::vector inputs; std::vector outputs; for (auto &buffer : input_buffers) { @@ -252,6 +275,17 @@ Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, con for (auto &buffer : output_buffers) { outputs.emplace_back(buffer.data); } - return op_task_->LaunchKernel(inputs, outputs, workspace_buffers, stream_); + + if (op_task_->GetOpTaskType() == OP_TASK_TBE) { + return ExecuteTbeTask(input_desc, inputs, output_desc, outputs); + } else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { + return op_task_->LaunchKernel(input_desc, inputs, output_desc, outputs, stream_); + } else { + GELOGE(UNSUPPORTED, "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", + op_task_->GetOpTaskType()); + return UNSUPPORTED; + } } + +void DynamicSingleOp::SetSessionID(uint64_t session_id) { aicpu_session_id_ = session_id; } } // namespace ge diff --git a/src/ge/single_op/single_op.h b/src/ge/single_op/single_op.h index b7d23d32..0ca4afef 100644 --- a/src/ge/single_op/single_op.h +++ b/src/ge/single_op/single_op.h @@ -27,6 +27,7 @@ #include "framework/executor/ge_executor.h" #include "runtime/stream.h" #include "task/op_task.h" +#include "cce/aicpu_engine_struct.h" namespace ge { class SingleOp { @@ -36,6 +37,7 @@ class SingleOp { Status ExecuteAsync(const std::vector &inputs, const std::vector &outputs); void SetStream(rtStream_t stream); + void SetSessionID(uint64_t session_id); private: Status ValidateArgs(const std::vector &inputs, const std::vector &outputs); @@ -50,6 +52,7 @@ class SingleOp { std::vector output_addr_list_; std::vector output_sizes_; std::vector args_; + uint64_t aicpu_session_id_ = 0; std::vector tasks_; std::vector> arg_table_; @@ -58,9 +61,10 @@ class SingleOp { class DynamicSingleOp { public: DynamicSingleOp(uintptr_t resource_id, std::mutex *stream_mutex_, rtStream_t stream); - ~DynamicSingleOp() = default; + ~DynamicSingleOp(); Status ExecuteAsync(const vector &input_desc, const std::vector &inputs, std::vector &output_desc, std::vector &outputs); + void SetSessionID(uint64_t session_id); private: friend class SingleOpModel; @@ -69,12 +73,16 @@ class DynamicSingleOp { Status AllocateWorkspaces(const std::vector &workspace_sizes, std::vector &workspaces); - std::unique_ptr op_task_; + Status ExecuteTbeTask(const vector &input_desc, const vector &inputs, + vector &output_desc, vector &outputs); + + std::unique_ptr op_task_; uintptr_t resource_id_ = 0; std::mutex *stream_mutex_; rtStream_t stream_ = nullptr; size_t num_inputs_ = 0; size_t num_outputs_ = 0; + uint64_t aicpu_session_id_ = 0; }; } // namespace ge #endif // GE_SINGLE_OP_SINGLE_OP_H_ diff --git a/src/ge/single_op/single_op_model.cc b/src/ge/single_op/single_op_model.cc index 8c974259..fb676008 100644 --- a/src/ge/single_op/single_op_model.cc +++ b/src/ge/single_op/single_op_model.cc @@ -16,6 +16,7 @@ #include "single_op/single_op_model.h" +#include #include #include #include @@ -31,6 +32,8 @@ #include "task/aicpu_kernel_task_builder.h" #include "task/tbe_task_builder.h" +static std::atomic aicpu_sessionid(0); + using domi::TaskDef; using std::unique_ptr; using std::vector; @@ -250,17 +253,21 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { } single_op.tasks_.emplace_back(task); } else { - GELOGE(UNSUPPORTED, "Only TBE kernel and AI_CPU kernek are supported, but got %u", context.kernel_type()); + GELOGE(UNSUPPORTED, "Only TBE kernel and AI_CPU kernel are supported, but got %u", context.kernel_type()); return UNSUPPORTED; } } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { GELOGD("Building AICPU_TF task"); - OpTask *task = nullptr; - auto ret = BuildKernelExTask(task_def.kernel_ex(), single_op, &task); + AiCpuTask *aicpu_task = nullptr; + bool depend_compute_flag = false; + uint64_t singleop_sessionid = aicpu_sessionid++; + GELOGI("Build singleOp, sessionId = %lu", singleop_sessionid); + auto ret = BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, false, depend_compute_flag, singleop_sessionid); if (ret != SUCCESS) { return ret; } - single_op.tasks_.emplace_back(task); + single_op.tasks_.emplace_back(aicpu_task); + single_op.SetSessionID(singleop_sessionid); } else { // skip GELOGD("Skip task type: %d", static_cast(task_type)); @@ -316,7 +323,8 @@ Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTa return SUCCESS; } -Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, SingleOp &single_op, OpTask **task) { +Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, bool dynamic_flag, + bool &depend_compute_flag, uint64_t session_id) { auto iter = op_list_.find(kernel_def.op_index()); if (iter == op_list_.end()) { GELOGE(INTERNAL_ERROR, "op desc not found. op index = %u", kernel_def.op_index()); @@ -329,11 +337,12 @@ Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, Sin return MEMALLOC_FAILED; } auto builder = AiCpuTaskBuilder(iter->second->GetOpDesc(), kernel_def); - auto ret = builder.BuildTask(*aicpu_task, model_params_); + auto ret = builder.BuildTask(*aicpu_task, model_params_, dynamic_flag, session_id); if (ret != SUCCESS) { GELOGE(ret, "build aicpu_TF op task failed"); return ret; } + depend_compute_flag = (aicpu_task->GetUnknownType() == DEPEND_COMPUTE); *task = aicpu_task.release(); return SUCCESS; @@ -370,6 +379,27 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { return BuildTaskList(single_op); } +Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) { + const domi::KernelDef &kernel_def = task_def.kernel(); + const auto &context = kernel_def.context(); + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type == cce::ccKernelType::TE) { + GELOGD("Building TBE task"); + TbeOpTask *tbe_task = nullptr; + GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); + single_op.op_task_.reset(tbe_task); + } else if (kernel_type == cce::ccKernelType::AI_CPU) { + GELOGD("Building AICPU_CC task"); + OpTask *task = nullptr; + GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task)); + single_op.op_task_.reset(task); + } else { + GELOGE(UNSUPPORTED, "Only TBE kernel and AI_CPU kernel are supported, but got %u", context.kernel_type()); + return UNSUPPORTED; + } + return SUCCESS; +} + Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { auto ge_model = model_helper_.GetGeModel(); GE_CHECK_NOTNULL(ge_model); @@ -385,10 +415,30 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { GELOGE(UNSUPPORTED, "Do not support dynamic op with multiple tasks."); return UNSUPPORTED; } - - TbeOpTask *task = nullptr; - GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &task)); - single_op.op_task_.reset(task); + GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op)); + } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { + if (single_op.op_task_ != nullptr) { + GELOGE(UNSUPPORTED, "Do not support dynamic op with multiple tasks."); + return UNSUPPORTED; + } + GELOGD("Building AICPU_TF task"); + AiCpuTask *aicpu_task = nullptr; + bool depend_compute_flag = false; + uint64_t dynamic_singleop_sessionid = aicpu_sessionid++; + GELOGI("Build dynamic singleOp, sessionId = %lu", dynamic_singleop_sessionid); + GE_CHK_STATUS_RET_NOLOG( + BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, true, depend_compute_flag, dynamic_singleop_sessionid)); + if (depend_compute_flag) { + if (i >= tasks.size() - 1) { + GELOGE(FAILED, "The copy task of the fourth operator was not found."); + return FAILED; + } + ++i; + const TaskDef ©_task_def = tasks[i]; + GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); + } + single_op.op_task_.reset(aicpu_task); + single_op.SetSessionID(dynamic_singleop_sessionid); } else { // skip GELOGD("Skip task type: %d", static_cast(task_type)); diff --git a/src/ge/single_op/single_op_model.h b/src/ge/single_op/single_op_model.h index 8becf438..09b90050 100644 --- a/src/ge/single_op/single_op_model.h +++ b/src/ge/single_op/single_op_model.h @@ -66,8 +66,10 @@ class SingleOpModel { Status BuildTaskList(SingleOp &single_op); Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task); - Status BuildKernelExTask(const domi::KernelExDef &kernel_def, SingleOp &single_op, OpTask **task); + Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, bool dynamic_flag, + bool &depend_compute_flag, uint64_t session_id); Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task); + Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op); static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam ¶m); void ParseArgTable(TbeOpTask *task, SingleOp &op); diff --git a/src/ge/single_op/task/aicpu_kernel_task_builder.cc b/src/ge/single_op/task/aicpu_kernel_task_builder.cc index 4264f8c5..cc334f41 100644 --- a/src/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/src/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -54,6 +54,29 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) { task.SetSoName(so_name); task.SetkernelName(kernel_name); task.op_desc_ = op_desc_; + + task.num_inputs_ = op_desc_->GetInputsSize(); + task.num_outputs_ = op_desc_->GetOutputsSize(); + + // get kernel_ext_info + auto &kernel_ext_info = kernel_def_.kernel_ext_info(); + auto kernel_ext_info_size = kernel_def_.kernel_ext_info_size(); + GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED, + "task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", kernel_ext_info.size(), + kernel_ext_info_size); + + ret = task.SetExtInfoAndType(kernel_ext_info); + if (ret != SUCCESS) { + GELOGE(ret, "Init ext info failed."); + return ret; + } + + auto aicpu_param_head = reinterpret_cast(task.args_.get()); + if (task.ext_info_addr_dev_ != nullptr) { + aicpu_param_head->extInfoLength = kernel_ext_info.size(); + aicpu_param_head->extInfoAddr = reinterpret_cast(task.ext_info_addr_dev_); + } + return SUCCESS; } } // namespace ge \ No newline at end of file diff --git a/src/ge/single_op/task/aicpu_task_builder.cc b/src/ge/single_op/task/aicpu_task_builder.cc index aba29f93..9ad52d81 100644 --- a/src/ge/single_op/task/aicpu_task_builder.cc +++ b/src/ge/single_op/task/aicpu_task_builder.cc @@ -30,13 +30,13 @@ Status AiCpuTaskBuilder::SetInputOutputAddr(void **io_addr, const std::vector(addresses.data()); uint64_t src_len = sizeof(void *) * addresses.size(); - rt_ret = rtMemcpy(*io_addr, arg_size, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); + rt_ret = rtMemcpy(*io_addr, arg_size, src_addr, src_len, RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { (void)rtFree(*io_addr); GELOGE(RT_FAILED, "rtMemcpy addresses failed, ret = %d", rt_ret); @@ -69,8 +69,8 @@ Status AiCpuTaskBuilder::SetKernelArgs(void **args, STR_FWK_OP_KERNEL &fwk_op_ke return RT_FAILED; } - rt_ret = - rtMemcpy(fwk_op_args, sizeof(STR_FWK_OP_KERNEL), &fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_HOST); + rt_ret = rtMemcpy(fwk_op_args, sizeof(STR_FWK_OP_KERNEL), &fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), + RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { (void)rtFree(fwk_op_args); GELOGE(RT_FAILED, "copy args failed, ret = %d", rt_ret); @@ -80,7 +80,8 @@ Status AiCpuTaskBuilder::SetKernelArgs(void **args, STR_FWK_OP_KERNEL &fwk_op_ke return SUCCESS; } -Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam ¶m) { +Status AiCpuTaskBuilder::InitWorkspaceAndIO(void **io_addr, void **kernel_workspace, const SingleOpModelParam ¶m, + bool dynamic_flag) { if (kernel_def_.args_size() > sizeof(STR_FWK_OP_KERNEL)) { GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", sizeof(STR_FWK_OP_KERNEL), kernel_def_.args_size()); @@ -88,31 +89,60 @@ Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam } auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param); auto ws_addr_vec = addresses.at(BuildTaskUtils::kAddressIndexWorkspace); - if (ws_addr_vec.empty()) { - GELOGE(PARAM_INVALID, "workspace Data Address is empty."); - return PARAM_INVALID; - } - auto rt_ret = rtMemcpy(ws_addr_vec[0], kernel_def_.task_info_size(), kernel_def_.task_info().data(), - kernel_def_.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(FAILED, "rtMemcpy error: 0x%X", rt_ret); - return FAILED; + + if (dynamic_flag) { + GE_CHK_RT_RET(rtMalloc(kernel_workspace, kernel_def_.task_info_size(), RT_MEMORY_HBM)); + } else { + if (ws_addr_vec.empty()) { + GELOGE(PARAM_INVALID, "workspace Data Address is empty."); + return PARAM_INVALID; + } + *kernel_workspace = ws_addr_vec[0]; } + GE_CHK_RT_RET(rtMemcpy(*kernel_workspace, kernel_def_.task_info_size(), kernel_def_.task_info().data(), + kernel_def_.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE)); - void *io_addr = nullptr; - auto ret = SetInputOutputAddr(&io_addr, BuildTaskUtils::JoinAddresses(addresses)); + auto ret = SetInputOutputAddr(io_addr, BuildTaskUtils::JoinAddresses(addresses)); if (ret != SUCCESS) { return ret; } + return SUCCESS; +} + +Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag, + uint64_t session_id) { + void *io_addr = nullptr; + void *kernel_workspace = nullptr; + GE_CHK_STATUS_RET_NOLOG(InitWorkspaceAndIO(&io_addr, &kernel_workspace, param, dynamic_flag)); STR_FWK_OP_KERNEL fwk_op_kernel = {0}; - ret = SetFmkOpKernel(io_addr, ws_addr_vec[0], fwk_op_kernel); + auto ret = SetFmkOpKernel(io_addr, kernel_workspace, fwk_op_kernel); if (ret != SUCCESS) { (void)rtFree(io_addr); return ret; } + + task.op_desc_ = op_desc_; + task.num_inputs_ = op_desc_->GetInputsSize(); + task.num_outputs_ = op_desc_->GetOutputsSize(); + + // get kernel_ext_info + auto &kernel_ext_info = kernel_def_.kernel_ext_info(); + auto kernel_ext_info_size = kernel_def_.kernel_ext_info_size(); + GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED, + "task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", kernel_ext_info.size(), + kernel_ext_info_size); + GE_CHK_STATUS_RET(task.SetExtInfoAndType(kernel_ext_info), "Init ext info failed."); + + if (task.ext_info_addr_dev_ != nullptr) { + fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast(task.ext_info_addr_dev_); + fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = kernel_ext_info_size; + } + GE_CHK_STATUS_RET(task.InitForSummaryAndCopy(), "AiCpuTask init for summary and copy task failed."); + // Create session - auto session_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID; + fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID = session_id; + GELOGI("Begin to CreateAicpuSession, session id: %lu", session_id); GE_CHECK_NOTNULL(ModelManager::GetInstance()); GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuSession(session_id) != SUCCESS, GELOGE(FAILED, "CreateAicpuSession error. session id: %lu", session_id); @@ -127,8 +157,8 @@ Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam task.op_type_ = op_desc_->GetName(); task.io_addr_ = io_addr; task.task_info_ = kernel_def_.task_info(); - task.workspace_addr_ = ws_addr_vec[0]; - task.op_desc_ = op_desc_; + task.workspace_addr_ = kernel_workspace; + task.dynamic_flag_ = dynamic_flag; auto debug_info = BuildTaskUtils::GetTaskInfo(op_desc_); GELOGI("[TASK_INFO] %s %s", task.task_info_.c_str(), debug_info.c_str()); diff --git a/src/ge/single_op/task/aicpu_task_builder.h b/src/ge/single_op/task/aicpu_task_builder.h index bd582a4f..76ccb161 100644 --- a/src/ge/single_op/task/aicpu_task_builder.h +++ b/src/ge/single_op/task/aicpu_task_builder.h @@ -29,12 +29,14 @@ class AiCpuTaskBuilder { AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def); ~AiCpuTaskBuilder() = default; - Status BuildTask(AiCpuTask &task, const SingleOpModelParam ¶m); + Status BuildTask(AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag, uint64_t session_id); private: static Status SetKernelArgs(void **args, STR_FWK_OP_KERNEL &kernel); Status SetInputOutputAddr(void **io_addr, const std::vector &addresses); Status SetFmkOpKernel(void *io_addr, void *ws_addr, STR_FWK_OP_KERNEL &kernel); + Status InitWorkspaceAndIO(void **io_addr, void **kernel_workspace, const SingleOpModelParam ¶m, + bool dynamic_flag); const OpDescPtr op_desc_; const domi::KernelExDef &kernel_def_; diff --git a/src/ge/single_op/task/op_task.cc b/src/ge/single_op/task/op_task.cc index f23073bb..0c489aa4 100644 --- a/src/ge/single_op/task/op_task.cc +++ b/src/ge/single_op/task/op_task.cc @@ -20,8 +20,10 @@ #include #include +#include "aicpu/common/aicpu_task_struct.h" #include "common/dump/dump_manager.h" #include "common/dump/dump_op.h" +#include "common/formats/formats.h" #include "framework/common/debug/log.h" #include "register/op_tiling.h" #include "runtime/rt.h" @@ -30,24 +32,31 @@ namespace ge { namespace { constexpr int kLaunchRetryTimes = 1000; constexpr int kSleepTime = 10; +constexpr uint64_t kReleaseFlag = 1; +constexpr int kCopyNum = 2; } // namespace -Status OpTask::OpenDump(const void *arg, const OpDescPtr &op_desc, rtStream_t stream) { - if (DumpManager::GetInstance().IsDumpOpen()) { +Status OpTask::OpenDump(const std::vector &io_addr, rtStream_t stream) { + if (DumpManager::GetInstance().GetDumpProperties().IsSingleOpNeedDump()) { GELOGI("Dump is open in single op,start to set dump info"); std::vector input_addrs; std::vector output_adds; - auto input_size = op_desc->GetAllInputsDesc().size(); - auto output_size = op_desc->GetOutputsSize(); + auto input_size = op_desc_->GetInputsSize(); + auto output_size = op_desc_->GetOutputsSize(); + auto all_size = io_addr.size(); + if (input_size + output_size != all_size) { + GELOGE(FAILED, "io_addr size is not equal input and output size"); + return FAILED; + } for (size_t i = 0; i < input_size; i++) { - uint64_t input_addr = *(reinterpret_cast(arg) + i); + uint64_t input_addr = static_cast(io_addr[i]); input_addrs.emplace_back(input_addr); } for (size_t j = 0; j < output_size; j++) { - uint64_t output_addr = *(reinterpret_cast(arg) + input_size + j); + uint64_t output_addr = static_cast(io_addr[input_size + j]); output_adds.emplace_back(output_addr); } - dump_op_.SetDumpInfo(DumpManager::GetInstance().GetDumpProperties(), op_desc, input_addrs, output_adds, stream); + dump_op_.SetDumpInfo(DumpManager::GetInstance().GetDumpProperties(), op_desc_, input_addrs, output_adds, stream); auto status = dump_op_.LaunchDumpOp(); if (status != SUCCESS) { GELOGE(status, "Launch dump op failed in single op"); @@ -112,11 +121,6 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) { } GELOGI("[TASK_INFO] %s", this->stub_name_.c_str()); - auto status = OpenDump(args_.get(), op_desc_, stream); - if (status != SUCCESS) { - GELOGE(status, "Open dump failed in tbe single op %s", stub_name_.c_str()); - return status; - } return SUCCESS; } @@ -218,6 +222,119 @@ Status TbeOpTask::LaunchKernel(const vector &inputs, const vector(unknown_shape_type_val); + + aicpu_ext_handle_.reset( + new (std::nothrow)::ge::hybrid::AicpuExtInfoHandler(op_desc_->GetName(), num_inputs_, num_outputs_, unknown_type_)); + GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, FAILED, "Malloc aicpu_ext_handle mem failed!"); + + Status ret = aicpu_ext_handle_->Parse(kernel_ext_info); + if (ret != SUCCESS) { + GELOGE(ret, "Parse kernel ext info failed, kernel_ext_info_size=%zu.", kernel_ext_info.size()); + return ret; + } + + GE_CHK_RT_RET(rtMalloc(&ext_info_addr_dev_, kernel_ext_info.size(), RT_MEMORY_HBM)); + GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, kernel_ext_info.size(), kernel_ext_info.data(), kernel_ext_info.size(), + RT_MEMCPY_HOST_TO_DEVICE)); + return SUCCESS; +} + +Status AiCpuBaseTask::UpdateExtInfo(const std::vector &input_desc, + std::vector &output_desc) { + GELOGI("Update ext info begin, unknown_type=%d.", unknown_type_); + if (num_inputs_ == 0 && num_outputs_ == 0) { + GELOGI("No input and output, no need update ext info."); + return SUCCESS; + } + + GE_CHECK_NOTNULL(aicpu_ext_handle_); + for (size_t i = 0; i < num_inputs_; ++i) { + GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateInputShapeAndType(i, input_desc[i]), + "Input[%zu] update input shape failed.", i); + } + + if (unknown_type_ != DEPEND_COMPUTE) { + for (size_t j = 0; j < num_outputs_; ++j) { + GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateOutputShapeAndType(j, output_desc[j]), + "Output[%zu] UpdateOutputShapeAndType failed.", j); + // debug code + GELOGD("No input and output, no need update ext info."); + } + } + + GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, + aicpu_ext_handle_->GetExtInfoLen(), // check size + aicpu_ext_handle_->GetExtInfo(), aicpu_ext_handle_->GetExtInfoLen(), + RT_MEMCPY_HOST_TO_DEVICE)); + + GELOGI("Update ext info end."); + return SUCCESS; +} + +Status AiCpuBaseTask::UpdateOutputShape(vector &output_desc) { + if (num_outputs_ == 0) { + GELOGD("AiCpuBaseTask output_num is 0, no need update output shape."); + return SUCCESS; + } + GELOGD("Start to update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape."); + + GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_->GetExtInfo(), aicpu_ext_handle_->GetExtInfoLen(), ext_info_addr_dev_, + aicpu_ext_handle_->GetExtInfoLen(), RT_MEMCPY_DEVICE_TO_HOST)); + + for (size_t i = 0; i < num_outputs_; ++i) { + GeShape shape; + DataType data_type; + aicpu_ext_handle_->GetOutputShapeAndType(i, shape, data_type); + GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]), "AiCpuCCTask Update [%zu]th output shape failed.", + i); + } + GELOGD("Update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape finished."); + return SUCCESS; +} + +Status AiCpuBaseTask::UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensorDesc &output_desc) { + auto shape_old = output_desc.GetShape(); + output_desc.SetShape(shape_new); + GELOGD("Update AiCpuBaseTask shape from %s to %s", shape_old.ToString().c_str(), shape_new.ToString().c_str()); + + auto origin_shape_old = output_desc.GetOriginShape(); + auto origin_format = output_desc.GetOriginFormat(); + auto format = output_desc.GetFormat(); + if (origin_format == format) { + output_desc.SetOriginShape(shape_new); + return SUCCESS; + } + + std::vector origin_dims_new; + + auto trans_ret = + formats::TransShape(format, shape_new.GetDims(), output_desc.GetDataType(), origin_format, origin_dims_new); + GE_CHK_STATUS_RET(trans_ret, "AiCpuTask originFormat[%d] is not same as format[%d], but TransShape failed, shape=%s.", + origin_format, format, shape_new.ToString().c_str()); + + auto origin_shape_new = GeShape(origin_dims_new); + output_desc.SetOriginShape(origin_shape_new); + GELOGD("AiCpuTask originFormat[%d] is not same as format[%d], need update from %s ro %s.", origin_format, format, + origin_shape_old.ToString().c_str(), origin_shape_new.ToString().c_str()); + return SUCCESS; +} + AiCpuTask::~AiCpuTask() { if (args_ != nullptr) { (void)rtFree(args_); @@ -226,6 +343,43 @@ AiCpuTask::~AiCpuTask() { if (io_addr_ != nullptr) { (void)rtFree(io_addr_); } + + if (dynamic_flag_ && workspace_addr_ != nullptr) { + (void)rtFree(workspace_addr_); + } + if (copy_workspace_buf_ != nullptr) { + (void)rtFree(copy_workspace_buf_); + } + + if (copy_ioaddr_dev_ != nullptr) { + (void)rtFree(copy_ioaddr_dev_); + } + + if (copy_input_release_flag_dev_ != nullptr) { + (void)rtFree(copy_input_release_flag_dev_); + } + + if (copy_input_data_size_dev_ != nullptr) { + (void)rtFree(copy_input_data_size_dev_); + } + + if (copy_input_src_dev_ != nullptr) { + (void)rtFree(copy_input_src_dev_); + } + + if (copy_input_dst_dev_ != nullptr) { + (void)rtFree(copy_input_dst_dev_); + } + + if (copy_task_args_buf_ != nullptr) { + (void)rtFree(copy_task_args_buf_); + } + + for (auto summary : output_summary_) { + if (summary != nullptr) { + (void)rtFree(summary); + } + } } const void *AiCpuTask::GetIOAddr() const { return io_addr_; } @@ -247,15 +401,225 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { } GELOGI("[TASK_INFO] is %s", this->task_info_.c_str()); - auto status = OpenDump(args_, op_desc_, stream); - if (status != SUCCESS) { - GELOGE(status, "Open dump failed in aicpu single op %s", op_type_.c_str()); - return status; - } GELOGD("Done launch kernel successfully. task = %s", this->op_type_.c_str()); return SUCCESS; } +Status AiCpuTask::PrepareCopyInputs(vector &outputs, const std::vector &out_shape_hbm) { + std::vector copy_input_release_flag; + std::vector copy_input_data_size; + std::vector copy_input_src; + std::vector copy_input_dst; + + for (size_t i = 0; i < num_outputs_; ++i) { + const auto &summary = output_summary_host_[i]; + GELOGI("Node out[%zu] summary, shape data=0x%lx, shape data size=%lu, raw data=0x%lx, raw data size=%lu.", i, + summary.shape_data_ptr, summary.shape_data_size, summary.raw_data_ptr, summary.raw_data_size); + auto output = outputs[i]; + copy_input_release_flag.emplace_back(kReleaseFlag); + copy_input_data_size.emplace_back(summary.raw_data_size); + copy_input_src.emplace_back(summary.raw_data_ptr); + copy_input_dst.emplace_back(reinterpret_cast(output)); + + const auto &shape_buffer = out_shape_hbm[i]; + copy_input_release_flag.emplace_back(kReleaseFlag); + copy_input_data_size.emplace_back(summary.shape_data_size); + copy_input_src.emplace_back(summary.shape_data_ptr); + copy_input_dst.emplace_back(reinterpret_cast(shape_buffer)); + } + + const size_t copy_input_buf_len = num_outputs_ * kCopyNum * sizeof(uint64_t); + + GE_CHK_RT_RET(rtMemcpy(copy_input_release_flag_dev_, copy_input_buf_len, copy_input_release_flag.data(), + copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_input_data_size_dev_, copy_input_buf_len, copy_input_data_size.data(), copy_input_buf_len, + RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_input_src_dev_, copy_input_buf_len, copy_input_src.data(), copy_input_buf_len, + RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_input_dst_dev_, copy_input_buf_len, copy_input_dst.data(), copy_input_buf_len, + RT_MEMCPY_HOST_TO_DEVICE)); + return SUCCESS; +} + +Status AiCpuTask::ReadResultSummaryAndPrepareMemory(std::vector &out_shape_hbm) { + for (size_t i = 0; i < num_outputs_; ++i) { + auto &result_summary = output_summary_host_[i]; + + GE_CHK_RT_RET(rtMemcpy(&result_summary, sizeof(aicpu::FWKAdapter::ResultSummary), output_summary_[i], + sizeof(aicpu::FWKAdapter::ResultSummary), RT_MEMCPY_DEVICE_TO_HOST)); + auto shape_data_size = result_summary.shape_data_size; + void *shape_buffer = nullptr; + GE_MAKE_GUARD_RTMEM(shape_buffer); + GE_CHK_RT_RET(rtMalloc(&shape_buffer, shape_data_size, RT_MEMORY_HBM)); + out_shape_hbm.emplace_back(shape_buffer); + } + return SUCCESS; +} + +Status AiCpuTask::CopyDataToHbm(vector &outputs, const std::vector &out_shape_hbm, rtStream_t stream) { + GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs, out_shape_hbm)); + + GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), RT_KERNEL_DEFAULT, stream)); + GE_CHK_RT_RET(rtStreamSynchronize(stream)); + return SUCCESS; +} + +Status AiCpuTask::UpdateShapeByHbmBuffer(vector &output_desc, const std::vector &out_shape_hbm) { + for (size_t i = 0; i < num_outputs_; ++i) { + const auto &result_summary = output_summary_host_[i]; + std::vector shape_dims; + const auto &shape_hbm = out_shape_hbm[i]; + + uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); + std::unique_ptr shape_addr(new (std::nothrow) int64_t[dim_num]()); + GE_CHECK_NOTNULL(shape_addr); + GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm, result_summary.shape_data_size, + RT_MEMCPY_DEVICE_TO_HOST)); + + for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { + shape_dims.emplace_back(shape_addr[dim_idx]); + GELOGD("Node [%zu]th output dim[%u]=%ld.", i, dim_idx, shape_addr[dim_idx]); + } + + GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), output_desc[i]), + "AiCpuTask update [%zu]th output shape failed.", i); + } + return SUCCESS; +} + +Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector &output_desc, vector &outputs, + rtStream_t stream) { + if (num_outputs_ == 0) { + GELOGI("Output num is 0, there is no need to update the output and size."); + return SUCCESS; + } + + GELOGI("Update shape and data by result summary begin."); + + std::vector out_shape_hbm; + GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(out_shape_hbm), + "Read ResultSummary and update output shape failed."); + + GE_CHK_STATUS_RET(CopyDataToHbm(outputs, out_shape_hbm, stream), "Copy data to output failed."); + + GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc, out_shape_hbm), "Update shape by hbm buffer failed."); + + GELOGI("Update shape and data by result summary end."); + return SUCCESS; +} + +Status AiCpuTask::SetIO(const vector &inputs, vector &outputs) { + vector io_addrs; + io_addrs.reserve(num_inputs_ + num_outputs_); + for (size_t i = 0; i < num_inputs_; ++i) { + GE_CHECK_NOTNULL(inputs[i]); + GELOGD("AiCpuTask input[%zu] addr = %p", i, inputs[i]); + io_addrs.emplace_back(reinterpret_cast(inputs[i])); + } + + if (unknown_type_ != DEPEND_COMPUTE) { + for (size_t i = 0; i < num_outputs_; ++i) { + GE_CHECK_NOTNULL(outputs[i]); + GELOGD("AiCpuTask output[%zu] addr = %p", i, outputs[i]); + io_addrs.emplace_back(reinterpret_cast(outputs[i])); + } + } else { + for (size_t i = 0; i < num_outputs_; ++i) { + void *summary_addr = output_summary_[i]; + io_addrs.emplace_back(reinterpret_cast(summary_addr)); + } + } + + if (!io_addrs.empty()) { + auto *dst_io_addr = const_cast(reinterpret_cast(io_addr_)); + GE_CHK_RT_RET(rtMemcpy(dst_io_addr, sizeof(uint64_t) * io_addrs.size(), &io_addrs[0], + sizeof(uint64_t) * io_addrs.size(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHECK_NOTNULL(dst_io_addr); + }; + return SUCCESS; +} + +Status AiCpuTask::InitForSummaryAndCopy() { + if (unknown_type_ != DEPEND_COMPUTE || num_outputs_ == 0) { + GELOGI("Unknown_type is %d, output num is %d.", unknown_type_, num_outputs_); + return SUCCESS; + } + + output_summary_.resize(num_outputs_); + constexpr auto result_summary_size = sizeof(aicpu::FWKAdapter::ResultSummary); + for (size_t i = 0; i < num_outputs_; ++i) { + GE_CHK_RT_RET(rtMalloc(&output_summary_[i], result_summary_size, RT_MEMORY_HBM)); + } + output_summary_host_.resize(num_outputs_); + + const size_t copy_input_buf_len = num_outputs_ * kCopyNum * sizeof(uint64_t); + + GE_CHK_RT_RET(rtMalloc(©_input_release_flag_dev_, copy_input_buf_len, RT_MEMORY_HBM)); + GE_CHK_RT_RET(rtMalloc(©_input_data_size_dev_, copy_input_buf_len, RT_MEMORY_HBM)); + GE_CHK_RT_RET(rtMalloc(©_input_src_dev_, copy_input_buf_len, RT_MEMORY_HBM)); + GE_CHK_RT_RET(rtMalloc(©_input_dst_dev_, copy_input_buf_len, RT_MEMORY_HBM)); + + GE_CHK_RT_RET(rtMalloc(©_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM)); + + std::vector copy_io_addr; + copy_io_addr.emplace_back(reinterpret_cast(copy_input_release_flag_dev_)); + copy_io_addr.emplace_back(reinterpret_cast(copy_input_data_size_dev_)); + copy_io_addr.emplace_back(reinterpret_cast(copy_input_src_dev_)); + copy_io_addr.emplace_back(reinterpret_cast(copy_input_dst_dev_)); + + const auto copy_io_addr_size = sizeof(uint64_t) * copy_io_addr.size(); + + GE_CHK_RT_RET(rtMalloc(©_ioaddr_dev_, copy_io_addr_size, RT_MEMORY_HBM)); + + GE_CHK_RT_RET( + rtMemcpy(copy_ioaddr_dev_, copy_io_addr_size, copy_io_addr.data(), copy_io_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); + return SUCCESS; +} + +Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { + if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) { + GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", sizeof(STR_FWK_OP_KERNEL), + kernel_def.args_size()); + return PARAM_INVALID; + } + GE_CHK_RT_RET(rtMalloc(©_workspace_buf_, kernel_def.task_info_size(), RT_MEMORY_HBM)); + GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_, kernel_def.task_info_size(), kernel_def.task_info().data(), + kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE)); + + STR_FWK_OP_KERNEL aicpu_task = {0}; + auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL), kernel_def.args().data(), kernel_def.args().size()); + if (sec_ret != EOK) { + GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); + return FAILED; + } + + aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(copy_ioaddr_dev_); + aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast(copy_workspace_buf_); + aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; + aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; + + GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), &aicpu_task, sizeof(STR_FWK_OP_KERNEL), + RT_MEMCPY_HOST_TO_DEVICE)); + return SUCCESS; +} + +Status AiCpuTask::LaunchKernel(const std::vector &input_desc, const std::vector &inputs, + std::vector &output_desc, std::vector &outputs, + rtStream_t stream) { + GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc)); + GE_CHK_STATUS_RET_NOLOG(SetIO(inputs, outputs)); + GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); + GE_CHK_RT_RET(rtStreamSynchronize(stream)); + + if (unknown_type_ == DEPEND_SHAPE_RANGE) { + GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc)); + } else if (unknown_type_ == DEPEND_COMPUTE) { + GE_CHK_STATUS_RET_NOLOG(UpdateShapeAndDataByResultSummary(output_desc, outputs, stream)); + } + + return SUCCESS; +} + void AiCpuCCTask::SetKernelArgs(std::unique_ptr args, size_t arg_size) { args_ = std::move(args); arg_size_ = arg_size; @@ -291,11 +655,34 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { } GELOGD("Invoke rtCpuKernelLaunch succeeded"); - auto status = OpenDump(args_.get(), op_desc_, stream); - if (status != SUCCESS) { - GELOGE(status, "Open dump failed in aicpucc single op"); - return status; + return SUCCESS; +} + +Status AiCpuCCTask::LaunchKernel(const std::vector &input_desc, const std::vector &inputs, + std::vector &output_desc, std::vector &outputs, + rtStream_t stream) { + GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED, + "AiCpuCCTask unknown type[%d] is depend compute, it's not supported now.", unknown_type_); + + GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc)); + + size_t arg_index = 0; + auto *task_io_addr = reinterpret_cast(io_addr_); + GE_CHECK_NOTNULL(task_io_addr); + for (auto &input : inputs) { + task_io_addr[arg_index++] = reinterpret_cast(input); + } + for (auto &output : outputs) { + task_io_addr[arg_index++] = reinterpret_cast(output); } + + GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); + GE_CHK_RT_RET(rtStreamSynchronize(stream)); + + if (unknown_type_ == DEPEND_SHAPE_RANGE) { + GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc)); + } + return SUCCESS; } } // namespace ge diff --git a/src/ge/single_op/task/op_task.h b/src/ge/single_op/task/op_task.h index a571bce1..b6ea9114 100644 --- a/src/ge/single_op/task/op_task.h +++ b/src/ge/single_op/task/op_task.h @@ -27,6 +27,9 @@ #include "graph/op_kernel_bin.h" #include "runtime/stream.h" #include "graph/node.h" +#include "cce/aicpu_engine_struct.h" +#include "hybrid/node_executor/aicpu/aicpu_ext_info.h" +#include "init/gelib.h" namespace ge { enum OpTaskType { @@ -52,14 +55,20 @@ class OpTask { virtual const void *GetIOAddr() const = 0; const vector &GetWorkspaceSizes() const; void SetWorkspaceSizes(const vector &workspace_sizes); + const OpDescPtr &GetOpdesc() const { return op_desc_; } + Status OpenDump(const std::vector &io_addr, rtStream_t stream); + virtual Status LaunchKernel(const std::vector &input_desc, const std::vector &inputs, + std::vector &output_desc, std::vector &outputs, rtStream_t stream) { + return UNSUPPORTED; + } private: std::vector workspace_sizes_; protected: - Status OpenDump(const void *arg, const OpDescPtr &op_desc, rtStream_t stream); DumpProperties dump_properties_; DumpOp dump_op_; + OpDescPtr op_desc_; }; class TbeOpTask : public OpTask { @@ -97,10 +106,30 @@ class TbeOpTask : public OpTask { uint32_t max_tiling_size_ = 0; std::string tiling_data_; NodePtr node_; - OpDescPtr op_desc_; }; -class AiCpuTask : public OpTask { +class AiCpuBaseTask : public OpTask { + public: + AiCpuBaseTask() = default; + ~AiCpuBaseTask() override; + const UnknowShapeOpType GetUnknownType() const { return unknown_type_; } + + protected: + Status SetExtInfoAndType(const std::string &kernel_ext_info); + + Status UpdateExtInfo(const std::vector &input_desc, std::vector &output_desc); + Status UpdateOutputShape(vector &output_desc); + Status UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensorDesc &output_desc); + + protected: + size_t num_inputs_ = 0; + size_t num_outputs_ = 0; + UnknowShapeOpType unknown_type_ = DEPEND_IN_SHAPE; + std::unique_ptr aicpu_ext_handle_; + void *ext_info_addr_dev_ = nullptr; +}; + +class AiCpuTask : public AiCpuBaseTask { public: AiCpuTask() = default; ~AiCpuTask() override; @@ -109,7 +138,24 @@ class AiCpuTask : public OpTask { OpTaskType GetOpTaskType() override { return OP_TASK_AICPU; } const void *GetIOAddr() const override; + Status LaunchKernel(const std::vector &input_desc, const std::vector &inputs, + std::vector &output_desc, std::vector &outputs, rtStream_t stream) override; + Status SetMemCopyTask(const domi::KernelExDef &kernel_def); + private: + Status SetIO(const vector &inputs, vector &outputs); + + // for copy task. + Status InitForSummaryAndCopy(); + Status UpdateShapeAndDataByResultSummary(vector &output_desc, vector &outputs, + rtStream_t stream); + Status ReadResultSummaryAndPrepareMemory(std::vector &out_shape_hbm); + + Status CopyDataToHbm(vector &outputs, const std::vector &out_shape_hbm, rtStream_t stream); + Status PrepareCopyInputs(vector &outputs, const std::vector &out_shape_hbm); + + Status UpdateShapeByHbmBuffer(vector &output_desc, const std::vector &out_shape_hbm); + friend class AiCpuTaskBuilder; void *workspace_addr_ = nullptr; std::string task_info_; @@ -117,10 +163,24 @@ class AiCpuTask : public OpTask { size_t arg_size_ = 0; std::string op_type_; void *io_addr_ = nullptr; - OpDescPtr op_desc_; + + bool dynamic_flag_ = false; + // for copy task + void *copy_task_args_buf_; + void *copy_workspace_buf_; + + std::vector output_summary_; + std::vector output_summary_host_; + + void *copy_ioaddr_dev_; + + void *copy_input_release_flag_dev_; + void *copy_input_data_size_dev_; + void *copy_input_src_dev_; + void *copy_input_dst_dev_; }; -class AiCpuCCTask : public OpTask { +class AiCpuCCTask : public AiCpuBaseTask { public: AiCpuCCTask() = default; ~AiCpuCCTask() override; @@ -137,6 +197,9 @@ class AiCpuCCTask : public OpTask { void SetIoAddr(void *io_addr); size_t GetArgSize() const; + Status LaunchKernel(const std::vector &input_desc, const std::vector &inputs, + std::vector &output_desc, std::vector &outputs, rtStream_t stream) override; + private: friend class AiCpuCCTaskBuilder; std::string so_name_; @@ -146,7 +209,6 @@ class AiCpuCCTask : public OpTask { uint32_t block_dim_ = 1; void *sm_desc_ = nullptr; void *io_addr_ = nullptr; - OpDescPtr op_desc_; }; } // namespace ge diff --git a/third_party/fwkacllib/inc/ops/aipp.h b/third_party/fwkacllib/inc/ops/aipp.h index d11fdc95..07d25fc7 100644 --- a/third_party/fwkacllib/inc/ops/aipp.h +++ b/third_party/fwkacllib/inc/ops/aipp.h @@ -25,14 +25,16 @@ namespace ge { /** -*@brief Performs AI pre-processing (AIPP) on images including color space conversion (CSC), image normalization (by subtracting the mean value or multiplying a factor), image cropping (by specifying the crop start and cropping the image to the size required by the neural network), and much more. +*@brief Performs AI pre-processing (AIPP) on images including color space conversion (CSC), +image normalization (by subtracting the mean value or multiplying a factor), image cropping +(by specifying the crop start and cropping the image to the size required by the neural network), and much more. \n *@par Inputs: *@li images: An NCHW or NHWC tensor of type uint8, specifying the input to the data layer. -*@li params: Dynamic AIPP configuration parameters of type uint8. +*@li params: Dynamic AIPP configuration parameters of type uint8. \n *@par Attributes: -*aipp_config_path: A required string, specifying the path of the AIPP configuration file +*aipp_config_path: A required string, specifying the path of the AIPP configuration file. \n *@par Outputs: *features: The AIPP-processed output tensor of type float16 or uint8. @@ -47,17 +49,17 @@ REG_OP(Aipp) .OP_END_FACTORY_REG(Aipp) /** -*@brief Performs this op is for dynamic aipp.If you set aipp-mode to dynamic \n -in aipp config file, framework will auto add one input node to graph at last. +*@brief Performs this op is for dynamic aipp.If you set aipp-mode to dynamic +in aipp config file, framework will auto add one input node to graph at last. \n *@par Inputs: -*data: An NCHW or NHWC tensor of type uint8, specifying the input to the data layer. +*data: An NCHW or NHWC tensor of type uint8, specifying the input to the data layer. \n *@par Attributes: -*index: specify aipp serial num +*index: specify aipp serial num \n *@par Outputs: -*out: The AIPP-processed output tensor of all types. +*out: The AIPP-processed output tensor of all types. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator AippData. diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h index ea82e0fa..1af02b05 100644 --- a/third_party/fwkacllib/inc/ops/array_ops.h +++ b/third_party/fwkacllib/inc/ops/array_ops.h @@ -27,29 +27,29 @@ namespace ge { /** -*@brief Applies lower_bound(sorted_search_values, values) along each row. +*@brief Applies lower_bound(sorted_search_values, values) along each row. \n *@par Inputs: -*The input sorted_x and values can be one-dimensional vector. Inputs include: \n +*The input sorted_x and values can be one-dimensional vector. Inputs include: * @li sorted_x:A `Tensor`. 2-D Tensor where each row is ordered. -* @li values:A `Tensor`. Must have the same type as `sorted_x`. +* @li values:A `Tensor`. Must have the same type as `sorted_x`. \n *@par Attributes: -*@li out_type:An optional `DType` from: `int32, int64`. \n -Defaults to `int32`. +*@li out_type:An optional `DType` from: `int32, int64`. +Defaults to `int32`. \n *@par Outputs: -*y: A `Tensor` of type `out_type`. +*y: A `Tensor` of type `out_type`. \n -*@attention Constraints: \n -*-The implementation for LowerBound on Ascend uses AI CPU, with bad performance. \n +*@attention Constraints: +*The implementation for LowerBound on Ascend uses AI CPU, with bad performance. \n *@par Quantization supported or not *Not supported *@par Quantized inference supported or not *Supported *@par L2 convergence supported or not -*@par Multiple batches supported or not +*@par Multiple batches supported or not \n *@par Third-party framework compatibility *Compatible with tensorflow Operator LowerBound. @@ -65,25 +65,25 @@ REG_OP(LowerBound) .OP_END_FACTORY_REG(LowerBound) /** -*@brief Reverses variable length slices. +*@brief Reverses variable length slices. \n *@par Inputs: -*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper" \n +*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper" are 0D scalars. * @li x: A Tensor. The input to reverse. -* @li seq_lengths: A 1D Tensor of type int32 or int64. +* @li seq_lengths: A 1D Tensor of type int32 or int64. \n *@par Attributes: -*@li seq_dim: An optional int. Defaults to "0". The dimension along which \n -reversal is performed. -*@li batch_dim: An optional int. Defaults to "0". The dimension along which \n +*@li seq_dim: An optional int. The dimension along which reversal is performed. +*@li batch_dim: An optional int. Defaults to "0". The dimension along which +reversal is performed. \n *@par Outputs: -*y: A rank k tensor. Has the same shape as input. The extracted banded tensor. +*y: A rank k tensor. Has the same shape as input. The extracted banded tensor. \n -*@attention Constraints: \n -*ReverseSequence runs on the Ascend AI CPU, which delivers poor performance. +*@attention Constraints: +*ReverseSequence runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ReverseSequence. @@ -102,21 +102,21 @@ REG_OP(ReverseSequence) .OP_END_FACTORY_REG(ReverseSequence) /** -*@brief Copies a tensor setting everything outside a central band in each innermost matrix. +*@brief Copies a tensor setting everything outside a central band in each innermost matrix. \n *@par Inputs: -*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper" \n +*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper" are 0D scalars. * @li x: A rank k tensor. -* @li num_lower: A 0D tensor. Number of superdiagonals to keep. If negative, \n -keeps entire upper triangle. -* @li num_upper: A 0D tensor. Number of superdiagonals to keep. If negative, \n +* @li num_lower: A 0D tensor. Number of superdiagonals to keep. If negative, keeps entire upper triangle. +* @li num_upper: A 0D tensor. Number of superdiagonals to keep. If negative, +keeps entire upper triangle. \n *@par Outputs: -*y: A rank k tensor. Has the same shape as input. The extracted banded tensor. +*y: A rank k tensor. Has the same shape as input. The extracted banded tensor. \n -*@attention Constraints: \n +*@attention Constraints: *MatrixBandPart runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility @@ -136,23 +136,23 @@ REG_OP(MatrixBandPart) .OP_END_FACTORY_REG(MatrixBandPart) /** -*@brief Finds unique elements in a 1D tensor. +*@brief Finds unique elements in a 1D tensor. \n *@par Inputs: -*x: 1D tensor. \n -*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper" \n -are 0D scalars. +*x: 1D tensor. +*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper" +are 0D scalars. \n *@par Attributes: -*out_idx: An optional DType from: "int32, int64". \n -Defaults to "int32". +*out_idx: An optional DType from: "int32, int64". +Defaults to "int32". \n *@par Outputs: *@li y: A Tensor. Has the same type as "x". *@li idx: A Tensor of type "out_idx". -*@li count: A Tensor of type "out_idx". +*@li count: A Tensor of type "out_idx". \n -*@attention Constraints: \n +*@attention Constraints: *UniqueWithCounts runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility @@ -170,21 +170,21 @@ REG_OP(UniqueWithCounts) .OP_END_FACTORY_REG(UniqueWithCounts) /** -*@brief Finds unique elements in a 1D tensor. +*@brief Finds unique elements in a 1D tensor. \n *@par Inputs: -*x: 1D tensor. \n -*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper" \n -are 0D scalars. +*x: 1D tensor. +*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper" +are 0D scalars. \n *@par Attributes: -*out_idx: An optional DType from: "int32, int64". Defaults to "int32". +*out_idx: An optional DType from: "int32, int64". Defaults to "int32". \n *@par Outputs: *@li y: "x" in the unique output "y". -*@li idx: A tensor the same size as "x". The index of each value of "x". +*@li idx: A tensor the same size as "x". The index of each value of "x". \n -*@attention Constraints: \n +*@attention Constraints: *Unique runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility @@ -201,24 +201,24 @@ REG_OP(Unique) .OP_END_FACTORY_REG(Unique) /** -*@brief Finds unique elements in a 1D tensor. +*@brief Finds unique elements in a 1D tensor. \n *@par Inputs: -*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper" \n -are 0D scalars. \n +*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper" +are 0D scalars. *Including: * @li x: 1D tensor. -* @li axis: A Tensor of type int32. Defaults to "None". +* @li axis: A Tensor of type int32. Defaults to "None". \n *@par Attributes: -*out_idx: An optional DType from: "int32, int64". \n -Defaults to "int32". +*out_idx: An optional DType from: "int32, int64". +Defaults to "int32". \n *@par Outputs: *@li y: "x" in the unique output "y". -*@li idx: A tensor the same size as "x". The index of each value of "x". +*@li idx: A tensor the same size as "x". The index of each value of "x". \n -*@attention Constraints: \n +*@attention Constraints: *UniqueExt2 runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility @@ -236,15 +236,15 @@ REG_OP(UniqueExt2) .OP_END_FACTORY_REG(UniqueExt2) /** -*@brief Computes the inverse permutation of a tensor. +*@brief Computes the inverse permutation of a tensor. \n *@par Inputs: *x: A k-dimensional tensor. \n *@par Outputs: -*y: A 1D tensor. +*y: A 1D tensor. \n -*@attention Constraints: \n +*@attention Constraints: *InvertPermutation runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility @@ -257,18 +257,18 @@ REG_OP(InvertPermutation) .OP_END_FACTORY_REG(InvertPermutation) /** -*@brief Checks a tensor for NaN and Inf values. +*@brief Checks a tensor for NaN and Inf values. \n *@par Inputs: *x: A k-dimensional tensor. \n *@par Attributes: -*message: Prefix of the error message. +*message: Prefix of the error message. \n *@par Outputs: -*y: The output tensor. +*y: The output tensor. \n -*@attention Constraints: \n +*@attention Constraints: *CheckNumerics runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility @@ -282,19 +282,19 @@ REG_OP(CheckNumerics) .OP_END_FACTORY_REG(CheckNumerics) /** -*@brief Converts an array of flat indices into a tuple of coordinate arrays. +*@brief Converts an array of flat indices into a tuple of coordinate arrays. \n *@par Inputs: -*Input "indices" is a 0D or 1D tensor. Input "dims" is a 1D tensor. \n -* @li indices: A 0D or 1D int Tensor whose elements are indices into \n +*Input "indices" is a 0D or 1D tensor. Input "dims" is a 1D tensor. +* @li indices: A 0D or 1D int Tensor whose elements are indices into the flattened version of an array of dimensions "dims". -* @li dims: A 1D int Tensor of the same type as "indices". \n -*The shape of the array to use for unraveling indices. +* @li dims: A 1D int Tensor of the same type as "indices". +*The shape of the array to use for unraveling indices. \n *@par Outputs: -*y: A Tensor. Has the same type as "indices". +*y: A Tensor. Has the same type as "indices". \n -*@attention Constraints: \n +*@attention Constraints: *UnravelIndex runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility @@ -308,20 +308,20 @@ REG_OP(UnravelIndex) .OP_END_FACTORY_REG(UnravelIndex) /** -*@brief Applies upper_bound(sorted_search_values, values) along each row. +*@brief Applies upper_bound(sorted_search_values, values) along each row. \n *@par Inputs: *Inputs "sorted_x" and "values" are 2D tensors. * @li sorted_x: A 2D Tensor where each row is ordered. -* @li values: A 2D Tensor with the same numbers of rows as "sorted_x. +* @li values: A 2D Tensor with the same numbers of rows as "sorted_x. \n *@par Attributes: -*out_type: sets the optional out_type attribute to value. +*out_type: sets the optional out_type attribute to value. \n *@par Outputs: -*y: A Tensor with the same shape as "values". +*y: A Tensor with the same shape as "values". \n -*@attention Constraints: \n +*@attention Constraints: *UpperBound runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility @@ -338,23 +338,23 @@ REG_OP(UpperBound) .OP_END_FACTORY_REG(UpperBound) /** -*@brief Finds unique elements in a 1D tensor. +*@brief Finds unique elements in a 1D tensor. \n *@par Inputs: -*Inputs "x" and "axis" are 1D vectors. \n +*Inputs "x" and "axis" are 1D vectors. * @li x: A 1D tensor. -* @li axis: A 1D tensor. +* @li axis: A 1D tensor. \n *@par Attributes: -*out_idx: An optional DType from: "int32, int64". \n -Defaults to "int32". +*out_idx: An optional DType from: "int32, int64". +Defaults to "int32". \n *@par Outputs: *@li y: "x" in the unique output "y". *@li idx: A tensor the same size as "x". The index of each value of "x". -*@li count: A tensor the same size as "x". The index of each value of "x". +*@li count: A tensor the same size as "x". The index of each value of "x". \n -*@attention Constraints: \n +*@attention Constraints: *UniqueWithCountsExt2 runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility @@ -373,23 +373,23 @@ REG_OP(UniqueWithCountsExt2) .OP_END_FACTORY_REG(UniqueWithCountsExt2) /** -*@brief Fills the tensor with the mirror value. +*@brief Fills the tensor with the mirror value. \n *@par Inputs: -*Inputs "x" and "paddings" are 1D scalars. \n +*Inputs "x" and "paddings" are 1D scalars. * @li x: The tensor to be padded. -* @li paddings: A two-column matrix specifying the padding sizes. \n -The number of rows Has the same rank as "x". +* @li paddings: A two-column matrix specifying the padding sizes. +The number of rows Has the same rank as "x". \n *@par Attributes: -*mode: Either "REFLECT" or "SYMMETRIC". In reflect mode the padded regions \n -do not include the borders, while in symmetric mode the padded regions \n -do include the borders. +*mode: Either "REFLECT" or "SYMMETRIC". In reflect mode the padded regions +do not include the borders, while in symmetric mode the padded regions +do include the borders. \n *@par Outputs: -*y: The padded tensor. +*y: The padded tensor. \n -*@attention Constraints: \n +*@attention Constraints: *MirrorPad runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility @@ -408,21 +408,21 @@ REG_OP(MirrorPad) .OP_END_FACTORY_REG(MirrorPad) /** -*@brief Calculates the difference between two numbers or a list of strings. +*@brief Calculates the difference between two numbers or a list of strings. \n *@par Inputs: -*Inputs "x" and "y" are 1D vectors. \n +*Inputs "x" and "y" are 1D vectors. * @li x: A Tensor. 1D. Values to keep. -* @li y: A Tensor. Must have the same type as x. 1D. Values to remove. +* @li y: A Tensor. Must have the same type as x. 1D. Values to remove. \n *@par Attributes: -*out_idx: An optional DType from: "int32, int64". Defaults to "int32". +*out_idx: An optional DType from: "int32, int64". Defaults to "int32". \n *@par Outputs: *@li out: A Tensor. Has the same type as "x". -*@li idx: A Tensor of type "out_idx". +*@li idx: A Tensor of type "out_idx". \n -*@attention Constraints: \n +*@attention Constraints: *ListDiff runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility @@ -441,14 +441,14 @@ REG_OP(ListDiff) .OP_END_FACTORY_REG(ListDiff) /** -*@brief Create an empty tensor, using the shape and dtype specified in attributes. +*@brief Create an empty tensor, using the shape and dtype specified in attributes. \n *@par Attributes: *@li dtype: Specify the data type of the empty tensor. -*@li shape: Specify the shape of the empty tensor. +*@li shape: Specify the shape of the empty tensor. \n *@par Outputs: -*y: The empty constant tensor. +*y: The empty constant tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator _ParallelConcatStart. @@ -461,14 +461,14 @@ REG_OP(_ParallelConcatStart) .OP_END_FACTORY_REG(_ParallelConcatStart) /** -*@brief Creates a constant tensor from a tensor-like object. This operator is used for inference. \n -Operator Const has the same definition as operator Constant. +*@brief Creates a constant tensor from a tensor-like object. This operator is used for inference. +Operator Const has the same definition as operator Constant. \n *@par Attributes: -*value: Required. The value and type of the resulting tensor, and no restrictions on type. +*value: Required. The value and type of the resulting tensor, and no restrictions on type. \n *@par Outputs: -*y: A constant tensor. +*y: A constant tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Const. @@ -480,13 +480,13 @@ REG_OP(Const) .OP_END_FACTORY_REG(Const) /** -*@brief Creates a constant tensor for training. +*@brief Creates a constant tensor for training. \n *@par Attributes: -*value: Required. The value and type of the resulting tensor, and no restrictions on type. +*value: Required. The value and type of the resulting tensor, and no restrictions on type. \n *@par Outputs: -*y: The constant tensor. +*y: The constant tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Const. @@ -498,13 +498,13 @@ REG_OP(Constant) .OP_END_FACTORY_REG(Constant) /** -*@brief Returns a copy of the input tensor. +*@brief Returns a copy of the input tensor. \n *@par Inputs: -*x: A tensor. +*x: A tensor. \n *@par Outputs: -*y: A tensor. +*y: A tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Snapshot. @@ -517,13 +517,13 @@ REG_OP(Snapshot) .OP_END_FACTORY_REG(Snapshot) /** -*@brief Gives a guarantee to the runtime that the input tensor is a constant. +*@brief Gives a guarantee to the runtime that the input tensor is a constant. \n *@par Inputs: -*x: A tensor. +*x: A tensor. \n *@par Outputs: -*y: The input tensor. +*y: The input tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator GuaranteeConst. @@ -536,14 +536,14 @@ REG_OP(GuaranteeConst) .OP_END_FACTORY_REG(GuaranteeConst) /** -*@brief Returns the target shape for broadcasting shapes "x1" and "x2". +*@brief Returns the target shape for broadcasting shapes "x1" and "x2". \n *@par Inputs: *@li x1: A tensor of type int32 or int64. A shape. -*@li x2: A tensor of the same type as "x1". The other shape. +*@li x2: A tensor of the same type as "x1". The other shape. \n *@par Outputs: -*y: A tensor. The broadcasted shape. +*y: A tensor. The broadcasted shape. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator BroadcastArgs. @@ -555,16 +555,16 @@ REG_OP(BroadcastArgs) .OP_END_FACTORY_REG(BroadcastArgs) /** -*@brief Outputs its input tensor as is and triggers an error if a gradient is requested. +*@brief Outputs its input tensor as is and triggers an error if a gradient is requested. \n *@par Inputs: -*x: A tensor. +*x: A tensor. \n *@par Attributes: -*message: Will be printed in the error at the attempt to request a gradient. +*message: Will be printed in the error at the attempt to request a gradient. \n *@par Outputs: -*y: The input tensor. +*y: The input tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator PreventGradient. @@ -578,16 +578,16 @@ REG_OP(PreventGradient) .OP_END_FACTORY_REG(PreventGradient) /** -*@brief Returns the reduction indices for computing gradients of "x1" and "x2" with broadcast. +*@brief Returns the reduction indices for computing gradients of "x1" and "x2" with broadcast. \n *@par Inputs: *@li x1: A tensor of type int32 or int64. -*@li x2: A tensor of type int32 or int64. \n -"x2" has the same type as "x1". +*@li x2: A tensor of type int32 or int64. +"x2" has the same type as "x1". \n *@par Outputs: *@li y1: A tensor. Reduction indices of "x1". -*@li y2: A tensor. Reduction indices of "x2". +*@li y2: A tensor. Reduction indices of "x2". \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator BroadcastGradientArgs. @@ -604,10 +604,10 @@ REG_OP(BroadcastGradientArgs) *@par Inputs: -*x: A tensor. +*x: A tensor. \n *@par Outputs: -*y: The input tensor. +*y: The input tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator StopGradient. @@ -620,13 +620,13 @@ REG_OP(StopGradient) .OP_END_FACTORY_REG(StopGradient) /** -*@brief Return a tensor with the same shape and contents as input. +*@brief Return a tensor with the same shape and contents as input. \n *@par Inputs: -*x: A tensor. +*x: A tensor. \n *@par Outputs: -*y: A tensor. +*y: A tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Identity. @@ -639,13 +639,14 @@ REG_OP(Identity) .OP_END_FACTORY_REG(Identity) /** -*@brief Returns a list of tensors with the same shapes and contents as the input tensors. +*@brief Returns a list of tensors with the same shapes and contents as the input tensors. \n *@par Inputs: -*x: A list of input tensors. +*x: A list of input tensors. It's a dynamic input \n *@par Outputs: *y: A list of Tensor objects, with the same length as the input tensor list. +It's a dynamic output. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator IdentityN. @@ -658,14 +659,15 @@ REG_OP(IdentityN) .OP_END_FACTORY_REG(IdentityN) /** -*@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape is changed, without changing the data. +*@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape is changed, without + changing the data. \n *@par Inputs: *@li x: A tensor. -*@li axis: The dimension index at which to expand. +*@li axis: The dimension index at which to expand. \n *@par Outputs: -*y: A tensor. +*y: A tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ExpandDims. @@ -679,14 +681,14 @@ REG_OP(ExpandDims) .OP_END_FACTORY_REG(ExpandDims) /** -*@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape is changed, without changing the data. +*@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape is changed, without changing the data. \n *@par Inputs: *@li x: Original tensor. -*@li axis: List of ints. +*@li axis: List of ints. \n *@par Outputs: -*y: Reshape tensor with same data as input. +*y: Reshape tensor with same data as input. \n *@par Third-party framework compatibility *Compatible with the Onnx operator Unsqueeze. @@ -699,21 +701,21 @@ REG_OP(Unsqueeze) .OP_END_FACTORY_REG(Unsqueeze) /** -*@brief Reshapes a tensor. Only the tensor shape is changed, without changing the data. +*@brief Reshapes a tensor. Only the tensor shape is changed, without changing the data. \n *@par Inputs: *@li x: A tensor. -*@li shape: A tensor. Defines the shape of the output tensor. +*@li shape: A tensor. Defines the shape of the output tensor. \n *@par Attributes: *@li axis: An optional int32 or int64. The first dimension to reshape. Defaults to "0". -*@li num_axes: An optional int32 or int64. The extent of the reshape. Defaults to "-1". +*@li num_axes: An optional int32 or int64. The extent of the reshape. Defaults to "-1". \n *@par Outputs: -*y: A tensor. +*y: A tensor. \n *@par Attention: -*This operator cannot be directly called by the acllopExecute API. +*This operator cannot be directly called by the acllopExecute API. \n *@par Third-party framework compatibility *@li Compatible with the TensorFlow operator Reshape. @@ -730,16 +732,17 @@ REG_OP(Reshape) .OP_END_FACTORY_REG(Reshape) /** -*@brief Removes dimensions of size 1 from the shape of a tensor. +*@brief Removes dimensions of size 1 from the shape of a tensor. \n *@par Inputs: -*x: A tensor. +*x: A tensor. \n *@par Attributes: -*axis: An optional list of int32 or int64. If not specified, squeezes all dimensions of size 1. \n If specified, only squeezes the dimensions listed. It is an error to squeeze a dimension that is not 1. +*axis: An optional list of int32 or int64. If not specified, squeezes all dimensions of size 1. +If specified, only squeezes the dimensions listed. It is an error to squeeze a dimension that is not 1. \n *@par Outputs: -*y: A tensor. +*y: A tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Squeeze. @@ -751,13 +754,14 @@ REG_OP(Squeeze) .OP_END_FACTORY_REG(Squeeze) /** -*@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. +*@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of +indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n *@par Inputs: -*x: A tensor. +*x: A tensor. \n *@par Outputs: -*y: A tensor. The rank of input tensor. +*y: A tensor. The rank of input tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Rank. @@ -769,16 +773,16 @@ REG_OP(Rank) .OP_END_FACTORY_REG(Rank) /** -*@brief Returns the size of a tensor, that is, an integer of the number of elements of the tensor. +*@brief Returns the size of a tensor, that is, an integer of the number of elements of the tensor. \n *@par Inputs: -*x: A tensor. +*x: A tensor. \n *@par Attributes: -*out_type: An optional int32 or int64. The output data type. Defaults to "int32". +*out_type: An optional int32 or int64. The output data type. Defaults to "int32". \n *@par Outputs: -*y: A tensor. The size of the input tensor. +*y: A tensor. The size of the input tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Size. @@ -791,18 +795,18 @@ REG_OP(Size) .OP_END_FACTORY_REG(Size) /** -*@brief Input data for other operators. +*@brief Input data for other operators. \n *@par Inputs: -*x: A tensor. +*x: A tensor. \n *@par Attributes: -*index: Index of the input tensor.The data type must be int32 or int64. \n -Assume that net has three data nodes, one should be set 0, another should \n -be set 1, and the left should be set 2. +*index: Index of the input tensor.The data type must be int32 or int64. +Assume that net has three data nodes, one should be set 0, another should +be set 1, and the left should be set 2. \n *@par Outputs: -*y: A tensor. +*y: A tensor. \n *@par Third-party framework compatibility *Compatible with the Caffe operator Data. @@ -814,19 +818,19 @@ REG_OP(Data) .OP_END_FACTORY_REG(Data) /** -*@brief Inserts a placeholder for a tensor that will be always fed. +*@brief Inserts a placeholder for a tensor that will be always fed. \n *@par Inputs: -*x: A tensor. +*x: A tensor. \n *@par Attributes: *@li peerIndex: An integer type. The index of the corresponding "end" node connected to. *@li parentId: A string, used to check if the nodes are from the saved parent node. *@li parentOpType: A string. Op type of the original node. -*@li anchorIndex: An integer, used to check if the node is from the saved anchor. +*@li anchorIndex: An integer, used to check if the node is from the saved anchor. \n *@par Outputs: -*y: The created placeholder tensor. +*y: The created placeholder tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator PlaceHolder. @@ -841,17 +845,17 @@ REG_OP(PlaceHolder) .OP_END_FACTORY_REG(PlaceHolder) /** -*@brief Inserts a placeholder with default value for a tensor. +*@brief Inserts a placeholder with default value for a tensor. \n *@par Inputs: -*x: A tensor. +*x: A tensor. \n *@par Attributes: *@li dtype: data type of tensor. -*@li shape: tensor shape. +*@li shape: tensor shape. \n *@par Outputs: -*y: The created placeholder tensor. +*y: The created placeholder tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator PlaceholderWithDefault. @@ -863,16 +867,16 @@ REG_OP(PlaceholderWithDefault) .OP_END_FACTORY_REG(PlaceholderWithDefault) /** -*@brief Reads and returns the value of the input variable tensor. +*@brief Reads and returns the value of the input variable tensor. \n *@par Inputs: -*x: A tensor. +*x: A tensor. \n *@par Attributes: -*dtype: An optional int32 or int64. The output data type. Defaults to int32. +*dtype: An optional int32 or int64. The output data type. Defaults to int32. \n *@par Outputs: -*y: A tensor. +*y: A tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ReadVariableOp. @@ -892,21 +896,32 @@ REG_OP(End) .ATTR(parentOpType, String, "") // op type of original node .OP_END_FACTORY_REG(End) + +/** +*@brief Operations for writing summary data, for use in analysis and visualization. + +*@par Inputs: +* One input: +*x: Collections of summary data. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ REG_OP(Summary) .INPUT(x, TensorType::ALL()) .OP_END_FACTORY_REG(Summary) /** -*@brief Returns the shape of a tensor. +*@brief Returns the shape of a tensor. \n *@par Inputs: -*x: A tensor. +*x: A tensor. \n *@par Attributes: -*dtype: An optional int32 or int64. The output data type. Defaults to int32. +*dtype: An optional int32 or int64. The output data type. Defaults to int32. \n *@par Outputs: -*y: A tensor. The shape of the input tensor. +*y: A tensor. The shape of the input tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Size. @@ -919,16 +934,17 @@ REG_OP(Shape) .OP_END_FACTORY_REG(Shape) /** -*@brief Returns shape of tensors. +*@brief Returns shape of tensors. \n *@par Inputs: -*x: A list of input tensors. +*x: A list of input tensors. It's a dynamic input. \n *@par Attributes: -*dtype: An optional int32 or int64. The output data type. Defaults to "int32". +*dtype: An optional int32 or int64. The output data type. Defaults to "int32". \n *@par Outputs: *y: A list of tensors with the same length as the input list of tensors. +It's a dynamic output. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ShapeN. @@ -941,17 +957,18 @@ REG_OP(ShapeN) .OP_END_FACTORY_REG(ShapeN) /** -*@brief Creates a tensor with the given "shape" and "dtype". +*@brief Creates a tensor with the given "shape" and "dtype". \n *@par Inputs: -*shape: The shape of the output tensor. +*shape: The shape of the output tensor. \n *@par Attributes: *@li dtype: Optional. The data type of the output tensor. Defaults to "int32". -*@li init: An optional bool. If true, initializes the returned tensor with the default value of "dtype". Defaults to "false". +*@li init: An optional bool. If true, initializes the returned tensor with the default value of "dtype". +Defaults to "false". \n *@par Outputs: -*y: A tensor. +*y: A tensor. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Empty. @@ -965,21 +982,21 @@ REG_OP(Empty) .OP_END_FACTORY_REG(Empty) /** -*@brief Gradient op for MirrorPad op. Folds a mirror-padded tensor. +*@brief Gradient op for MirrorPad op. Folds a mirror-padded tensor. \n *@par Inputs: -*Inputs "x" and "y" are 1D vectors. \n +*Inputs "x" and "y" are 1D vectors. * @li x: A Tensor. The input tensor to be folded. -* @li paddings: A Tensor of type int32 or int64. A two-column matrix \n -specifying the padding sizes. +* @li paddings: A Tensor of type int32 or int64. A two-column matrix +specifying the padding sizes. \n *@par Attributes: -*mode: A string from: "REFLECT", "SYMMETRIC". The mode used in the MirrorPad op. +*mode: A string from: "REFLECT", "SYMMETRIC". The mode used in the MirrorPad op. \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x". \n -*@attention Constraints: \n +*@attention Constraints: *MirrorPadGrad runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility @@ -998,18 +1015,18 @@ REG_OP(MirrorPadGrad) .OP_END_FACTORY_REG(MirrorPadGrad) /** -*@brief Returns locations of nonzero / true values in a tensor. +*@brief Returns locations of nonzero / true values in a tensor. \n *@par Inputs: -*Including: \n -*x: A Tensor. Must be one of the following types: \n -DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \n -DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL. +*Including: +*x: A Tensor. Must be one of the following types: +DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, +DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL. \n *@par Outputs: -*y: A Tensor of type DT_INT64. +*y: A Tensor of type DT_INT64. \n -*@attention Constraints:\n +*@attention Constraints: *Where runs on the Ascend AI CPU, which delivers poor performance.\n *@par Third-party framework compatibility @@ -1024,15 +1041,15 @@ REG_OP(Where) /** *@brief Derived from the Caffe operator Split that splits an input blob to -* multiple output blobs for feeding a blob into multiple output layers. \n -*The Split node is removed from the graph after the split operation is completed. +* multiple output blobs for feeding a blob into multiple output layers. +*The Split node is removed from the graph after the split operation is completed. \n *@par Inputs: -*x: A Tensor. Must be one of the following types: \n -fp16, fp32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. +*x: A Tensor. Must be one of the following types: +fp16, fp32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n *@par Outputs: -*y: A Tensor. Has the same type as "x".It's required and the value should equal to output_num. +*y: A Tensor. Has the same type as "x".It's required and the value should equal to output_num. \n *@par Attributes: *@li N: A required int. The parameter will get the number of dynamic outputs. @@ -1046,17 +1063,17 @@ REG_OP(Copy) .OP_END_FACTORY_REG(Copy); /** -*@brief Generates fingerprint values. +*@brief Generates fingerprint values. \n *@par Inputs: *@li data: Must have rank 1 or higher. -*@li method: Fingerprint method used by this op. Currently available method is \n -`farmhash::fingerprint64`. +*@li method: Fingerprint method used by this op. Currently available method is +`farmhash::fingerprint64`. \n *@par Outputs: -y: A two-dimensional `Tensor` of type `tf.uint8`. The first dimension equals to \n -`data`'s first dimension, and the second dimension size depends on the \n -fingerprint algorithm. +y: A two-dimensional `Tensor` of type `tf.uint8`. The first dimension equals to +`data`'s first dimension, and the second dimension size depends on the +fingerprint algorithm. \n *@par Third-party framework compatibility * Compatible with TensorFlow Fingerprint operator. @@ -1074,10 +1091,10 @@ REG_OP(Fingerprint) * *@par Inputs: -*x: A Tensor. +*x: A Tensor. \n *@par Outputs: -*y: A Tensor. Has the same type as "x".It's required and the value should equal to output_num. +*y: A Tensor. Has the same type as "x".It's required and the value should equal to output_num. \n *@par Attributes: *outShape: The shape of output will be inferred according to the attribute @@ -1089,27 +1106,27 @@ REG_OP(TransShape) .OP_END_FACTORY_REG(TransShape); /** -*@brief Computes the (possibly normalized) Levenshtein Edit Distance. +*@brief Computes the (possibly normalized) Levenshtein Edit Distance. \n *@par Inputs: -*@li hypothesis_indices: The indices of the hypothesis list SparseTensor.\n +*@li hypothesis_indices: The indices of the hypothesis list SparseTensor. This is an N x R int64 matrix. -*@li hypothesis_shape: The values of the hypothesis list SparseTensor.\n +*@li hypothesis_shape: The values of the hypothesis list SparseTensor. This is an N-length vector. -*@li hypothesis_shape: The shape of the hypothesis list SparseTensor.\n +*@li hypothesis_shape: The shape of the hypothesis list SparseTensor. This is an R-length vector. -*@li truth_indices: The indices of the truth list SparseTensor.\n +*@li truth_indices: The indices of the truth list SparseTensor. This is an M x R int64 matrix. -*@li truth_shape: The values of the truth list SparseTensor.\n +*@li truth_shape: The values of the truth list SparseTensor. This is an M-length vector. -*@li truth_shape: The shape of the truth list SparseTensor.\n +*@li truth_shape: The shape of the truth list SparseTensor. This is an R-length vector *@par Attributes: -*@li normalize: boolean (if true, edit distances are normalized by length of truth). +*@li normalize: boolean (if true, edit distances are normalized by length of truth). \n *@par Outputs: -*@li output: A dense float tensor with rank R - 1. +*@li output: A dense float tensor with rank R - 1. \n *@par Third-party framework compatibility * Compatible with TensorFlow EditDistance operator. diff --git a/third_party/fwkacllib/inc/ops/audio_ops.h b/third_party/fwkacllib/inc/ops/audio_ops.h index feecd7ae..149c57d5 100644 --- a/third_party/fwkacllib/inc/ops/audio_ops.h +++ b/third_party/fwkacllib/inc/ops/audio_ops.h @@ -26,29 +26,29 @@ namespace ge { /** -*@brief Mel-Frequency Cepstral Coefficient (MFCC) calculation consists of \n -taking the DCT-II of a log-magnitude mel-scale spectrogram. +*@brief Mel-Frequency Cepstral Coefficient (MFCC) calculation consists of +taking the DCT-II of a log-magnitude mel-scale spectrogram . \n -*@par Inputs: -*Input "spectrogram" is a 3D tensor. Input "sample_rate" is a scalar. \n +*@par Inputs: +*Input "spectrogram" is a 3D tensor. Input "sample_rate" is a scalar. * @li spectrogram: A 3D float tensor. -* @li sample_rate: The MFCC sample rate. +* @li sample_rate: The MFCC sample rate . \n -*@par Attributes: +*@par Attributes: *@li upper_frequency_limit: The highest frequency for calculation. *@li lower_frequency_limit: The lowest frequency for calculation. *@li filterbank_channel_count: Resolution of the Mel bank. -*@li dct_coefficient_count: Number of output channels to produce \n -per time slice. +*@li dct_coefficient_count: Number of output channels to produce +per time slice . \n -*@par Outputs: -*y: A Tensor of type float32. +*@par Outputs: +*y: A Tensor of type float32 . \n -*@attention Constraints: \n -*Mfcc runs on the Ascend AI CPU, which delivers poor performance. \n +*@attention Constraints: +*Mfcc runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility -*Compatible with the TensorFlow operator Mfcc. +*Compatible with the TensorFlow operator Mfcc . \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -64,26 +64,26 @@ REG_OP(Mfcc) .OP_END_FACTORY_REG(Mfcc) /** -*@brief Decodes and generates spectrogram using wav float tensor. +*@brief Decodes and generates spectrogram using wav float tensor . \n -*@par Inputs: -*Input "x" is a 2D matrix. \n -* x: A float tensor. Float representation of audio data. +*@par Inputs: +*Input "x" is a 2D matrix. +* x: A float tensor. Float representation of audio data . \n -*@par Attributes: +*@par Attributes: *@li window_size: Size of the spectrogram window. *@li stride: Size of the spectrogram stride. -*@li magnitude_squared: If true, uses squared magnitude. +*@li magnitude_squared: If true, uses squared magnitude . \n -*@par Outputs: -*spectrogram: A 3D float Tensor. +*@par Outputs: +*spectrogram: A 3D float Tensor . \n -*@attention Constraints: \n -*AudioSpectrogram runs on the Ascend AI CPU, which delivers \n -poor performance. +*@attention Constraints: +*AudioSpectrogram runs on the Ascend AI CPU, which delivers +poor performance . \n *@par Third-party framework compatibility -*Compatible with the TensorFlow operator AudioSpectrogram. +*Compatible with the TensorFlow operator AudioSpectrogram . \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -98,26 +98,26 @@ REG_OP(AudioSpectrogram) .OP_END_FACTORY_REG(AudioSpectrogram) /** -*@brief Decodes a 16-bit WAV file into a float tensor. +*@brief Decodes a 16-bit WAV file into a float tensor . \n -*@par Inputs: -*contents: A Tensor of type string. The WAV-encoded audio, usually from a file. +*@par Inputs: +*contents: A Tensor of type string. The WAV-encoded audio, usually from a file . \n -*@par Attributes: -*@li desired_channels: An optional int. Defaults to "-1". \n +*@par Attributes: +*@li desired_channels: An optional int. Defaults to "-1". Number of sample channels wanted. -*@li desired_samples: An optional int. Defaults to "-1". \n -Length of audio requested. +*@li desired_samples: An optional int. Defaults to "-1". +Length of audio requested . \n -*@par Outputs: +*@par Outputs: *@li *audio: A Tensor of type float32. -*@li *sample_rate: A Tensor of type int32. +*@li *sample_rate: A Tensor of type int32 . \n -*@attention Constraints: \n -*DecodeWav runs on the Ascend AI CPU, which delivers poor performance. \n +*@attention Constraints: +*DecodeWav runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility -*Compatible with the TensorFlow operator DecodeWav. +*Compatible with the TensorFlow operator DecodeWav . \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -132,21 +132,21 @@ REG_OP(DecodeWav) .OP_END_FACTORY_REG(DecodeWav) /** -*@brief Encode audio data using the WAV file format. +*@brief Encode audio data using the WAV file format . \n *@par Inputs: -*Including: \n +*Including: * @li audio: A Tensor of type DT_FLOAT. -* @li sample_rate: A Tensor of type DT_INT32. +* @li sample_rate: A Tensor of type DT_INT32 . \n *@par Outputs: -*contents: A Tensor of type DT_STRING. +*contents: A Tensor of type DT_STRING . \n -*@attention Constraints:\n -*EncodeWav runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*EncodeWav runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility -*Compatible with tensorflow Operator EncodeWav. +*Compatible with tensorflow Operator EncodeWav . \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. diff --git a/third_party/fwkacllib/inc/ops/batch_ops.h b/third_party/fwkacllib/inc/ops/batch_ops.h index dd2efade..0e1562c0 100644 --- a/third_party/fwkacllib/inc/ops/batch_ops.h +++ b/third_party/fwkacllib/inc/ops/batch_ops.h @@ -26,35 +26,36 @@ namespace ge { /** -*@brief Creates batches of tensors in "x_tensors". +*@brief Creates batches of tensors in "x_tensors" . \n -*@par Inputs: -*Input "x_tensors" is a list or a dictionary of tensors. \n -*x_tensors: The list or dictionary of tensors to enqueue. +*@par Inputs: +*Input "x_tensors" is a list or a dictionary of tensors. +*x_tensors: The list or dictionary of tensors to enqueue . +It's a dynamic input \n -*@par Attributes: -*@li num_batch_threads: The number of threads enqueuing "x_tensors". \n +*@par Attributes: +*@li num_batch_threads: The number of threads enqueuing "x_tensors". The batching will be nondeterministic if "num_batch_threads" > 1. *@li max_batch_size: The maximum batch size pulled from the queue. *@li max_enqueued_batches: The maximum number of batches pulled from the queue. *@li batch_timeout_micros: The batch processing timeout, in microseconds. *@li allowed_batch_sizes: The allowed batch size pulled from the queue. -*@li grad_timeout_micros: The gradient batch processing timeout, \n +*@li grad_timeout_micros: The gradient batch processing timeout, in microseconds. -*@li container: If non-empty, this queue is placed in the given container. \n +*@li container: If non-empty, this queue is placed in the given container. Otherwise, a default container is used. -*@li shared_name: If set, this queue will be shared under the given name \n +*@li shared_name: If set, this queue will be shared under the given name across multiple sessions. -*@li batching_queue: The queue resource container. +*@li batching_queue: The queue resource container . \n -*@par Outputs: +*@par Outputs: *@li y_index: A Tensor. The index of a BatchTensor. Must be in row-major order. *@li y_id: A Tensor. The ID of a BatchTensor. Must be in row-major order. -*@li y_tensors: A list or dictionary of tensors with \n -the same types as "x_tensors". +*@li y_tensors: A list or dictionary of tensors with +the same types as "x_tensors" . It's a dynamic output. \n -*@attention Constraints: \n -*Batch runs on the Ascend AI CPU, which delivers poor performance. \n +*@attention Constraints: +*Batch runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Batch. @@ -79,26 +80,26 @@ REG_OP(Batch) .OP_END_FACTORY_REG(Batch) /** -*@brief Reverses the operation of Batch for a single output Tensor. +*@brief Reverses the operation of Batch for a single output Tensor . \n -*@par Inputs: -*Input "x_tensors" is a list or a dictionary of tensors. \n +*@par Inputs: +*Input "x_tensors" is a list or a dictionary of tensors. * @li x_tensors: The list or dictionary of tensors to enqueue. * @li index: The matching "batch_index" obtained from Batch. -* @li id: The "id" scalar emitted by Batch. +* @li id: The "id" scalar emitted by Batch . \n -*@par Attributes: +*@par Attributes: *@li timeout_micros: The unbatch processing timeout, in microseconds. -*@li container: If non-empty, this queue is placed in the given container. \n +*@li container: If non-empty, this queue is placed in the given container. Otherwise, a default container is used. -*@li shared_name: If set, this queue will be shared under the given name \n -across multiple sessions. +*@li shared_name: If set, this queue will be shared under the given name +across multiple sessions . \n -*@par Outputs: -*y_tensor: A list or dictionary of tensors with the same types as "x_tensors". +*@par Outputs: +*y_tensor: A list or dictionary of tensors with the same types as "x_tensors" . \n -*@attention Constraints: \n -*Unbatch runs on the Ascend AI CPU, which delivers poor performance. \n +*@attention Constraints: +*Unbatch runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Unbatch. @@ -117,27 +118,27 @@ REG_OP(Unbatch) .OP_END_FACTORY_REG(Unbatch) /** -*@brief Acts like Batch but using the given "batch_index" index of batching \n -things as they become available. +*@brief Acts like Batch but using the given "batch_index" index of batching +things as they become available . \n -*@par Inputs: -*Input "x_input" is a list or a dictionary of tensors. \n +*@par Inputs: +*Input "x_input" is a list or a dictionary of tensors. * @li x_input: The input to the Unbatch operation. * @li index: The batch_index given to the Unbatch operation. * @li id: The "id" scalar emitted by Batch. -* @li grad: The downstream gradient. +* @li grad: The downstream gradient . \n -*@par Attributes: -*@li container: If non-empty, this queue is placed in the given container. \n +*@par Attributes: +*@li container: If non-empty, this queue is placed in the given container. Otherwise, a default container is used. -*@li shared_name: If set, this queue will be shared under the given name \n -across multiple sessions. +*@li shared_name: If set, this queue will be shared under the given name +across multiple sessions . \n -*@par Outputs: -*y_grad: The return value, either an empty tensor or the batched gradient. +*@par Outputs: +*y_grad: The return value, either an empty tensor or the batched gradient . \n -*@attention Constraints: \n -*UnbatchGrad runs on the Ascend AI CPU, which delivers poor performance. \n +*@attention Constraints: +*UnbatchGrad runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator UnbatchGrad. diff --git a/third_party/fwkacllib/inc/ops/bitwise_ops.h b/third_party/fwkacllib/inc/ops/bitwise_ops.h index 0a6cbe9b..5b35a38a 100644 --- a/third_party/fwkacllib/inc/ops/bitwise_ops.h +++ b/third_party/fwkacllib/inc/ops/bitwise_ops.h @@ -26,20 +26,20 @@ namespace ge { /** -*@brief Element-wise computes the bitwise right-shift of x and y. +*@brief Element-wise computes the bitwise right-shift of x and y . \n -*@par Inputs: -*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper" \n +*@par Inputs: +*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper" are 0D scalars. -* @li x: A Tensor. Must be one of the following types: int8, int16, int32, \n -int64, uint8, uint16, uint32, uint64. \n -* @li y: A Tensor. Has the same type as "x". \n +* @li x: A Tensor. Must be one of the following types: int8, int16, int32, +int64, uint8, uint16, uint32, uint64. +* @li y: A Tensor. Has the same type as "x". \n -*@par Outputs: -* z: A Tensor. Has the same type as "x". \n +*@par Outputs: +* z: A Tensor. Has the same type as "x". \n -*@attention Constraints: \n -*Unique runs on the Ascend AI CPU, which delivers poor performance. \n +*@attention Constraints: +*Unique runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator RightShift. diff --git a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h index cded3acd..f1b4e7a9 100644 --- a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h +++ b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h @@ -26,28 +26,28 @@ namespace ge { /** -*@brief Bucketizes each feature based on bucket boundaries. +*@brief Bucketizes each feature based on bucket boundaries . \n -*@par Inputs: -*Input "float_values" is a 1D tensor. Input "bucket_boundaries" is \n -a list of 1D tensors. -* @li float_values: A list of rank 1 tensors each containing float \n +*@par Inputs: +*Input "float_values" is a 1D tensor. Input "bucket_boundaries" is +a list of 1D tensors. It's a dynamic input. +* @li float_values: A list of rank 1 tensors each containing float values for a single feature. -* @li bucket_boundaries: A list of rank 1 tensors each containing \n -the bucket boundaries for a single feature. +* @li bucket_boundaries: A list of rank 1 tensors each containing +the bucket boundaries for a single feature . It's a dynamic input. \n -*@par Attributes: -*@li num_features: Number of features \n +*@par Attributes: +*@li num_features: Number of features -*@par Outputs: -*@li y: A list of rank 1 tensors each containing the bucketized values for \n -a single feature. +*@par Outputs: +*@li y: A list of rank 1 tensors each containing the bucketized values for +a single feature . \n -*@attention Constraints: \n +*@attention Constraints: *BoostedTreesBucketize runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility -*Compatible with the TensorFlow operator BoostedTreesBucketize. +*Compatible with the TensorFlow operator BoostedTreesBucketize . \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. diff --git a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h index c0109fca..9b9ce314 100644 --- a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h +++ b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h @@ -26,44 +26,44 @@ namespace ge { /** -*@brief Generates labels for candidate sampling with \n -a learned unigram distribution. +*@brief Generates labels for candidate sampling with +a learned unigram distribution. \n -*@par Inputs: -*Input "true_classes" is a 2D matrix. \n -*true_classes: A "batch_size * num_true" matrix, in which each row contains \n -the IDs of the "num_true" "target_classes" in the corresponding original label. +*@par Inputs: +*Input "true_classes" is a 2D matrix. +*true_classes: A "batch_size * num_true" matrix, in which each row contains +the IDs of the "num_true" "target_classes" in the corresponding original label. \n -*@par Attributes: +*@par Attributes: *@li num_true: Number of true labels per context. *@li num_sampled: Number of candidates to randomly sample. -*@li unique: If "unique" is true, samples with rejection, \n +*@li unique: If "unique" is true, samples with rejection, so that all sampled candidates in a batch are unique. -*This requires some approximation to estimate the post-rejection \n +*This requires some approximation to estimate the post-rejection sampling probabilities. -*@li range_max: The sampler will sample integers from the interval \n +*@li range_max: The sampler will sample integers from the interval [0, range_max). *@li seed: If either "seed" or "seed2" are set to be non-zero. -*@li seed2: A second seed to avoid seed collision. +*@li seed2: A second seed to avoid seed collision. \n -*@par Outputs: -*@li sampled_candidates: A vector of length "num_sampled", in which each \n +*@par Outputs: +*@li sampled_candidates: A vector of length "num_sampled", in which each element is the ID of a sampled candidate. -*@li true_expected_count: A "batch_size * num_true" matrix, representing \n -the number of times each candidate is expected to occur in a batch of sampled \n +*@li true_expected_count: A "batch_size * num_true" matrix, representing +the number of times each candidate is expected to occur in a batch of sampled candidates. If "unique" is true, then this is a probability. -*@li sampled_expected_count: A vector of length "num_sampled", \n +*@li sampled_expected_count: A vector of length "num_sampled", for each sampled candidate. -*representing the number of times the candidate is expected to occur \n +*representing the number of times the candidate is expected to occur in a batch of sampled candidates. -* If "unique" is true, then this is a probability. \n +* If "unique" is true, then this is a probability. -*@attention Constraints: \n -*ThreadUnsafeUnigramCandidateSampler runs on the Ascend AI CPU, \n -which delivers poor performance. +*@attention Constraints: +*ThreadUnsafeUnigramCandidateSampler runs on the Ascend AI CPU, +which delivers poor performance. \n *@par Third-party framework compatibility -*Compatible with the TensorFlow operator ThreadUnsafeUnigramCandidateSampler. +*Compatible with the TensorFlow operator ThreadUnsafeUnigramCandidateSampler. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -82,44 +82,44 @@ REG_OP(ThreadUnsafeUnigramCandidateSampler) .OP_END_FACTORY_REG(ThreadUnsafeUnigramCandidateSampler) /** -*@brief Generates labels for candidate sampling with a learned \n -unigram distribution. +*@brief Generates labels for candidate sampling with a learned +unigram distribution. \n -*@par Inputs: -*true_classes: A "batch_size * num_true" matrix, in which each row contains \n +*@par Inputs: +*true_classes: A "batch_size * num_true" matrix, in which each row contains the IDs of the "num_true" "target_classes" in the corresponding original label. -*Input "true_classes" is a 2D matrix. +*Input "true_classes" is a 2D matrix. \n -*@par Attributes: +*@par Attributes: *@li num_true: Number of true labels per context. *@li num_sampled: Number of candidates to randomly sample. -*@li unique: If "unique" is true, samples with rejection, \n +*@li unique: If "unique" is true, samples with rejection, so that all sampled candidates in a batch are unique. -*This requires some approximation to estimate the post-rejection \n +*This requires some approximation to estimate the post-rejection sampling probabilities. -*@li range_max: The sampler will sample integers from the interval \n +*@li range_max: The sampler will sample integers from the interval [0, range_max). *@li seed: If either "seed" or "seed2" are set to be non-zero. -*@li seed2: A second seed to avoid seed collision. +*@li seed2: A second seed to avoid seed collision. \n -*@par Outputs: -*@li sampled_candidates: A vector of length "num_sampled", \n +*@par Outputs: +*@li sampled_candidates: A vector of length "num_sampled", in which each element is the ID of a sampled candidate. -*@li true_expected_count: A "batch_size * num_true" matrix, representing the \n -number of times each candidate is expected to occur \n +*@li true_expected_count: A "batch_size * num_true" matrix, representing the +number of times each candidate is expected to occur in a batch of sampled candidates. *If "unique" is true, then this is a probability. -*@li sampled_expected_count: A vector of length "num_sampled", for each \n +*@li sampled_expected_count: A vector of length "num_sampled", for each sampled candidate representing the number of times. -* the candidate is expected to occur in a batch of sampled candidates. \n -*If "unique" is true, then this is a probability. +* the candidate is expected to occur in a batch of sampled candidates. +*If "unique" is true, then this is a probability. \n -*@attention Constraints: \n -*UniformCandidateSampler runs on the Ascend AI CPU, \n -which delivers poor performance. +*@attention Constraints: +*UniformCandidateSampler runs on the Ascend AI CPU, +which delivers poor performance. \n *@par Third-party framework compatibility -*Compatible with the TensorFlow operator UniformCandidateSampler. +*Compatible with the TensorFlow operator UniformCandidateSampler. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -138,56 +138,56 @@ REG_OP(UniformCandidateSampler) .OP_END_FACTORY_REG(UniformCandidateSampler) /** -*@brief Generates labels for candidate sampling with a learned \n -unigram distribution. +*@brief Generates labels for candidate sampling with a learned +unigram distribution. \n -*@par Inputs: -*true_classes: A "batch_size * num_true" matrix, in which each row contains \n +*@par Inputs: +*true_classes: A "batch_size * num_true" matrix, in which each row contains the IDs of the "num_true" "target_classes" in the corresponding original label. -* Input "true_classes" is a 2D matrix. +* Input "true_classes" is a 2D matrix. \n -*@par Attributes: +*@par Attributes: *@li num_true: Number of true labels per context. *@li num_sampled: Number of candidates to randomly sample. -*@li unique: If "unique" is true, samples with rejection, \n -so that all sampled candidates in a batch are unique. This requires \n +*@li unique: If "unique" is true, samples with rejection, +so that all sampled candidates in a batch are unique. This requires some approximation to estimate the post-rejection sampling probabilities. *@li range_max: The sampler will sample integers from the interval [0, range_max). -*@li vocab_file: Each valid line in this file (which should have a \n -CSV-like format) corresponds to a valid word ID. \n +*@li vocab_file: Each valid line in this file (which should have a +CSV-like format) corresponds to a valid word ID. *IDs are in sequential order, starting from num_reserved_ids. -*@li distortion: The distortion is used to skew the unigram probability \n -distribution. Each weight is first raised to the distortion's power before \n +*@li distortion: The distortion is used to skew the unigram probability +distribution. Each weight is first raised to the distortion's power before adding to the internal unigram distribution. -*@li num_reserved_ids: Optionally some reserved IDs can be added in the range \n -[0, ..., num_reserved_ids) by the users. \n +*@li num_reserved_ids: Optionally some reserved IDs can be added in the range +[0, ..., num_reserved_ids) by the users. * One use case is that a special unknown word token is used as ID 0. -*@li num_shards: A sampler can be used to sample from a subset of the \n +*@li num_shards: A sampler can be used to sample from a subset of the original range. in order to speed up the whole computation through parallelism. -*@li shard: A sampler can be used to sample from a subset of the original \n +*@li shard: A sampler can be used to sample from a subset of the original range in order to speed up the whole computation through parallelism. -*@li unigrams: A list of unigram counts or probabilities, one per ID in \n +*@li unigrams: A list of unigram counts or probabilities, one per ID in sequential order. *@li seed: If either "seed" or "seed2" are set to be non-zero. -*@li seed2: A second seed to avoid seed collision. +*@li seed2: A second seed to avoid seed collision. \n -*@par Outputs: -*@li sampled_candidates: A vector of length "num_sampled", in which each \n +*@par Outputs: +*@li sampled_candidates: A vector of length "num_sampled", in which each element is the ID of a sampled candidate. -*@li true_expected_count: A "batch_size * num_true" matrix, representing the \n -number of times each candidate is expected to occur in a batch of sampled \n +*@li true_expected_count: A "batch_size * num_true" matrix, representing the +number of times each candidate is expected to occur in a batch of sampled candidates. If "unique" is true, then this is a probability. -*@li sampled_expected_count: A vector of length "num_sampled", \n -for each sampled candidate representing the number of times the candidate is \n -expected to occur in a batch of sampled candidates. \n -If "unique" is true, then this is a probability. +*@li sampled_expected_count: A vector of length "num_sampled", +for each sampled candidate representing the number of times the candidate is +expected to occur in a batch of sampled candidates. +If "unique" is true, then this is a probability. \n -*@attention Constraints: \n -* FixedUnigramCandidateSampler runs on the Ascend AI CPU, \n -which delivers poor performance. +*@attention Constraints: +* FixedUnigramCandidateSampler runs on the Ascend AI CPU, +which delivers poor performance. \n *@par Third-party framework compatibility -*Compatible with the TensorFlow operator FixedUnigramCandidateSampler. +*Compatible with the TensorFlow operator FixedUnigramCandidateSampler. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -212,43 +212,43 @@ REG_OP(FixedUnigramCandidateSampler) .OP_END_FACTORY_REG(FixedUnigramCandidateSampler) /** -*@brief Generates labels for candidate sampling with a learned \n -unigram distribution. +*@brief Generates labels for candidate sampling with a learned +unigram distribution. \n -*@par Inputs: -*true_classes: A "batch_size * num_true" matrix, in which each row contains \n +*@par Inputs: +*true_classes: A "batch_size * num_true" matrix, in which each row contains the IDs of the "num_true" "target_classes" in the corresponding original label. -* Input "true_classes" is a 2D matrix. +* Input "true_classes" is a 2D matrix. \n -*@par Attributes: +*@par Attributes: *@li num_true: Number of true labels per context. *@li num_sampled: Number of candidates to randomly sample. -*@li unique: If "unique" is true, samples with rejection, \n -so that all sampled candidates in a batch are unique. \n -*This requires some approximation to estimate the post-rejection \n +*@li unique: If "unique" is true, samples with rejection, +so that all sampled candidates in a batch are unique. +*This requires some approximation to estimate the post-rejection sampling probabilities. -*@li range_max: The sampler will sample integers from the interval \n +*@li range_max: The sampler will sample integers from the interval [0, range_max). *@li seed: If either "seed" or "seed2" are set to be non-zero. -*@li seed2: A second seed to avoid seed collision. +*@li seed2: A second seed to avoid seed collision. \n -*@par Outputs: -*@li sampled_candidates: A vector of length "num_sampled", in which each \n +*@par Outputs: +*@li sampled_candidates: A vector of length "num_sampled", in which each element is the ID of a sampled candidate. -*@li true_expected_count: A "batch_size * num_true" matrix, representing \n -the number of times each candidate is expected to occur in a batch of sampled candidates. \n -*If "unique" is true, then this is a probability. -*@li sampled_expected_count: A vector of length "num_sampled", for each \n -sampled candidate representing the number of times the candidate is expected \n -to occur in a batch of sampled candidates. \n +*@li true_expected_count: A "batch_size * num_true" matrix, representing +the number of times each candidate is expected to occur in a batch of sampled candidates. *If "unique" is true, then this is a probability. +*@li sampled_expected_count: A vector of length "num_sampled", for each +sampled candidate representing the number of times the candidate is expected +to occur in a batch of sampled candidates. +*If "unique" is true, then this is a probability. \n -*@attention Constraints: \n -*LearnedUnigramCandidateSampler runs on the Ascend AI CPU, which delivers \n -poor performance. +*@attention Constraints: +*LearnedUnigramCandidateSampler runs on the Ascend AI CPU, which delivers +poor performance. \n *@par Third-party framework compatibility -*Compatible with the TensorFlow operator LearnedUnigramCandidateSampler. +*Compatible with the TensorFlow operator LearnedUnigramCandidateSampler. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -267,42 +267,42 @@ REG_OP(LearnedUnigramCandidateSampler) .OP_END_FACTORY_REG(LearnedUnigramCandidateSampler) /** -*@brief Generates labels for candidate sampling with a log-uniform \n -distribution. +*@brief Generates labels for candidate sampling with a log-uniform +distribution. \n -*@par Inputs: -*true_classes: A "batch_size * num_true" matrix, in which each row contains \n -the IDs of the "num_true" "target_classes" in the corresponding original label. \n -* Input "true_classes" is a 2D matrix. +*@par Inputs: +*true_classes: A "batch_size * num_true" matrix, in which each row contains +the IDs of the "num_true" "target_classes" in the corresponding original label. +* Input "true_classes" is a 2D matrix. \n -*@par Attributes: +*@par Attributes: *@li num_true: Number of true labels per context. *@li num_sampled: Number of candidates to randomly sample. -*@li unique: If "unique" is true, samples with rejection, so that all \n -sampled candidates in a batch are unique. This requires some approximation \n +*@li unique: If "unique" is true, samples with rejection, so that all +sampled candidates in a batch are unique. This requires some approximation to estimate the post-rejection sampling probabilities. -*@li range_max: The sampler will sample integers from the interval \n +*@li range_max: The sampler will sample integers from the interval [0, range_max). *@li seed: If either "seed" or "seed2" are set to be non-zero. -*@li seed2: A second seed to avoid seed collision. +*@li seed2: A second seed to avoid seed collision. \n -*@par Outputs: -*@li sampled_candidates: A vector of length "num_sampled", in which each \n +*@par Outputs: +*@li sampled_candidates: A vector of length "num_sampled", in which each element is the ID of a sampled candidate. -*@li true_expected_count: A "batch_size * num_true" matrix, representing \n -the number of times each candidate is expected to occur in a batch of sampled \n +*@li true_expected_count: A "batch_size * num_true" matrix, representing +the number of times each candidate is expected to occur in a batch of sampled candidates. If "unique" is true, then this is a probability. -*@li sampled_expected_count: A vector of length "num_sampled", for each \n -sampled candidate representing the number of times the candidate is expected \n -to occur in a batch of sampled candidates. \n -*If "unique" is true, then this is a probability. +*@li sampled_expected_count: A vector of length "num_sampled", for each +sampled candidate representing the number of times the candidate is expected +to occur in a batch of sampled candidates. +*If "unique" is true, then this is a probability. \n -*@attention Constraints: \n -*LogUniformCandidateSampler runs on the Ascend AI CPU, which delivers \n -poor performance. +*@attention Constraints: +*LogUniformCandidateSampler runs on the Ascend AI CPU, which delivers +poor performance. \n *@par Third-party framework compatibility -*Compatible with the TensorFlow operator LogUniformCandidateSampler. +*Compatible with the TensorFlow operator LogUniformCandidateSampler. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -321,38 +321,38 @@ REG_OP(LogUniformCandidateSampler) .OP_END_FACTORY_REG(LogUniformCandidateSampler) /** -*@brief Generates labels for candidate sampling with a learned \n -unigram distribution. +*@brief Generates labels for candidate sampling with a learned +unigram distribution. \n -*@par Inputs: -*true_classes: A "batch_size * num_true" matrix, in which each row contains \n -the IDs of the "num_true" "target_classes" in the corresponding original label. \n -* Input "true_classes" is a 2D matrix. +*@par Inputs: +*true_classes: A "batch_size * num_true" matrix, in which each row contains +the IDs of the "num_true" "target_classes" in the corresponding original label. +* Input "true_classes" is a 2D matrix. \n -*@par Attributes: +*@par Attributes: *@li num_true: Number of true labels per context. *@li num_sampled: Number of candidates to randomly sample. -*@li unique: If "unique" is true, samples with rejection, \n -so that all sampled candidates in a batch are unique. This requires some \n +*@li unique: If "unique" is true, samples with rejection, +so that all sampled candidates in a batch are unique. This requires some approximation to estimate the post-rejection sampling probabilities. *@li seed: If either "seed" or "seed2" are set to be non-zero. -*@li seed2: A second seed to avoid seed collision. +*@li seed2: A second seed to avoid seed collision. \n -*@par Outputs: -*@li sampled_candidates: A vector of length "num_sampled", \n +*@par Outputs: +*@li sampled_candidates: A vector of length "num_sampled", in which each element is the ID of a sampled candidate. -*@li true_expected_count: A "batch_size * num_true" matrix, representing the \n -number of times each candidate is expected to occur in a batch of sampled candidates. \n +*@li true_expected_count: A "batch_size * num_true" matrix, representing the +number of times each candidate is expected to occur in a batch of sampled candidates. *If "unique" is true, then this is a probability. -*@li sampled_expected_count: A vector of length "num_sampled", for each \n -sampled candidate representing the number of times the candidate is expected \n -to occur in a batch of sampled candidates. If "unique" is true, then this is a probability. +*@li sampled_expected_count: A vector of length "num_sampled", for each +sampled candidate representing the number of times the candidate is expected +to occur in a batch of sampled candidates. If "unique" is true, then this is a probability. \n -*@attention Constraints: \n -*AllCandidateSampler runs on the Ascend AI CPU, which delivers poor performance. \n +*@attention Constraints: +*AllCandidateSampler runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility -*Compatible with the TensorFlow operator AllCandidateSampler. +*Compatible with the TensorFlow operator AllCandidateSampler. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -370,31 +370,31 @@ REG_OP(AllCandidateSampler) .OP_END_FACTORY_REG(AllCandidateSampler) /** -*@brief Computes the "ids" of the positions in "sampled_candidates" that \n -match "true_labels". +*@brief Computes the "ids" of the positions in "sampled_candidates" that +match "true_labels". \n -*@par Inputs: -* @li Input "true_classes" is a 2D matrix. \n -* @li true_classes: The "true_classes" output of UnpackSparseLabels. \n -* @li sampled_candidates: The "sampled_candidates" output of CandidateSampler. \n +*@par Inputs: +* @li Input "true_classes" is a 2D matrix. +* @li true_classes: The "true_classes" output of UnpackSparseLabels. +* @li sampled_candidates: The "sampled_candidates" output of CandidateSampler. \n -*@par Attributes: +*@par Attributes: *@li num_true: Number of true labels per context. *@li seed: If either "seed" or "seed2" are set to be non-zero. -*@li seed2: A second seed to avoid seed collision. +*@li seed2: A second seed to avoid seed collision. \n -*@par Outputs: +*@par Outputs: * @li indices: A vector of indices corresponding to rows of "true_candidates". -* @li ids: A vector of IDs of positions in "sampled_candidates" that match a \n +* @li ids: A vector of IDs of positions in "sampled_candidates" that match a "true_label" for the row with the corresponding index in indices. -* @li weights: A vector of the same length as "indices" and "ids", in which \n -each element is -FLOAT_MAX. +* @li weights: A vector of the same length as "indices" and "ids", in which +each element is -FLOAT_MAX. \n -*@attention Constraints: \n -*ComputeAccidentalHits runs on the Ascend AI CPU, which delivers poor performance. \n +*@attention Constraints: +*ComputeAccidentalHits runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility -*Compatible with the TensorFlow operator ComputeAccidentalHits. +*Compatible with the TensorFlow operator ComputeAccidentalHits. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. diff --git a/third_party/fwkacllib/inc/ops/condtake_ops.h b/third_party/fwkacllib/inc/ops/condtake_ops.h index 72bf46a0..554c18f1 100644 --- a/third_party/fwkacllib/inc/ops/condtake_ops.h +++ b/third_party/fwkacllib/inc/ops/condtake_ops.h @@ -26,17 +26,17 @@ namespace ge { /** -*@brief Take elements from data if specific condition is satisfied on mask. +*@brief Take elements from data if specific condition is satisfied on mask. \n *@par Inputs: -*@li data: input tensor from which to take elements, High-dimension input would \n +*@li data: input tensor from which to take elements, High-dimension input would first be flattened. -*@li mask: condition param; must be the same shape with data. +*@li mask: condition param; must be the same shape with data. \n *@par Attributes: *@li mode:convert by convert in Mode. *@li val:convert by -*@li eps:convert by (default: 1e-06) +*@li eps:convert by (default: 1e-06) \n *@par Outputs: *@li out_data: the elements taken diff --git a/third_party/fwkacllib/inc/ops/control_flow_ops.h b/third_party/fwkacllib/inc/ops/control_flow_ops.h index 75992103..e2fd4715 100644 --- a/third_party/fwkacllib/inc/ops/control_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/control_flow_ops.h @@ -27,21 +27,21 @@ namespace ge { /** - *@brief Forwards the value of an available tensor from input "x" to output "y". \n - * Merge waits for at least one of the input tensors to become available. \n - * It is usually combined with Switch to implement branching. \n - * Merge forwards the first tensor to become available to output "y", \n - * and sets "value_index" the index of the tensor in inputs. + *@brief Forwards the value of an available tensor from input "x" to output "y". + * Merge waits for at least one of the input tensors to become available. + * It is usually combined with Switch to implement branching. + * Merge forwards the first tensor to become available to output "y", + * and sets "value_index" the index of the tensor in inputs . \n *@par Inputs: - *x: The input tensors, one of which will become available. \n - * Must be one of the following types: float16, float32, float64, int8, \n - * int16, int32, int64, uint8, uint16, uint32, uint64, bool. + *x: The input tensors, one of which will become available. + * Must be one of the following types: float16, float32, float64, int8, + * int16, int32, int64, uint8, uint16, uint32, uint64, bool . It's a dynamic input. \n *@par Outputs: *@li y: The available tensor. Has the same type as "x". - *@li value_index: A scalar of type int32, for the index of the chosen input \n - * tensor. + *@li value_index: A scalar of type int32, for the index of the chosen input + * tensor . \n *@see Switch() @@ -59,21 +59,21 @@ REG_OP(Merge) .OP_END_FACTORY_REG(Merge) /** - *@brief Forwards the value of an available tensor from input "x" to output "y". \n - * Merge waits for at least one of the input tensors to become available. \n - * It is usually combined with Switch to implement branching. \n - * Merge forwards the first tensor to become available to output "y", \n - * and sets "value_index" the index of the tensor in inputs. + *@brief Forwards the value of an available tensor from input "x" to output "y". + * Merge waits for at least one of the input tensors to become available. + * It is usually combined with Switch to implement branching. + * Merge forwards the first tensor to become available to output "y", + * and sets "value_index" the index of the tensor in inputs . \n *@par Inputs: - *x: The input tensors, one of which will become available. \n - * Must be one of the following types: float16, float32, float64, int8, \n - * int16, int32, int64, uint8, uint16, uint32, uint64, bool. + *x: The input tensors, one of which will become available. + * Must be one of the following types: float16, float32, float64, int8, + * int16, int32, int64, uint8, uint16, uint32, uint64, bool . It's a dynamic input. \n *@par Outputs: *@li y: The available tensor. Has the same type as "x". - *@li value_index: A scalar of type int32, for the index of the chosen input \n - * tensor. + *@li value_index: A scalar of type int32, for the index of the chosen input + * tensor . \n *@see Switch() | Merge() @@ -91,21 +91,21 @@ REG_OP(RefMerge) .OP_END_FACTORY_REG(RefMerge) /** - *@brief Forwards "data" to the output port determined by "pred". \n - * If "pred" is "true", the data input is forwarded to "output_true". \n - * Otherwise, the data is forwarded to "output_false". + *@brief Forwards "data" to the output port determined by "pred". + * If "pred" is "true", the data input is forwarded to "output_true". + * Otherwise, the data is forwarded to "output_false" . \n *@par Inputs: *@li data: The tensor to be forwarded. \ n - * Must be one of the following types: float16, float32, float64, \n + * Must be one of the following types: float16, float32, float64, * int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool. - *@li pred: A boolean scalar. The output port that will receive data. + *@li pred: A boolean scalar. The output port that will receive data . \n *@par Outputs: - *@li output_false: If "pred" is "false", data will be forwarded to this output. \n + *@li output_false: If "pred" is "false", data will be forwarded to this output. * Has the same type as "data". - *@li output_true: If "pred" is "true", data will be forwarded to this output. \n - * Has the same type as "data". + *@li output_true: If "pred" is "true", data will be forwarded to this output. + * Has the same type as "data" . \n *@see Merge() @@ -126,21 +126,21 @@ REG_OP(Switch) .OP_END_FACTORY_REG(Switch) /** - *@brief Forwards "data" to the output port determined by "pred". \n - * If "pred" is "true", the data input is forwarded to "output_true". \n - * Otherwise, the data is forwarded to "output_false". + *@brief Forwards "data" to the output port determined by "pred". + * If "pred" is "true", the data input is forwarded to "output_true". + * Otherwise, the data is forwarded to "output_false" . \n *@par Inputs: - *@li data: The ref tensor to be forwarded. \n - * Must be one of the following types: float16, float32, float64, \n + *@li data: The ref tensor to be forwarded. + * Must be one of the following types: float16, float32, float64, * int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool. - *@li pred: A boolean scalar. The output port that will receive data. + *@li pred: A boolean scalar. The output port that will receive data . \n *@par Outputs: - *@li output_false: If "pred" is "false", data will be forwarded to this output. \n + *@li output_false: If "pred" is "false", data will be forwarded to this output. * Has the same type as "data". - *@li output_true: If "pred" is "true", data will be forwarded to this output. \n - * Has the same type as "data". + *@li output_true: If "pred" is "true", data will be forwarded to this output. + * Has the same type as "data" . \n *@see Merge() | Switch() @@ -161,16 +161,16 @@ REG_OP(RefSwitch) .OP_END_FACTORY_REG(RefSwitch) /** - *@brief Forwards "data" to the output port determined by "pred_value". + *@brief Forwards "data" to the output port determined by "pred_value" . \n *@par Inputs: *@li data: The tensor to be forwarded. \ n - * Must be one of the following types: float16, float32, float64, \n + * Must be one of the following types: float16, float32, float64, * int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool. - *@li pred_value: A int64 tensor which determines the output port that will receive data. + *@li pred_value: A int64 tensor which determines the output port that will receive data . \n *@par Outputs: - *output: The output tensors, one of which will become available. \n + *output: The output tensors, one of which will become available. * Has the same type as "data". */ REG_OP(SwitchN) @@ -184,24 +184,24 @@ REG_OP(SwitchN) .OP_END_FACTORY_REG(SwitchN) /** - *@brief Creates or finds a child frame, and makes "x" available to the child \n - * frame. This op is used together with Exit to create loops in the graph. \n - * The Executor uses the unique "frame_name" to identify frames. \n - * If "is_constant" is "true", output "y" is a constant in the child \n - * frame; otherwise it may be changed in the child frame. + *@brief Creates or finds a child frame, and makes "x" available to the child + * frame. This op is used together with Exit to create loops in the graph. + * The Executor uses the unique "frame_name" to identify frames. + * If "is_constant" is "true", output "y" is a constant in the child + * frame; otherwise it may be changed in the child frame . \n *@par Inputs: - *x: The tensor to be made available to the child frame. \n - * Must be one of the following types: float16, float32, float64, int8, \n - * int16, int32, int64, uint8, uint16, uint32, uint64, bool. + *x: The tensor to be made available to the child frame. + * Must be one of the following types: float16, float32, float64, int8, + * int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n *@par Attributes: *@li frame_name: A required string. The name of the child frame. - *@li is_constant: A required bool. If true, the output is constant in \n - * the child frame. + *@li is_constant: A required bool. If true, the output is constant in + * the child frame . \n *@par Outputs: - *y: A Tensor. Has the same type as "x". + *y: A Tensor. Has the same type as "x" . \n *@see Exit() @@ -220,24 +220,24 @@ REG_OP(Enter) .OP_END_FACTORY_REG(Enter) /** - *@brief Creates or finds a child frame, and makes "x" available to the child \n - * frame. This op is used together with Exit to create loops in the graph. \n - * The Executor uses the unique "frame_name" to identify frames. \n - * If "is_constant" is "true", output "y" is a constant in the child \n - * frame; otherwise it may be changed in the child frame. + *@brief Creates or finds a child frame, and makes "x" available to the child + * frame. This op is used together with Exit to create loops in the graph. + * The Executor uses the unique "frame_name" to identify frames. + * If "is_constant" is "true", output "y" is a constant in the child + * frame; otherwise it may be changed in the child frame . \n *@par Inputs: - *x: The tensor to be made available to the child frame. \n - * Must be one of the following types: float16, float32, float64, int8, \n - * int16, int32, int64, uint8, uint16, uint32, uint64, bool. + *x: The tensor to be made available to the child frame. + * Must be one of the following types: float16, float32, float64, int8, + * int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n *@par Attributes: *@li frame_name: A required string. The name of the child frame. - *@li is_constant: A required bool. If true, the output is constant in \n - * the child frame. + *@li is_constant: A required bool. If true, the output is constant in + * the child frame . \n *@par Outputs: - *y: A tensor. Has the same type as "x". + *y: A tensor. Has the same type as "x" . \n *@see Exit() | Enter() @@ -256,14 +256,14 @@ REG_OP(RefEnter) .OP_END_FACTORY_REG(RefEnter) /** - *@brief Forwards the input to the output. This op represents the loop \n - * termination condition. + *@brief Forwards the input to the output. This op represents the loop + * termination condition . \n *@par Inputs: - *x: A boolean scalar. The condition of the Switch op. + *x: A boolean scalar. The condition of the Switch op . \n *@par Outputs: - *y: The tensor "x". + *y: The tensor "x" . \n *@see Switch() @@ -276,15 +276,15 @@ REG_OP(LoopCond) .OP_END_FACTORY_REG(LoopCond) /** - *@brief Makes the input available to the next iteration. + *@brief Makes the input available to the next iteration . \n *@par Inputs: - *x: The tensor to be made available to the next iteration. \n - * Must be one of the following types: float16, float32, float64, int8, \n - * int16, int32, int64, uint8, uint16, uint32, uint64, bool. + *x: The tensor to be made available to the next iteration. + * Must be one of the following types: float16, float32, float64, int8, + * int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n *@par Outputs: - *y: A Tensor. Has the same type as "x". + *y: A Tensor. Has the same type as "x" . \n *@par Third-party framework compatibility *@Compatible with the TensorFlow operator NextIteration. @@ -299,15 +299,15 @@ REG_OP(NextIteration) .OP_END_FACTORY_REG(NextIteration) /** - *@brief Makes the input available to the next iteration. + *@brief Makes the input available to the next iteration . \n *@par Inputs: - *x: The tensor to be made available to the next iteration. \n - * Must be one of the following types: float16, float32, float64, int8, \n - * int16, int32, int64, uint8, uint16, uint32, uint64, bool. + *x: The tensor to be made available to the next iteration. + * Must be one of the following types: float16, float32, float64, int8, + * int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n *@par Outputs: - *y: A tensor. Has the same type as "x". + *y: A tensor. Has the same type as "x" . \n *@par Third-party framework compatibility *@Compatible with the TensorFlow operator RefNextIteration. @@ -322,15 +322,15 @@ REG_OP(RefNextIteration) .OP_END_FACTORY_REG(RefNextIteration) /** - *@brief Exits the current frame to its parent frame. + *@brief Exits the current frame to its parent frame . \n *@par Inputs: - *x: The tensor to be made available to the parent frame. \n - * Must be one of the following types: float16, float32, float64, int8, \n - * int16, int32, int64, uint8, uint16, uint32, uint64, bool. + *x: The tensor to be made available to the parent frame. + * Must be one of the following types: float16, float32, float64, int8, + * int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n *@par Outputs: - *y: A Tensor. Has the same type as "x". + *y: A Tensor. Has the same type as "x" . \n *@see Enter() @@ -347,15 +347,15 @@ REG_OP(Exit) .OP_END_FACTORY_REG(Exit) /** - *@brief Exits the current frame to its parent frame. + *@brief Exits the current frame to its parent frame . \n *@par Inputs: - *x: The tensor to be made available to the parent frame. \n - * Must be one of the following types: float16, float32, float64, int8, \n - * int16, int32, int64, uint8, uint16, uint32, uint64, bool. + *x: The tensor to be made available to the parent frame. + * Must be one of the following types: float16, float32, float64, int8, + * int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n *@par Outputs: - *y: A tensor. Has the same type as "x". + *y: A tensor. Has the same type as "x" . \n *@see Enter() | Exit() @@ -372,9 +372,9 @@ REG_OP(RefExit) .OP_END_FACTORY_REG(RefExit) /** - *@brief Only useful as a placeholder for control edges. \n - * It is similar to a no-op that always produces a live control output \n - * even when some control inputs are dead. + *@brief Only useful as a placeholder for control edges. + * It is similar to a no-op that always produces a live control output + * even when some control inputs are dead . \n *@par Third-party framework compatibility *@Compatible with the TensorFlow operator ControlTrigger. @@ -389,7 +389,7 @@ REG_OP(ControlTrigger) * Three inputs, including: *@li x: One dimensional tensore of type int32, specifying queried shape, max size is 8. *@li data_seq: One dimensional tensore of type int32, specifying the mapped table is queried. -*@li level_index: One dimensional tensore of type int32, specifying secondary index. +*@li level_index: One dimensional tensore of type int32, specifying secondary index. \n *@par Outputs: *@li y: A Tensor with shape [batch, 8], of type int32, specifying index of shape in the map. diff --git a/third_party/fwkacllib/inc/ops/ctc_ops.h b/third_party/fwkacllib/inc/ops/ctc_ops.h index eaf6f9e9..383568dc 100644 --- a/third_party/fwkacllib/inc/ops/ctc_ops.h +++ b/third_party/fwkacllib/inc/ops/ctc_ops.h @@ -27,29 +27,29 @@ namespace ge { /** -*@brief Calculates the CTC Loss (log probability) for each batch entry. \n -Also calculates the gradient. +*@brief Calculates the CTC Loss (log probability) for each batch entry. +Also calculates the gradient. \n *@par Inputs: *@li inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits. -*@li labels_indices: The indices of a `SparseTensor`. \n -`labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for \n +*@li labels_indices: The indices of a `SparseTensor`. +`labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for `(batch b, time t)`. *@li labels_values: The values (labels) associated with the given batch and time. -*@li sequence_length: A vector containing sequence lengths (batch). +*@li sequence_length: A vector containing sequence lengths (batch). \n *@par Outputs: *@li loss: A vector (batch) containing log-probabilities. -*@li gradient: The gradient of `loss`. 3-D, shape: `(max_time x \n -batch_size x num_classes)`. +*@li gradient: The gradient of `loss`. 3-D, shape: `(max_time x +batch_size x num_classes)`. \n *@par Attributes: -*@li preprocess_collapse_repeated: Scalar, if true then repeated labels are collapsed prior to \n +*@li preprocess_collapse_repeated: Scalar, if true then repeated labels are collapsed prior to the CTC calculation.If not specified, defaults to false -*@li ctc_merge_repeated: Scalar. If set to false, *during* CTC calculation \n -repeated non-blank labels will not be merged and are interpreted as \n -individual labels. This is a simplified version of CTC. \n -If not specified, defaults to true +*@li ctc_merge_repeated: Scalar. If set to false, *during* CTC calculation +repeated non-blank labels will not be merged and are interpreted as +individual labels. This is a simplified version of CTC. +If not specified, defaults to true. \n *@par Third-party framework compatibility * Compatible with TensorFlow CTCLoss operator. @@ -67,24 +67,24 @@ REG_OP(CTCLoss) .OP_END_FACTORY_REG(CTCLoss) /** -*@brief Performs greedy decoding on the logits given in inputs. +*@brief Performs greedy decoding on the logits given in inputs. \n *@par Inputs: *@li inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits. -*@li sequence_length: A vector containing sequence lengths, size `(batch_size)`. +*@li sequence_length: A vector containing sequence lengths, size `(batch_size)`. \n *@par Attributes: -*@li merge_repeated: If True, merge repeated classes in output. +*@li merge_repeated: If True, merge repeated classes in output. \n *@par Outputs: -*@li decoded_indices: Indices matrix, size `(total_decoded_outputs x 2)`,\n +*@li decoded_indices: Indices matrix, size `(total_decoded_outputs x 2)`, of a `SparseTensor`. The rows store: [batch, time]. -*@li decoded_values: Values vector, size: `(total_decoded_outputs)`,\n +*@li decoded_values: Values vector, size: `(total_decoded_outputs)`, of a `SparseTensor`. The vector stores the decoded classes. -*@li decoded_shape: Shape vector, size `(2)`, of the decoded SparseTensor.\n +*@li decoded_shape: Shape vector, size `(2)`, of the decoded SparseTensor. Values are: `[batch_size, max_decoded_length]`. -*@li log_probability: Matrix, size `(batch_size x 1)`, containing sequence\n -log-probabilities. +*@li log_probability: Matrix, size `(batch_size x 1)`, containing sequence +log-probabilities. \n *@par Third-party framework compatibility * Compatible with TensorFlow CTCGreedyDecoder operator. @@ -100,27 +100,27 @@ REG_OP(CTCGreedyDecoder) .OP_END_FACTORY_REG(CTCGreedyDecoder) /** -*@brief Performs beam search decoding on the logits given in input. +*@brief Performs beam search decoding on the logits given in input. \n *@par Inputs: *@li inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits. -*@li sequence_length: A vector containing sequence lengths, size `(batch_size)`. +*@li sequence_length: A vector containing sequence lengths, size `(batch_size)`. \n *@par Attributes: -*@li merge_repeated: If True, merge repeated classes in output. +*@li merge_repeated: If True, merge repeated classes in output. \n *@par Outputs: -*@li decoded_indices: A list (length: top_paths) of indices matrices. Matrix j,\n -size `(total_decoded_outputs[j] x 2)`, has indices of a\n +*@li decoded_indices: A list (length: top_paths) of indices matrices. Matrix j, +size `(total_decoded_outputs[j] x 2)`, has indices of a `SparseTensor`. The rows store: [batch, time]. -*@li decoded_values: A list (length: top_paths) of values vectors. Vector j,\n -size `(length total_decoded_outputs[j])`, has the values of a\n +*@li decoded_values: A list (length: top_paths) of values vectors. Vector j, +size `(length total_decoded_outputs[j])`, has the values of a `SparseTensor`. The vector stores the decoded classes for beam j. -*@li decoded_shape: A list (length: top_paths) of shape vector. Vector j,\n -size `(2)`, stores the shape of the decoded `SparseTensor[j]`.\n +*@li decoded_shape: A list (length: top_paths) of shape vector. Vector j, +size `(2)`, stores the shape of the decoded `SparseTensor[j]`. Its values are: `[batch_size, max_decoded_length[j]]`. -*@li log_probability: A matrix, shaped: `(batch_size x top_paths)`. The\n -sequence log-probabilities. +*@li log_probability: A matrix, shaped: `(batch_size x top_paths)`. The +sequence log-probabilities. \n *@par Third-party framework compatibility * Compatible with TensorFlow CTCBeamSearchDecoder operator. diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h index d407c4cd..3bfcfe01 100644 --- a/third_party/fwkacllib/inc/ops/data_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h @@ -28,15 +28,15 @@ namespace ge { /** -*@brief This operation returns true if the queue is closed and false if \n -the queue is open. +*@brief This operation returns true if the queue is closed and false if +the queue is open. \n *@par Inputs: -*The input handle must have the resource type. Inputs include: \n -*handle:A Tensor of type resource. The handle to a queue. +*The input handle must have the resource type. Inputs include: +*handle:A Tensor of type resource. The handle to a queue. \n *@par Outputs: -*is_closed:A Tensor of type bool. +*is_closed:A Tensor of type bool. \n *@par Third-party framework compatibility *Compatible with tensorflow QueueIsClosed operator. @@ -48,14 +48,14 @@ REG_OP(QueueIsClosed) .OP_END_FACTORY_REG(QueueIsClosed) /** -*@brief Computes the number of elements in the given queue. +*@brief Computes the number of elements in the given queue. \n *@par Inputs: -*The input handle must have the resource type. Inputs include: \n -*handle:A Tensor of type mutable resource. The handle to a queue. +*The input handle must have the resource type. Inputs include: +*handle:A Tensor of type mutable resource. The handle to a queue. \n *@par Outputs: -*size:A Tensor of type int32. +*size:A Tensor of type int32. \n *@par Third-party framework compatibility *Compatible with tensorflow QueueSize operator. @@ -67,22 +67,22 @@ REG_OP(QueueSize) .OP_END_FACTORY_REG(QueueSize) /** -*@brief A queue that produces elements in first-in first-out order. +*@brief A queue that produces elements in first-in first-out order. \n *@par Attributes: -*@li component_types: A list of DType objects. The length of component_types \n +*@li component_types: A list of DType objects. The length of component_types must equal the number of tensors in each queue element. -*@li shapes:(Optional.) A list of fully-defined TensorShape objects with the \n +*@li shapes:(Optional.) A list of fully-defined TensorShape objects with the same length as dtypes, or None. -*@li capacity:An integer. The upper bound on the number of elements that may \n +*@li capacity:An integer. The upper bound on the number of elements that may be stored in this queue. -*@li container: An optional string. Defaults to "". If non-empty, this queue \n +*@li container: An optional string. Defaults to "". If non-empty, this queue is placed in the given container. Otherwise, a default container is used. -*@li shared_name:(Optional.) If non-empty, this queue will be shared under \n -the given name across multiple sessions. +*@li shared_name:(Optional.) If non-empty, this queue will be shared under +the given name across multiple sessions. \n *@par Outputs: -*handle:A Tensor of type mutable resource. The handle to a queue. +*handle:A Tensor of type mutable resource. The handle to a queue. \n *@par Third-party framework compatibility *Compatible with tensorflow FIFOQueue operator. @@ -98,18 +98,18 @@ REG_OP(FIFOQueue) .OP_END_FACTORY_REG(FIFOQueue) /** -*@brief Enqueues a tuple of one or more tensors in the given queue. +*@brief Enqueues a tuple of one or more tensors in the given queue. \n *@par Inputs: -*The input handle must have the resource type. Inputs include: \n +*The input handle must have the resource type. Inputs include: *@li handle:A Tensor of type mutable resource. The handle to a queue. -*@li components: A list of Tensor objects. One or more tensors from which \n -the enqueued tensors should be taken. +*@li components: A list of Tensor objects. One or more tensors from which +the enqueued tensors should be taken. It's a dynamic input. \n *@par Attributes: -*timeout_ms: An optional int. Defaults to -1. If the queue is full, this \n -operation will block for up to timeout_ms milliseconds. Note: This option \n -is not supported yet. +*timeout_ms: An optional int. Defaults to -1. If the queue is full, this +operation will block for up to timeout_ms milliseconds. Note: This option +is not supported yet. \n *@par Third-party framework compatibility *Compatible with tensorflow QueueEnqueue operator. @@ -126,18 +126,18 @@ REG_OP(QueueEnqueue) .OP_END_FACTORY_REG(QueueEnqueue) /** -*@brief Enqueues zero or more tuples of one or more tensors in the given queue. +*@brief Enqueues zero or more tuples of one or more tensors in the given queue. \n *@par Inputs: -*The input handle must have the resource type. Inputs include: \n +*The input handle must have the resource type. Inputs include: *@li handle:A Tensor of type mutable resource. The handle to a queue. -*@li components: A list of Tensor objects. One or more tensors from which \n -the enqueued tensors should be taken. +*@li components: A list of Tensor objects. One or more tensors from which +the enqueued tensors should be taken. It's a dynamic input. \n *@par Attributes: -*timeout_ms: An optional int. Defaults to -1. If the queue is full, this \n -operation will block for up to timeout_ms milliseconds. Note: This option \n -is not supported yet. +*timeout_ms: An optional int. Defaults to -1. If the queue is full, this +operation will block for up to timeout_ms milliseconds. Note: This option +is not supported yet. \n *@par Third-party framework compatibility *Compatible with tensorflow QueueEnqueueMany operator. @@ -154,21 +154,21 @@ REG_OP(QueueEnqueueMany) .OP_END_FACTORY_REG(QueueEnqueueMany) /** -*@brief Dequeues n tuples of one or more tensors from the given queue. +*@brief Dequeues n tuples of one or more tensors from the given queue. \n *@par Inputs: -*The input handle must have the resource type. Inputs include: \n -*handle:A Tensor of type mutable resource. The handle to a queue. +*The input handle must have the resource type. Inputs include: +*handle:A Tensor of type mutable resource. The handle to a queue. \n *@par Attributes: -*@li timeout_ms: An optional int. Defaults to -1. If the queue is empty, this \n -operation will block for up to timeout_ms milliseconds. Note: This option is \n +*@li timeout_ms: An optional int. Defaults to -1. If the queue is empty, this +operation will block for up to timeout_ms milliseconds. Note: This option is not supported yet. -*@li component_types: A list of DTypes that has length >= 1. The type of each \n -component in a tuple. +*@li component_types: A list of DTypes that has length >= 1. The type of each +component in a tuple. \n *@par Outputs: -*components:A list of Tensor objects of type component_types. +*components:A list of Tensor objects of type component_types. \n *@par Third-party framework compatibility *Compatible with tensorflow QueueDequeue operator. @@ -186,22 +186,22 @@ REG_OP(QueueDequeue) .OP_END_FACTORY_REG(QueueDequeue) /** -*@brief Dequeues n tuples of one or more tensors from the given queue. +*@brief Dequeues n tuples of one or more tensors from the given queue. \n *@par Inputs: -*The input handle must have the resource type. Inputs include: \n +*The input handle must have the resource type. Inputs include: *@li handle:A Tensor of type mutable resource. The handle to a queue. -*@li n: A Tensor of type int32. The number of tuples to dequeue. +*@li n: A Tensor of type int32. The number of tuples to dequeue. \n *@par Attributes: -*@li timeout_ms: An optional int. Defaults to -1. If the queue has fewer than \n -n elements, this operation will block for up to timeout_ms milliseconds. \n +*@li timeout_ms: An optional int. Defaults to -1. If the queue has fewer than +n elements, this operation will block for up to timeout_ms milliseconds. Note: This option is not supported yet. -*@li component_types: A list of DTypes that has length >= 1. The type of each \n -component in a tuple. +*@li component_types: A list of DTypes that has length >= 1. The type of each +component in a tuple. \n *@par Outputs: -*components:A list of Tensor objects of type component_types. +*components:A list of Tensor objects of type component_types. \n *@par Third-party framework compatibility *Compatible with tensorflow QueueDequeueMany operator. @@ -220,22 +220,22 @@ REG_OP(QueueDequeueMany) .OP_END_FACTORY_REG(QueueDequeueMany) /** -*@brief Dequeues n tuples of one or more tensors from the given queue. +*@brief Dequeues n tuples of one or more tensors from the given queue. \n *@par Inputs: -*The input handle must have the resource type. Inputs include: \n +*The input handle must have the resource type. Inputs include: *@li handle:A Tensor of type mutable resource. The handle to a queue. -*@li n: A Tensor of type int32. The number of tuples to dequeue. +*@li n: A Tensor of type int32. The number of tuples to dequeue. \n *@par Attributes: -*@li timeout_ms: An optional int. Defaults to -1. If the queue has fewer than \n -n elements, this operation will block for up to timeout_ms milliseconds. \n +*@li timeout_ms: An optional int. Defaults to -1. If the queue has fewer than +n elements, this operation will block for up to timeout_ms milliseconds. Note: This option is not supported yet. -*@li component_types: A list of DTypes that has length >= 1. The type of each \n -component in a tuple. +*@li component_types: A list of DTypes that has length >= 1. The type of each +component in a tuple. \n *@par Outputs: -*components:A list of Tensor objects of type component_types. +*components:A list of Tensor objects of type component_types. \n *@par Third-party framework compatibility *Compatible with tensorflow QueueDequeueUpTo operator. @@ -254,24 +254,24 @@ REG_OP(QueueDequeueUpTo) .OP_END_FACTORY_REG(QueueDequeueUpTo) /** -*@brief Stage values similar to a lightweight Enqueue. +*@brief Stage values similar to a lightweight Enqueue. \n *@par Inputs: -*The input values must be a list of Tensor objects. Inputs include: \n -*values: A list of Tensor objects. A list of data types that inserted values \n -should adhere to. +*The input values must be a list of Tensor objects. Inputs include: +*values: A list of Tensor objects. A list of data types that inserted values +should adhere to. It's a dynamic input. \n *@par Attributes: -*@li capacity: An optional int that is >= 0. Defaults to 0. Maximum number of \n -elements in the Staging Area. If > 0, inserts on the container will block \n +*@li capacity: An optional int that is >= 0. Defaults to 0. Maximum number of +elements in the Staging Area. If > 0, inserts on the container will block when the capacity is reached. -*@li memory_limit: An optional int that is >= 0. Defaults to 0. The maximum \n -number of bytes allowed for Tensors in the Staging Area. If > 0, inserts will \n +*@li memory_limit: An optional int that is >= 0. Defaults to 0. The maximum +number of bytes allowed for Tensors in the Staging Area. If > 0, inserts will block until sufficient space is available. -*@li container: An optional string. Defaults to "". If non-empty, this queue \n +*@li container: An optional string. Defaults to "". If non-empty, this queue is placed in the given container. Otherwise, a default container is used. -*@li shared_name: An optional string. Defaults to "". It is necessary to \n -match this name to the matching Unstage Op. +*@li shared_name: An optional string. Defaults to "". It is necessary to +match this name to the matching Unstage Op. \n *@see Unstage @@ -290,14 +290,14 @@ REG_OP(Stage) .OP_END_FACTORY_REG(Stage) /** -*@brief Op removes all elements in the underlying container. +*@brief Op removes all elements in the underlying container. \n *@par Attributes: *@li capacity: A list of DTypes *@li memory_limit: An optional int that is >= 0. Defaults to 0. *@li container: An optional string. Defaults to "". *@li shared_name: An optional string. Defaults to "". -*@li dtypes: A list of DTypes. +*@li dtypes: A list of DTypes. \n *@see Stage @@ -314,22 +314,22 @@ REG_OP(StageClear) .OP_END_FACTORY_REG(StageClear) /** -*@brief Op peeks at the values at the specified index. If the underlying \n -container does not contain sufficient elements this op will block until it does. +*@brief Op peeks at the values at the specified index. If the underlying +container does not contain sufficient elements this op will block until it does. \n *@par Inputs: -*The input values must be type int32. Inputs include: \n -*values: A Tensor of type int32. +*The input values must be type int32. Inputs include: +*values: A Tensor of type int32. \n *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to 0. *@li memory_limit: An optional int that is >= 0. Defaults to 0. *@li container: An optional string. Defaults to "". *@li shared_name: An optional string. Defaults to "". -*@li dtypes: A list of DTypes that has length >= 1. +*@li dtypes: A list of DTypes that has length >= 1. \n *@par Outputs: -*y:A list of Tensor objects of type dtypes. +*y:A list of Tensor objects of type dtypes. \n *@par Third-party framework compatibility *Compatible with tensorflow StagePeek operator. @@ -348,17 +348,17 @@ REG_OP(StagePeek) .OP_END_FACTORY_REG(StagePeek) /** -*@brief Op returns the number of elements in the underlying container. +*@brief Op returns the number of elements in the underlying container. \n *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to 0. *@li memory_limit: An optional int that is >= 0. Defaults to 0. *@li container: An optional string. Defaults to "". *@li shared_name: An optional string. Defaults to "". -*@li dtypes: A list of DTypes that has length >= 1. +*@li dtypes: A list of DTypes that has length >= 1. \n *@par Outputs: -*size:A Tensor of type int32. +*size:A Tensor of type int32. \n *@par Third-party framework compatibility *Compatible with tensorflow StageSize operator. @@ -374,17 +374,17 @@ REG_OP(StageSize) .OP_END_FACTORY_REG(StageSize) /** -*@brief Pop the element at the top of the stack. +*@brief Pop the element at the top of the stack. \n *@par Inputs: -*The input handle must be type resource. Inputs include: \n -*handle: A Tensor of type resource. The handle to a stack. +*The input handle must be type resource. Inputs include: +*handle: A Tensor of type resource. The handle to a stack. \n *@par Attributes: -*elem_type: A DType. The type of the elem that is popped. +*elem_type: A DType. The type of the elem that is popped. \n *@par Outputs: -*element:A Tensor of type elem_type. +*element:A Tensor of type elem_type. \n *@par Third-party framework compatibility *Compatible with tensorflow StackPop operator. @@ -399,19 +399,19 @@ REG_OP(StackPop) .OP_END_FACTORY_REG(StackPop) /** -*@brief Push an element onto the stack. +*@brief Push an element onto the stack. \n *@par Inputs: -*The input handle must be type resource. Inputs include: \n +*The input handle must be type resource. Inputs include: *@li handle: A Tensor of type resource. The handle to a stack. -*@li elem: A Tensor. The tensor to be pushed onto the stack. +*@li elem: A Tensor. The tensor to be pushed onto the stack. \n *@par Attributes: -*swap_memory: An optional bool. Defaults to False. Swap elem to CPU. Default \n -to false. +*swap_memory: An optional bool. Defaults to False. Swap elem to CPU. Default +to false. \n *@par Outputs: -*y:A Tensor. Has the same type as elem. +*y:A Tensor. Has the same type as elem. \n *@par Third-party framework compatibility *Compatible with tensorflow StackPush operator. @@ -429,11 +429,11 @@ REG_OP(StackPush) .OP_END_FACTORY_REG(StackPush) /** -*@brief Close the stack. +*@brief Close the stack. \n *@par Inputs: -*The input handle must be type resource. Inputs include: \n -*handle: A Tensor of type resource. The handle to a stack. +*The input handle must be type resource. Inputs include: +*handle: A Tensor of type resource. The handle to a stack. \n *@par Third-party framework compatibility *Compatible with tensorflow StackClose operator. @@ -444,18 +444,18 @@ REG_OP(StackClose) .OP_END_FACTORY_REG(StackClose) /** -*@brief Create a stack. +*@brief Create a stack. \n *@par Inputs: -*The input max_size must be type int32. Inputs include: \n -*max_size: A Tensor of type int32. The number of elements of a stack. +*The input max_size must be type int32. Inputs include: +*max_size: A Tensor of type int32. The number of elements of a stack. \n *@par Attributes: *@li stack_name: An optional string. Defaults to "". -*@li elem_type: The elements type of the created Stack. +*@li elem_type: The elements type of the created Stack. \n *@par Outputs: -*handle: A Tensor of type resource. The handle to a stack. +*handle: A Tensor of type resource. The handle to a stack. \n *@par Third-party framework compatibility *Compatible with tensorflow Stack operator. @@ -469,27 +469,27 @@ REG_OP(Stack) .OP_END_FACTORY_REG(Stack) /** -*@brief Partitions "x" into "num_partitions" tensors using indices from "partitions". +*@brief Partitions "x" into "num_partitions" tensors using indices from "partitions". \n *@par Inputs: -*Including: \n -* @li x: The Tensor to be sliced. Must be one of the following types: \n -DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \n +*Including: +* @li x: The Tensor to be sliced. Must be one of the following types: +DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \ DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING. -* @li partitions: A Tensor of type DT_INT32, with any shape. The indices. +* @li partitions: A Tensor of type DT_INT32, with any shape. The indices. \n *@par Attributes: -*num_partitions: The number of partitions to output. +*num_partitions: The number of partitions to output. \n *@par Outputs: -*y: A list of tensors of type DT_INT32. +*y: A list of tensors of type DT_INT32. \n -*@attention Constraints:\n -*DynamicPartition runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*DynamicPartition runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility -*Compatible with the TensorFlow operator DynamicPartition. +*Compatible with the TensorFlow operator DynamicPartition. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -506,27 +506,27 @@ REG_OP(DynamicPartition) .OP_END_FACTORY_REG(DynamicPartition) /** -*@brief Interleaves the values from the "x" tensors into a single tensor. +*@brief Interleaves the values from the "x" tensors into a single tensor. \n *@par Inputs: -*Including: \n -* @li indices: A list of at least 1 Tensor objects with type DT_INT32. -* @li x: A list with the same length as "indices" of Tensor objects. \n -Must be one of the following types: DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \n -DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_QINT32, \n -DT_QUINT8, DT_QINT8, DT_STRING, DT_COMPLEX64, DT_COMPLEX128. +*Including: +* @li indices: A list of at least 1 Tensor objects with type DT_INT32. It's a dynamic input. +* @li x: A list with the same length as "indices" of Tensor objects. +Must be one of the following types: DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, +DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_QINT32, +DT_QUINT8, DT_QINT8, DT_STRING, DT_COMPLEX64, DT_COMPLEX128. It's a dynamic input. \n *@par Attributes: -*N: An int that is >= 1. Defaults to "1". +*N: An int that is >= 1. Defaults to "1". \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x". \n -*@attention Constraints:\n -*DynamicStitch runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*DynamicStitch runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility -*Compatible with the TensorFlow operator DynamicStitch. +*Compatible with the TensorFlow operator DynamicStitch. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -545,24 +545,24 @@ REG_OP(DynamicStitch) .OP_END_FACTORY_REG(DynamicStitch) /** -*@brief Interleaves the values from the "x" tensors into a single tensor. +*@brief Interleaves the values from the "x" tensors into a single tensor. \n *@par Inputs: -*Including: \n -* @li indices: A list of at least 1 Tensor objects with type DT_INT32. -* @li x: A list with the same length as "indices" of Tensor objects. \n -Must be one of the following types: DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \n -DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_STRING, \n -DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT32. +*Including: +* @li indices: A list of at least 1 Tensor objects with type DT_INT32. It's a dynamic input. +* @li x: A list with the same length as "indices" of Tensor objects. It's a dynamic input. +Must be one of the following types: DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, +DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_STRING, +DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT32. \n *@par Attributes: -*N: An int that is >= 1. Defaults to "1". +*N: An int that is >= 1. Defaults to "1". \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x". \n -*@attention Constraints:\n -*ParallelDynamicStitch runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*ParallelDynamicStitch runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator ParallelDynamicStitch. @@ -582,17 +582,17 @@ REG_OP(ParallelDynamicStitch) .OP_END_FACTORY_REG(ParallelDynamicStitch) /** -*@brief Removes all elements in the underlying container. +*@brief Removes all elements in the underlying container. \n *@par Attributes:An optional int that is >= 0. Defaults to "0". *@li capacity: An optional int that is >= 0. Defaults to "0". *@li memory_limit: An optional int that is >= 0. Defaults to "0". *@li dtypes: A list of tf.DTypes. *@li container: An optional string. Defaults to "". -*@li shared_name: An optional string. Defaults to "". +*@li shared_name: An optional string. Defaults to "". \n -*@attention Constraints:\n -*MapClear runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*MapClear runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator MapClear. @@ -607,20 +607,20 @@ REG_OP(MapClear) .OP_END_FACTORY_REG(MapClear) /** -*@brief Returns the number of incomplete elements in the underlying container. +*@brief Returns the number of incomplete elements in the underlying container. \n *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to "0". *@li memory_limit: An optional int that is >= 0. Defaults to "0". *@li dtypes: A list of tf.DTypes. *@li container: An optional string. Defaults to "". -*@li shared_name: An optional string. Defaults to "". +*@li shared_name: An optional string. Defaults to "". \n *@par Outputs: -*size: A Tensor of type DT_INT32. +*size: A Tensor of type DT_INT32. \n -*@attention Constraints:\n -*MapIncompleteSize runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*MapIncompleteSize runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator MapIncompleteSize. @@ -636,17 +636,17 @@ REG_OP(MapIncompleteSize) .OP_END_FACTORY_REG(MapIncompleteSize) /** -*@brief Unstage Op is similar to a lightweight Dequeue. +*@brief Unstage Op is similar to a lightweight Dequeue. \n *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to 0. *@li memory_limit: An optional int that is >= 0. Defaults to 0. *@li container: An optional string. Defaults to "". *@li shared_name: An optional string. Defaults to "". -*@li dtypes: A list of DTypes that has length >= 1. +*@li dtypes: A list of DTypes that has length >= 1. \n *@par Outputs: -*y: A list of Tensor objects of type dtypes. +*y: A list of Tensor objects of type dtypes. \n *@par Third-party framework compatibility *Compatible with tensorflow Unstage operator. @@ -664,34 +664,35 @@ REG_OP(Unstage) .OP_END_FACTORY_REG(Unstage) /** -*@brief Stage (key, values) in the underlying container which behaves like a hashtable. +*@brief Stage (key, values) in the underlying container which behaves like a hashtable. \n *@par Inputs: -*Including: \n +*Including: * @li key: A Tensor of type DT_INT64. * @li indices: A Tensor of type DT_INT32. -* @li values: A list of Tensor objects for tensor dtypes. \n -A list of data types that inserted values should adhere to of. \n -Must be one of the following types: DT_FLOAT, DT_FLOAT16, \n -DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, \n -DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, DT_UINT64, \n -DT_RESOURCE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, \n +* @li values: A list of Tensor objects for tensor dtypes. +A list of data types that inserted values should adhere to of. +Must be one of the following types: DT_FLOAT, DT_FLOAT16, +DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, +DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, DT_UINT64, +DT_RESOURCE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32. +It's a dynamic input. \n *@par Attributes: -*@li capacity: An optional int that is >= 0. Defaults to "0". \n -Maximum number of elements in the Staging Area. If > 0, \n +*@li capacity: An optional int that is >= 0. Defaults to "0". +Maximum number of elements in the Staging Area. If > 0, inserts on the container will block when the capacity is reached. *@li memory_limit: An optional int that is >= 0. Defaults to "0". *@li dtypes: A list of tf.DTypes. -*@li container: An optional string. Defaults to "". \n -If non-empty, this queue is placed in the given container. \n +*@li container: An optional string. Defaults to "". +If non-empty, this queue is placed in the given container. Otherwise, a default container is used. -*@li shared_name: An optional string. Defaults to "". \n -It is necessary to match this name to the matching Unstage Op. +*@li shared_name: An optional string. Defaults to "". +It is necessary to match this name to the matching Unstage Op. \n -*@attention Constraints:\n -*MapStage runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*MapStage runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator MapStage. @@ -713,29 +714,29 @@ REG_OP(MapStage) .OP_END_FACTORY_REG(MapStage) /** -*@brief Removes and returns the values associated with the key. +*@brief Removes and returns the values associated with the key. \n *@par Inputs: -*Including: \n +*Including: * @li key: A Tensor of type DT_INT64. -* @li indices: A Tensor of type DT_INT32. +* @li indices: A Tensor of type DT_INT32. \n *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to "0". *@li memory_limit: An optional int that is >= 0. Defaults to "0". *@li dtypes: A list of DTypes that has length >= 1. *@li container: An optional string. Defaults to "". -*@li shared_name: An optional string. Defaults to "". +*@li shared_name: An optional string. Defaults to "". \n *@par Outputs: -*values: A list of Tensor objects. Must be one of the following types: \n -DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, \n -DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, DT_UINT64, DT_RESOURCE, \n -DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, \n -DT_QINT16, DT_QUINT16, DT_QINT32. +*values: A list of Tensor objects. Must be one of the following types: +DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, +DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, DT_UINT64, DT_RESOURCE, +DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, +DT_QINT16, DT_QUINT16, DT_QINT32. \n -*@attention Constraints:\n -*MapUnstage runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*MapUnstage runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator MapUnstage. @@ -757,29 +758,29 @@ REG_OP(MapUnstage) .OP_END_FACTORY_REG(MapUnstage) /** -*@brief Removes and returns a random (key, value). +*@brief Removes and returns a random (key, value). \n *@par Inputs: -*Including: \n -*indices: A Tensor of type DT_INT32. +*Including: +*indices: A Tensor of type DT_INT32. \n *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to "0". *@li memory_limit: An optional int that is >= 0. Defaults to "0". *@li dtypes: A list of DTypes that has length >= 1. *@li container: An optional string. Defaults to "". -*@li shared_name: An optional string. Defaults to "". +*@li shared_name: An optional string. Defaults to "". \n *@par Outputs: *@li key: A Tensor of type DT_INT64. -*@li values: A list of Tensor objects. \n -Must be one of the following types: DT_FLOAT, DT_FLOAT16, DT_INT8, \n -DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, \n -DT_UINT32, DT_UINT64, DT_RESOURCE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, \n -DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32. +*@li values: A list of Tensor objects. +Must be one of the following types: DT_FLOAT, DT_FLOAT16, DT_INT8, +DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, +DT_UINT32, DT_UINT64, DT_RESOURCE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, +DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32. \n -*@attention Constraints:\n -*MapUnstageNoKey runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*MapUnstageNoKey runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator MapUnstageNoKey. @@ -801,29 +802,29 @@ REG_OP(MapUnstageNoKey) .OP_END_FACTORY_REG(MapUnstageNoKey) /** -*@brief Peeks at the values at the specified key. +*@brief Peeks at the values at the specified key. \n *@par Inputs: -*Including: \n +*Including: * @li key: A Tensor of type DT_INT64. -* @li indices: A Tensor of type DT_INT32. +* @li indices: A Tensor of type DT_INT32. \n *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to "0". *@li memory_limit: An optional int that is >= 0. Defaults to "0". *@li dtypes: A list of tf.DTypes that has length >= 1. *@li container: An optional string. Defaults to "". -*@li shared_name: An optional string. Defaults to "". +*@li shared_name: An optional string. Defaults to "". \n *@par Outputs: -*values: A list of Tensor objects of type "dtypes". \n -Must be one of the following types: DT_FLOAT, DT_FLOAT16, DT_INT8, \n -DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, \n -DT_DOUBLE, DT_UINT32, DT_UINT64, DT_RESOURCE, DT_STRING, DT_COMPLEX64, \n -DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32. +*values: A list of Tensor objects of type "dtypes". +Must be one of the following types: DT_FLOAT, DT_FLOAT16, DT_INT8, +DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, +DT_DOUBLE, DT_UINT32, DT_UINT64, DT_RESOURCE, DT_STRING, DT_COMPLEX64, +DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32. \n -*@attention Constraints:\n -*MapPeek runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*MapPeek runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator MapPeek. @@ -845,20 +846,20 @@ REG_OP(MapPeek) .OP_END_FACTORY_REG(MapPeek) /** -*@brief Returns the number of elements in the underlying container. +*@brief Returns the number of elements in the underlying container. \n *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to "0". *@li memory_limit: An optional int that is >= 0. Defaults to "0". *@li dtypes: A list of tf.DTypes. *@li container: An optional string. Defaults to "". -*@li shared_name: An optional string. Defaults to "". +*@li shared_name: An optional string. Defaults to "". \n *@par Outputs: -*size: A Tensor of type DT_INT32. +*size: A Tensor of type DT_INT32. \n -*@attention Constraints:\n -*MatMul runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*MatMul runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator MapSize. @@ -874,29 +875,29 @@ REG_OP(MapSize) .OP_END_FACTORY_REG(MapSize) /** -*@brief Class wrapping dynamic-sized, per-time-step, write-once Tensor arrays. +*@brief Class wrapping dynamic-sized, per-time-step, write-once Tensor arrays. \n *@par Inputs: -*The input size must be type int32. Inputs include: \n -*@li size: int32 scalar Tensor: the size of the TensorArray. Required if \n -handle is not provided. +*The input size must be type int32. Inputs include: +*@li size: int32 scalar Tensor: the size of the TensorArray. Required if +handle is not provided. \n *@par Attributes: *@li dtype: The data type of this TensorArray. *@li element_shape: The TensorShape of elements in this TensorArray. -*@li dynamic_size: A boolean that determines whether writes to the \n +*@li dynamic_size: A boolean that determines whether writes to the TensorArray are allowed to grow the size. -*@li clear_after_read: Boolean (optional, default: True). If True, clear \n -TensorArray values \n -after reading them. This disables read-many semantics, but allows early \n +*@li clear_after_read: Boolean (optional, default: True). If True, clear +TensorArray values +after reading them. This disables read-many semantics, but allows early release of memory. -*@li identical_element_shapes: If true (default is false), then all elements \n +*@li identical_element_shapes: If true (default is false), then all elements in the TensorArray will be expected to have have identical shapes. -*@li tensor_array_name: String: the name of the TensorArray. +*@li tensor_array_name: String: the name of the TensorArray. \n *@par Outputs: *@li handle: The handle to the TensorArray. -*@li flow: A scalar used to control gradient flow. +*@li flow: A scalar used to control gradient flow. \n *@par Third-party framework compatibility *Compatible with tensorflow TensorArray operator. @@ -915,12 +916,12 @@ REG_OP(TensorArray) .OP_END_FACTORY_REG(TensorArray) /** -*@brief Delete the TensorArray from its resource container. +*@brief Delete the TensorArray from its resource container. \n *@par Inputs: -*The input handle must be type resource. Inputs include: \n -*handle: A Tensor of type resource. The handle to a TensorArray \n -(output of TensorArray or TensorArrayGrad). +*The input handle must be type resource. Inputs include: +*handle: A Tensor of type resource. The handle to a TensorArray +(output of TensorArray or TensorArrayGrad). \n *@par Third-party framework compatibility *Compatible with tensorflow TensorArrayClose operator. @@ -931,23 +932,23 @@ REG_OP(TensorArrayClose) .OP_END_FACTORY_REG(TensorArrayClose) /** -*@brief Concat the elements from the TensorArray into value value. +*@brief Concat the elements from the TensorArray into value value. \n *@par Inputs: -*The input handle must be type resource. Inputs include: \n +*The input handle must be type resource. Inputs include: *@li handle: The handle to a TensorArray. -*@li flow_in: A float scalar that enforces proper chaining of operations. +*@li flow_in: A float scalar that enforces proper chaining of operations. \n *@par Attributes: *@li dtype: The type of the elem that is returned. -*@li element_shape_except0: The expected shape of an element, if known, \n -excluding the first dimension. +*@li element_shape_except0: The expected shape of an element, if known, +excluding the first dimension. \n *@par Outputs: -*@li value: All of the elements in the TensorArray, concatenated along \n +*@li value: All of the elements in the TensorArray, concatenated along the first axis. -*@li lengths: A vector of the row sizes of the original T elements in the \n -value output. +*@li lengths: A vector of the row sizes of the original T elements in the +value output. \n *@par Third-party framework compatibility *Compatible with tensorflow TensorArrayConcat operator. @@ -966,24 +967,24 @@ REG_OP(TensorArrayConcat) .OP_END_FACTORY_REG(TensorArrayConcat) /** -*@brief All elements selected by indices must have the same shape. +*@brief All elements selected by indices must have the same shape. \n *@par Inputs: -*The input handle must be type resource. Inputs include: \n +*The input handle must be type resource. Inputs include: *@li handle: The handle to a TensorArray. -*@li indices: The locations in the TensorArray from which to read tensor \n +*@li indices: The locations in the TensorArray from which to read tensor elements. -*@li flow_in: A float scalar that enforces proper chaining of operations. +*@li flow_in: A float scalar that enforces proper chaining of operations. \n *@par Attributes: *@li dtype: The type of the elem that is returned. -*@li element_shape: The expected shape of an element, if known. Used to \n -validate the shapes of TensorArray elements. If this shape is not fully \n -specified, gathering zero-size TensorArrays is an error. +*@li element_shape: The expected shape of an element, if known. Used to +validate the shapes of TensorArray elements. If this shape is not fully +specified, gathering zero-size TensorArrays is an error. \n *@par Outputs: -*value: All of the elements in the TensorArray, concatenated along a new \n -axis (the new dimension 0). +*value: All of the elements in the TensorArray, concatenated along a new +axis (the new dimension 0). \n *@par Third-party framework compatibility *Compatible with tensorflow TensorArrayGather operator. @@ -1002,21 +1003,21 @@ REG_OP(TensorArrayGather) .OP_END_FACTORY_REG(TensorArrayGather) /** -*@brief Creates a TensorArray for storing the gradients of values in the \n -given handle. +*@brief Creates a TensorArray for storing the gradients of values in the +given handle. \n *@par Inputs: -*The input handle must be type resource. Inputs include: \n +*The input handle must be type resource. Inputs include: *@li handle: The handle to a TensorArray. -*@li flow_in: A float scalar that enforces proper chaining of operations. +*@li flow_in: A float scalar that enforces proper chaining of operations. \n *@par Attributes: -*source: The gradient source string, used to decide which gradient \n -TensorArray to return. +*source: The gradient source string, used to decide which gradient +TensorArray to return. \n *@par Outputs: *@li grad_handle: A Tensor of type resource. -*@li flow_out: A Tensor of type float. +*@li flow_out: A Tensor of type float. \n *@par Third-party framework compatibility *Compatible with tensorflow TensorArrayGrad operator. @@ -1031,17 +1032,17 @@ REG_OP(TensorArrayGrad) .OP_END_FACTORY_REG(TensorArrayGrad) /** -*@brief Push an element onto the tensor_array. +*@brief Push an element onto the tensor_array. \n *@par Inputs: -*The input handle must be type resource. Inputs include: \n +*The input handle must be type resource. Inputs include: *@li handle: The handle to a TensorArray. *@li index: The position to write to inside the TensorArray. *@li value: The tensor to write to the TensorArray. -*@li flow_in: A float scalar that enforces proper chaining of operations. +*@li flow_in: A float scalar that enforces proper chaining of operations. \n *@par Outputs: -*flow_out: A float scalar that enforces proper chaining of operations. +*flow_out: A float scalar that enforces proper chaining of operations. \n *@par Third-party framework compatibility *Compatible with tensorflow TensorArrayWrite operator. @@ -1058,24 +1059,24 @@ REG_OP(TensorArrayWrite) .OP_END_FACTORY_REG(TensorArrayWrite) /** -*@brief Creates a TensorArray for storing multiple gradients of values in \n -the given handle. +*@brief Creates a TensorArray for storing multiple gradients of values in +the given handle. \n *@par Inputs: -*The input handle must be type resource. Inputs include: \n +*The input handle must be type resource. Inputs include: *@li handle: A Tensor of type resource. The handle to the forward TensorArray. -*@li flow_in: A Tensor of type float. A float scalar that enforces proper \n +*@li flow_in: A Tensor of type float. A float scalar that enforces proper chaining of operations. -*@li shape_to_prepend: A Tensor of type int32. An int32 vector representing \n -a shape. +*@li shape_to_prepend: A Tensor of type int32. An int32 vector representing +a shape. \n *@par Attributes: -*source: A string. The gradient source string, used to decide which gradient \n -TensorArray to return. +*source: A string. The gradient source string, used to decide which gradient +TensorArray to return. \n *@par Outputs: *@li grad_handle: A Tensor of type resource. -*@li flow_out: A Tensor of type float. +*@li flow_out: A Tensor of type float. \n *@par Third-party framework compatibility *Compatible with tensorflow TensorArrayGradWithShape operator. @@ -1091,19 +1092,19 @@ REG_OP(TensorArrayGradWithShape) .OP_END_FACTORY_REG(TensorArrayGradWithShape) /** -*@brief Read an element from the TensorArray into output value. +*@brief Read an element from the TensorArray into output value. \n *@par Inputs: -*The input handle must be type resource. Inputs include: \n +*The input handle must be type resource. Inputs include: *@li handle: A Tensor of type resource. The handle to a TensorArray. *@li index: A Tensor of type int32. -*@li flow_in: A Tensor of type float. +*@li flow_in: A Tensor of type float. \n *@par Attributes: -*dtype: A DType. +*dtype: A DType. \n *@par Outputs: -*y: A Tensor of type dtype. +*y: A Tensor of type dtype. \n *@par Third-party framework compatibility *Compatible with tensorflow TensorArrayRead operator. @@ -1120,18 +1121,18 @@ REG_OP(TensorArrayRead) .OP_END_FACTORY_REG(TensorArrayRead) /** -*@brief Scatter the data from the input value into specific TensorArray \n -elements. +*@brief Scatter the data from the input value into specific TensorArray +elements. \n *@par Inputs: -*The input handle must be type resource. Inputs include: \n +*The input handle must be type resource. Inputs include: *@li handle: The handle to a TensorArray. *@li indices: The locations at which to write the tensor elements. *@li value: The concatenated tensor to write to the TensorArray. -*@li flow_in: A float scalar that enforces proper chaining of operations. +*@li flow_in: A float scalar that enforces proper chaining of operations. \n *@par Outputs: -*flow_out: A float scalar that enforces proper chaining of operations. +*flow_out: A float scalar that enforces proper chaining of operations. \n *@par Third-party framework compatibility *Compatible with tensorflow TensorArrayScatter operator. @@ -1148,18 +1149,18 @@ REG_OP(TensorArrayScatter) .OP_END_FACTORY_REG(TensorArrayScatter) /** -*@brief Split the data from the input value into TensorArray elements. +*@brief Split the data from the input value into TensorArray elements. \n *@par Inputs: -*The input handle must be type resource. Inputs include: \n +*The input handle must be type resource. Inputs include: *@li handle: The handle to a TensorArray. *@li value: The concatenated tensor to write to the TensorArray. -*@li lengths: The vector of lengths, how to split the rows of value into \n +*@li lengths: The vector of lengths, how to split the rows of value into the TensorArray. -*@li flow_in: A float scalar that enforces proper chaining of operations. +*@li flow_in: A float scalar that enforces proper chaining of operations. \n *@par Outputs: -*flow_out: A float scalar that enforces proper chaining of operations. +*flow_out: A float scalar that enforces proper chaining of operations. \n *@par Third-party framework compatibility *Compatible with tensorflow TensorArraySplit operator. @@ -1176,15 +1177,15 @@ REG_OP(TensorArraySplit) .OP_END_FACTORY_REG(TensorArraySplit) /** -*@brief Return the number of elements in a TensorArray. +*@brief Return the number of elements in a TensorArray. \n *@par Inputs: -*The input handle must be type resource. Inputs include: \n +*The input handle must be type resource. Inputs include: *@li handle: The handle to a TensorArray. -*@li flow_in: A float scalar that enforces proper chaining of operations. +*@li flow_in: A float scalar that enforces proper chaining of operations. \n *@par Outputs: -*size: The number of elements in a TensorArray.. +*size: The number of elements in a TensorArray.. \n *@par Third-party framework compatibility *Compatible with tensorflow TensorArraySize operator. @@ -1197,21 +1198,21 @@ REG_OP(TensorArraySize) .OP_END_FACTORY_REG(TensorArraySize) /** -*@brief A queue implementation that dequeues elements in a random order. +*@brief A queue implementation that dequeues elements in a random order. \n *@par Attributes: -*@li shapes: (Optional.) A list of fully-defined TensorShape objects with \n +*@li shapes: (Optional.) A list of fully-defined TensorShape objects with the same length as dtypes, or None. -*@li capacity: An integer. The upper bound on the number of elements that may \n +*@li capacity: An integer. The upper bound on the number of elements that may be stored in this queue. *@li min_after_dequeue: An integer (described above). *@li seed: An integer. Used to create a random seed. *@li seed2: An integer. Used to create a random seed. *@li container: An optional string. Defaults to "". -*@li shared_name: An optional string. Defaults to "". +*@li shared_name: An optional string. Defaults to "". \n *@par Outputs: -*handle: A Tensor of type resource. The handle to a stack. +*handle: A Tensor of type resource. The handle to a stack. \n *@par Third-party framework compatibility *Compatible with tensorflow RandomShuffleQueue operator. @@ -1230,30 +1231,30 @@ REG_OP(RandomShuffleQueue) .OP_END_FACTORY_REG(RandomShuffleQueue) /** -*@brief A queue that produces elements in first-in first-out order. +*@brief A queue that produces elements in first-in first-out order. \n *@par Attributes: -*@li shapes: An optional list of shapes for each component of \n -a queue element. Defaults to {}. The length of this attr must be \n -either 0 or the same as the length of "component_types". Shapes of fixed \n -rank but variable size are allowed by setting any shape dimension to "-1". \n -In this case, the inputs' shape may vary along the given dimension, \n -and DequeueMany will pad the given dimension with zeros up to the maximum \n -shape of all elements in the given batch. If the length of this attr is "0", \n -different queue elements may have different ranks and shapes, but only one \n +*@li shapes: An optional list of shapes for each component of +a queue element. Defaults to {}. The length of this attr must be +either 0 or the same as the length of "component_types". Shapes of fixed +rank but variable size are allowed by setting any shape dimension to "-1". +In this case, the inputs' shape may vary along the given dimension, +and DequeueMany will pad the given dimension with zeros up to the maximum +shape of all elements in the given batch. If the length of this attr is "0", +different queue elements may have different ranks and shapes, but only one element may be dequeued at a time. -*@li capacity: An optional int. Defaults to "-1". The upper bound on the number \n +*@li capacity: An optional int. Defaults to "-1". The upper bound on the number of elements in this queue. Negative numbers mean no limit. -*@li container: An optional string. Defaults to "". If non-empty, this queue \n +*@li container: An optional string. Defaults to "". If non-empty, this queue is placed in the given container. Otherwise, a default container is used. -*@li shared_name: An optional string. Defaults to "". If non-empty, this queue \n -will be shared under the given name across multiple sessions. +*@li shared_name: An optional string. Defaults to "". If non-empty, this queue +will be shared under the given name across multiple sessions. \n *@par Outputs: -*handle: A Tensor of type DT_RESOURCE. +*handle: A Tensor of type DT_RESOURCE. \n -*@attention Constraints:\n -*PaddingFIFOQueue runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*PaddingFIFOQueue runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator PaddingFIFOQueue. @@ -1269,25 +1270,25 @@ REG_OP(PaddingFIFOQueue) .OP_END_FACTORY_REG(PaddingFIFOQueue) /** -*@brief A queue that produces elements sorted by the first component value. +*@brief A queue that produces elements sorted by the first component value. \n *@par Attributes: -*@li component_types: An optional list of tf.DTypes. Defaults to {}. \n +*@li component_types: An optional list of tf.DTypes. Defaults to {}. The type of each component in a value. *@li shapes: A list of shapes for each component of a queue element. -The length of this attr must be either 0 or the same as the length of \n -"component_types". If the length of this attr is 0, the shapes of queue \n +The length of this attr must be either 0 or the same as the length of +"component_types". If the length of this attr is 0, the shapes of queue elements are not constrained, and only one element may be dequeued at a time. -*@li container: An optional string. Defaults to "". If non-empty, this queue \n +*@li container: An optional string. Defaults to "". If non-empty, this queue is placed in the given container. Otherwise, a default container is used. -*@li shared_name: An optional string. Defaults to "". If non-empty, this \n -queue will be shared under the given name across multiple sessions. +*@li shared_name: An optional string. Defaults to "". If non-empty, this +queue will be shared under the given name across multiple sessions. \n *@par Outputs: -*handle: A Tensor of type DT_RESOURCE. +*handle: A Tensor of type DT_RESOURCE. \n -*@attention Constraints:\n -*PriorityQueue runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*PriorityQueue runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator PriorityQueue. @@ -1303,19 +1304,19 @@ REG_OP(PriorityQueue) .OP_END_FACTORY_REG(PriorityQueue) /** -*@brief Multiplies the matrix "x1" by the matrix "x2". +*@brief Multiplies the matrix "x1" by the matrix "x2". \n *@par Inputs: -*Including: \n -*handle: A Tensor of type DT_RESOURCE. The handle to a queue. +*Including: +*handle: A Tensor of type DT_RESOURCE. The handle to a queue. \n *@par Attributes: -*cancel_pending_enqueues: An optional bool. Defaults to "False". \n -If true, all pending enqueue requests that are blocked on \n -the given queue will be canceled. +*cancel_pending_enqueues: An optional bool. Defaults to "False". +If true, all pending enqueue requests that are blocked on +the given queue will be canceled. \n -*@attention Constraints:\n -*QueueClose runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*QueueClose runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator QueueClose. @@ -1327,33 +1328,34 @@ REG_OP(QueueClose) .OP_END_FACTORY_REG(QueueClose) /** -*@brief Stage (key, values) in the underlying container which behaves like an ordered associative container. +*@brief Stage (key, values) in the underlying container which behaves like an ordered associative container. \n *@par Inputs: -*Including: \n +*Including: * @li key: A Tensor of type DT_INT64. * @li indices: A Tensor of type DT_INT32. -* @li values: A list of Must be one of the following types: \n -DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, \n -DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, DT_UINT64, \n -DT_RESOURCE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, \n -DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32 that inserted values should adhere to. +* @li values: A list of Must be one of the following types: +DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, +DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, DT_UINT64, +DT_RESOURCE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, +DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32 that inserted +values should adhere to. It's a dynamic input. \n *@par Attributes: -*@li capacity: An optional int that is >= 0. Defaults to "0". \n -Maximum number of elements in the Staging Area. \n -If > 0, inserts on the container will block \n +*@li capacity: An optional int that is >= 0. Defaults to "0". +Maximum number of elements in the Staging Area. +If > 0, inserts on the container will block when the capacity is reached. *@li memory_limit: An optional int that is >= 0. Defaults to "0". *@li dtypes: A list of DTypes. -*@li container: An optional string. Defaults to "". \n -If non-empty, this queue is placed in the given container. \n +*@li container: An optional string. Defaults to "". +If non-empty, this queue is placed in the given container. Otherwise, a default container is used. -*@li shared_name: An optional string. Defaults to "". \n -It is necessary to match this name to the matching Unstage Op. +*@li shared_name: An optional string. Defaults to "". +It is necessary to match this name to the matching Unstage Op. \n -*@attention Constraints:\n -*OrderedMapStage runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*OrderedMapStage runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator OrderedMapStage. @@ -1375,20 +1377,20 @@ REG_OP(OrderedMapStage) .OP_END_FACTORY_REG(OrderedMapStage) /** -*@brief Returns the number of elements in the underlying container. +*@brief Returns the number of elements in the underlying container. \n *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to "0". *@li memory_limit: An optional int that is >= 0. Defaults to "0". *@li dtypes: A list of DTypes. *@li container: An optional string. Defaults to "". -*@li shared_name: An optional string. Defaults to "". +*@li shared_name: An optional string. Defaults to "". \n *@par Outputs: -*size: A Tensor of type DT_INT32. +*size: A Tensor of type DT_INT32. \n -*@attention Constraints:\n -*OrderedMapSize runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*OrderedMapSize runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator OrderedMapSize. @@ -1404,17 +1406,17 @@ REG_OP(OrderedMapSize) .OP_END_FACTORY_REG(OrderedMapSize) /** -*@brief Removes all elements in the underlying container. +*@brief Removes all elements in the underlying container. \n *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to "0". *@li memory_limit: An optional int that is >= 0. Defaults to "0". *@li dtypes: A list of DTypes. *@li container: An optional string. Defaults to "". -*@li shared_name: An optional string. Defaults to "". +*@li shared_name: An optional string. Defaults to "". \n -*@attention Constraints:\n -*OrderedMapClear runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*OrderedMapClear runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator OrderedMapClear. @@ -1429,21 +1431,21 @@ REG_OP(OrderedMapClear) .OP_END_FACTORY_REG(OrderedMapClear) /** -*@brief Returns the number of incomplete elements in the underlying container. +*@brief Returns the number of incomplete elements in the underlying container. \n *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to "0". *@li memory_limit: An optional int that is >= 0. Defaults to "0". *@li dtypes: A list of DTypes. *@li container: An optional string. Defaults to "". -*@li shared_name: An optional string. Defaults to "". +*@li shared_name: An optional string. Defaults to "". \n *@par Outputs: -*size: A Tensor of type DT_INT32. +*size: A Tensor of type DT_INT32. \n -*@attention Constraints:\n -*OrderedMapIncompleteSize runs on the Ascend AI CPU, \n -which delivers poor performance.\n +*@attention Constraints: +*OrderedMapIncompleteSize runs on the Ascend AI CPU, +which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator OrderedMapIncompleteSize. @@ -1459,28 +1461,28 @@ REG_OP(OrderedMapIncompleteSize) .OP_END_FACTORY_REG(OrderedMapIncompleteSize) /** -*@brief Peeks at the values at the specified key. +*@brief Peeks at the values at the specified key. \n *@par Inputs: -*Including: \n +*Including: * @li key: A Tensor of type DT_INT64. -* @li indices: A Tensor of type DT_INT32. +* @li indices: A Tensor of type DT_INT32. \n *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to "0". *@li memory_limit: An optional int that is >= 0. Defaults to "0". *@li dtypes: A list of DTypes that has length >= 1. *@li container: An optional string. Defaults to "". -*@li shared_name: An optional string. Defaults to "". +*@li shared_name: An optional string. Defaults to "". \n *@par Outputs: -*values: A list of Tensor objects. Must be one of the following types: \n -DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, \n -DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, DT_UINT64, DT_RESOURCE, DT_STRING, \n -DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32. +*values: A list of Tensor objects. Must be one of the following types: +DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, +DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, DT_UINT64, DT_RESOURCE, DT_STRING, +DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32. \n -*@attention Constraints:\n -*OrderedMapPeek runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*OrderedMapPeek runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator OrderedMapPeek. @@ -1502,29 +1504,29 @@ REG_OP(OrderedMapPeek) .OP_END_FACTORY_REG(OrderedMapPeek) /** -*@brief Removes and returns the (key, value) element with the smallest. +*@brief Removes and returns the (key, value) element with the smallest. \n *@par Inputs: -*Including: \n -* @li indices: A Tensor of type DT_INT32. +*Including: +* @li indices: A Tensor of type DT_INT32. \n *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to "0". *@li memory_limit: An optional int that is >= 0. Defaults to "0". *@li dtypes: A list of DTypes that has length >= 1. *@li container: An optional string. Defaults to "". -*@li shared_name: An optional string. Defaults to "". +*@li shared_name: An optional string. Defaults to "". \n *@par Outputs: *@li key: A Tensor of type DT_INT64. -*@li values: A list of Tensor objects. Must be one of the following types: \n -DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, \n -DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, DT_UINT64, DT_RESOURCE, DT_STRING, \n -DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32. +*@li values: A list of Tensor objects. Must be one of the following types: +DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, +DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, DT_UINT64, DT_RESOURCE, DT_STRING, +DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32. \n -*@attention Constraints:\n -*OrderedMapUnstageNoKey runs on the Ascend AI CPU, \n -which delivers poor performance.\n +*@attention Constraints: +*OrderedMapUnstageNoKey runs on the Ascend AI CPU, +which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator OrderedMapUnstageNoKey. @@ -1546,27 +1548,27 @@ REG_OP(OrderedMapUnstageNoKey) .OP_END_FACTORY_REG(OrderedMapUnstageNoKey) /** -*@brief Removes and returns the values associated with the key. +*@brief Removes and returns the values associated with the key. \n *@par Inputs: -*Including: \n +*Including: * @li key: A Tensor of type DT_INT64. -* @li indices: A Tensor of type DT_INT32. +* @li indices: A Tensor of type DT_INT32. \n *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to "0". *@li memory_limit: An optional int that is >= 0. Defaults to "0". *@li dtypes: A list of tf.DTypes that has length >= 1. *@li container: An optional string. Defaults to "". -*@li shared_name: An optional string. Defaults to "". +*@li shared_name: An optional string. Defaults to "". \n *@par Outputs: -*values: A list of Tensor objects. Must be one of the following types: \n -DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT, \n -DT_FLOAT16, DT_DOUBLE, DT_BOOL, DT_UINT32, DT_UINT64. +*values: A list of Tensor objects. Must be one of the following types: +DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT, +DT_FLOAT16, DT_DOUBLE, DT_BOOL, DT_UINT32, DT_UINT64. \n -*@attention Constraints:\n -*OrderedMapUnstage runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*OrderedMapUnstage runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility *Compatible with the TensorFlow operator OrderedMapUnstage. @@ -1586,31 +1588,31 @@ REG_OP(OrderedMapUnstage) .OP_END_FACTORY_REG(OrderedMapUnstage) /** -*@brief A barrier represents a key-value map, where each key is a string, \n -and each value is a tuple of tensors. +*@brief A barrier represents a key-value map, where each key is a string, +and each value is a tuple of tensors. \n *@par Attributes: *@li component_types: The type of each component in a value. *@li shapes: A list of shapes for each component of a queue element. -Each shape must be 1 in the first dimension. \n -The length of this attr must be the same as \n +Each shape must be 1 in the first dimension. +The length of this attr must be the same as the length of "component_types". -*@li capacity: The capacity of the barrier. \n -The default capacity is MAX_INT32, \n +*@li capacity: The capacity of the barrier. +The default capacity is MAX_INT32, which is the largest capacity of the underlying queue. -*@li container: If non-empty, this barrier is placed in the given container. \n +*@li container: If non-empty, this barrier is placed in the given container. Otherwise, a default container is used. -*@li shared_name: If non-empty, this barrier will be shared under \n -the given name across multiple sessions. +*@li shared_name: If non-empty, this barrier will be shared under +the given name across multiple sessions. \n *@par Outputs: -*handle: A Tensor of type DT_STRING_REF. The handle to the barrier. +*handle: A Tensor of type DT_STRING_REF. The handle to the barrier. \n -*@attention Constraints:\n -*Barrier runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*Barrier runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility -*Compatible with the TensorFlow operator Barrier. +*Compatible with the TensorFlow operator Barrier. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -1625,26 +1627,26 @@ REG_OP(Barrier) .OP_END_FACTORY_REG(Barrier) /** -*@brief For each key, assigns the respective value to the specified component. +*@brief For each key, assigns the respective value to the specified component. \n *@par Inputs: -*Including: \n +*Including: * @li handle: A Tensor of type DT_STRING_REF. The handle to a barrier. * @li keys: A Tensor of type DT_STRING. A 1D tensor of keys. -* @li values: An any-dimensional tensor of values, which are associated \n -with the respective keys. The 0th dimension must have length n \n -Must be one of the following types: DT_FLOAT, DT_FLOAT16, DT_INT8, \n -DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, \n -DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING. +* @li values: An any-dimensional tensor of values, which are associated +with the respective keys. The 0th dimension must have length n +Must be one of the following types: DT_FLOAT, DT_FLOAT16, DT_INT8, +DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, +DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING. \n *@par Attributes: -*component_index: The component of the barrier elements that is being assigned. +*component_index: The component of the barrier elements that is being assigned. \n -*@attention Constraints:\n -*BarrierInsertMany runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*BarrierInsertMany runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility -*Compatible with the TensorFlow operator BarrierInsertMany. +*Compatible with the TensorFlow operator BarrierInsertMany. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -1660,41 +1662,41 @@ REG_OP(BarrierInsertMany) .OP_END_FACTORY_REG(BarrierInsertMany) /** -*@brief Takes the given number of completed elements from a barrier. +*@brief Takes the given number of completed elements from a barrier. \n *@par Inputs: -*Including: \n +*Including: * @li handle: A Tensor of type DT_STRING_REF. The handle to a barrier. -* @li num_elements: A Tensor of type DT_INT32. \n -A single-element tensor containing the number of elements to take. +* @li num_elements: A Tensor of type DT_INT32. +A single-element tensor containing the number of elements to take. \n *@par Attributes: *@li component_types: The type of each component in a value. -*@li allow_small_batch: Allow to return less than "num_elements" \n +*@li allow_small_batch: Allow to return less than "num_elements" items if barrier is already closed. -*@li wait_for_incomplete: An any-dimensional tensor \n +*@li wait_for_incomplete: An any-dimensional tensor for each component in the barrier element. -*@li timeout_ms: If the queue is empty, this operation will block for up to \n -"timeout_ms" milliseconds. Note: This option is not supported yet. +*@li timeout_ms: If the queue is empty, this operation will block for up to +"timeout_ms" milliseconds. Note: This option is not supported yet. \n *@par Outputs: -*@li indices: A 1D tensor of type DT_INT64. The indices, with length "num_elems". \n -These indices refer to the batch in which the values were \n +*@li indices: A 1D tensor of type DT_INT64. The indices, with length "num_elems". +These indices refer to the batch in which the values were placed into the barrier. -*@li keys: A 1D tensor of keys, \n +*@li keys: A 1D tensor of keys, with length "num_elements" of type DT_STRING. -*@li values: A 1D tensor per component in a barrier element. \n -All values have length "num_elements" along the 0th dimension. \n -Must be one of the following types: \n -DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, \n -DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128, \n -DT_RESOURCE, DT_STRING. +*@li values: A 1D tensor per component in a barrier element. +All values have length "num_elements" along the 0th dimension. +Must be one of the following types: +DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, +DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128, +DT_RESOURCE, DT_STRING. \n -*@attention Constraints:\n -*BarrierTakeMany runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*BarrierTakeMany runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility -*Compatible with the TensorFlow operator BarrierTakeMany. +*Compatible with the TensorFlow operator BarrierTakeMany. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -1715,23 +1717,23 @@ REG_OP(BarrierTakeMany) .OP_END_FACTORY_REG(BarrierTakeMany) /** -*@brief Closes the given barrier. +*@brief Closes the given barrier. \n *@par Inputs: -*Including: \n -*handle: A Tensor of type DT_STRING_REF. The handle to a barrier. +*Including: +*handle: A Tensor of type DT_STRING_REF. The handle to a barrier. \n *@par Attributes: -*cancel_pending_enqueues: If true, all pending enqueue requests \n -that are blocked on the barrier's queue will \n -be canceled. InsertMany will fail, \n -even if no new key is introduced. +*cancel_pending_enqueues: If true, all pending enqueue requests +that are blocked on the barrier's queue will +be canceled. InsertMany will fail, +even if no new key is introduced. \n -*@attention Constraints:\n -*BarrierClose runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*BarrierClose runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility -*Compatible with the TensorFlow operator BarrierClose. +*Compatible with the TensorFlow operator BarrierClose. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -1742,20 +1744,20 @@ REG_OP(BarrierClose) .OP_END_FACTORY_REG(BarrierClose) /** -*@brief Computes the number of complete elements in the given barrier. +*@brief Computes the number of complete elements in the given barrier. \n *@par Inputs: -*Including: \n -*handle: A Tensor of type DT_STRING_REF. The handle to a barrier. +*Including: +*handle: A Tensor of type DT_STRING_REF. The handle to a barrier. \n *@par Outputs: -*size: A Tensor of type DT_INT32. The number of complete elements. +*size: A Tensor of type DT_INT32. The number of complete elements. \n -*@attention Constraints:\n -*BarrierReadySize runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*BarrierReadySize runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility -*Compatible with the TensorFlow operator BarrierReadySize. +*Compatible with the TensorFlow operator BarrierReadySize. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -1766,20 +1768,20 @@ REG_OP(BarrierReadySize) .OP_END_FACTORY_REG(BarrierReadySize) /** -*@brief Computes the number of incomplete elements in the given barrier. +*@brief Computes the number of incomplete elements in the given barrier. \n *@par Inputs: -*Including: \n -*handle: A Tensor of type DT_STRING_REF. The handle to a barrier. +*Including: +*handle: A Tensor of type DT_STRING_REF. The handle to a barrier. \n *@par Outputs: -*size: A Tensor of type DT_INT32. The number of incomplete elements in the barrier. +*size: A Tensor of type DT_INT32. The number of incomplete elements in the barrier. \n -*@attention Constraints:\n -*BarrierIncompleteSize runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*BarrierIncompleteSize runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility -*Compatible with the TensorFlow operator BarrierIncompleteSize. +*Compatible with the TensorFlow operator BarrierIncompleteSize. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -1790,24 +1792,24 @@ REG_OP(BarrierIncompleteSize) .OP_END_FACTORY_REG(BarrierIncompleteSize) /** -*@brief Emits randomized records. +*@brief Emits randomized records. \n *@par Attributes: *@li file_pattern: A string. Glob pattern for the data files. -*@li file_random_seed: An optional int. Defaults to 301. Random seeds used to \n +*@li file_random_seed: An optional int. Defaults to 301. Random seeds used to produce randomized records. -*@li file_shuffle_shift_ratio: An optional float. Defaults to 0. Shifts the \n +*@li file_shuffle_shift_ratio: An optional float. Defaults to 0. Shifts the list of files after the list is randomly shuffled. -*@li file_buffer_size: An optional int. Defaults to 10000. The randomization \n +*@li file_buffer_size: An optional int. Defaults to 10000. The randomization shuffling buffer. -*@li file_parallelism: An optional int. Defaults to 16. How many sstables are \n +*@li file_parallelism: An optional int. Defaults to 16. How many sstables are opened and concurrently iterated over. *@li batch_size: An optional int. Defaults to 32. The batch size. -*@li compression_type: An optional string. Defaults to "". The type of \n -compression for the file. Currently ZLIB and GZIP are supported. +*@li compression_type: An optional string. Defaults to "". The type of +compression for the file. Currently ZLIB and GZIP are supported. \n *@par Outputs: -*records: A Tensor of type string. +*records: A Tensor of type string. \n *@par Third-party framework compatibility *Compatible with tensorflow RecordInput operator. @@ -1825,25 +1827,25 @@ REG_OP(RecordInput) .OP_END_FACTORY_REG(RecordInput) /** -*@brief A conditional accumulator for aggregating gradients. +*@brief A conditional accumulator for aggregating gradients. \n *@par Attributes: *@li dtype: The type of the value being accumulated. *@li shape: The shape of the values, can be [], in which case shape is unknown. -*@li container: If non-empty, this accumulator is placed in the given container. \n +*@li container: If non-empty, this accumulator is placed in the given container. Otherwise, a default container is used. -*@li shared_name: If non-empty, this accumulator will be shared under the given \n +*@li shared_name: If non-empty, this accumulator will be shared under the given name across multiple sessions. -*@li reduction_type: reduction operator type, default "MEAN". +*@li reduction_type: reduction operator type, default "MEAN". \n *@par Outputs: -*handle: A Tensor of type DT_STRING_REF. The handle to the accumulator. +*handle: A Tensor of type DT_STRING_REF. The handle to the accumulator. \n -*@attention Constraints:\n -*ConditionalAccumulator runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*ConditionalAccumulator runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility -*Compatible with the TensorFlow operator ConditionalAccumulator. +*Compatible with the TensorFlow operator ConditionalAccumulator. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -1858,28 +1860,28 @@ REG_OP(ConditionalAccumulator) .OP_END_FACTORY_REG(ConditionalAccumulator) /** -*@brief Applies a gradient to a given accumulator. +*@brief Applies a gradient to a given accumulator. \n *@par Inputs: -*Does not add if "local_step" is lesser than the accumulator's "global_step". \n +*Does not add if "local_step" is lesser than the accumulator's "global_step". * @li handle: A Tensor of type DT_STRING_REF. The handle to an accumulator. -* @li local_step: A Tensor of type DT_INT64. \n -The "local_step" value at which the gradient was computed. +* @li local_step: A Tensor of type DT_INT64. +The "local_step" value at which the gradient was computed. \n -* @li gradient: A tensor of the gradient to be accumulated. \n -Must be one of the following types: \n +* @li gradient: A tensor of the gradient to be accumulated. +Must be one of the following types: DT_FLOAT16, DT_FLOAT, DT_DOUBLE *@par Attributes: -*dtype: Must be one of the following types: \n +*dtype: Must be one of the following types: DT_FLOAT16, DT_FLOAT, DT_DOUBLE -*@attention Constraints:\n -*AccumulatorApplyGradient runs on the Ascend AI CPU, \n -which delivers poor performance.\n +*@attention Constraints: +*AccumulatorApplyGradient runs on the Ascend AI CPU, +which delivers poor performance. *@par Third-party framework compatibility -*Compatible with the TensorFlow operator AccumulatorApplyGradient. +*Compatible with the TensorFlow operator AccumulatorApplyGradient. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -1892,22 +1894,22 @@ REG_OP(AccumulatorApplyGradient) .OP_END_FACTORY_REG(AccumulatorApplyGradient) /** -*@brief Returns the number of gradients aggregated in the given accumulators. +*@brief Returns the number of gradients aggregated in the given accumulators. \n *@par Inputs: -*Including: \n -*handle: A Tensor of type DT_STRING_REF. The handle to an accumulator. +*Including: +*handle: A Tensor of type DT_STRING_REF. The handle to an accumulator. \n *@par Outputs: -*y: A Tensor of type DT_INT32. The number of gradients aggregated \n -in the given accumulator. +*y: A Tensor of type DT_INT32. The number of gradients aggregated +in the given accumulator. \n -*@attention Constraints:\n -*AccumulatorNumAccumulated runs on the Ascend AI CPU, \n -which delivers poor performance.\n +*@attention Constraints: +*AccumulatorNumAccumulated runs on the Ascend AI CPU, +which delivers poor performance. *@par Third-party framework compatibility -*Compatible with the TensorFlow operator AccumulatorNumAccumulated. +*Compatible with the TensorFlow operator AccumulatorNumAccumulated. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -1918,18 +1920,18 @@ REG_OP(AccumulatorNumAccumulated) .OP_END_FACTORY_REG(AccumulatorNumAccumulated) /** -*@brief Updates the accumulator with a new value for "global_step". +*@brief Updates the accumulator with a new value for "global_step". \n *@par Inputs: -*Input "new_global_step" is a scalar. \n +*Input "new_global_step" is a scalar. * @li handle: A Tensor of type DT_STRING_REF. The handle to an accumulator. -* @li new_global_step: The new "global_step" value to set A Tensor of type DT_INT64. +* @li new_global_step: The new "global_step" value to set A Tensor of type DT_INT64. \n -*@attention Constraints:\n -*AccumulatorSetGlobalStep runs on the Ascend AI CPU, which delivers poor performance.\n +*@attention Constraints: +*AccumulatorSetGlobalStep runs on the Ascend AI CPU, which delivers poor performance. *@par Third-party framework compatibility -*Compatible with the TensorFlow operator AccumulatorSetGlobalStep. +*Compatible with the TensorFlow operator AccumulatorSetGlobalStep. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -1940,29 +1942,29 @@ REG_OP(AccumulatorSetGlobalStep) .OP_END_FACTORY_REG(AccumulatorSetGlobalStep) /** -*@brief Extracts the average gradient in the given ConditionalAccumulator. +*@brief Extracts the average gradient in the given ConditionalAccumulator. \n *@par Inputs: -* Input "num_required" is a scalar. \n +* Input "num_required" is a scalar. * @li handle: A Tensor of type DT_STRING_REF. The handle to an accumulator. -* @li num_required: A Tensor of type DT_INT32. \n -Number of gradients required before an aggregate is returned. +* @li num_required: A Tensor of type DT_INT32. +Number of gradients required before an aggregate is returned. \n *@par Attributes: -*dtype: The data type of accumulated gradients. \n -Needs to correspond to the type of the accumulator. +*dtype: The data type of accumulated gradients. +Needs to correspond to the type of the accumulator. \n *@par Outputs: -*y: The average of the accumulated gradients. \n +*y: The average of the accumulated gradients. Must be one of the following types: -DT_FLOAT16, DT_FLOAT, DT_DOUBLE. +DT_FLOAT16, DT_FLOAT, DT_DOUBLE. \n -*@attention Constraints:\n +*@attention Constraints: *AccumulatorTakeGradient runs on the Ascend AI CPU, -\nwhich delivers poor performance.\n + which delivers poor performance. *@par Third-party framework compatibility -*Compatible with the TensorFlow operator AccumulatorTakeGradient. +*Compatible with the TensorFlow operator AccumulatorTakeGradient. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -1975,23 +1977,23 @@ REG_OP(AccumulatorTakeGradient) .OP_END_FACTORY_REG(AccumulatorTakeGradient) /** -*@brief A conditional accumulator for aggregating sparse gradients. +*@brief A conditional accumulator for aggregating sparse gradients. \n *@par Attributes: *@li shape: The shape of the values. *@li dtype: The type of the value being accumulated. -*@li container: If non-empty, this accumulator is placed in the given \n +*@li container: If non-empty, this accumulator is placed in the given container. Otherwise, a default container is used. -*@li shared_name: If non-empty, this accumulator will be shared under the \n +*@li shared_name: If non-empty, this accumulator will be shared under the given name across multiple sessions. -*@li reduction_type: The reduction method whose type is string, \n -default is "MEAN". +*@li reduction_type: The reduction method whose type is string, +default is "MEAN". \n *@par Outputs: -*handle: The handle to the accumulator. +*handle: The handle to the accumulator. \n *@par Third-party framework compatibility -*Compatible with tensorflow SparseConditionalAccumulator operator. +*Compatible with tensorflow SparseConditionalAccumulator operator. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -2006,28 +2008,28 @@ REG_OP(SparseConditionalAccumulator) .OP_END_FACTORY_REG(SparseConditionalAccumulator) /** -*@brief Applies a sparse gradient to a given accumulator. +*@brief Applies a sparse gradient to a given accumulator. \n *@par Inputs: -*The input handle must be type string_ref. Inputs include: \n +*The input handle must be type string_ref. Inputs include: *@li handle: A Tensor of type mutable string. The handle to a accumulator. -*@li local_step: A Tensor of type int64. The local_step value at which the \n +*@li local_step: A Tensor of type int64. The local_step value at which the sparse gradient was computed. -*@li indices: A Tensor of type int64. Indices of the sparse gradient to be \n +*@li indices: A Tensor of type int64. Indices of the sparse gradient to be accumulated. Must be a vector. -*@li values: A Tensor. Values are the non-zero slices of the gradient, \n -and must have the same first dimension as indices, i.e., the nnz represented \n +*@li values: A Tensor. Values are the non-zero slices of the gradient, +and must have the same first dimension as indices, i.e., the nnz represented by indices and values must be consistent. -*@li shape: A Tensor of type int64. +*@li shape: A Tensor of type int64. \n *@par Attributes: -*@li has_known_shape: A bool. Boolean indicating whether gradient_shape is \n +*@li has_known_shape: A bool. Boolean indicating whether gradient_shape is unknown, in which case the input is ignored during validation. -*@li dtype: The data type of accumulated gradients. Needs to correspond to \n -the type of the accumulator. +*@li dtype: The data type of accumulated gradients. Needs to correspond to +the type of the accumulator. \n *@par Third-party framework compatibility -*Compatible with tensorflow SparseAccumulatorApplyGradient operator. +*Compatible with tensorflow SparseAccumulatorApplyGradient operator. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -2046,24 +2048,24 @@ REG_OP(SparseAccumulatorApplyGradient) .OP_END_FACTORY_REG(SparseAccumulatorApplyGradient) /** -*@brief Extracts the average sparse gradient in a SparseConditionalAccumulator. +*@brief Extracts the average sparse gradient in a SparseConditionalAccumulator. \n *@par Inputs: -*The input handle must be type string_ref. Inputs include: \n +*The input handle must be type string_ref. Inputs include: *@li handle: The handle to a SparseConditionalAccumulator. -*@li num_required: Number of gradients required before we return an aggregate. +*@li num_required: Number of gradients required before we return an aggregate. \n *@par Attributes: -*dtype: The data type of accumulated gradients. Needs to correspond to the \n -type of the accumulator. +*dtype: The data type of accumulated gradients. Needs to correspond to the +type of the accumulator. \n *@par Outputs: *@li indices: Indices of the average of the accumulated sparse gradients. *@li values: Values of the average of the accumulated sparse gradients. -*@li shape: Shape of the average of the accumulated sparse gradients. +*@li shape: Shape of the average of the accumulated sparse gradients. \n *@par Third-party framework compatibility -*Compatible with tensorflow SparseAccumulatorTakeGradient operator. +*Compatible with tensorflow SparseAccumulatorTakeGradient operator. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -2079,25 +2081,25 @@ REG_OP(SparseAccumulatorTakeGradient) .OP_END_FACTORY_REG(SparseAccumulatorTakeGradient) /** -*@brief A conditional accumulator for aggregating gradients. +*@brief A conditional accumulator for aggregating gradients. \n *@par Attributes: * @li dtype: The type of the value being accumulated. * @li shape: The shape of the values, can be [], in which case shape is unknown. -* @li container: If non-empty, this accumulator is placed in the given container. \n +* @li container: If non-empty, this accumulator is placed in the given container. Otherwise, a default container is used. -* @li shared_name: If non-empty, this accumulator will be shared under the given \n +* @li shared_name: If non-empty, this accumulator will be shared under the given name across multiple sessions. -* @li reduction_type: reduction operator type, default "MEAN". +* @li reduction_type: reduction operator type, default "MEAN". \n *@par Outputs: -*handle: A Tensor of type DT_RESOURCE. The handle to the accumulator. +*handle: A Tensor of type DT_RESOURCE. The handle to the accumulator. \n *@attention Constraints: -*ResourceConditionalAccumulator runs on the Ascend AI CPU, which delivers poor performance. +*ResourceConditionalAccumulator runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility -*Compatible with the TensorFlow operator ResourceConditionalAccumulator. +*Compatible with the TensorFlow operator ResourceConditionalAccumulator. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -2112,21 +2114,21 @@ REG_OP(ResourceConditionalAccumulator) .OP_END_FACTORY_REG(ResourceConditionalAccumulator) /** -*@brief Applies a gradient to a given accumulator. \n -Does not add if "local_step" is lesser than the accumulator's "global_step". +*@brief Applies a gradient to a given accumulator. +Does not add if "local_step" is lesser than the accumulator's "global_step". \n *@par Inputs: * @li handle: The handle to an accumulator. * @li local_step: The "local_step" value at which the gradient was computed. -* @li gradient: A tensor of the gradient to be accumulated. \n -Must be one of the following types: \n +* @li gradient: A tensor of the gradient to be accumulated. +Must be one of the following types: DT_FLOAT16, DT_FLOAT, DT_DOUBLE *@attention Constraints: -*ResourceAccumulatorApplyGradient runs on the Ascend AI CPU, which delivers poor performance. +*ResourceAccumulatorApplyGradient runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility -*Compatible with the TensorFlow operator ResourceAccumulatorApplyGradient. +*Compatible with the TensorFlow operator ResourceAccumulatorApplyGradient. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -2138,19 +2140,19 @@ REG_OP(ResourceAccumulatorApplyGradient) .OP_END_FACTORY_REG(ResourceAccumulatorApplyGradient) /** -*@brief Returns the number of gradients aggregated in the given accumulators. +*@brief Returns the number of gradients aggregated in the given accumulators. \n *@par Inputs: -*handle: The handle to an accumulator. +*handle: The handle to an accumulator. \n *@par Outputs: -*num_accumulated: The number of gradients aggregated in the given accumulator. +*num_accumulated: The number of gradients aggregated in the given accumulator. \n *@attention Constraints: -*ResourceAccumulatorNumAccumulated runs on the Ascend AI CPU, which delivers poor performance. +*ResourceAccumulatorNumAccumulated runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility -*Compatible with the TensorFlow operator ResourceAccumulatorNumAccumulated. +*Compatible with the TensorFlow operator ResourceAccumulatorNumAccumulated. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -2161,17 +2163,17 @@ REG_OP(ResourceAccumulatorNumAccumulated) .OP_END_FACTORY_REG(ResourceAccumulatorNumAccumulated) /** -*@brief Updates the accumulator with a new value for "global_step". +*@brief Updates the accumulator with a new value for "global_step". \n *@par Inputs: * @li handle: The handle to an accumulator. -* @li new_global_step: The new "global_step" value to set. +* @li new_global_step: The new "global_step" value to set. \n *@attention Constraints: -*ResourceAccumulatorSetGlobalStep runs on the Ascend AI CPU, which delivers poor performance. +*ResourceAccumulatorSetGlobalStep runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility -*Compatible with the TensorFlow operator ResourceAccumulatorSetGlobalStep. +*Compatible with the TensorFlow operator ResourceAccumulatorSetGlobalStep. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -2182,26 +2184,26 @@ REG_OP(ResourceAccumulatorSetGlobalStep) .OP_END_FACTORY_REG(ResourceAccumulatorSetGlobalStep) /** -*@brief Extracts the average gradient in the given ConditionalAccumulator. +*@brief Extracts the average gradient in the given ConditionalAccumulator. \n *@par Inputs: * @li handle: The handle to an accumulator. -* @li num_required: Number of gradients required before an aggregate is returned. +* @li num_required: Number of gradients required before an aggregate is returned. \n *@par Attributes: -*dtype: The data type of accumulated gradients. \n -Needs to correspond to the type of the accumulator. +*dtype: The data type of accumulated gradients. +Needs to correspond to the type of the accumulator. \n *@par Outputs: -*average: The average of the accumulated gradients. \n -Must be one of the following types: \n -DT_FLOAT16, DT_FLOAT, DT_DOUBLE. +*average: The average of the accumulated gradients. +Must be one of the following types: +DT_FLOAT16, DT_FLOAT, DT_DOUBLE. \n *@attention Constraints: -*ResourceAccumulatorTakeGradient runs on the Ascend AI CPU, which delivers poor performance. +*ResourceAccumulatorTakeGradient runs on the Ascend AI CPU, which delivers poor performance. \n *@par Third-party framework compatibility -*Compatible with the TensorFlow operator ResourceAccumulatorTakeGradient. +*Compatible with the TensorFlow operator ResourceAccumulatorTakeGradient. \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -2214,19 +2216,19 @@ REG_OP(ResourceAccumulatorTakeGradient) .OP_END_FACTORY_REG(ResourceAccumulatorTakeGradient) /** -*@brief Enqueue a Tensor on the computation outfeed. +*@brief Enqueue a Tensor on the computation outfeed. \n *@par Inputs: -*Inputs include: \n -*x: A Tensor. Must be one of the following types: float16, float32, \n -float64, int8, int16, uint16, uint8, int32, int64, uint32, uint64, \n -bool, double, string. +*Inputs include: +*x: A Tensor. Must be one of the following types: float16, float32, +float64, int8, int16, uint16, uint8, int32, int64, uint32, uint64, +bool, double, string. It's a dynamic input. \n *@par Attributes: -*channel_name: name of operator channel, default "". +*channel_name: name of operator channel, default "". \n -*@attention Constraints:\n -*-The implementation for OutfeedEnqueueOp on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for OutfeedEnqueueOp on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow OutfeedEnqueueOp operator. diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index cd42b707..2313b4a0 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -24,16 +24,16 @@ namespace ge { /** -*@brief Adds all input tensors element-wise. +*@brief Adds all input tensors element-wise. \n *@par Inputs: *Dynamic inputs, including: * @li x: A list of Tensor objects, each with same shape and type. The supported types are: * float16, float32, double, int32, uint8, int16, int8, complex64, int64, -* qint8, quint8, qint32, uint16, complex128, uint32, uint64. +* qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n *@par Outputs: -*y: A Tensor. Has the same shape and type as the elements of "x". +*y: A Tensor. Has the same shape and type as the elements of "x". \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator AddN. @@ -52,20 +52,20 @@ REG_OP(AddN) * @li grads: A mutable Tensor. Must be one of the following types: * float16, float32, int32. * @li x1: A mutable Tensor of the same type as "grads". -* @li x2: A mutable Tensor of the same type as "grads". +* @li x2: A mutable Tensor of the same type as "grads". \n *@par Attributes: *@li grad_x: An optional bool. Defaults to "True". * If "True", "y1" will be output. -* If "False", "y1" will not be output. +* If "False", "y1" will not be output. \n *@li grad_y: An optional bool. Defaults to "True". * If "True", "y2" will be output. -* If "False", "y2" will not be output. +* If "False", "y2" will not be output. \n *@par Outputs: * @li y1: A mutable Tensor. Has the same type as "grads". -* @li y2: A mutable Tensor. Has the same type as "grads". +* @li y2: A mutable Tensor. Has the same type as "grads". \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator MaximumGrad. @@ -88,20 +88,20 @@ REG_OP(MaximumGrad) * @li grads: A mutable Tensor. Must be one of the following types: * float16, float32, int32. * @li x1: A mutable Tensor of the same type as "grads". -* @li x2: A mutable Tensor of the same type as "grads". +* @li x2: A mutable Tensor of the same type as "grads". \n *@par Attributes: *@li grad_x: An optional bool. Defaults to "True". * If "True", "y1" will be output. -* If "False", "y1" will not be output. +* If "False", "y1" will not be output. \n *@li grad_y: An optional bool. Defaults to "True". * If "True", "y2" will be output. -* If "False", "y2" will not be output. +* If "False", "y2" will not be output. \n *@par Outputs: * @li y1: A mutable Tensor. Has the same type as "grads". -* @li y2: A mutable Tensor. Has the same type as "grads". +* @li y2: A mutable Tensor. Has the same type as "grads". \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator MinimumGrad. @@ -117,15 +117,15 @@ REG_OP(MinimumGrad) .OP_END_FACTORY_REG(MinimumGrad) /** -*@brief Cast a tensor form src data type to dst data type. +*@brief Cast a tensor form src data type to dst data type. \n *@par Inputs: *One input: *x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8, - int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. + int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. \n *@par Attributes: -*dst_type: An required attribute of type int32, specifying the dst data type. +*dst_type: An required attribute of type int32, specifying the dst data type. \n *@par Outputs: *y:A Tensor. Has the same type as x. @@ -141,16 +141,16 @@ REG_OP(Cast) .OP_END_FACTORY_REG(Cast) /** -*@brief Returns the truth value of (x1 >= x2) element-wise. +*@brief Returns the truth value of (x1 >= x2) element-wise. \n *@par Inputs: *Two inputs, including: * @li x1: A Tensor. Must be one of the following types: float16, float32, * double, int32, int8, uint8, int64, uint16, uint32, uint64. -* @li x2: A Tensor of the same type as "x1". +* @li x2: A Tensor of the same type as "x1". \n *@par Outputs: -*y: A Tensor. Has the same type as "x1". +*y: A Tensor. Has the same type as "x1". \n *@par Third-party framework compatibility: * Compatible with the TensorFlow operator GreaterEqual. @@ -162,16 +162,16 @@ REG_OP(GreaterEqual) .OP_END_FACTORY_REG(GreaterEqual) /** -*@brief Returns the truth value of (x1 < x2) element-wise. +*@brief Returns the truth value of (x1 < x2) element-wise. \n *@par Inputs: *Two inputs, including: * @li x1: A Tensor. Must be one of the following types: float16, float32, double, int32, * uint8, int16, int8, int64, uint16, uint32, uint64. -* @li x2: A Tensor with the same type as "x1". +* @li x2: A Tensor with the same type as "x1". \n *@par Outputs: -*y: A Tensor of type bool. +*y: A Tensor of type bool. \n *@par Third-party framework compatibility: * Compatible with TensorFlow operator Less. @@ -183,17 +183,17 @@ REG_OP(Less) .OP_END_FACTORY_REG(Less) /** -*@brief Returns x1/x2 element-wise for real types. +*@brief Returns x1/x2 element-wise for real types. \n *@par Inputs: * Two inputs, including: *@li x1: A Tensor. Must be one of the following types: float16, float32, double, uint16, int8, uint8, int16, int32, int64, complex64, DT_COMPLEX128. *@li x2: A Tensor. Must be one of the following types: float16, float32, double, uint16, - int8, uint8, int16, int32, int64, complex64, DT_COMPLEX128. + int8, uint8, int16, int32, int64, complex64, DT_COMPLEX128. \n *@par Outputs: -* y: A Tensor. Has the same type and format as input "x1". +* y: A Tensor. Has the same type and format as input "x1". \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator RealDiv. @@ -211,10 +211,10 @@ REG_OP(RealDiv) .OP_END_FACTORY_REG(RealDiv) /** -*@brief Computes square root of x element-wise. +*@brief Computes square root of x element-wise. \n *@par Inputs: -* x: A Tensor. Must be one of the following types: float16, float32, complex128, complex64, float64. +* x: A Tensor. Must be one of the following types: float16, float32, complex128, complex64, float64. \n *@par Outputs: *y: A Tensor. Has the same type as "x". @@ -227,15 +227,15 @@ REG_OP(Sqrt) .OP_END_FACTORY_REG(Sqrt) /** -*@brief Returns the max of "x" and "y" (i.e. x > y ? x: y) element-wise. +*@brief Returns the max of "x" and "y" (i.e. x > y ? x: y) element-wise. \n *@par Inputs: *Two inputs, including: * @li x1: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. -* @li x2: A Tensor of the same type as "x1". +* @li x2: A Tensor of the same type as "x1". \n *@par Outputs: -*y: A Tensor. Has the same type as "x1". +*y: A Tensor. Has the same type as "x1". \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Maximum. @@ -250,15 +250,15 @@ REG_OP(Maximum) .OP_END_FACTORY_REG(Maximum) /** -*@brief Returns the min of x and y (i.e. x1 < x2 ? x1 : x2) element-wise. +*@brief Returns the min of x and y (i.e. x1 < x2 ? x1 : x2) element-wise. \n *@par Inputs: *Two inputs, include: * @li x1: A Tensor. Must be one of the following types: float32, float16, double, int32, int64. -* @li x2: A Tensor of the same type as "x1". +* @li x2: A Tensor of the same type as "x1". \n *@par Outputs: -*y: A Tensor of the same type as "x1". +*y: A Tensor of the same type as "x1". \n *@par Third-party framework compatibility: * Compatible with the TensorFlow operator Minimum. @@ -273,7 +273,7 @@ REG_OP(Minimum) .OP_END_FACTORY_REG(Minimum) /** -*@brief: Computes the reciprocal of "x". +*@brief: Computes the reciprocal of "x". \n *@par Inputs: *One inputs, include: @@ -281,7 +281,7 @@ REG_OP(Minimum) * complex64, complex128.the format can be [NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND] *@par Outputs: -*y:A Tensor with same type as "x". +*y:A Tensor with same type as "x". \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Reciprocal. @@ -299,7 +299,7 @@ REG_OP(Reciprocal) *Two inputs, including: * @li x1: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, float64, * float16, float32, complex128, complex64, uint16. -* @li x2: A Tensor of the same type as "x1". +* @li x2: A Tensor of the same type as "x1". \n *@par Outputs: *y: A Tensor. Has the same type as "x". @@ -319,14 +319,14 @@ REG_OP(Sub) .OP_END_FACTORY_REG(Sub) /** -*@brief computes the absolute value of a tensor. +*@brief computes the absolute value of a tensor. \n *@par Inputs: *One inputs, including: -* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. +* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x". \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Abs. @@ -337,7 +337,7 @@ REG_OP(Abs) .OP_END_FACTORY_REG(Abs) /** -*@brief Computes gradients for absolute operation. +*@brief Computes gradients for absolute operation. \n * *@par Inputs: @@ -361,14 +361,14 @@ REG_OP(AbsGrad) .OP_END_FACTORY_REG(AbsGrad) /** -*@brief: Computes the sign of "x". +*@brief: Computes the sign of "x". \n *@par Inputs: *x:An ND Tensor of type float16, float32, int32, int64, double, -* complex64, complex128. +* complex64, complex128. \n *@par Outputs: -*y:An ND Tensor with same type as "x". +*y:An ND Tensor with same type as "x". \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Sign. @@ -381,15 +381,15 @@ REG_OP(Sign) .OP_END_FACTORY_REG(Sign) /** -*@brief Returns (x1 - x2)(x1 - x2) element-wise. +*@brief Returns (x1 - x2)(x1 - x2) element-wise. \n *@par Inputs: *Two inputs, including: \n *@li x1: A Tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64,complex128 -*@li x2: A Tensor. Has the same type as "x1". +*@li x2: A Tensor. Has the same type as "x1". \n *@par Outputs: -*y: A Tensor. Has the same type as "x1". +*y: A Tensor. Has the same type as "x1". \n *@par Third-party framework compatibility * Compatible with TensorFlow operator SquaredDifference. @@ -404,17 +404,17 @@ REG_OP(SquaredDifference) .OP_END_FACTORY_REG(SquaredDifference) /** -*@brief Computes cosine of "x" element-wise. +*@brief Computes cosine of "x" element-wise. \n *@par Inputs: *x: A Tensor of type float16, float32, double, complex64, complex128. * the format can be [NCHW,NC1HWC0,NHWC,ND] *@par Outputs: -*y: A Tensor of the same type as "x". +*y: A Tensor of the same type as "x". \n *@par Third-party framework compatibility -* Compatible with the TensorFlow operator Cos. +* Compatible with the TensorFlow operator Cos. \n */ REG_OP(Cos) @@ -423,17 +423,17 @@ REG_OP(Cos) .OP_END_FACTORY_REG(Cos) /** -*@brief Returns x1/x2 element-wise. +*@brief Returns x1/x2 element-wise. \n *@par Inputs: * Two inputs, including: *@li x1: A Tensor. Must be one of the following types: * float16, float32, int32, int8, uint8, float64, int64, uint16, int16, * complex64, complex128, the format can be [NCHW,NC1HWC0,NHWC,ND]. -*@li x2: A Tensor. Has the same type and format as input "x1". +*@li x2: A Tensor. Has the same type and format as input "x1". \n *@par Outputs: -* y: A Tensor. Has the same type and format as input "x1". +* y: A Tensor. Has the same type and format as input "x1". \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator Div. @@ -451,7 +451,7 @@ REG_OP(Div) .OP_END_FACTORY_REG(Div) /** -*@brief: Returns the truth value of (x = y) element-wise. +*@brief: Returns the truth value of (x = y) element-wise. \n *@par Inputs: * Two inputs, including: @@ -459,10 +459,10 @@ REG_OP(Div) * float16, float32, int32, int8, uint8, double, int16, int64, complex64, * complex128, quint8, qint8, qint32, string, bool. the format can be * [NCHW, NC1HWC0, NHWC, ND] -*@li x2: A Tensor of the same type and format as "x1". +*@li x2: A Tensor of the same type and format as "x1". \n *@par Outputs: -*y: A Tensor of type bool. +*y: A Tensor of type bool. \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator Equal. @@ -480,19 +480,19 @@ REG_OP(Equal) .OP_END_FACTORY_REG(Equal) /** -*@brief Computes the exponential of "x" element-wise. +*@brief Computes the exponential of "x" element-wise. \n *@par Inputs: *One input:\n -*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. +*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. \n *@par Attributes: *@li base: An optional attribute of type float32, specifying the base gamma. Defaults to "-1.0". *@li scale: An optional attribute of type float32, specifying the scale alpha. Defaults to "1.0". -*@li shift: An optional attribute of type float32, specifying the shift beta. Defaults to "0.0". +*@li shift: An optional attribute of type float32, specifying the shift beta. Defaults to "0.0". \n *@par Outputs: -*y: A Tensor of the same type as "x". +*y: A Tensor of the same type as "x". \n *@par Third-party framework compatibility * Compatible with TensorFlow operator Exp. @@ -506,14 +506,14 @@ REG_OP(Exp) .OP_END_FACTORY_REG(Exp) /** -*@brief Computes the exp(x) - 1 element-wise, y = e^x - 1. +*@brief Computes the exp(x) - 1 element-wise, y = e^x - 1. \n *@par Inputs: *One input: -*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. +*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. \n *@par Outputs: -*y: A Tensor of the same type as "x". +*y: A Tensor of the same type as "x". \n *@par Third-party framework compatibility * Compatible with TensorFlow operator Expm1. @@ -524,13 +524,13 @@ REG_OP(Expm1) .OP_END_FACTORY_REG(Expm1) /** -*@brief: Computes the reciprocal of "x". +*@brief: Computes the reciprocal of "x". \n *@par Inputs:\n -*x: A Tensor. Must be one of the following types: float16, float32, int32, int64, double, complex64, complex128. +*x: A Tensor. Must be one of the following types: float16, float32, int32, int64, double, complex64, complex128. \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x". \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator Inv. @@ -542,15 +542,15 @@ REG_OP(Inv) /** *@brief: Computes "x" reciprocal grad, dx = -1*dy*y*y, where, "y = 1/x", and "dy" - is the corresponding input gradient. + is the corresponding input gradient. \n *@par Inputs: * Two inputs, including: * @li x: A Tensor. Must be one of the following types: float16, float32, int32, int8. -* @li grad: A Tensor. Has the same type as "x". +* @li grad: A Tensor. Has the same type as "x". \n *@par Outputs: -*y: A Tensor, Has the same type as "x". +*y: A Tensor, Has the same type as "x". \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator InvGrad. @@ -562,17 +562,17 @@ REG_OP(InvGrad) .OP_END_FACTORY_REG(InvGrad) /** -*@brief: Returns the truth value of (x <= y) element-wise. +*@brief: Returns the truth value of (x <= y) element-wise. \n *@par Inputs: * Two inputs, including: *@li x1: A Tensor. Must be one of the following types: float32, float64, * int32, uint8, int16, int8, int64, qint8, quint8, qint32, uint16, * float16, uint32, uint64. -*@li x2: A Tensor of the same type as "x1". +*@li x2: A Tensor of the same type as "x1". \n *@par Outputs: -*y: A Tensor of type bool. +*y: A Tensor of type bool. \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator LessEqual. @@ -584,14 +584,14 @@ REG_OP(LessEqual) .OP_END_FACTORY_REG(LessEqual) /** -*@brief Computes the logarithm of (x + 1) element-wise, y = ln(x + 1). +*@brief Computes the logarithm of (x + 1) element-wise, y = ln(x + 1). \n *@par Inputs: *One input:\n -*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. +*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. \n *@par Outputs: -*y: A Tensor of the same type as "x". +*y: A Tensor of the same type as "x". \n *@par Third-party framework compatibility * Compatible with TensorFlow operator Log1p. @@ -607,7 +607,7 @@ REG_OP(Log1p) *Two inputs, including: * @li x1: A Tensor. Must be one of the following types: float16, float32, * int32, int64, int8, uint8, double. -* @li x2: A Tensor of the same type as "x1". +* @li x2: A Tensor of the same type as "x1". \n *@par Outputs: *y: A Tensor. Has the same type as "x1". @@ -624,16 +624,16 @@ REG_OP(Mod) .OP_END_FACTORY_REG(Mod) /** -*@brief: Returns the truth value of (x != y) element-wise. +*@brief: Returns the truth value of (x != y) element-wise. \n *@par Inputs: * Two inputs, including: *@li x1: A Tensor. Must be one of the following types: float16, float32, int32, * int8, uint8, double, int16, int64, uint16, half, uint32, uint64 -*@li x2: A Tensor of the same type as "x1". +*@li x2: A Tensor of the same type as "x1". \n *@par Outputs: -*y: A Tensor of type bool. +*y: A Tensor of type bool. \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator NotEqual. @@ -650,10 +650,10 @@ REG_OP(NotEqual) *@par Inputs: * One input: *x: A Tensor. Must be one of the following types: float16, float32, int32, - * int64, complex64, complex128. + * int64, complex64, complex128. \n *@par Outputs: -*y: A Tensor. Has the same type and format as input "x". +*y: A Tensor. Has the same type and format as input "x". \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator Neg. @@ -664,23 +664,23 @@ REG_OP(Neg) .OP_END_FACTORY_REG(Neg) /** -*@brief Returns x1/x2 element-wise for integer types. +*@brief Returns x1/x2 element-wise for integer types. \n *@par Inputs: *@li x1: A Tensor. Must be one of the following types: * float32, float64, int32, uint8, int16, int8, * complex64, int64, qint8, quint8, qint32, uint16, * complex128, float16, uint32, uint64, complex64, complex128. -*@li x2: A Tensor of the same data type as "x1". +*@li x2: A Tensor of the same data type as "x1". \n *@par Outputs: *y: A Tensor. Has the same type as "x1". - + *@attention Constraints: -* Broadcasting is supported. +* Broadcasting is supported. \n *@par Third-party framework compatibility -* Compatible with the TensorFlow operator TruncateDiv. +* Compatible with the TensorFlow operator TruncateDiv. \n */ REG_OP(TruncateDiv) @@ -696,16 +696,16 @@ REG_OP(TruncateDiv) .OP_END_FACTORY_REG(TruncateDiv) /** -*@brief Computes x1/x2 element-wise, if x1 == 0, return 0. +*@brief Computes x1/x2 element-wise, if x1 == 0, return 0. *@par Inputs: * Two inputs, including: * @li x1: A Tensor. Must be one of the following types: float16, float32, * double, complex64, complex128. -* @li x2: A Tensor. Has the same type as "x1". +* @li x2: A Tensor. Has the same type as "x1". \n *@par Outputs: -*y: A Tensor. Has the same type as "x1". +*y: A Tensor. Has the same type as "x1". \n *@par Third-party framework compatibility * Compatible with TensorFlow operator Xdivy. @@ -721,16 +721,16 @@ REG_OP(Xdivy) /** *@brief Computes "x" multiplied by the logarithm of y element-wise, -* if "x" == 0, return "0". +* if "x" == 0, return "0". \n *@par Inputs: * Two inputs, including: * @li x1: A Tensor. Must be one of the following types: float16, float32, -* double, complex64, complex128. -* @li x2: A Tensor. Has the same type as "x1". +* double, complex64, complex128. +* @li x2: A Tensor. Has the same type as "x1". \n *@par Outputs: -*y: A Tensor. Has the same type as "x1". +*y: A Tensor. Has the same type as "x1". \n *@par Third-party framework compatibility * Compatible with TensorFlow operator Xlogy. @@ -745,14 +745,14 @@ REG_OP(Xlogy) .OP_END_FACTORY_REG(Xlogy) /** -*@brief Computes square of "x" element-wise. +*@brief Computes square of "x" element-wise. \n *@par Inputs: *One input: \n *x: A Tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128 *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x". \n *@par Third-party framework compatibility * Compatible with TensorFlow operator Square. @@ -766,7 +766,7 @@ REG_OP(Square) /** -*@brief Computes reciprocal of square root of "x" element-wise: y = 1/sqrt{x}. +*@brief Computes reciprocal of square root of "x" element-wise: y = 1/sqrt{x}. \n * *@par Inputs: @@ -786,7 +786,7 @@ REG_OP(Rsqrt) .OP_END_FACTORY_REG(Rsqrt) /** -*@brief Computes the trignometric inverse sine of "x" element-wise. +*@brief Computes the trignometric inverse sine of "x" element-wise. \n * *@par Inputs: @@ -807,7 +807,7 @@ REG_OP(Asin) .OP_END_FACTORY_REG(Asin) /** -*@brief Computes gradients for Asin operation. +*@brief Computes gradients for Asin operation. \n * *@par Inputs: @@ -834,7 +834,7 @@ REG_OP(AsinGrad) .OP_END_FACTORY_REG(AsinGrad) /** -*@brief Computes acos of x element-wise. +*@brief Computes acos of x element-wise. \n * *@par Inputs: @@ -855,7 +855,7 @@ REG_OP(Acos) .OP_END_FACTORY_REG(Acos) /** -*@brief Computes gradients for Acos operation. +*@brief Computes gradients for Acos operation. \n * *@par Inputs: @@ -879,7 +879,7 @@ REG_OP(AcosGrad) .OP_END_FACTORY_REG(AcosGrad) /** -*@brief Computes inverse hyperbolic cosine of x element-wise. +*@brief Computes inverse hyperbolic cosine of x element-wise. \n * *@par Inputs: @@ -902,7 +902,7 @@ REG_OP(Acosh) .OP_END_FACTORY_REG(Acosh) /** -*@brief Computes gradients for Acosh operation. +*@brief Computes gradients for Acosh operation. \n * *@par Inputs: @@ -926,7 +926,7 @@ REG_OP(AcoshGrad) .OP_END_FACTORY_REG(AcoshGrad) /** -*@brief Returns the truth value of x1 OR x2 element-wise. +*@brief Returns the truth value of x1 OR x2 element-wise. \n * *@par Inputs: @@ -950,7 +950,7 @@ REG_OP(LogicalOr) .OP_END_FACTORY_REG(LogicalOr) /** -*@brief Returns the truth value of x1 AND x2 element-wise. +*@brief Returns the truth value of x1 AND x2 element-wise. \n * *@par Inputs: @@ -975,7 +975,7 @@ REG_OP(LogicalAnd) /** *@brief Computes the Bessel i0e function of "x" element-wise. -* Exponentially scaled modified Bessel function of order 0 +* Exponentially scaled modified Bessel function of order 0 * defined as: bessel_i0e(x) = exp(-abs(x)) bessel_i0(x). * This function is faster and numerically stabler than "bessel_i0(x)". * @@ -996,7 +996,7 @@ REG_OP(BesselI0e) /** *@brief Computes the Bessel i1e function of "x" element-wise. -* Exponentially scaled modified Bessel function of order 0 +* Exponentially scaled modified Bessel function of order 0 * defined as: bessel_i1e(x) = exp(-abs(x)) bessel_i1(x). * This function is faster and numerically stabler than "bessel_i1(x)". * @@ -1017,10 +1017,10 @@ REG_OP(BesselI1e) /** * @brief Computes logarithm of x element-wise. -* y = log_base(shift + scale * x), with "base" > 0. +* y = log_base(shift + scale * x), with "base" > 0. \n * @par Inputs: -* @li x: A Tensor of type complex64, complex128, float16, float32 or double. +* @li x: A Tensor of type complex64, complex128, float16, float32 or double. \n * @par Attributes: * @li base: An optional float32, specifying the base "e". Defaults to "-1.0" @@ -1030,13 +1030,13 @@ REG_OP(BesselI1e) * @li shift: An optional float32, specifying the shift. Defaults to "0.0" * @par Outputs: -* y: A Tensor has same type as "x". +* y: A Tensor has same type as "x". \n * @attention Constraints: * @li "base" is supposed to be greater than 0. Retaining the default * value "-1" sets "base" to "e". * @li If the input value of operator Log is within the range (0, 0.01] or -* [0.95, 1.05], the output accuracy is subject to change. +* [0.95, 1.05], the output accuracy is subject to change. \n * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator Log. @@ -1058,14 +1058,14 @@ REG_OP(Log) * @li x1: A Tensor. Must be one of the following types: float16, float32, * float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128. * @li x2: A Tensor. Must be one of the following types: float16, float32, -* float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128. +* float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128. \n * @par Outputs: * y: A Tensor. Must be one of the following types: float16, float32, float64, -* uint8, int8, uint16, int16, int32, int64, complex64, complex128. +* uint8, int8, uint16, int16, int32, int64, complex64, complex128. \n * @attention Constraints: -* @li "x1" and "x2" have incompatible shapes or types. +* @li "x1" and "x2" have incompatible shapes or types. \n * @par Third-party framework compatibility * Compatible with the TensorFlow operator Multiply. @@ -1085,15 +1085,15 @@ REG_OP(Mul) /** * @brief Computes the gradient of the square root of "x" with regard to its * input. grad = dy * 0.5/y, where y = sqrt(x), and "dy" is the corresponding -* input gradient. +* input gradient. \n * @par Inputs: * Two inputs, including: * @li y: A Tensor of type float32 or float16. -* @li dy: A Tensor. Has the same type as "y". +* @li dy: A Tensor. Has the same type as "y". \n * @par Outputs: -* z: A Tensor. Has the same type as "y". +* z: A Tensor. Has the same type as "y". \n * @attention Constraints: * "dy" has the same shape and type as "y". @@ -1110,7 +1110,7 @@ REG_OP(SqrtGrad) *Two inputs, including: * @li x1: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, float64, * float16, float32, complex128, complex64, string. -* @li x2: A Tensor of the same type as "x1". +* @li x2: A Tensor of the same type as "x1". \n *@par Outputs: *y: A Tensor. Has the same type as "x". @@ -1130,16 +1130,16 @@ REG_OP(Add) .OP_END_FACTORY_REG(Add) /** -*@brief Confuse broadcast, add and mul. +*@brief Confuse broadcast, add and mul. \n *@par Inputs: *Five inputs, including: * @li x1: A Tensor. Must be one of the following types:int32 float16, float32. * @li x2: A Tensor of the same type as "x1". -* @li x3: A Tensor of the same type as "x1". +* @li x3: A Tensor of the same type as "x1". \n *@par Outputs: -*@li y: A Tensor. Has the same type as "x1". +*@li y: A Tensor. Has the same type as "x1". \n *@par Third-party framework compatibility: * Compatible with the TensorFlow operator LRN. @@ -1153,7 +1153,7 @@ REG_OP(FusedMulAdd) .OP_END_FACTORY_REG(FusedMulAdd) /** -*@brief Returns x1 + x2 element-wise. +*@brief Returns x1 + x2 element-wise. \n * *@par Inputs: @@ -1183,23 +1183,23 @@ REG_OP(AddV2) .OP_END_FACTORY_REG(AddV2) /** -*@brief Updates "ref" by adding "value" to it. +*@brief Updates "ref" by adding "value" to it. \n *@par Inputs: *@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. -*@li value: A Tensor of the same type as "ref". +*@li value: A Tensor of the same type as "ref". \n *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", the addition will be protected by a lock; otherwise the behavior is undefined, but may exhibit less contention. -* This attribute is reserved. +* This attribute is reserved. \n *@par Outputs: -*ref: A Tensor that holds the new value of ref after the value has been added. +*ref: A Tensor that holds the new value of ref after the value has been added. \n *@attention Constraints: -*An input tensor of type int64 must have a shape with size 1. +*An input tensor of type int64 must have a shape with size 1. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator AssignAdd. @@ -1212,11 +1212,11 @@ REG_OP(AssignAdd) .OP_END_FACTORY_REG(AssignAdd) /** -*@brief Updates "ref" by assigning "value" to it. +*@brief Updates "ref" by assigning "value" to it. \n *@par Inputs: *@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. -*@li value: A Tensor of the same type as "ref". +*@li value: A Tensor of the same type as "ref". \n *@par Attributes: *@li validate_shape: An optional bool. Defaults to "true". @@ -1226,10 +1226,10 @@ REG_OP(AssignAdd) *@li use_locking: An optional bool. Defaults to True. If True, the assignment will be protected by a lock; otherwise the behavior is undefined, but may exhibit less contention. -* This attribute is reserved. +* This attribute is reserved. \n *@par Outputs: -*ref: A Tensor that holds the new value of ref after the value has been assigned. +*ref: A Tensor that holds the new value of ref after the value has been assigned. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Assign. @@ -1245,7 +1245,7 @@ REG_OP(Assign) /** *@brief Updates "var" by subtracting "value" from it.\n * This operation outputs "var" after the update is done. \n -* This makes it easier to chain operations that need to use the reset value. +* This makes it easier to chain operations that need to use the reset value. \n * *@par Inputs: @@ -1271,16 +1271,16 @@ REG_OP(AssignSub) .OP_END_FACTORY_REG(AssignSub) /** -*@brief: Computes the backpropagation of the square root operation. +*@brief: Computes the backpropagation of the square root operation. \n *@par Inputs: * Two inputs, including: *@li y: An NCHW, NC1HWC0, NHWC, ND Tensor. Must be one of the following types: \ * float, int32, int8, double, complex64, complex128, half. -*@li dy: A Tensor of the same type and format as "y". +*@li dy: A Tensor of the same type and format as "y". \n *@par Outputs: -*z: A Tensor of the same type and format as "y". +*z: A Tensor of the same type and format as "y". \n *@see Matmul() | Rsqrt () @@ -1294,18 +1294,18 @@ REG_OP(RsqrtGrad) .OP_END_FACTORY_REG(RsqrtGrad) /** -*@brief Computes hyperbolic sine of "x" element-wise. +*@brief Computes hyperbolic sine of "x" element-wise. \n *@par Inputs: *x: An NCHW, NC1HWC0, NHWC,or ND Tensor of type float, double, complex64, - * complex128, half. + * complex128, half. \n *@par Outputs: *y: A NCHW, NC1HWC0, NHWC,or ND Tensor of type float, double, complex64, - * complex128, half. + * complex128, half. \n *@par Third-party framework compatibility -* Compatible with the TensorFlow operator Sinh. +* Compatible with the TensorFlow operator Sinh. \n */ REG_OP(Sinh) @@ -1314,17 +1314,17 @@ REG_OP(Sinh) .OP_END_FACTORY_REG(Sinh) /** -*@brief: Clips tensor values to a specified min and max. +*@brief: Clips tensor values to a specified min and max. \n *@par Inputs: * Three inputs, including: -*@li x: A Tensor of type float32, float64, int32, uint8, int16, int8, complex64, int64, +*@li x: A Tensor of type float32, float64, int32, uint8, int16, int8, complex64, int64, *qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64. *@li clip_value_min: A Tensor of the same type as "x". -*@li clip_value_max: A Tensor of the same type as "x". +*@li clip_value_max: A Tensor of the same type as "x". \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x". \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ClipByValue. @@ -1337,17 +1337,17 @@ REG_OP(ClipByValue) .OP_END_FACTORY_REG(ClipByValue) /** -*@brief Computes cosine of "x" element-wise. +*@brief Computes cosine of "x" element-wise. \n *@par Inputs: *x: A Tensor of type float16, float32, double, complex64, complex128. -* the format can be [NCHW,NC1HWC0,NHWC,ND]. +* the format can be [NCHW,NC1HWC0,NHWC,ND]. \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x". \n *@par Third-party framework compatibility -* Compatible with the TensorFlow operator Cosh. +* Compatible with the TensorFlow operator Cosh. \n */ REG_OP(Cosh) @@ -1356,16 +1356,16 @@ REG_OP(Cosh) .OP_END_FACTORY_REG(Cosh) /** -*@brief: Returns 0 if the denominator is zero, else, like Div. +*@brief: Returns 0 if the denominator is zero, else, like Div. \n *@par Inputs: * Two inputs, including: *@li x1: A Tensor. Must be one of the following types:float16, float32, int32, * int8, uint8, double, the format can be [NCHW,NC1HWC0,NHWC,ND]. -*@li x2: A Tensor of the same type as "x1". +*@li x2: A Tensor of the same type as "x1". \n *@par Outputs: -*y: A Tensor. Has the same type as "x1". +*y: A Tensor. Has the same type as "x1". \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator DivNoNan. @@ -1380,7 +1380,7 @@ REG_OP(DivNoNan) .OP_END_FACTORY_REG(DivNoNan) /** -*@brief Reverses specific dimensions of a tensor. +*@brief Reverses specific dimensions of a tensor. \n *@par Inputs: * One input: \n @@ -1404,10 +1404,10 @@ REG_OP(Invert) *@par Inputs: *One input: \n *x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, - * int16, uint16, int32, int64, complex128, bool. + * int16, uint16, int32, int64, complex128, bool. \n *@par Outputs: -*y: A Tensor of the same type as "x". +*y: A Tensor of the same type as "x". \n *@par Third-party framework compatibility * Compatible with TensorFlow operator OnesLike. @@ -1422,20 +1422,20 @@ REG_OP(OnesLike) .OP_END_FACTORY_REG(OnesLike) /** -*@brief Computes the gradient for the inverse of "x" with regard its input. +*@brief Computes the gradient for the inverse of "x" with regard its input. \n *@par Inputs: *@li input_y: A Tensor. Must be one of the following types: float, double, * complex64, complex128, half. *@li input_dy: A Tensor. Must be one of the following types: float, double, - * complex64, complex128, half. + * complex64, complex128, half. \n *@par Outputs: *output_data: A Tensor. Must be one of the following types: float, double, - * complex64, complex128, half. + * complex64, complex128, half. \n *@attention Constraints: -* "input_dy" has the same shape and type as "input_y". +* "input_dy" has the same shape and type as "input_y". \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator reciprocal_grad. @@ -1447,21 +1447,21 @@ REG_OP(ReciprocalGrad) .OP_END_FACTORY_REG(ReciprocalGrad) /** -*@brief Returns the truth value of (x1 > x2) element-wise. +*@brief Returns the truth value of (x1 > x2) element-wise. \n *@par Inputs: *@li x1: A Tensor of type float16, float32, double, int64, int32, int16, int8, * uint8, uint16, uint32, uint64. -*@li x2: A Tensor of the same data type as "x1". +*@li x2: A Tensor of the same data type as "x1". \n *@par Outputs: *y: A Tensor of type bool. - + *@attention Constraints: -* Broadcasting is supported. +* Broadcasting is supported. \n *@par Third-party framework compatibility -* Compatible with the TensorFlow operator Greater. +* Compatible with the TensorFlow operator Greater. \n */ REG_OP(Greater) @@ -1471,19 +1471,19 @@ REG_OP(Greater) .OP_END_FACTORY_REG(Greater) /** -*@brief Returns a tensor of the same type and shape as the input tensor with all elements set to zero. +*@brief Returns a tensor of the same type and shape as the input tensor with all elements set to zero. \n *@par Inputs: *x: A Tensor. Must be one of the following types: * float32, float64, int32, uint8, int16, int8, * complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, -* complex128, float16, uint32, uint64, complex64, complex128. +* complex128, float16, uint32, uint64, complex64, complex128. \n *@par Outputs: -*y: A Tensor of the same data type as "x". +*y: A Tensor of the same data type as "x". \n *@attention Constraints: -* The output has the same shape and type as the input. +* The output has the same shape and type as the input. \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator zeros_like. @@ -1494,16 +1494,16 @@ REG_OP(ZerosLike) .OP_END_FACTORY_REG(ZerosLike) /** -*@brief Returns the truth value of NOT "x" element-wise. +*@brief Returns the truth value of NOT "x" element-wise. \n *@par Inputs: -*x: A Tensor of type bool. +*x: A Tensor of type bool. \n *@par Outputs: -*y: A Tensor of type bool. +*y: A Tensor of type bool. \n *@attention Constraints: -* The input and output values are "1" or "0", corresponding to bool values "true" and "false". +* The input and output values are "1" or "0", corresponding to bool values "true" and "false". \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator logical_not. @@ -1515,7 +1515,7 @@ REG_OP(LogicalNot) /** *@brief Computes inverse hyperbolic sine of x element-wise. -* Given an input tensor, this function computes inverse hyperbolic sine for every element in the tensor. +* Given an input tensor, this function computes inverse hyperbolic sine for every element in the tensor. \n * *@par Inputs: @@ -1534,7 +1534,7 @@ REG_OP(Asinh) .OP_END_FACTORY_REG(Asinh) /** -*@brief Computes gradients for Asinh operation. +*@brief Computes gradients for Asinh operation. \n * *@par Inputs: @@ -1556,7 +1556,7 @@ REG_OP(AsinhGrad) /** *@brief Computes inverse hyperbolic tangent of x element-wise.\n -* Given an input tensor, this function computes inverse hyperbolic tangent for every element in the tensor. \n Input range is [-1,1] and output range is [-inf, inf]. If input is -1, \n output will be -inf and if the input is 1, output will be inf.\n Values outside the range will have nan as output. +* Given an input tensor, this function computes inverse hyperbolic tangent for every element in the tensor. \n Input range is [-1,1] and output range is [-inf, inf]. If input is -1, \n output will be -inf and if the input is 1, output will be inf.\n Values outside the range will have nan as output. \n * *@par Inputs: @@ -1576,7 +1576,7 @@ REG_OP(Atanh) /** *@brief Computes the trignometric inverse tangent of x element-wise. -* The atan operation returns the inverse of tan, such that if y = tan(x) then, x = atan(y). +* The atan operation returns the inverse of tan, such that if y = tan(x) then, x = atan(y). \n * *@par Inputs: @@ -1595,7 +1595,7 @@ REG_OP(Atan) .OP_END_FACTORY_REG(Atan) /** -*@brief Computes gradients for Atan operation. +*@brief Computes gradients for Atan operation. \n * *@par Inputs: @@ -1616,7 +1616,7 @@ REG_OP(AtanGrad) .OP_END_FACTORY_REG(AtanGrad) /** -*@brief Computes arctangent of x1/x2 element-wise, respecting signs of the arguments. +*@brief Computes arctangent of x1/x2 element-wise, respecting signs of the arguments. \n * *@par Inputs: @@ -1637,7 +1637,7 @@ REG_OP(Atan2) .OP_END_FACTORY_REG(Atan2) /** -*@brief Returns the truth value of abs(x1-x2) < tolerance element-wise. +*@brief Returns the truth value of abs(x1-x2) < tolerance element-wise. \n * *@par Inputs: @@ -1663,12 +1663,16 @@ REG_OP(ApproximateEqual) /** *@brief Returns the element-wise sum of a list of tensors.\n -* AccumulateNV2 performs the same operation as AddN, but does not wait for all of its inputs to be ready before beginning to sum.\n This can save memory if inputs are ready at different times, \n since minimum temporary storage is proportional to the output size rather than the inputs size.\n Returns a Tensor of same shape and type as the elements of inputs. +* AccumulateNV2 performs the same operation as AddN, but does not wait for all of its inputs +to be ready before beginning to sum.\n This can save memory if inputs are ready at different times, +since minimum temporary storage is proportional to the output size rather than the inputs size. + Returns a Tensor of same shape and type as the elements of inputs. \n * *@par Inputs: *Dynamic inputs, including: -* x: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, \n qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64. +* x: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, +qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64. It's a dynamic input. \n * *@par Outputs: * y: A tensor. Has the same type as "x". @@ -1694,18 +1698,18 @@ REG_OP(AccumulateNV2) * num_bits is the bit width of the quantization, between 2 and 16, inclusive. \n * Quantization is called fake since the output is still in floating point. \n -*@par Inputs: +*@par Inputs: *One input: -*x: A Tensor of type float32. +*x: A Tensor of type float32. \n *@par Attributes: *@li min: An optional attribute. Defaults to "-6.0". *@li max: An optional attribute. Defaults to "6.0". *@li num_bits: An optional attribute. Defaults to "8". -*@li narrow_range: An optional bool. Defaults to "false". +*@li narrow_range: An optional bool. Defaults to "false". \n *@par Outputs: -*y: A Tensor. Has the same shape and type of "x". +*y: A Tensor. Has the same shape and type of "x". \n *@par Third-party framework compatibility * Compatible with TensorFlow operator FakeQuantWithMinMaxArgs. @@ -1720,7 +1724,7 @@ REG_OP(FakeQuantWithMinMaxArgs) .OP_END_FACTORY_REG(FakeQuantWithMinMaxArgs) /** -*@brief Computes gradients for a FakeQuantWithMinMaxArgs operation. +*@brief Computes gradients for a FakeQuantWithMinMaxArgs operation. \n *@par Inputs: *Two inputs, including: \n @@ -1732,10 +1736,10 @@ REG_OP(FakeQuantWithMinMaxArgs) *@li min: An optional attribute. Defaults to "-6.0". *@li max: An optional attribute. Defaults to "6.0". *@li num_bits: An optional attribute. Defaults to "8". -*@li narrow_range: An optional bool. Defaults to "False". +*@li narrow_range: An optional bool. Defaults to "False". \n *@par Outputs: -*y: A Tensor of type float32. +*y: A Tensor of type float32. \n *@par Third-party framework compatibility * Compatible with TensorFlow operator FakeQuantWithMinMaxArgsGradient. @@ -1751,7 +1755,7 @@ REG_OP(FakeQuantWithMinMaxArgsGradient) .OP_END_FACTORY_REG(FakeQuantWithMinMaxArgsGradient) /** -*@brief Fake-quantize the 'inputs' tensor of type float via global float scalars. +*@brief Fake-quantize the 'inputs' tensor of type float via global float scalars. \n *@par Inputs: *Three inputs, including: @@ -1762,10 +1766,10 @@ REG_OP(FakeQuantWithMinMaxArgsGradient) *@par Attributes: *@li num_bits: An optional attribute. Defaults to "8". -*@li narrow_range: An optional bool. Defaults to "False". +*@li narrow_range: An optional bool. Defaults to "False". \n *@par Outputs: -*y: A Tensor of type float32. +*y: A Tensor of type float32. \n *@par Third-party framework compatibility * Compatible with TensorFlow operator FakeQuantWithMinMaxVars. @@ -1780,23 +1784,23 @@ REG_OP(FakeQuantWithMinMaxVars) .OP_END_FACTORY_REG(FakeQuantWithMinMaxVars) /** -*@brief Computes gradients for a FakeQuantWithMinMaxVars operation. +*@brief Computes gradients for a FakeQuantWithMinMaxVars operation. \n *@par Inputs: *Four inputs, including: *@li gradients: A Tensor of type float32. *@li x: A Tensor of type float32. *@li min: A Tensor of type float32. -*@li max: A Tensor of type float32. +*@li max: A Tensor of type float32. \n *@par Attributes: *@li num_bits: An integer specifying the quantization bit width. Defaults to "8". -*@li narrow_range: A Boolean specifying whether to use a narrow range for quantization. Defaults to "False". +*@li narrow_range: A Boolean specifying whether to use a narrow range for quantization. Defaults to "False". \n *@par Outputs: *@li backprops_wrt_x: A Tensor. Has the same type as input "x". *@li backprops_wrt_min: A Tensor. Has the same type as input "min". -*@li backprops_wrt_max: A Tensor. Has the same type as input "max". +*@li backprops_wrt_max: A Tensor. Has the same type as input "max". \n *@attention Constraints: *@li "gradients" has the same shape as "x". @@ -1829,11 +1833,11 @@ tensor of same shape as inputs *Three inputs, including: *@li x: A Tensor of type float32. *@li min: A Tensor of type float32. -*@li max: A Tensor of type float32. +*@li max: A Tensor of type float32. \n *@par Attributes: *@li num_bits: An integer specifying the quantization bit width. Defaults to "8". -*@li narrow_range: A Boolean specifying whether to use a narrow range for quantization. Defaults to "False". +*@li narrow_range: A Boolean specifying whether to use a narrow range for quantization. Defaults to "False". \n *@par Outputs: *y: A Tensor. Has the same type as input "x". @@ -1859,23 +1863,23 @@ REG_OP(FakeQuantWithMinMaxVarsPerChannel) .OP_END_FACTORY_REG(FakeQuantWithMinMaxVarsPerChannel) /** -*@brief Computes gradients for a FakeQuantWithMinMaxVarsPerChannel operation. +*@brief Computes gradients for a FakeQuantWithMinMaxVarsPerChannel operation. \n *@par Inputs: *Four inputs, including: *@li gradients: A Tensor of type float32. *@li x: A Tensor of type float32. *@li min: A Tensor of type float32. -*@li max: A Tensor of type float32. +*@li max: A Tensor of type float32. \n *@par Attributes: *@li num_bits: An integer specifying the quantization bit width. Defaults to "8". -*@li narrow_range: A Boolean specifying whether to use a narrow range for quantization. Defaults to "False". +*@li narrow_range: A Boolean specifying whether to use a narrow range for quantization. Defaults to "False". \n *@par Outputs: *@li backprops_wrt_x: A Tensor. Has the same type as input "x". *@li backprops_wrt_min: A Tensor. Has the same type as input "min". -*@li backprops_wrt_max: A Tensor. Has the same type as input "max". +*@li backprops_wrt_max: A Tensor. Has the same type as input "max". \n *@attention Constraints: *@li "gradients" has the same shape as "x". @@ -1901,16 +1905,16 @@ REG_OP(FakeQuantWithMinMaxVarsPerChannelGradient) .OP_END_FACTORY_REG(FakeQuantWithMinMaxVarsPerChannelGradient) /** -*@brief Element-wise computes the bitwise AND of "x1" and "x2". +*@brief Element-wise computes the bitwise AND of "x1" and "x2". \n *@par Inputs: *Two inputs, including: * @li x1: A Tensor. Must be one of the following types: int8, int16, * int32, int64, uint8, uint16, uint32, uint64. Broadcasting is supported. -* @li x2: A Tensor of the same type as "x1". +* @li x2: A Tensor of the same type as "x1". \n *@par Outputs: -*y: A Tensor. Has the same type as "x1". +*y: A Tensor. Has the same type as "x1". \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator BitwiseAnd. @@ -1922,16 +1926,16 @@ REG_OP(BitwiseAnd) .OP_END_FACTORY_REG(BitwiseAnd) /** -*@brief Element-wise computes the bitwise OR of "x1" and "x2". +*@brief Element-wise computes the bitwise OR of "x1" and "x2". \n *@par Inputs: *Two inputs, including: * @li x1: A Tensor. Must be one of the following types: int8, int16, * int32, int64, uint8, uint16, uint32, uint64. Broadcasting is supported. -* @li x2: A Tensor of the same type as "x1". +* @li x2: A Tensor of the same type as "x1". \n *@par Outputs: -*y: A Tensor. Has the same type as "x1". +*y: A Tensor. Has the same type as "x1". \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator BitwiseOr. @@ -1943,16 +1947,16 @@ REG_OP(BitwiseOr) .OP_END_FACTORY_REG(BitwiseOr) /** -*@brief Elementwise computes the bitwise XOR of "x1" and "x2". +*@brief Elementwise computes the bitwise XOR of "x1" and "x2". \n *@par Inputs: -*Two inputs, including: +*Two inputs, including: *@li x1: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, uint16, uint32, uint64. * The format is NC1HWC0 or ND. Broadcasting is supported. -*@li x2: A Tensor. Has the same type and format as "x1". +*@li x2: A Tensor. Has the same type and format as "x1". \n *@par Outputs: -*y: Output result. Has the same type as "x1". +*y: Output result. Has the same type as "x1". \n *@par Third-party framework compatibility * Compatible with TensorFlow operator BitwiseXor. @@ -1964,10 +1968,10 @@ REG_OP(BitwiseXor) .OP_END_FACTORY_REG(BitwiseXor) /** -*@brief Returns element-wise smallest integer not less than "x". +*@brief Returns element-wise smallest integer not less than "x". \n *@par Inputs: -* x: A Tensor of type float16 or float32 or float64. +* x: A Tensor of type float16 or float32 or float64. \n *@par Outputs: *y: A Tensor. Has the same type as "x". @@ -1980,13 +1984,13 @@ REG_OP(Ceil) .OP_END_FACTORY_REG(Ceil) /** -*@brief Returns element-wise largest integer not greater than "x". +*@brief Returns element-wise largest integer not greater than "x". \n *@par Inputs: -*x: A Tensor of type float16, float32 or double. +*x: A Tensor of type float16, float32 or double. \n *@par Outputs: -*y: A Tensor of the same type as "x". +*y: A Tensor of the same type as "x". \n *@par Third-party framework compatibility: * Compatible with TensorFlow operator Floor. @@ -1998,16 +2002,16 @@ REG_OP(Floor) /** *@brief Divides "x1/x2" element-wise, rounding toward the -* most negative integer. +* most negative integer. \n *@par Inputs: *Two inputs, including: * @li x1: A Tensor. Must be one of the following types: float16, float32, int32, int64, int8, * uint8, int16, uint16, double, complex64, complex128. -* @li x2: A Tensor of the same type as "x1". +* @li x2: A Tensor of the same type as "x1". \n *@par Outputs: -*y: A Tensor. Has the same type as "x1". +*y: A Tensor. Has the same type as "x1". \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator FloorDiv. @@ -2025,7 +2029,7 @@ REG_OP(FloorDiv) .OP_END_FACTORY_REG(FloorDiv) /** -*@brief Returns element-wise remainder of division. Consistent with: floor(x1/x2) * x2 + mod(x1, x2) = x1. +*@brief Returns element-wise remainder of division. Consistent with: floor(x1/x2) * x2 + mod(x1, x2) = x1. \n *@par Inputs: * Two inputs, including: @@ -2048,16 +2052,16 @@ REG_OP(FloorMod) .OP_END_FACTORY_REG(FloorMod) /** -*@brief Computes the power of "x1" to "x2". +*@brief Computes the power of "x1" to "x2". \n *@par Inputs: *Two inputs, including: * @li x1: A Tensor. Must be one of the following types: * float16, float32, int32, int64, int8, uint8, double, complex64, complex128. -* @li x2: A Tensor of the same type as "x1". +* @li x2: A Tensor of the same type as "x1". \n *@par Outputs: -*y: A Tensor. Has the same type as "x1". +*y: A Tensor. Has the same type as "x1". \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Pow. @@ -2072,15 +2076,15 @@ REG_OP(Pow) .OP_END_FACTORY_REG(Pow) /** -*@brief Return element-wise integer closest to x. +*@brief Return element-wise integer closest to x. \n *@par Inputs: *One input, include: *x: A mutable Tensor. Must be one of the following types: -* float16, float32, double. +* float16, float32, double. \n *@par Outputs: -*y: A mutable Tensor. Has the same type as "x". +*y: A mutable Tensor. Has the same type as "x". \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator Rint. @@ -2092,7 +2096,7 @@ REG_OP(Rint) /** *@brief Rounds the values of a tensor to the nearest integer, element-wise. - * Rounds half to even. + * Rounds half to even. \n *@par Inputs: *Inputs including: @@ -2111,7 +2115,7 @@ REG_OP(Round) .OP_END_FACTORY_REG(Round) /** -*@brief: Computes sine of "x" element-wise. +*@brief: Computes sine of "x" element-wise. \n *@par Inputs: *One input: @@ -2119,7 +2123,7 @@ REG_OP(Round) * complex64, complex128, int32, int64 *@par Outputs: -*y: An ND Tensor. Has the same type as "x". +*y: An ND Tensor. Has the same type as "x". \n *@par Third-party framework compatibility * Compatible with TensorFlow operator Sin. @@ -2130,14 +2134,14 @@ REG_OP(Sin) .OP_END_FACTORY_REG(Sin) /** -*@brief: Computes tan of "x" element-wise. +*@brief: Computes tan of "x" element-wise. \n *@par Inputs: *One input: *x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128, int32, int64 *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x". \n *@par Third-party framework compatibility * Compatible with TensorFlow operator Tan. @@ -2150,16 +2154,16 @@ REG_OP(Tan) .OP_END_FACTORY_REG(Tan) /** -*@brief Returns element-wise remainder of division. +*@brief Returns element-wise remainder of division. \n *@par Inputs: *Two inputs, including: * @li x1: A Tensor. Must be one of the following types: float16, float32, * double, int32, int64. -* @li x2: A Tensor of the same type as "x1". +* @li x2: A Tensor of the same type as "x1". \n *@par Outputs: -*y: A Tensor. Has the same type as "x1". +*y: A Tensor. Has the same type as "x1". \n *@par Third-party framework compatibility *@li Compatible with the TensorFlow operator TruncateMod. @@ -2174,19 +2178,19 @@ REG_OP(TruncateMod) .OP_END_FACTORY_REG(TruncateMod) /** -*@brief Adds 'bias' to 'x'. +*@brief Adds 'bias' to 'x'. \n *@par Inputs: *Two inputs, including: -* @li x: A Tensor of type NumberType. Must be one of the following types: float32, float64, int32, uint8, int16, +* @li x: A Tensor of type NumberType. Must be one of the following types: float32, float64, int32, uint8, int16, *int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, complex128, float16, uint32, uint64. -* @li bias: A 1D Tensor with size the C dimension of value. +* @li bias: A 1D Tensor with size the C dimension of value. \n *@par Attributes: -*data_format: An optional string. Defaults to "NHWC". +*data_format: An optional string. Defaults to "NHWC". \n *@par Outputs: -*y: A Tensor with same type as "x". +*y: A Tensor with same type as "x". \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator BiasAdd. @@ -2199,7 +2203,7 @@ REG_OP(BiasAdd) .OP_END_FACTORY_REG(BiasAdd) /** -*@brief Returns the index with the smallest value across dimensions of a tensor. +*@brief Returns the index with the smallest value across dimensions of a tensor. \n *@par Inputs: *Two inputs, including: @@ -2208,10 +2212,10 @@ REG_OP(BiasAdd) *@li dimension: A Tensor. Must be one of the following types: int32, int64. Must be in the range [-rank(input x), rank(input x)]. Describes which dimension of the input Tensor to reduce across. * The format is ND. *@par Attributes: -*dtype: The output type, either "int32" or "int64". Defaults to "int64". +*dtype: The output type, either "int32" or "int64". Defaults to "int64". \n *@par Outputs: -*y: A Tensor of type "dtype". +*y: A Tensor of type "dtype". \n *@par Third-party framework compatibility * Compatible with TensorFlow operator ArgMin. @@ -2224,22 +2228,25 @@ REG_OP(ArgMin) .OP_END_FACTORY_REG(ArgMin) /** -*@brief Returns the index with the smallest value across dimensions of a tensor. +*@brief Returns the index with the smallest value across dimensions of a tensor. \n *@par Inputs: *One input: -*x: A Tensor of type float16 or float32 in ND format. +*x: A Tensor of type float16 or float32 in ND format. \n *@par Attributes: *@li dimension: The dimension of the input Tensor to reduce across. -*@li dtype: An optional attribute, specifying the output data type. Must be "int32". Defaults to "int64". +*@li dtype: An optional attribute, specifying the output data type. Must be "int32". Defaults to "int64". \n *@par Outputs: -*y: A Tensor of type dtype. +*y: A Tensor of type dtype. \n *@par Third-party framework compatibility * Compatible with TensorFlow operator ArgMin. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ArgMin instead. */ REG_OP(ArgMinD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -2249,22 +2256,22 @@ REG_OP(ArgMinD) .OP_END_FACTORY_REG(ArgMinD) /** -*@brief Returns the index with the largest value across axes of a tensor. +*@brief Returns the index with the largest value across axes of a tensor. \n *@par Inputs: -* Two inputs, including: +* Two inputs, including: *@li x: A multi-dimensional Tensor of type float16, float32, or int16. -*@li dimension: A Scalar of type int32, specifying the index with the largest value. +*@li dimension: A Scalar of type int32, specifying the index with the largest value. \n *@par Attributes: -*dtype: The output type, either "int32" or "int64". Defaults to "int64". +*dtype: The output type, either "int32" or "int64". Defaults to "int64". \n -*@par Outputs: -*y: A multi-dimensional Tensor of type int32 or int64, specifying the index with the largest value. The dimension is one less than that of "x". +*@par Outputs: +*y: A multi-dimensional Tensor of type int32 or int64, specifying the index with the largest value. The dimension is one less than that of "x". \n *@attention Constraints: *@li x: If there are multiple maximum values, the index of the first maximum value is used. -*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". +*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". \n *@par Third-party framework compatibility * Compatible with TensorFlow operator ArgMax. @@ -2277,25 +2284,28 @@ REG_OP(ArgMaxV2) .OP_END_FACTORY_REG(ArgMaxV2) /** -*@brief Returns the index with the largest value across axes of a tensor. +*@brief Returns the index with the largest value across axes of a tensor. \n *@par Inputs: -* One input, including: -*x: A multi-dimensional Tensor of type float16, float32. +* One input, including: +*x: A multi-dimensional Tensor of type float16, float32. \n *@par Attributes: *@li dimension: An integer of type int32, specifying the axis information of the index with the maximum value. -*@li dtype: The output type, either "int32" or "int64". Defaults to "int64". +*@li dtype: The output type, either "int32" or "int64". Defaults to "int64". \n -*@par Outputs: -*y: A multi-dimensional Tensor of type int32, specifying the index with the largest value. The dimension is one less than that of "x". +*@par Outputs: +*y: A multi-dimensional Tensor of type int32, specifying the index with the largest value. The dimension is one less than that of "x". \n *@attention Constraints: *@li x: If there are multiple maximum values, the index of the first maximum value is used. -*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". +*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". \n *@par Third-party framework compatibility * Compatible with TensorFlow operator ArgMax. +* +* @par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ArgMaxD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -2306,17 +2316,17 @@ REG_OP(ArgMaxD) /** *@brief Returns the maximum value of all elements in the input in the given -* dimension. +* dimension. \n *@par Inputs: *One input: \n -*x: A multi-dimensional Tensor of type float16 or float32. +*x: A multi-dimensional Tensor of type float16 or float32. \n *@par Attributes: *@li dimension: An integer of type int32, specifying the axis information of * the index with the maximum value. *@li keep_dims: A bool, specifying whether to keep dimensions for the output -* Tensor. Defaults to "false". +* Tensor. Defaults to "false". \n *@par Outputs: *@li indice: A multi-dimensional Tensor of type int32, specifying the index. @@ -2324,13 +2334,13 @@ REG_OP(ArgMaxD) * "dimension" compared with that of "x". Otherwise, the output has one fewer * dimension than "x".) *@li values: A Tensor, specifying the maximum value. Has the same dimensions -* as "indice" and the same type as "x". +* as "indice" and the same type as "x". \n *@attention Constraints: *@li If there are multiple maximum values, the index of the first maximum * value is used. *@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the -* dimension length of "x". +* dimension length of "x". \n *@par Third-party framework compatibility * Compatible with the two output scenarios of PyTorch operator Max (the output @@ -2347,13 +2357,13 @@ REG_OP(ArgMaxWithValue) /** *@par Inputs: *One input: \n -*x: A multi-dimensional Tensor of type float16 or float32. +*x: A multi-dimensional Tensor of type float16 or float32. \n *@par Attributes: *@li dimension: An integer of type int32, specifying the axis information of * the index with the maximum value. *@li keep_dims: A bool, specifying whether to keep dimensions for the output -* Tensor. Defaults to "false". +* Tensor. Defaults to "false". \n *@par Outputs: *@li indice: A multi-dimensional Tensor of type int32, specifying the index. @@ -2361,7 +2371,7 @@ REG_OP(ArgMaxWithValue) * "dimension" compared with that of "x". Otherwise, the output has one fewer * dimension than "x".) *@li values: A Tensor, specifying the minimum value. Has the same dimensions -* as "indice" and the same type as "x". +* as "indice" and the same type as "x". \n *@attention Constraints: *@li If there are multiple minimum values, the index of the first minimum @@ -2369,7 +2379,7 @@ REG_OP(ArgMaxWithValue) *@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the * dimension length of "x". *@li Performing the ArgMinWithValue operation on the last axis of float32 data -* is not supported on a mini platform. +* is not supported on a mini platform. \n *@par Third-party framework compatibility * Compatible with the two output scenarios of PyTorch operator Min (the output @@ -2390,10 +2400,10 @@ REG_OP(ArgMinWithValue) *One input: \n *x: the list of input data, the type of element in Tensor should be same. * the max size of x is 32. -* should met one of the following types: float16, float32 +* should met one of the following types: float16, float32. It's a dynamic input. \n *@par Outputs: -*y: A Tensor. Has the same type and format as "x". +*y: A Tensor. Has the same type and format as "x". \n *@par Attributes: *@li N: A required attribute. the number of input x, max size is 32. Type is int. @@ -2412,13 +2422,13 @@ REG_OP(Eltwise) .OP_END_FACTORY_REG(Eltwise) /** -*@brief Computes element-wise population count. +*@brief Computes element-wise population count. \n *@par Inputs: -*x: A Tensor of type TensorType::IntegerDataType(). +*x: A Tensor of type TensorType::IntegerDataType(). \n *@par Outputs: -*y: A Tensor of type uint8. +*y: A Tensor of type uint8. \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator PopulationCount. @@ -2429,7 +2439,7 @@ REG_OP(PopulationCount) .OP_END_FACTORY_REG(PopulationCount) /** -*@brief A fusion operator for bert lamb. +*@brief A fusion operator for bert lamb. \n *@par Inputs: *Thirteen inputs, including: @@ -2445,14 +2455,14 @@ REG_OP(PopulationCount) * @li mul2_x: A Tensor. Must be one of the following types: float16, float32. * @li mul3_sub1: A Tensor. Must be one of the following types: float16, float32. * @li mul4_x: A Tensor. Must be one of the following types: float16, float32. -* @li add2_y: A Tensor. Must be one of the following types: float16, float32. +* @li add2_y: A Tensor. Must be one of the following types: float16, float32. \n *@par Outputs: *Four outputs, including: * @li y1: A Tensor. Must be one of the following types: float16, float32. * @li y2: A Tensor. Must be one of the following types: float16, float32. * @li y3: A Tensor. Must be one of the following types: float16, float32. -* @li y4: A Tensor. Must be one of the following types: float16, float32. +* @li y4: A Tensor. Must be one of the following types: float16, float32. \n */ REG_OP(LambNextMVWithDecay) @@ -2476,7 +2486,7 @@ REG_OP(LambNextMVWithDecay) .OP_END_FACTORY_REG(LambNextMVWithDecay) /** -*@brief Confuse real_div, rsqrt, sqrt, maximum, minimum, sub and add. +*@brief Confuse real_div, rsqrt, sqrt, maximum, minimum, sub and add. \n *@par Inputs: *Thirteen inputs, including: @@ -2492,7 +2502,7 @@ REG_OP(LambNextMVWithDecay) * @li mul2_x: A Tensor of the same type as "input1". * @li mul3_sub1: A Tensor. Must be one of the following types: float16, float32. * @li mul4_x: A Tensor of the same type as "input1". -* @li add2_y: A Tensor of the same type as "input1". +* @li add2_y: A Tensor of the same type as "input1". \n *@par Outputs: *Four outputs, including: @@ -2522,7 +2532,7 @@ REG_OP(LambNextMV) .OP_END_FACTORY_REG(LambNextMV) /** -*@brief A fusion operator for bert lamb. +*@brief A fusion operator for bert lamb. \n *@par Inputs: *Six inputs, including: @@ -2531,12 +2541,12 @@ REG_OP(LambNextMV) * @li mul2_x: A Tensor. Must be one of the following types: float16, float32. * @li mul3_x: A Tensor. Must be one of the following types: float16, float32. * @li truediv1_recip: A Tensor. Must be one of the following types: float16, float32. -* @li add2_y: A Tensor. Must be one of the following types: float16, float32. +* @li add2_y: A Tensor. Must be one of the following types: float16, float32. \n *@par Outputs: *Two outputs, including: * @li y1: A Tensor of the same type as "input_square". -* @li y2: A Tensor of the same type as "input_square". +* @li y2: A Tensor of the same type as "input_square". \n */ REG_OP(LambNextRight) @@ -2551,7 +2561,7 @@ REG_OP(LambNextRight) .OP_END_FACTORY_REG(LambNextRight) /** -*@brief A fusion operator for bert lamb. +*@brief A fusion operator for bert lamb. \n *@par Inputs: *Six inputs, including: @@ -2563,10 +2573,10 @@ REG_OP(LambNextRight) * @li input_sub: A Tensor. Must be one of the following types: float16, float32. * @li greater_y: A Tensor. Must be one of the following types: float16, float32. * @li select_e: A Tensor. Must be one of the following types: float16, float32. -* @li minimum_y: A Tensor. Must be one of the following types: float16, float32. +* @li minimum_y: A Tensor. Must be one of the following types: float16, float32. \n *@par Outputs: -*y: A Tensor of the same type as "input_greater1". +*y: A Tensor of the same type as "input_greater1". \n */ REG_OP(LambUpdateWithLr) @@ -2583,7 +2593,7 @@ REG_OP(LambUpdateWithLr) .OP_END_FACTORY_REG(LambUpdateWithLr) /** -*@brief A fusion operator for bert lamb. +*@brief A fusion operator for bert lamb. \n *@par Inputs: *Seven inputs, including: @@ -2593,10 +2603,10 @@ REG_OP(LambUpdateWithLr) * @li x4: A Tensor. Must be one of the following types: float16, float32. * @li x5: A Tensor. Must be one of the following types: float16, float32. * @li greater_y: A Tensor. Must be one of the following types: float16, float32. -* @li select_e: A Tensor. Must be one of the following types: float16, float32. +* @li select_e: A Tensor. Must be one of the following types: float16, float32. \n *@par Outputs: -*y: A Tensor of the same type as input. +*y: A Tensor of the same type as input. \n */ REG_OP(LambUpdateWithLrV2) @@ -2611,7 +2621,7 @@ REG_OP(LambUpdateWithLrV2) .OP_END_FACTORY_REG(LambUpdateWithLrV2) /** -*@brief A fusion operator for bert lamb. +*@brief A fusion operator for bert lamb. \n *@par Inputs: *Eleven inputs, including: @@ -2625,13 +2635,13 @@ REG_OP(LambUpdateWithLrV2) * @li mul2_x: A Tensor. Must be one of the following types: float16, float32. * @li mul3_x: A Tensor. Must be one of the following types: float16, float32. * @li mul4_x: A Tensor. Must be one of the following types: float16, float32. -* @li add2_y: A Tensor. Must be one of the following types: float16, float32. +* @li add2_y: A Tensor. Must be one of the following types: float16, float32. \n *@par Outputs: *Three outputs, including: * @li output0: A Tensor. Must be one of the following types: float16, float32. * @li output1: A Tensor. Must be one of the following types: float16, float32. -* @li output2: A Tensor. Must be one of the following types: float16, float32. +* @li output2: A Tensor. Must be one of the following types: float16, float32. \n */ REG_OP(AdamApplyOneWithDecay) @@ -2652,7 +2662,7 @@ REG_OP(AdamApplyOneWithDecay) .OP_END_FACTORY_REG(AdamApplyOneWithDecay) /** -*@brief A fusion operator for bert lamb. +*@brief A fusion operator for bert lamb. \n *@par Inputs: *Ten inputs, including: @@ -2665,13 +2675,13 @@ REG_OP(AdamApplyOneWithDecay) * @li mul1_x: A Tensor. Must be one of the following types: float16, float32. * @li mul2_x: A Tensor. Must be one of the following types: float16, float32. * @li mul3_x: A Tensor. Must be one of the following types: float16, float32. -* @li add2_y: A Tensor. Must be one of the following types: float16, float32. +* @li add2_y: A Tensor. Must be one of the following types: float16, float32. \n *@par Outputs: *Three outputs, including: * @li output0: A Tensor. Must be one of the following types: float16, float32. * @li output1: A Tensor. Must be one of the following types: float16, float32. -* @li output2: A Tensor. Must be one of the following types: float16, float32. +* @li output2: A Tensor. Must be one of the following types: float16, float32. \n */ REG_OP(AdamApplyOne) @@ -2691,7 +2701,7 @@ REG_OP(AdamApplyOne) .OP_END_FACTORY_REG(AdamApplyOne) /** -*@brief A fusion operator for bert lamb. +*@brief A fusion operator for bert lamb. \n *@par Inputs: *Eleven inputs, including: @@ -2705,13 +2715,13 @@ REG_OP(AdamApplyOne) * @li mul2_x: A Tensor. Must be one of the following types: float16, float32. * @li mul3_x: A Tensor. Must be one of the following types: float16, float32. * @li mul4_x: A Tensor. Must be one of the following types: float16, float32. -* @li add2_y: A Tensor. Must be one of the following types: float16, float32. +* @li add2_y: A Tensor. Must be one of the following types: float16, float32. \n *@par Outputs: *Three outputs, including: * @li output0: A Tensor. Must be one of the following types: float16, float32. * @li output1: A Tensor. Must be one of the following types: float16, float32. -* @li output2: A Tensor. Must be one of the following types: float16, float32. +* @li output2: A Tensor. Must be one of the following types: float16, float32. \n */ REG_OP(AdamApplyOneWithDecayAssign) @@ -2732,7 +2742,7 @@ REG_OP(AdamApplyOneWithDecayAssign) .OP_END_FACTORY_REG(AdamApplyOneWithDecayAssign) /** -*@brief A fusion operator for bert lamb. +*@brief A fusion operator for bert lamb. \n *@par Inputs: *Ten inputs, including: @@ -2745,13 +2755,13 @@ REG_OP(AdamApplyOneWithDecayAssign) * @li mul1_x: A Tensor. Must be one of the following types: float16, float32. * @li mul2_x: A Tensor. Must be one of the following types: float16, float32. * @li mul3_x: A Tensor. Must be one of the following types: float16, float32. -* @li add2_y: A Tensor. Must be one of the following types: float16, float32. +* @li add2_y: A Tensor. Must be one of the following types: float16, float32. \n *@par Outputs: *Three outputs, including: * @li output0: A Tensor. Must be one of the following types: float16, float32. * @li output1: A Tensor. Must be one of the following types: float16, float32. -* @li output2: A Tensor. Must be one of the following types: float16, float32. +* @li output2: A Tensor. Must be one of the following types: float16, float32. \n */ REG_OP(AdamApplyOneAssign) @@ -2771,17 +2781,17 @@ REG_OP(AdamApplyOneAssign) .OP_END_FACTORY_REG(AdamApplyOneAssign) /** -*@brief Confuse select, maximum, greater and sqrt. +*@brief Confuse select, maximum, greater and sqrt. \n *@par Inputs: *Four inputs, including: * @li x: A Tensor. Must be one of the following types: float16, float32. * @li greater_zeros: A Tensor. Must be one of the following types: float16, float32. * @li select_ones: A Tensor. Must be one of the following types: float16, float32. -* @li maximum_ones: A Tensor. Must be one of the following types: float16, float32. +* @li maximum_ones: A Tensor. Must be one of the following types: float16, float32. \n *@par Outputs: -*y: A Tensor of the same type as "x". +*y: A Tensor of the same type as "x". \n */ REG_OP(ClipByNormNoDivSum) @@ -2793,15 +2803,15 @@ REG_OP(ClipByNormNoDivSum) .OP_END_FACTORY_REG(ClipByNormNoDivSum) /** -*@brief Confuse reducesumd and square. +*@brief Confuse reducesumd and square. \n *@par Inputs: -*x: A Tensor of type float16, float32. +*x: A Tensor of type float16, float32. \n *@par Attributes: * Two attributes, including: \n *@li axis: A optional listint, specifies the dimensions to reduce. -*@li keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false". +*@li keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false". \n *@par Outputs: *Two outputs, including: \n @@ -2817,15 +2827,15 @@ REG_OP(SquareSumV2) .OP_END_FACTORY_REG(SquareSumV2) /** -*@brief Confuse reducesumd and square. +*@brief Confuse reducesumd and square. \n *@par Inputs: -*x: A Tensor of type float16, float32. +*x: A Tensor of type float16, float32. \n *@par Attributes: * Two attributes, including: \n *@li axis: A optional listint, specifies the dimensions to reduce. -*@li keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false". +*@li keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false". \n *@par Outputs: y: A Tensor. Has the same type as "x". @@ -2842,7 +2852,7 @@ REG_OP(SquareSumV1) *@par Inputs: *x1: A Tensor of type float32. -*x2: A Tensor of type float32. +*x2: A Tensor of type float32. \n *@par Outputs: y1: A Tensor. Has the same type as "x1".The result of "x1". @@ -2856,13 +2866,13 @@ REG_OP(SquareSumAll) .OP_END_FACTORY_REG(SquareSumAll) /** -*@brief Confuse broadcast, addn and mul. +*@brief Confuse broadcast, addn and mul. \n *@par Inputs: *Three inputs, including: * @li x1: A Tensor. Must be one of the following types:int32, int16, float16, float32. * @li x2: A Tensor of the same type as "x1". -* @li x3: A Tensor of the same type as "x1". +* @li x3: A Tensor of the same type as "x1". \n *@par Outputs: * y: A Tensor. Has the same type as "x1". @@ -2875,25 +2885,25 @@ REG_OP(FusedMulAddN) .OP_END_FACTORY_REG(FusedMulAddN) /** -*@brief Add 'bias' to 'x'. +*@brief Add 'bias' to 'x'. \n *@par Inputs: * Two inputs, including: *@li x: An ND tensor of type float16 or float32. -*@li bias: An ND tensor of type float16 or float32. +*@li bias: An ND tensor of type float16 or float32. \n *@par Attributes: *@li axis: An optional int32 used to compute the shape of bias input from the online bottoms. Defaults to "1". *@li num_axes: An optional int32 used to compute the shape of bias input from a Caffe model trained offline. Defaults to "1". -*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe model trained offline. If "false", bias is input from online bottoms. Defaults to "true". +*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe model trained offline. If "false", bias is input from online bottoms. Defaults to "true". \n *@par Outputs: -*y: An ND tensor of type float16 or float32. +*y: An ND tensor of type float16 or float32. \n *@attention Constraints:\n * Assume that the shape length of "x" is "n" and that of "bias" is "m". *@li "axis" is within the range [-n, n-1]. num_axes >= -1. -*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis).\n +*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis).\n * If "axis < 0", the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < -axis). *@li If "bias_from_blob = true" and "num_axes = 0", "bias" is a scalar with shape length 1 and dimension size 1. *@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).\n @@ -2916,18 +2926,18 @@ REG_OP(Bias) /** *@brief Function multiply gradients calculation. output0 is the result of which input0 dot multily input1. -output1 is the result of which input0 dot multily input1, then reducesum it. +output1 is the result of which input0 dot multily input1, then reducesum it. \n *@par Inputs: *@li input0: A Tensor of input of mul, and dtype supports float16, float32. *@li input1: A Tensor of input of mul and mul_1, and dtype supports float16, float32. -*@li input2: A Tensor of input of mul_1, and dtype supports float16, float32. +*@li input2: A Tensor of input of mul_1, and dtype supports float16, float32. \n *@par Attributes: *@li axes: The dimensions to reduce. Default:(), reduce all dimensions. \n Only constant value is allowed. *@li keep_dims: If true, keep these reduced dimensions and the length is 1. \n -If false, don’t keep these dimensions. Default:False. +If false, don’t keep these dimensions. Default:False. \n *@par Outputs: *@li output0: A Tensor result of which input0 dot multily input1. @@ -2944,12 +2954,12 @@ REG_OP(ConfusionMulGrad) .OP_END_FACTORY_REG(ConfusionMulGrad) /** -*@brief Function fused multiply l2 loss calculation. +*@brief Function fused multiply l2 loss calculation. \n *@par Inputs: -*@li x1: A Tensor of number type. -*@li x2: A Tensor of number type. -*@li x3: A Tensor of number type. +*@li x1: A Tensor of type float16, float32. +*@li x2: A Tensor of type float16, float32. +*@li x3: A Tensor of type float16, float32. \n *@par Outputs: *@li y1: A Tensor of shape and dtype of first output, which should have \n @@ -2965,13 +2975,13 @@ REG_OP(FusedMulAddNL2loss) .OP_END_FACTORY_REG(FusedMulAddNL2loss) /** -*@brief Tests whether the input exceeds a threshold. +*@brief Tests whether the input exceeds a threshold. \n *@par Inputs: -*@li x: A Tensor with any format. Must be one of the following types: float16, float32. +*@li x: A Tensor with any format. Must be one of the following types: float16, float32. \n *@par Attributes: -*@li threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. +*@li threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n *@par Outputs: *@li y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32. @@ -2986,10 +2996,10 @@ REG_OP(FusedMulAddNL2loss) .OP_END_FACTORY_REG(Threshold); /** -*@brief Returns the index number corresponding to the maximum value entered. +*@brief Returns the index number corresponding to the maximum value entered. \n *@par Inputs: -*@li x: A tensor. Must be one of the following types: float16, float32. +*@li x: A tensor. Must be one of the following types: float16, float32. \n *@par Attributes: *@li axis: An optional int. Specify the axis to be cut at the input tensor. If this parameter is not provided, find the topk for each batch. Defaults to 10000 @@ -3014,15 +3024,15 @@ REG_OP(ArgMaxWithK) .OP_END_FACTORY_REG(ArgMaxWithK) /** -*@brief Multiply tensor with scale. +*@brief Multiply tensor with scale. \n *@par Inputs: *Five inputs, including: * @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32. -* @li x2: A scale. Must be float. +* @li x2: A scale. Must be float. \n *@par Outputs: -*@li y: A Tensor. Has the same type and shape as "x1". +*@li y: A Tensor. Has the same type and shape as "x1". \n *@par Third-party framework compatibility: * Compatible with the Pytorch operator muls. @@ -3034,15 +3044,15 @@ REG_OP(Muls) .OP_END_FACTORY_REG(Muls) /** -*@brief Fill tensor with scale. +*@brief Fill tensor with scale. \n *@par Inputs: *Five inputs, including: * @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32. -* @li x2: A scale. Must be float. +* @li x2: A scale. Must be float. \n *@par Outputs: -*@li y: A Tensor. Has the same type and shape as "x1". +*@li y: A Tensor. Has the same type and shape as "x1". \n *@par Third-party framework compatibility: * Compatible with the Pytorch operator fills. @@ -3054,15 +3064,15 @@ REG_OP(Fills) .OP_END_FACTORY_REG(Fills) /** -*@brief Add tensor with scale. +*@brief Add tensor with scale. \n *@par Inputs: *Five inputs, including: * @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32. -* @li x2: A scale. Must be float. +* @li x2: A scale. Must be float. \n *@par Outputs: -*@li y: A Tensor. Has the same type and shape as "x1". +*@li y: A Tensor. Has the same type and shape as "x1". \n *@par Third-party framework compatibility: * Compatible with the Pytorch operator adds. @@ -3074,14 +3084,14 @@ REG_OP(Fills) .OP_END_FACTORY_REG(Adds) /** -*@brief Computes the product of x and y and returns 0 if the y is zero, even if x is NaN or infinite. +*@brief Computes the product of x and y and returns 0 if the y is zero, even if x is NaN or infinite. \n *@par Inputs: * @li x1: A Tensor. Must be one of the following types:float16, float32, double, complex64, complex128. -* @li x2: A Tensor. Has the same type and shape as "x1". +* @li x2: A Tensor. Has the same type and shape as "x1". \n *@par Outputs: -*y: A Tensor. Has the same type and shape as "x1". +*y: A Tensor. Has the same type and shape as "x1". \n *@par Third-party framework compatibility: * Compatible with the TensorFlow operator MulNoNan. @@ -3093,17 +3103,17 @@ REG_OP(Fills) .OP_END_FACTORY_REG(MulNoNan) /** -*@brief Add tensor with scale. +*@brief Add tensor with scale. \n *@par Inputs: * @li x1: A Tensor dtype of int32, float16, float32. -* @li x2: A Tensor dtype of int32, float16, float32. +* @li x2: A Tensor dtype of int32, float16, float32. \n *@par Attributes: *alpha: Float scalar apply to x2:x2*alpha *@par Outputs: -*y: A Tensor. should be same shape and type as "x1". +*y: A Tensor. should be same shape and type as "x1". \n *@par Third-party framework compatibility: * Compatible with the Pytorch operator Axpy. @@ -3116,16 +3126,16 @@ REG_OP(Axpy) .OP_END_FACTORY_REG(Axpy) /** -*@brief Creates a criterion that measures the loss given input tensors x1 x2 and a Tensor label y with values 1 or -1. +*@brief Creates a criterion that measures the loss given input tensors x1 x2 and a Tensor label y with values 1 or -1. \n *@par Inputs: *@li x1: A ND Tensor with one of the following types: int8, uint8, int32, float16, float32. *@li x2: A ND Tensor with one of the following types: int8, uint8, int32, float16, float32. -*@li target: A ND Tensor with one of the following types: int8, int32, float16, float32. +*@li target: A ND Tensor with one of the following types: int8, int32, float16, float32. \n *@par Attributes: *@li margin: A optional float32. Defaults to "0.0". -*@li reduction: A optional string. Defaults to "mean". +*@li reduction: A optional string. Defaults to "mean". \n *@par Outputs: *@li y: A ND Tensor with Must be float32. @@ -3142,16 +3152,16 @@ REG_OP(CosineEmbeddingLoss) .OP_END_FACTORY_REG(CosineEmbeddingLoss) /** -*@brief Kullback-Leibler divergence. +*@brief Kullback-Leibler divergence. \n *@par Inputs: * Two inputs, including: *@li x: Tensor of arbitrary shape. -*@li target: Tensor of the same shape and dtype as x. +*@li target: Tensor of the same shape and dtype as x. \n *@par Attributes: *reduction: An required "string", Specifies the reduction to apply to the output; -* Reduction only supports the two modes of "sum" and "batchmean". +* Reduction only supports the two modes of "sum" and "batchmean". \n *@par Outputs: *y: A ND Tensor of the same dtype as x. @@ -3166,14 +3176,14 @@ REG_OP(KLDiv) .OP_END_FACTORY_REG(KLDiv) /** -*@brief copy data from x to y.. +*@brief copy data from x to y.. \n *@par Inputs: *One inputs, including: -* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32, bool. +* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32, bool. \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x". \n *@par Third-party framework compatibility */ @@ -3183,14 +3193,14 @@ REG_OP(TensorMove) .OP_END_FACTORY_REG(TensorMove) /** -*@brief copy data from x to x. +*@brief copy data from x to x. \n *@par Inputs: *One inputs, including: -* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32, bool. +* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32, bool. \n *@par Outputs: -*x: A Tensor. Has the same type as "x". +*x: A Tensor. Has the same type as "x". \n *@par Third-party framework compatibility */ diff --git a/third_party/fwkacllib/inc/ops/functional_ops.h b/third_party/fwkacllib/inc/ops/functional_ops.h index 33dce25d..1e67c41f 100644 --- a/third_party/fwkacllib/inc/ops/functional_ops.h +++ b/third_party/fwkacllib/inc/ops/functional_ops.h @@ -25,40 +25,27 @@ #include "graph/operator.h" namespace ge { -REG_OP(SymbolicGradient) - .DYNAMIC_INPUT(input, TensorType::ALL()) - .DYNAMIC_OUTPUT(output, TensorType::ALL()) - .GRAPH(f) - .OP_END_FACTORY_REG(SymbolicGradient) - -REG_OP(RemoteCall) - .INPUT(target, DT_STRING) - .DYNAMIC_INPUT(args, TensorType::ALL()) - .DYNAMIC_OUTPUT(output, TensorType::ALL()) - .GRAPH(f) - .OP_END_FACTORY_REG(RemoteCall) - /** - *@brief Select one of the subgraphs to pass the input tensors and return the output tensors. \n - * If "cond" means True, the selected subgraph is "then_branch". \n - * Otherwise, the selected subgraph is "else_branch". + *@brief Select one of the subgraphs to pass the input tensors and return the output tensors. + * If "cond" means True, the selected subgraph is "then_branch". + * Otherwise, the selected subgraph is "else_branch" . \n *@par Inputs: - *@li cond: A Tensor. If "cond" is not a scalar of boolean type, \n - * it will be converted to a boolean according to the following rule: \n - * if "cond" is a numerical scalar, non-zero means True and zero means False; \n - * if "cond" is a string scalar, non-empty means True and empty means False; \n + *@li cond: A Tensor. If "cond" is not a scalar of boolean type, + * it will be converted to a boolean according to the following rule: + * if "cond" is a numerical scalar, non-zero means True and zero means False; + * if "cond" is a string scalar, non-empty means True and empty means False; * if "cond" is not a scalar, non-empty means True and empty means False. - *@li input: The input tensors. + *@li input: The input tensors . It's a dynamic input. \n *@par Graphs: - *@li then_branch: A subgraph takes 'input' and returns a list of tensors, \n + *@li then_branch: A subgraph takes 'input' and returns a list of tensors, * whose types are the same as what else_branch returns. - *@li else_branch: A subgraph takes 'input' and returns a list of tensors, \n - * whose types are the same as what then_branch returns. + *@li else_branch: A subgraph takes 'input' and returns a list of tensors, + * whose types are the same as what then_branch returns . \n *@par Outputs: - *output: The output tensors returned by either then_branch(input) or else_branch(input). + *output: The output tensors returned by either then_branch(input) or else_branch(input) . \n *@par Third-party framework compatibility *@Compatible with the TensorFlow operator _If. @@ -72,26 +59,26 @@ REG_OP(_If) .OP_END_FACTORY_REG(_If) /** - *@brief Select one of the subgraphs to pass the input tensors and return the output tensors. \n - * If "cond" means True, the selected subgraph is "then_branch". \n - * Otherwise, the selected subgraph is "else_branch". + *@brief Select one of the subgraphs to pass the input tensors and return the output tensors. + * If "cond" means True, the selected subgraph is "then_branch". + * Otherwise, the selected subgraph is "else_branch" . \n *@par Inputs: - *@li cond: A Tensor. If "cond" is not a scalar of boolean type, \n - * it will be converted to a boolean according to the following rule: \n - * if "cond" is a numerical scalar, non-zero means True and zero means False; \n - * if "cond" is a string scalar, non-empty means True and empty means False; \n + *@li cond: A Tensor. If "cond" is not a scalar of boolean type, + * it will be converted to a boolean according to the following rule: + * if "cond" is a numerical scalar, non-zero means True and zero means False; + * if "cond" is a string scalar, non-empty means True and empty means False; * if "cond" is not a scalar, non-empty means True and empty means False. - *@li input: The input tensors. + *@li input: The input tensors . It's a dynamic input. \n *@par Graphs: - *@li then_branch: A subgraph takes 'input' and returns a list of tensors, \n + *@li then_branch: A subgraph takes 'input' and returns a list of tensors, * whose types are the same as what else_branch returns. - *@li else_branch: A subgraph takes 'input' and returns a list of tensors, \n - * whose types are the same as what then_branch returns. + *@li else_branch: A subgraph takes 'input' and returns a list of tensors, + * whose types are the same as what then_branch returns . \n *@par Outputs: - *output: The output tensors returned by either then_branch(input) or else_branch(input). + *output: The output tensors returned by either then_branch(input) or else_branch(input) . \n *@par Third-party framework compatibility *@Compatible with the TensorFlow operator StatelessIf. @@ -105,26 +92,26 @@ REG_OP(StatelessIf) .OP_END_FACTORY_REG(StatelessIf) /** - *@brief Select one of the subgraphs to pass the input tensors and return the output tensors. \n - * If "cond" means True, the selected subgraph is "then_branch". \n - * Otherwise, the selected subgraph is "else_branch". + *@brief Select one of the subgraphs to pass the input tensors and return the output tensors. + * If "cond" means True, the selected subgraph is "then_branch". + * Otherwise, the selected subgraph is "else_branch" . \n *@par Inputs: - *@li cond: A Tensor. If "cond" is not a scalar of boolean type, \n - * it will be converted to a boolean according to the following rule: \n - * if "cond" is a numerical scalar, non-zero means True and zero means False; \n - * if "cond" is a string scalar, non-empty means True and empty means False; \n + *@li cond: A Tensor. If "cond" is not a scalar of boolean type, + * it will be converted to a boolean according to the following rule: + * if "cond" is a numerical scalar, non-zero means True and zero means False; + * if "cond" is a string scalar, non-empty means True and empty means False; * if "cond" is not a scalar, non-empty means True and empty means False. - *@li input: The input tensors. + *@li input: The input tensors . It's a dynamic input. \n *@par Graphs: - *@li then_branch: A subgraph takes 'input' and returns a list of tensors, \n + *@li then_branch: A subgraph takes 'input' and returns a list of tensors, * whose types are the same as what else_branch returns. - *@li else_branch: A subgraph takes 'input' and returns a list of tensors, \n - * whose types are the same as what then_branch returns. + *@li else_branch: A subgraph takes 'input' and returns a list of tensors, + * whose types are the same as what then_branch returns . \n *@par Outputs: - *output: The output tensors returned by either then_branch(input) or else_branch(input). + *output: The output tensors returned by either then_branch(input) or else_branch(input) . \n *@par Third-party framework compatibility *@Compatible with the TensorFlow operator If. @@ -138,18 +125,18 @@ REG_OP(If) .OP_END_FACTORY_REG(If) /** - *@brief Select one of the subgraphs to pass the input tensors and return the output tensors. + *@brief Select one of the subgraphs to pass the input tensors and return the output tensors . \n *@par Inputs: *@li branch_index: A int32 scalar which determines the selected subgraph. - *@li input: The input tensors, which will be passed to the subgraph. + *@li input: The input tensors, which will be passed to the subgraph . It's a dynamic input. \n *@par Graphs: - *branches: A list of subgraphs, each of which takes 'input' and returns a list of tensors, \n - * whose types are the same as what every other subgraph returns. + *branches: A list of subgraphs, each of which takes 'input' and returns a list of tensors, + * whose types are the same as what every other subgraph returns . \n *@par Outputs: - *output: The output tensors returned by one of branches. + *output: The output tensors returned by one of branches . It's a dynamic output. \n *@par Third-party framework compatibility *@Compatible with the TensorFlow operator Case. @@ -162,25 +149,25 @@ REG_OP(Case) .OP_END_FACTORY_REG(Case) /** - *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False. + *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n *@par Inputs: - *input: The input tensors. + *input: The input tensors . It's a dynamic input. \n *@par Graphs: - *@li cond: A subgraph takes 'input' and returns a tensor. \n - * If the tensor is not a scalar of boolean type, \n - * it will be converted to a boolean according to the following rule: \n - * if it is a numerical scalar, non-zero means True and zero means False; \n - * if it is a string scalar, non-empty means True and empty means False; \n + *@li cond: A subgraph takes 'input' and returns a tensor. + * If the tensor is not a scalar of boolean type, + * it will be converted to a boolean according to the following rule: + * if it is a numerical scalar, non-zero means True and zero means False; + * if it is a string scalar, non-empty means True and empty means False; * if it is not a scalar, non-empty means True and empty means False. - *@li body: A subgraph takes 'input' and returns a another list of tensors. + *@li body: A subgraph takes 'input' and returns a another list of tensors . \n *@par Attributes: - *parallel_iterations: An optional int, default as 10. + *parallel_iterations: An optional int, default as 10 . \n *@par Outputs: - *output: The output tensors returned by "body". Has the same type as "input". + *output: The output tensors returned by "body". Has the same type as "input" . \n *@par Third-party framework compatibility *@Compatible with the TensorFlow operator _While. @@ -193,25 +180,25 @@ REG_OP(_While) .OP_END_FACTORY_REG(_While) /** - *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False. + *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n *@par Inputs: - *input: The input tensors. + *input: The input tensors . It's a dynamic input. \n *@par Graphs: - *@li cond: A subgraph takes 'input' and returns a tensor. \n - * If the tensor is not a scalar of boolean type, \n - * it will be converted to a boolean according to the following rule: \n - * if it is a numerical scalar, non-zero means True and zero means False; \n - * if it is a string scalar, non-empty means True and empty means False; \n + *@li cond: A subgraph takes 'input' and returns a tensor. + * If the tensor is not a scalar of boolean type, + * it will be converted to a boolean according to the following rule: + * if it is a numerical scalar, non-zero means True and zero means False; + * if it is a string scalar, non-empty means True and empty means False; * if it is not a scalar, non-empty means True and empty means False. - *@li body: A subgraph takes 'input' and returns a another list of tensors. + *@li body: A subgraph takes 'input' and returns a another list of tensors . \n *@par Attributes: - *parallel_iterations: An optional int, default as 10. + *parallel_iterations: An optional int, default as 10 . \n *@par Outputs: - *output: The output tensors returned by "body". Has the same type as "input". + *output: The output tensors returned by "body". Has the same type as "input" . It's a dynamic output. \n *@par Third-party framework compatibility *@Compatible with the TensorFlow operator While. @@ -225,25 +212,25 @@ REG_OP(While) .OP_END_FACTORY_REG(While) /** - *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False. + *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n *@par Inputs: - *input: The input tensors. + *input: The input tensors . It's a dynamic input. \n *@par Graphs: - *@li cond: A subgraph takes 'input' and returns a tensor. \n - * If the tensor is not a scalar of boolean type, \n - * it will be converted to a boolean according to the following rule: \n - * if it is a numerical scalar, non-zero means True and zero means False; \n - * if it is a string scalar, non-empty means True and empty means False; \n + *@li cond: A subgraph takes 'input' and returns a tensor. + * If the tensor is not a scalar of boolean type, + * it will be converted to a boolean according to the following rule: + * if it is a numerical scalar, non-zero means True and zero means False; + * if it is a string scalar, non-empty means True and empty means False; * if it is not a scalar, non-empty means True and empty means False. - *@li body: A subgraph takes 'input' and returns a another list of tensors. + *@li body: A subgraph takes 'input' and returns a another list of tensors . \n *@par Attributes: - *parallel_iterations: An optional int, default as 10. + *parallel_iterations: An optional int, default as 10 . \n *@par Outputs: - *output: The output tensors returned by "body". Has the same type as "input". + *output: The output tensors returned by "body". Has the same type as "input" . It's a dynamic output. \n *@par Third-party framework compatibility *@Compatible with the TensorFlow operator StatelessWhile. @@ -257,19 +244,19 @@ REG_OP(StatelessWhile) .OP_END_FACTORY_REG(StatelessWhile) /** - *@brief Cyclic execute the "body" subgraph until the first input of For op exceed upper bound. + *@brief Cyclic execute the "body" subgraph until the first input of For op exceed upper bound . \n *@par Inputs: *@li start: A int32 scalar. The lower bound. *@li limit: A int32 scalar. The upper bound. *@li delta: A int32 scalar. The step size. - *@li input: The input tensors, which will be passed to "body". + *@li input: The input tensors, which will be passed to "body" . It's a dynamic input. \n *@par Graphs: - *body: A subgraph takes 'input' and returns a another list of tensors. + *body: A subgraph takes 'input' and returns a another list of tensors . \n *@par Outputs: - *output: The output tensors returned by "body". Has the same type as "input". + *output: The output tensors returned by "body". Has the same type as "input" . It's a dynamic output. \n *@par Third-party framework compatibility *@Compatible with the TensorFlow operator For. @@ -284,21 +271,21 @@ REG_OP(For) .OP_END_FACTORY_REG(For) /** - *@brief Pass the input tensors to the subgraph "f" and return the output tensors. + *@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n *@par Inputs: - *args: The input tensors, which will be passed to "f". + *args: The input tensors, which will be passed to "f" . It's a dynamic input. \n *@par Graphs: - *f: A subgraph takes 'args' and returns a another list of tensors. + *f: A subgraph takes 'args' and returns a another list of tensors . \n *@par Attributes: *@li config: An optional string, default as "". *@li config_proto: An optional int, default as "". - *@li executor_type: An optional int, default as "". + *@li executor_type: An optional int, default as "" . \n *@par Outputs: - *output: The output tensors returned by "f". + *output: The output tensors returned by "f" . It's a dynamic output. \n *@par Third-party framework compatibility *@Compatible with the TensorFlow operator PartitionedCall. @@ -313,21 +300,21 @@ REG_OP(PartitionedCall) .OP_END_FACTORY_REG(PartitionedCall) /** - *@brief Pass the input tensors to the subgraph "f" and return the output tensors. + *@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n *@par Inputs: - *args: The input tensors, which will be passed to "f". + *args: The input tensors, which will be passed to "f" . It's a dynamic input. \n *@par Graphs: - *f: A subgraph takes 'args' and returns a another list of tensors. + *f: A subgraph takes 'args' and returns a another list of tensors . \n *@par Attributes: *@li config: An optional string, default as "". *@li config_proto: An optional int, default as "". - *@li executor_type: An optional int, default as "". + *@li executor_type: An optional int, default as "" . \n *@par Outputs: - *output: The output tensors returned by "f". + *output: The output tensors returned by "f" . It's a dynamic output. \n *@par Third-party framework compatibility *@Compatible with the TensorFlow operator StatefulPartitionedCall. @@ -341,11 +328,6 @@ REG_OP(StatefulPartitionedCall) .ATTR(executor_type, String, "") .OP_END_FACTORY_REG(StatefulPartitionedCall) -REG_OP(FakeParam) - .OUTPUT(output, TensorType::ALL()) - .ATTR(shape, ListInt, {}) - .OP_END_FACTORY_REG(FakeParam) - } // namespace ge #endif // GE_FUNCTIONAL_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/hcom_ops.h b/third_party/fwkacllib/inc/ops/hcom_ops.h index 231729ce..7e985efc 100644 --- a/third_party/fwkacllib/inc/ops/hcom_ops.h +++ b/third_party/fwkacllib/inc/ops/hcom_ops.h @@ -27,18 +27,18 @@ namespace ge { /** * @brief Outputs a tensor gathering all input tensors. * @par Inputs: - * x: A tensor. Must be one of the following types: int8, int16, int32, float16, - * float32. + * x: A tensor. Must be one of the following types: int8, int16, int32, float16, + float32. * @par Attributes: - * @li rank_size: A required integer identifying the number of ranks - * participating in the op. - * @li group: A required string identifying the group name of ranks - * participating in the op. + * @li rank_size: A required integer identifying the number of ranks + participating in the op. + * @li group: A required string identifying the group name of ranks + participating in the op. * @par Outputs: * y: A Tensor. Has the same type as "x". - * @attention Constraints:\n - * "group" is limited to 128 characters. Use "hccl_world_group" - * as the name of a world group. + * @attention Constraints: + "group" is limited to 128 characters. Use "hccl_world_group" + as the name of a world group. */ REG_OP(HcomAllGather) .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) @@ -50,25 +50,25 @@ REG_OP(HcomAllGather) .OP_END_FACTORY_REG(HcomAllGather) /** - * @brief Outputs a tensor containing the reduction across all input tensors - * passed to op. + * @brief Outputs a tensor containing the reduction across all input tensors + passed to op. * @par Inputs: - * x: A tensor. Must be one of the following types: int8, int16, int32, float16, - * float32. + * x: A tensor. Must be one of the following types: int8, int16, int32, float16, + float32. * @par Attributes: - * @li reduction: A required string identifying the reduction operation to - * perform.The supported operation are: "sum", "max", "min", "prod". - * @li group: A required string identifying the group name of ranks - * participating in the op. - * @li fusion: An optional integer identifying the fusion flag of the op. \n - * 0: no fusion; 1 (default): fusion; 2: fusion the ops by fusion id. + * @li reduction: A required string identifying the reduction operation to + perform.The supported operation are: "sum", "max", "min", "prod". + * @li group: A required string identifying the group name of ranks + participating in the op. + * @li fusion: An optional integer identifying the fusion flag of the op. + 0: no fusion; 1 (default): fusion; 2: fusion the ops by fusion id. * @li fusion_id: An optional integer identifying the fusion id of the op. * The HcomAllReduce ops with the same fusion id will be fused. * @par Outputs: * y: A Tensor. Has the same type as "x". - * @attention Constraints: \n - * "group" is limited to 128 characters. Use "hccl_world_group" - * as the name of a world group. + * @attention Constraints: + *"group" is limited to 128 characters. Use "hccl_world_group" + as the name of a world group. */ REG_OP(HcomAllReduce) .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) @@ -84,18 +84,19 @@ REG_OP(HcomAllReduce) /** * @brief Broadcasts the input tensor in root rank to all ranks. * @par Inputs: - * x: A list of dynamic input tensor. Must be one of the following types: - * int8, int16, int32, float16, float32. + * x: A list of dynamic input tensor. Must be one of the following types: + int8, int16, int32, float16, float32. It's a dynamic input. * @par Attributes: - * @li root_rank: A required integer identifying the root rank in the op - * input of this rank will be broadcast to other ranks. - * @li group: A required string identifying the group name of ranks - * participating in the op. + * @li root_rank: A required integer identifying the root rank in the op + input of this rank will be broadcast to other ranks. + * @li group: A required string identifying the group name of ranks + participating in the op. * @par Outputs: * y: A list of dynamic output tensor. Has the same type and length as "x". - * @attention Constraints:\n - * "group" is limited to 128 characters. Use "hccl_world_group" - * as the name of a world group. + * It's a dynamic output. + * @attention Constraints: + "group" is limited to 128 characters. Use "hccl_world_group" + as the name of a world group. */ REG_OP(HcomBroadcast) .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) @@ -107,24 +108,24 @@ REG_OP(HcomBroadcast) .OP_END_FACTORY_REG(HcomBroadcast) /** - * @brief Performs reduction across all input tensors, scattering in equal - * blocks among ranks, each rank getting a chunk of data based on its rank - * index. + * @brief Performs reduction across all input tensors, scattering in equal + blocks among ranks, each rank getting a chunk of data based on its rank + index. * @par Inputs: - * x: A tensor. Must be one of the following types: int8, int16, int32, float16, - * float32. + * x: A tensor. Must be one of the following types: int8, int16, int32, float16, + float32. * @par Attributes: - * @li reduction: A required string identifying the reduction operation to - * perform. The supported operation are: "sum", "max", "min", "prod". - * @li group: A required string identifying the group name of ranks - * participating in the op. - * @li rank_size: A required integer identifying the number of ranks - * participating in the op. + * @li reduction: A required string identifying the reduction operation to + perform. The supported operation are: "sum", "max", "min", "prod". + * @li group: A required string identifying the group name of ranks + participating in the op. + * @li rank_size: A required integer identifying the number of ranks + participating in the op. * @par Outputs: * y: A Tensor. Has the same type as "x". - * @attention Constraints:\n - * "group" is limited to 128 characters. Use "hccl_world_group" - * as the name of a world group. + * @attention Constraints: + "group" is limited to 128 characters. Use "hccl_world_group" + as the name of a world group. */ REG_OP(HcomReduceScatter) .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) @@ -139,19 +140,19 @@ REG_OP(HcomReduceScatter) /** * @brief Sends the input tensor to destination rank. * @par Inputs: - * x: A tensor. Must be one of the following types: int8, int16, int32, float16, - * float32. + * x: A tensor. Must be one of the following types: int8, int16, int32, float16, + float32. * @par Attributes: - * @li sr_tag: A required integer identifying the send/recv message tag. The - * message will be received by the HcomReceive op with the same "sr_tag". + * @li sr_tag: A required integer identifying the send/recv message tag. The + message will be received by the HcomReceive op with the same "sr_tag". * @li dest_rank: A required integer identifying the destination rank. - * @li group: A string identifying the group name of ranks participating in - * the op. + * @li group: A string identifying the group name of ranks participating in + the op. * @par Outputs: * None. - * @attention Constraints:\n - * @li "group" is limited to 128 characters. Use - * "hccl_world_group" as the name of a world group. + * @attention Constraints: + @li "group" is limited to 128 characters. Use + "hccl_world_group" as the name of a world group. * @li Operators HcomSend and HcomReceive have the same "sr_tag". * @see HcomReceive */ @@ -169,20 +170,20 @@ REG_OP(HcomSend) * @par Inputs: * None. * @par Attributes: - * @li sr_tag: A required integer identifying the send/recv message tag. The - * message will be send by the HcomSend op with the same "sr_tag". + * @li sr_tag: A required integer identifying the send/recv message tag. The + message will be send by the HcomSend op with the same "sr_tag". * @li src_rank: A required integer identifying the source rank. * @li group: A required string identifying the group name of ranks * participating in the op. - * @li shape: A required list identifying the shape of the tensor to be - * received. - * @li dtype: A required integer identifying the type of the tensor to be - * received. The supported types are: int8, int16, int32, float16, float32. + * @li shape: A required list identifying the shape of the tensor to be + received. + * @li dtype: A required integer identifying the type of the tensor to be + received. The supported types are: int8, int16, int32, float16, float32. * @par Outputs: * y: A tensor with type identified in "dtype". - * @attention Constraints:\n - * @li "group" is limited to 128 characters. Use - * "hccl_world_group" as the name of a world group. + * @attention Constraints: + @li "group" is limited to 128 characters. Use + "hccl_world_group" as the name of a world group. * @li Operators HcomSend and HcomReceive have the same "sr_tag". * @li "shape" should be same as the input tensor of HcomSend. * @li "dtype" should be same as the input tensor of HcomSend. diff --git a/third_party/fwkacllib/inc/ops/hvd_ops.h b/third_party/fwkacllib/inc/ops/hvd_ops.h index 89282ca5..bde8486c 100644 --- a/third_party/fwkacllib/inc/ops/hvd_ops.h +++ b/third_party/fwkacllib/inc/ops/hvd_ops.h @@ -28,10 +28,10 @@ namespace ge { * @brief Outputs a tensor gathering all input tensors. * @par Inputs: * x: A tensor. Must be one of the following types: uint8, int8, uint16, int16, int32, - * int64, float16, bool. + int64, float16, bool. * @par Attributes: - * @li rank_size: A required integer identifying the number of ranks - * participating in the op. + * @li rank_size: A required integer identifying the number of ranks + participating in the op. * @par Outputs: * y: A Tensor. Has the same type as "x". */ @@ -44,13 +44,13 @@ REG_OP(HorovodAllgather) .OP_END_FACTORY_REG(HorovodAllgather) /** - * @brief Outputs a tensor containing the reduction across all input tensors - * passed to op. + * @brief Outputs a tensor containing the reduction across all input tensors + passed to op. * @par Inputs: - * x: A tensor. Must be one of the following types: int32, int64, float16, float32 - * @par Attributes: - * @li reduce_op: A required int identifying the reduction operation to - * perform.The supported operation are: "sum", "max", "min", "prod". + * x: A tensor. Must be one of the following types: int32, int64, float16, float32 + @par Attributes: + * @li reduce_op: A required int identifying the reduction operation to + perform.The supported operation are: "sum", "max", "min", "prod". * @par Outputs: * y: A Tensor. Has the same type as "x". */ @@ -63,11 +63,11 @@ REG_OP(HorovodAllreduce) /** * @brief Broadcasts the input tensor in root rank to all ranks. * @par Inputs: - * x: A list of dynamic input tensor. Must be one of the following types: - * int8, int32, float16, float32. + * x: A list of dynamic input tensor. Must be one of the following types: + int8, int32, float16, float32. * @par Attributes: - * @li root_rank: A required integer identifying the root rank in the op - * input of this rank will be broadcast to other ranks. + * @li root_rank: A required integer identifying the root rank in the op + input of this rank will be broadcast to other ranks. * @par Outputs: * y: A list of dynamic output tensor. Has the same type and length as "x". */ diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index 9412112c..27fb79a9 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -26,20 +26,20 @@ namespace ge { /** -*@brief Adjust the hue of one or more images. +*@brief Adjust the hue of one or more images . \n *@par Inputs: -*Input images is a tensor of at least 3 dimensions. The last dimension is \n -interpretted as channels, and must be three. Inputs include: \n +*Input images is a tensor of at least 3 dimensions. The last dimension is +interpretted as channels, and must be three. Inputs include: *@li images:A Tensor of type float. Images to adjust. At least 3-D. -*@li delta:A Tensor of type float. A float delta to add to the hue. +*@li delta:A Tensor of type float. A float delta to add to the hue . \n *@par Outputs: -*y:A Tensor of type float. +*y:A Tensor of type float . \n -*@attention Constraints: \n -*Input images is a tensor of at least 3 dimensions. The last dimension is \n -interpretted as channels, and must be three. +*@attention Constraints: +*Input images is a tensor of at least 3 dimensions. The last dimension is +interpretted as channels, and must be three . \n *@par Third-party framework compatibility *Compatible with tensorflow AdjustHue operator. @@ -52,20 +52,20 @@ REG_OP(AdjustHue) .OP_END_FACTORY_REG(AdjustHue) /** -*@brief Adjust the saturation of one or more images. +*@brief Adjust the saturation of one or more images . \n *@par Inputs: -*Input images is a tensor of at least 3 dimensions. The last dimension is \n -interpretted as channels, and must be three. Inputs include: \n +*Input images is a tensor of at least 3 dimensions. The last dimension is +interpretted as channels, and must be three. Inputs include: *@li images:A Tensor of type float. Images to adjust. At least 3-D. -*@li scale:A Tensor of type float. A float scale to add to the saturation. +*@li scale:A Tensor of type float. A float scale to add to the saturation . \n *@par Outputs: -*y:A Tensor of type float. +*y:A Tensor of type float . \n -*@attention Constraints: \n -*Input images is a tensor of at least 3 dimensions. The last dimension is \n -interpretted as channels, and must be three. +*@attention Constraints: +*Input images is a tensor of at least 3 dimensions. The last dimension is +interpretted as channels, and must be three . \n *@par Third-party framework compatibility *Compatible with tensorflow AdjustSaturation operator. @@ -78,20 +78,20 @@ REG_OP(AdjustSaturation) .OP_END_FACTORY_REG(AdjustSaturation) /** -*@brief Adjust the contrast of one or more images. +*@brief Adjust the contrast of one or more images . \n *@par Inputs: -*Input images is a tensor of at least 3 dimensions. The last 3 dimensions are \n -interpreted as '[height, width, channels]'. Inputs include: \n +*Input images is a tensor of at least 3 dimensions. The last 3 dimensions are +interpreted as '[height, width, channels]'. Inputs include: *@li images:A Tensor of type float. Images to adjust. At least 3-D. -*@li scale:A Tensor of type float. A float multiplier for adjusting contrast. +*@li scale:A Tensor of type float. A float multiplier for adjusting contrast . \n *@par Outputs: -*y:A Tensor of type float. +*y:A Tensor of type float . \n -*@attention Constraints: \n -*Input images is a tensor of at least 3 dimensions. The last dimension is \n -interpretted as channels, and must be three. +*@attention Constraints: +*Input images is a tensor of at least 3 dimensions. The last dimension is +interpretted as channels, and must be three . \n *@par Third-party framework compatibility *Compatible with tensorflow AdjustContrast operator. @@ -104,33 +104,33 @@ REG_OP(AdjustContrast) .OP_END_FACTORY_REG(AdjustContrast) /** -*@brief Extracts crops from the input image tensor and resizes them. Extracts \n -crops from the input image tensor and resizes them using bilinear sampling or \n -nearest neighbor sampling to a common output size specified by crop_size. +*@brief Extracts crops from the input image tensor and resizes them. Extracts +crops from the input image tensor and resizes them using bilinear sampling or +nearest neighbor sampling to a common output size specified by crop_size . \n *@par Inputs: -*Input images must be a 4-D tensor. Inputs include: \n -*@li images:A Tensor. Must be one of the following types:uint8, uint16, int8, \n -int16, int32, int64, float16, float, double. A 4-D tensor of shape \n +*Input images must be a 4-D tensor. Inputs include: +*@li images:A Tensor. Must be one of the following types:uint8, uint16, int8, +int16, int32, int64, float16, float, double. A 4-D tensor of shape [batch, image_height, image_width, depth]. *@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4]. -*@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with \n +*@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch). -*@li crop_size: A Tensor of type int32. A 1-D tensor of 2 elements, crop_size \n -= [crop_height, crop_width]. All cropped image patches are resized to this size. +*@li crop_size: A Tensor of type int32. A 1-D tensor of 2 elements, crop_size += [crop_height, crop_width]. All cropped image patches are resized to this size . \n *@par Attributes: -*@li extrapolation_value: An optional float. Defaults to 0. Value used for \n +*@li extrapolation_value: An optional float. Defaults to 0. Value used for extrapolation, when applicable. -*@li method: An optional string from: '"bilinear", "nearest"'. Defaults to \n -"bilinear". Currently two sampling methods are supported: Bilinear and \n -NearestNeighbor. +*@li method: An optional string from: '"bilinear", "nearest"'. Defaults to +"bilinear". Currently two sampling methods are supported: Bilinear and +NearestNeighbor . \n *@par Outputs: -*y:A Tensor of type float. +*y:A Tensor of type float . \n -*@attention Constraints: \n -*Input images must be a 4-D tensor. +*@attention Constraints: +*Input images must be a 4-D tensor . \n *@par Third-party framework compatibility *Compatible with tensorflow CropAndResize operator. @@ -150,28 +150,31 @@ REG_OP(CropAndResize) /** *@brief Extracts crops from the input image tensor and resizes them. * Extracts crops from the input image tensor and resizes them using bilinear sampling or -* nearest neighbor sampling to a common output size specified by crop_size. +* nearest neighbor sampling to a common output size specified by crop_size . \n *@par Inputs: *Input images must be a 5HD tensor. Inputs include: *@li x:A Tensor. Must be one of the following types:float16, float. A 5HD tensor of shape * [batch, C1, image_height, image_width, C0]. *@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4]. -*@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch). +*@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch) . \n *@par Attributes: *@li crop_size: list int. [crop_height, crop_width]. All cropped image patches are resized to this size. *@li extrapolation_value: An optional float. Defaults to 0. Value used for extrapolation, when applicable. -*@li method: An optional string from: '"bilinear"'. Defaults to "bilinear". +*@li method: An optional string from: '"bilinear"'. Defaults to "bilinear" . \n *@par Outputs: -*y:A Tensor of type float. +*y:A Tensor of type float . \n *@attention Constraints: -*Input images must be a 5HD tensor. +*Input images must be a 5HD tensor . \n *@par Third-party framework compatibility *Compatible with tensorflow CropAndResize operator. + +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use CropAndResize instead. */ REG_OP(CropAndResizeD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -184,30 +187,30 @@ REG_OP(CropAndResizeD) .OP_END_FACTORY_REG(CropAndResizeD) /** -*@brief Computes the gradient of the crop_and_resize op wrt the input \n -boxes tensor. +*@brief Computes the gradient of the crop_and_resize op wrt the input +boxes tensor . \n *@par Inputs: -*Input images and grads must be a 4-D tensor. Inputs include: \n +*Input images and grads must be a 4-D tensor. Inputs include: *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. -*@li images: A 4-D tensor of shape [batch, image_height, image_width, depth]. \n +*@li images: A 4-D tensor of shape [batch, image_height, image_width, depth]. Both image_height and image_width need to be positive. -*@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor \n -specifies the coordinates of a box in the box_ind[i] image and is specified in \n +*@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor +specifies the coordinates of a box in the box_ind[i] image and is specified in normalized coordinates [y1, x1, y2, x2]. -*@li box_index: A 1-D tensor of shape [num_boxes] with int32 values in \n -[0, batch). The value of box_ind[i] specifies the image that the i-th box \n -refers to. +*@li box_index: A 1-D tensor of shape [num_boxes] with int32 values in +[0, batch). The value of box_ind[i] specifies the image that the i-th box +refers to . \n *@par Attributes: -method: A string specifying the interpolation method. Only 'bilinear' is \n -supported for now. +method: A string specifying the interpolation method. Only 'bilinear' is +supported for now . \n *@par Outputs: -*y:A 2-D tensor of shape [num_boxes, 4]. +*y:A 2-D tensor of shape [num_boxes, 4] . \n -*@attention Constraints: \n -*Input images and grads must be a 4-D tensor. +*@attention Constraints: +*Input images and grads must be a 4-D tensor . \n *@par Third-party framework compatibility *Compatible with tensorflow CropAndResizeGradBoxes operator. @@ -224,31 +227,31 @@ REG_OP(CropAndResizeGradBoxes) .OP_END_FACTORY_REG(CropAndResizeGradBoxes) /** -*@brief Computes the gradient of the crop_and_resize op wrt the input \n -images tensor. +*@brief Computes the gradient of the crop_and_resize op wrt the input +images tensor . \n *@par Inputs: -*Input grads must be a 4-D tensor. Inputs include: \n +*Input grads must be a 4-D tensor. Inputs include: *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. -*@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor \n -specifies the coordinates of a box in the box_ind[i] image and is specified \n +*@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor +specifies the coordinates of a box in the box_ind[i] image and is specified in normalized coordinates [y1, x1, y2, x2]. -*@li box_index: A 1-D tensor of shape [num_boxes] with int32 values in \n -[0, batch). The value of box_ind[i] specifies the image that the i-th box \n +*@li box_index: A 1-D tensor of shape [num_boxes] with int32 values in +[0, batch). The value of box_ind[i] specifies the image that the i-th box refers to. -*@li image_size: A 1-D tensor with value [batch, image_height, image_width, \n -depth] containing the original image size. Both image_height and image_width \n -need to be positive. +*@li image_size: A 1-D tensor with value [batch, image_height, image_width, +depth] containing the original image size. Both image_height and image_width +need to be positive . \n *@par Attributes: -method: A string specifying the interpolation method. Only 'bilinear' is \n -supported for now. +method: A string specifying the interpolation method. Only 'bilinear' is +supported for now . \n *@par Outputs: -*y:A 4-D tensor of shape [batch, image_height, image_width, depth]. +*y:A 4-D tensor of shape [batch, image_height, image_width, depth] . \n -*@attention Constraints: \n -*Input grads must be a 4-D tensor. +*@attention Constraints: +*Input grads must be a 4-D tensor . \n *@par Third-party framework compatibility *Compatible with tensorflow CropAndResizeGradImage operator. @@ -265,35 +268,35 @@ REG_OP(CropAndResizeGradImage) .OP_END_FACTORY_REG(CropAndResizeGradImage) /** -*@brief Extracts a glimpse from the input tensor. +*@brief Extracts a glimpse from the input tensor . \n *@par Inputs: -*Input x must be a 4-D tensor. Inputs include: \n +*Input x must be a 4-D tensor. Inputs include: *@li x: A 4-D float tensor of shape [batch_size, height, width, channels]. -*@li size: A 1-D tensor of 2 elements containing the size of the glimpses to \n -extract. The glimpse height must be specified first, following by the glimpse \n +*@li size: A 1-D tensor of 2 elements containing the size of the glimpses to +extract. The glimpse height must be specified first, following by the glimpse width. -*@li offsets: A 2-D integer tensor of shape [batch_size, 2] containing the y, \n -x locations of the center of each window. +*@li offsets: A 2-D integer tensor of shape [batch_size, 2] containing the y, +x locations of the center of each window . \n *@par Attributes: -*@li centered: indicates if the offset coordinates are centered relative to \n -the image, in which case the (0, 0) offset is relative to the center of the \n -input images. If false, the (0,0) offset corresponds to the upper left corner \n +*@li centered: indicates if the offset coordinates are centered relative to +the image, in which case the (0, 0) offset is relative to the center of the +input images. If false, the (0,0) offset corresponds to the upper left corner of the input images. *@li normalized: indicates if the offset coordinates are normalized. -*@li uniform_noise: indicates if the noise should be generated using a \n +*@li uniform_noise: indicates if the noise should be generated using a uniform distribution or a Gaussian distribution. -*@li noise: indicates if the noise should uniform, gaussian, or zero. \n -The default is uniform which means the the noise type will be decided by \n -uniform_noise. +*@li noise: indicates if the noise should uniform, gaussian, or zero. +The default is uniform which means the the noise type will be decided by +uniform_noise . \n *@par Outputs: -*y:A tensor representing the glimpses [batch_size, glimpse_height, \n -glimpse_width, channels]. +*y:A tensor representing the glimpses [batch_size, glimpse_height, +glimpse_width, channels] . \n -*@attention Constraints: \n -*Input x must be a 4-D tensor. +*@attention Constraints: +*Input x must be a 4-D tensor . \n *@par Third-party framework compatibility *Compatible with tensorflow CropAndResizeGradImage operator. @@ -311,17 +314,17 @@ REG_OP(ExtractGlimpse) .OP_END_FACTORY_REG(ExtractGlimpse) /** -*@brief Convert one or more images from HSV to RGB. +*@brief Convert one or more images from HSV to RGB . \n *@par Inputs: -*Last dimension of input x must be size 3. Inputs include: \n -*images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3. +*Last dimension of input x must be size 3. Inputs include: +*images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3 . \n *@par Outputs: -*y:images converted to RGB. +*y:images converted to RGB . \n -*@attention Constraints: \n -*Last dimension of input x must be size 3. +*@attention Constraints: +*Last dimension of input x must be size 3 . \n *@par Third-party framework compatibility *Compatible with tensorflow HSVToRGB operator. @@ -333,29 +336,29 @@ REG_OP(HSVToRGB) .OP_END_FACTORY_REG(HSVToRGB) /** -*@brief Resize quantized images to size using quantized bilinear interpolation. +*@brief Resize quantized images to size using quantized bilinear interpolation . \n *@par Inputs: -*Input images must be a 4-D tensor. Inputs include: \n +*Input images must be a 4-D tensor. Inputs include: *@li images: 4-D with shape [batch, height, width, channels]. -*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new \n +*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new size for the images. *@li min: A Tensor of type float. -*@li max: A Tensor of type float. +*@li max: A Tensor of type float . \n *@par Attributes: -*@li align_corners: An optional bool. Defaults to False. If true, the centers \n -of the 4 corner pixels of the input and output tensors are aligned, preserving \n +*@li align_corners: An optional bool. Defaults to False. If true, the centers +of the 4 corner pixels of the input and output tensors are aligned, preserving the values at the corner pixels. Defaults to false. -*@li half_pixel_centers: indicates if the offset coordinates are normalized. +*@li half_pixel_centers: indicates if the offset coordinates are normalized . \n *@par Outputs: *@li resized_images: 4-D with shape [batch, new_height, new_width, channels]. *@li y_min: A Tensor of type float. -*@li y_max: A Tensor of type float. +*@li y_max: A Tensor of type float . \n -*@attention Constraints: \n -*Input images and output images must be quantized types. +*@attention Constraints: +*Input images and output images must be quantized types . \n *@par Third-party framework compatibility *Compatible with tensorflow QuantizedResizeBilinear operator. @@ -374,24 +377,24 @@ REG_OP(QuantizedResizeBilinear) .OP_END_FACTORY_REG(QuantizedResizeBilinear) /** -*@brief Resize images to size using area interpolation. +*@brief Resize images to size using area interpolation . \n *@par Inputs: -*Input images must be a 4-D tensor. Inputs include: \n +*Input images must be a 4-D tensor. Inputs include: *@li images: 4-D with shape [batch, height, width, channels]. -*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. \n -The new size for the images. +*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. +The new size for the images . \n *@par Attributes: -*align_corners: If true, the centers of the 4 corner pixels of the input and \n -output tensors are aligned, preserving the values at the corner pixels. \n -Defaults to false. +*align_corners: If true, the centers of the 4 corner pixels of the input and +output tensors are aligned, preserving the values at the corner pixels. +Defaults to false . \n *@par Outputs: -*y: 4-D with shape [batch, new_height, new_width, channels]. +*y: 4-D with shape [batch, new_height, new_width, channels] . \n -*@attention Constraints: \n -*Input images can be of different types but output images are always float. +*@attention Constraints: +*Input images can be of different types but output images are always float . \n *@par Third-party framework compatibility *Compatible with tensorflow ResizeArea operator. @@ -406,27 +409,27 @@ REG_OP(ResizeArea) .OP_END_FACTORY_REG(ResizeArea) /** -*@brief Computes the gradient of bicubic interpolation. +*@brief Computes the gradient of bicubic interpolation . \n *@par Inputs: -*Input grads must be a 4-D tensor. Inputs include: \n -*@li grads: A Tensor of type float. 4-D with shape [batch, height, width, \n +*Input grads must be a 4-D tensor. Inputs include: +*@li grads: A Tensor of type float. 4-D with shape [batch, height, width, channels]. -*@li original_image: A Tensor. Must be one of the following types: float, \n -double. 4-D with shape [batch, orig_height, orig_width, channels], The image \n -tensor that was resized. +*@li original_image: A Tensor. Must be one of the following types: float, +double. 4-D with shape [batch, orig_height, orig_width, channels], The image +tensor that was resized . \n *@par Attributes: -*@li align_corners: An optional bool. Defaults to False. If true, the centers \n -of the 4 corner pixels of the input and grad tensors are aligned. Defaults to \n +*@li align_corners: An optional bool. Defaults to False. If true, the centers +of the 4 corner pixels of the input and grad tensors are aligned. Defaults to false. -*@li half_pixel_centers: An optional bool. Defaults to False. +*@li half_pixel_centers: An optional bool. Defaults to False . \n *@par Outputs: -*y: A Tensor. Has the same type as original_image. +*y: A Tensor. Has the same type as original_image . \n -*@attention Constraints: \n -*Input images can be of different types but output images are always float. +*@attention Constraints: +*Input images can be of different types but output images are always float . \n *@par Third-party framework compatibility *Compatible with tensorflow ResizeBicubicGrad operator. @@ -441,25 +444,25 @@ REG_OP(ResizeBicubicGrad) .OP_END_FACTORY_REG(ResizeBicubicGrad) /** -*@brief Resize images to size using bicubic interpolation. +*@brief Resize images to size using bicubic interpolation . \n *@par Inputs: -*Input images must be a 4-D tensor. Inputs include: \n +*Input images must be a 4-D tensor. Inputs include: *@li images: 4-D with shape [batch, height, width, channels]. -*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new \n -size for the images. +*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new +size for the images . \n *@par Attributes: -*@li align_corners: If true, the centers of the 4 corner pixels of the input \n -and output tensors are aligned, preserving the values at the corner pixels. \n +*@li align_corners: If true, the centers of the 4 corner pixels of the input +and output tensors are aligned, preserving the values at the corner pixels. Defaults to false. -*@li half_pixel_centers: An optional bool. Defaults to False. +*@li half_pixel_centers: An optional bool. Defaults to False . \n *@par Outputs: -*y: 4-D with shape [batch, new_height, new_width, channels]. +*y: 4-D with shape [batch, new_height, new_width, channels] . \n -*@attention Constraints: \n -*Input images can be of different types but output images are always float. +*@attention Constraints: +*Input images can be of different types but output images are always float . \n *@par Third-party framework compatibility *Compatible with tensorflow ResizeBicubic operator. @@ -475,26 +478,26 @@ REG_OP(ResizeBicubic) .OP_END_FACTORY_REG(ResizeBicubic) /** -*@brief Computes the gradient of nearest neighbor interpolation. +*@brief Computes the gradient of nearest neighbor interpolation . \n *@par Inputs: -*Input grads must be a 4-D tensor. Inputs include: \n -*@li grads: A Tensor. Must be one of the following types: uint8, int8, int32, \n +*Input grads must be a 4-D tensor. Inputs include: +*@li grads: A Tensor. Must be one of the following types: uint8, int8, int32, float16, float, double. 4-D with shape [batch, height, width, channels]. -*@li size: A 1-D int32 Tensor of 2 elements: orig_height, orig_width. \n -The original input size. +*@li size: A 1-D int32 Tensor of 2 elements: orig_height, orig_width. +The original input size . \n *@par Attributes: -*@li align_corners: An optional bool. Defaults to False. If true, the centers \n -of the 4 corner pixels of the input and grad tensors are aligned. Defaults to \n +*@li align_corners: An optional bool. Defaults to False. If true, the centers +of the 4 corner pixels of the input and grad tensors are aligned. Defaults to false. -*@li half_pixel_centers: An optional bool. Defaults to False. +*@li half_pixel_centers: An optional bool. Defaults to False . \n *@par Outputs: -*y: A Tensor. Has the same type as grads. +*y: A Tensor. Has the same type as grads . \n -*@attention Constraints: \n -*Input grads must be a 4-D tensor. +*@attention Constraints: +*Input grads must be a 4-D tensor . \n *@par Third-party framework compatibility *Compatible with tensorflow ResizeNearestNeighborV2Grad operator. @@ -511,24 +514,27 @@ REG_OP(ResizeNearestNeighborV2Grad) .OP_END_FACTORY_REG(ResizeNearestNeighborV2Grad) /** -*@brief Computes the gradient of nearest neighbor interpolation. +*@brief Computes the gradient of nearest neighbor interpolation . \n *@par Inputs: -*Input grads must be a 4-D tensor. Inputs include: \n +*Input grads must be a 4-D tensor. Inputs include: *grads: A Tensor. 4-D with shape [batch, height, width, channels]. *@par Attributes: -*@li align_corners: An optional bool. Defaults to False. If true, the centers \n -of the 4 corner pixels of the input and grad tensors are aligned. Defaults to \n +*@li align_corners: An optional bool. Defaults to False. If true, the centers +of the 4 corner pixels of the input and grad tensors are aligned. Defaults to false. -*@li size: An list type. Specify the images size. +*@li size: An list type. Specify the images size . \n *@par Outputs: -*y: A Tensor. Has the same type as grads. +*y: A Tensor. Has the same type as grads . \n *@par Third-party framework compatibility *Compatible with tensorflow ResizeNearestNeighborV2GradD operator. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ResizeNearestNeighborV2Grad instead. */ REG_OP(ResizeNearestNeighborV2GradD) @@ -540,25 +546,25 @@ REG_OP(ResizeNearestNeighborV2GradD) .OP_END_FACTORY_REG(ResizeNearestNeighborV2GradD) /** -*@brief Computes the gradient of bilinear interpolation. +*@brief Computes the gradient of bilinear interpolation . \n *@par Inputs: -*Input grads must be a 4-D tensor. Inputs include: \n -*@li grads: A Tensor of type float32. 4-D with shape [batch, height, width, \n +*Input grads must be a 4-D tensor. Inputs include: +*@li grads: A Tensor of type float32. 4-D with shape [batch, height, width, channels]. -*@li original_image: A Tensor. 4-D with shape [batch, orig_height, orig_width, \n -channels], The image tensor that was resized. +*@li original_image: A Tensor. 4-D with shape [batch, orig_height, orig_width, +channels], The image tensor that was resized . \n *@par Attributes: -*align_corners: An optional bool. Defaults to False. If true, the centers of \n -the 4 corner pixels of the input and grad tensors are aligned. Defaults to \n -false. +*align_corners: An optional bool. Defaults to False. If true, the centers of +the 4 corner pixels of the input and grad tensors are aligned. Defaults to +false . \n *@par Outputs: -*y: A Tensor. Has the same type as original_image. +*y: A Tensor. Has the same type as original_image . \n -*@attention Constraints: \n -*Input grads must be a 4-D tensor. +*@attention Constraints: +*Input grads must be a 4-D tensor . \n *@par Third-party framework compatibility *Compatible with tensorflow ResizeBilinearV2Grad operator. @@ -573,24 +579,24 @@ REG_OP(ResizeBilinearV2Grad) .OP_END_FACTORY_REG(ResizeBilinearV2Grad) /** -*@brief Resize images to size using bilinear interpolation. +*@brief Resize images to size using bilinear interpolation . \n *@par Inputs: -*Input images must be a 4-D tensor. Inputs include: \n +*Input images must be a 4-D tensor. Inputs include: *@li x: 4-D with shape [batch, height, width, channels]. -*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new \n -size for the images. +*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new +size for the images . \n *@par Attributes: -*align_corners: If true, the centers of the 4 corner pixels of the input and \n -output tensors are aligned, preserving the values at the corner pixels. \n -Defaults to false. +*align_corners: If true, the centers of the 4 corner pixels of the input and +output tensors are aligned, preserving the values at the corner pixels. +Defaults to false . \n *@par Outputs: -*y: 4-D with shape [batch, new_height, new_width, channels]. +*y: 4-D with shape [batch, new_height, new_width, channels] . \n -*@attention Constraints: \n -*Input images can be of different types but output images are always float. +*@attention Constraints: +*Input images can be of different types but output images are always float . \n *@par Third-party framework compatibility *Compatible with tensorflow ResizeBilinearV2 operator. @@ -606,20 +612,20 @@ REG_OP(ResizeBilinearV2) .OP_END_FACTORY_REG(ResizeBilinearV2) /** -*@brief Converts one or more images from RGB to HSV. +*@brief Converts one or more images from RGB to HSV . \n *@par Inputs: -*Last dimension of input images must be size 3. Inputs include: \n -*images: A Tensor. Must be one of the following types: float, double. 1-D or \n -higher rank. RGB data to convert. Last dimension must be size 3. +*Last dimension of input images must be size 3. Inputs include: +*images: A Tensor. Must be one of the following types: float, double. 1-D or +higher rank. RGB data to convert. Last dimension must be size 3 . \n *@par Outputs: -*y: A Tensor. Has the same type as images. +*y: A Tensor. Has the same type as images . \n -*@attention Constraints: \n -*Outputs a tensor of the same shape as the images tensor, containing the HSV \n -value of the pixels. The output is only well defined if the value in images \n -are in [0,1]. +*@attention Constraints: +*Outputs a tensor of the same shape as the images tensor, containing the HSV +value of the pixels. The output is only well defined if the value in images +are in [0,1] . \n *@par Third-party framework compatibility *Compatible with tensorflow RGBToHSV operator. @@ -631,38 +637,38 @@ REG_OP(RGBToHSV) .OP_END_FACTORY_REG(RGBToHSV) /** -*@brief Generate a single randomly distorted bounding box for an image. +*@brief Generate a single randomly distorted bounding box for an image . \n *@par Inputs: -*Input images must be a 4-D tensor. Inputs include: \n +*Input images must be a 4-D tensor. Inputs include: *@li image_size: 1-D, containing [height, width, channels]. -*@li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding \n +*@li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding boxes associated with the image. -*@li min_object_covered: The cropped area of the image must contain at least \n -this fraction of any bounding box supplied. The value of this parameter should \n -be non-negative. In the case of 0, the cropped area does not need to overlap \n -any of the bounding boxes supplied. +*@li min_object_covered: The cropped area of the image must contain at least +this fraction of any bounding box supplied. The value of this parameter should +be non-negative. In the case of 0, the cropped area does not need to overlap +any of the bounding boxes supplied . \n *@par Attributes: -*@li seed: If either seed or seed2 are set to non-zero, the random number \n +*@li seed: If either seed or seed2 are set to non-zero, the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed. *@li seed2: A second seed to avoid seed collision. -*@li aspect_ratio_range: The cropped area of the image must have an aspect \n +*@li aspect_ratio_range: The cropped area of the image must have an aspect ratio = width / height within this range. -*@li max_attempts: Number of attempts at generating a cropped region of the \n -image of the specified constraints. After max_attempts failures, return the \n +*@li max_attempts: Number of attempts at generating a cropped region of the +image of the specified constraints. After max_attempts failures, return the entire image. -*@li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes \n -supplied. If true, assume an implicit bounding box covering the whole input. \n -If false, raise an error. +*@li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes +supplied. If true, assume an implicit bounding box covering the whole input. +If false, raise an error . \n *@par Outputs: *@li begin: 1-D, containing [offset_height, offset_width, 0]. *@li size: 1-D, containing [target_height, target_width, -1]. -*@li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box. +*@li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box . \n -*@attention Constraints: \n -*Input images can be of different types but output images are always float. +*@attention Constraints: +*Input images can be of different types but output images are always float . \n *@par Third-party framework compatibility *Compatible with tensorflow SampleDistortedBoundingBoxExt2 operator. @@ -687,21 +693,21 @@ REG_OP(SampleDistortedBoundingBoxExt2) .OP_END_FACTORY_REG(SampleDistortedBoundingBoxExt2) /** -*@brief Resize images to size using nearest neighbor interpolation. +*@brief Resize images to size using nearest neighbor interpolation . \n *@par Inputs: -*Input x must be a 4-D tensor. Inputs include: \n +*Input x must be a 4-D tensor. Inputs include: *@li x: 4-D with shape [batch, height, width, channels]. -*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. \n -The new size for the images. +*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. +The new size for the images . \n *@par Attributes: -*align_corners: If true, the centers of the 4 corner pixels of the input and \n -output tensors are aligned, preserving the values at the corner pixels. \n -Defaults to false. +*align_corners: If true, the centers of the 4 corner pixels of the input and +output tensors are aligned, preserving the values at the corner pixels. +Defaults to false . \n *@par Outputs: -*y: 4-D with shape [batch, new_height, new_width, channels]. +*y: 4-D with shape [batch, new_height, new_width, channels] . \n *@par Third-party framework compatibility *Compatible with tensorflow ResizeNearestNeighborV2 operator. @@ -718,20 +724,20 @@ REG_OP(ResizeNearestNeighborV2) .OP_END_FACTORY_REG(ResizeNearestNeighborV2) /** -*@brief Draw bounding boxes on a batch of images. +*@brief Draw bounding boxes on a batch of images . \n *@par Inputs: -*Input images must be a 4-D tensor. Inputs include: \n -*@li images: A Tensor. Must be one of the following types: float. 4-D with \n +*Input images must be a 4-D tensor. Inputs include: +*@li images: A Tensor. Must be one of the following types: float. 4-D with shape [batch, height, width, depth]. A batch of images. -*@li boxes: A Tensor of type float32. 3-D with shape [batch, \n -num_bounding_boxes, 4] containing bounding boxes. +*@li boxes: A Tensor of type float32. 3-D with shape [batch, +num_bounding_boxes, 4] containing bounding boxes . \n *@par Outputs: -*A Tensor. Has the same type as images. +*A Tensor. Has the same type as images . \n -*@attention Constraints: \n -*Input images must be a 4-D tensor. +*@attention Constraints: +*Input images must be a 4-D tensor . \n *@par Third-party framework compatibility *Compatible with tensorflow DrawBoundingBoxes operator. @@ -744,27 +750,27 @@ REG_OP(DrawBoundingBoxes) .OP_END_FACTORY_REG(DrawBoundingBoxes) /** -*@brief Greedily selects a subset of bounding boxes in descending order of \n -score. +*@brief Greedily selects a subset of bounding boxes in descending order of +score . \n *@par Inputs: -*Input boxes and scores must be float type. Inputs include: \n +*Input boxes and scores must be float type. Inputs include: *@li boxes: A 2-D float tensor of shape [num_boxes, 4]. -*@li scores: A 1-D float tensor of shape [num_boxes] representing a single \n +*@li scores: A 1-D float tensor of shape [num_boxes] representing a single score corresponding to each box (each row of boxes). -*@li max_output_size: A scalar integer tensor representing the maximum number \n -of boxes to be selected by non max suppression. +*@li max_output_size: A scalar integer tensor representing the maximum number +of boxes to be selected by non max suppression . \n *@par Attributes: -*iou_threshold: A float representing the threshold for deciding whether boxes \n -overlap too much with respect to IOU. +*iou_threshold: A float representing the threshold for deciding whether boxes +overlap too much with respect to IOU . \n *@par Outputs: -*selected_indices: A 1-D integer tensor of shape [M] representing the selected \n -indices from the boxes tensor, where M <= max_output_size. +*selected_indices: A 1-D integer tensor of shape [M] representing the selected +indices from the boxes tensor, where M <= max_output_size . \n -*@attention Constraints: \n -*Input boxes and scores must be float type. +*@attention Constraints: +*Input boxes and scores must be float type . \n *@par Third-party framework compatibility *Compatible with tensorflow NonMaxSuppression operator. @@ -779,25 +785,25 @@ REG_OP(NonMaxSuppression) .OP_END_FACTORY_REG(NonMaxSuppression) /** -*@brief Greedily selects a subset of bounding boxes in descending order of \n -score. +*@brief Greedily selects a subset of bounding boxes in descending order of +score . \n *@par Inputs: -*Input boxes and scores must be float type. Inputs include: \n +*Input boxes and scores must be float type. Inputs include: *@li boxes: A 2-D float tensor of shape [num_boxes, 4]. -*@li scores: A 1-D float tensor of shape [num_boxes] representing a single \n +*@li scores: A 1-D float tensor of shape [num_boxes] representing a single score corresponding to each box (each row of boxes). -*@li max_output_size: A scalar integer tensor representing the maximum number \n +*@li max_output_size: A scalar integer tensor representing the maximum number of boxes to be selected by non max suppression. -*@li iou_threshold: A 0-D float tensor representing the threshold for deciding \n -whether boxes overlap too much with respect to IOU. +*@li iou_threshold: A 0-D float tensor representing the threshold for deciding +whether boxes overlap too much with respect to IOU . \n *@par Outputs: -*selected_indices: A 1-D integer tensor of shape [M] representing the selected \n -indices from the boxes tensor, where M <= max_output_size. +*selected_indices: A 1-D integer tensor of shape [M] representing the selected +indices from the boxes tensor, where M <= max_output_size . \n -*@attention Constraints: \n -*Input boxes and scores must be float type. +*@attention Constraints: +*Input boxes and scores must be float type . \n *@par Third-party framework compatibility *Compatible with tensorflow NonMaxSuppressionV2 operator. @@ -812,27 +818,27 @@ REG_OP(NonMaxSuppressionV2) .OP_END_FACTORY_REG(NonMaxSuppressionV2) /** -*@brief Greedily selects a subset of bounding boxes in descending order of \n -score. +*@brief Greedily selects a subset of bounding boxes in descending order of +score . \n *@par Inputs: -*Input boxes and scores must be float type. Inputs include: \n +*Input boxes and scores must be float type. Inputs include: *@li boxes: A 2-D float tensor of shape [num_boxes, 4]. -*@li scores: A 1-D float tensor of shape [num_boxes] representing a single \n +*@li scores: A 1-D float tensor of shape [num_boxes] representing a single score corresponding to each box (each row of boxes). -*@li max_output_size: A scalar integer tensor representing the maximum number \n +*@li max_output_size: A scalar integer tensor representing the maximum number of boxes to be selected by non max suppression. -*@li iou_threshold: A 0-D float tensor representing the threshold for deciding \n +*@li iou_threshold: A 0-D float tensor representing the threshold for deciding whether boxes overlap too much with respect to IOU. -*@li score_threshold: A 0-D float tensor representing the threshold for \n -deciding when to remove boxes based on score. +*@li score_threshold: A 0-D float tensor representing the threshold for +deciding when to remove boxes based on score . \n *@par Outputs: -*selected_indices: A 1-D integer tensor of shape [M] representing the selected \n -indices from the boxes tensor, where M <= max_output_size. +*selected_indices: A 1-D integer tensor of shape [M] representing the selected +indices from the boxes tensor, where M <= max_output_size . \n -*@attention Constraints: \n -*Input boxes and scores must be float type. +*@attention Constraints: +*Input boxes and scores must be float type . \n *@par Third-party framework compatibility *Compatible with tensorflow NonMaxSuppressionV3 operator. @@ -848,33 +854,33 @@ REG_OP(NonMaxSuppressionV3) .OP_END_FACTORY_REG(NonMaxSuppressionV3) /** -*@brief Greedily selects a subset of bounding boxes in descending order of \n -score. +*@brief Greedily selects a subset of bounding boxes in descending order of +score . \n *@par Inputs: -*Input boxes and scores must be float type. Inputs include: \n +*Input boxes and scores must be float type. Inputs include: *@li boxes: A 2-D float tensor of shape [num_boxes, 4]. -*@li scores: A 1-D float tensor of shape [num_boxes] representing a single \n +*@li scores: A 1-D float tensor of shape [num_boxes] representing a single score corresponding to each box (each row of boxes). -*@li max_output_size: A scalar integer tensor representing the maximum number \n +*@li max_output_size: A scalar integer tensor representing the maximum number of boxes to be selected by non max suppression. -*@li iou_threshold: A 0-D float tensor representing the threshold for deciding \n +*@li iou_threshold: A 0-D float tensor representing the threshold for deciding whether boxes overlap too much with respect to IOU. -*@li score_threshold: A 0-D float tensor representing the threshold for \n -deciding when to remove boxes based on score. +*@li score_threshold: A 0-D float tensor representing the threshold for +deciding when to remove boxes based on score . \n *@par Attributes: -*pad_to_max_output_size: If true, the output selected_indices is padded \n -to be of length max_output_size. Defaults to false. +*pad_to_max_output_size: If true, the output selected_indices is padded +to be of length max_output_size. Defaults to false . \n *@par Outputs: -*@li selected_indices: A 1-D integer tensor of shape [M] representing the \n +*@li selected_indices: A 1-D integer tensor of shape [M] representing the selected indices from the boxes tensor, where M <= max_output_size. -*@li valid_outputs: A 0-D integer tensor representing the number of valid \n -elements in selected_indices, with the valid elements appearing first. +*@li valid_outputs: A 0-D integer tensor representing the number of valid +elements in selected_indices, with the valid elements appearing first . \n -*@attention Constraints: \n -*Input boxes and scores must be float type. +*@attention Constraints: +*Input boxes and scores must be float type . \n *@par Third-party framework compatibility *Compatible with tensorflow NonMaxSuppressionV4 operator. @@ -892,29 +898,29 @@ REG_OP(NonMaxSuppressionV4) .OP_END_FACTORY_REG(NonMaxSuppressionV4) /** -*@brief Greedily selects a subset of bounding boxes in descending order of \n -score. +*@brief Greedily selects a subset of bounding boxes in descending order of +score . \n *@par Inputs: -*Input overlaps and scores must be float type. Inputs include: \n -*@li overlaps: A 2-D float tensor of shape [num_boxes, num_boxes] \n +*Input overlaps and scores must be float type. Inputs include: +*@li overlaps: A 2-D float tensor of shape [num_boxes, num_boxes] representing the n-by-n box overlap values. -*@li scores: A 1-D float tensor of shape [num_boxes] representing a single \n +*@li scores: A 1-D float tensor of shape [num_boxes] representing a single score corresponding to each box (each row of boxes). -*@li max_output_size: A scalar integer tensor representing the maximum number \n +*@li max_output_size: A scalar integer tensor representing the maximum number of boxes to be selected by non max suppression. -*@li overlap_threshold: A 0-D float tensor representing the threshold for \n +*@li overlap_threshold: A 0-D float tensor representing the threshold for deciding whether boxes overlap too. -*@li score_threshold: A 0-D float tensor representing the threshold for \n -deciding when to remove boxes based on score. +*@li score_threshold: A 0-D float tensor representing the threshold for +deciding when to remove boxes based on score . \n *@par Attributes: -*pad_to_max_output_size: If true, the output selected_indices is padded \n -to be of length max_output_size. Defaults to false. +*pad_to_max_output_size: If true, the output selected_indices is padded +to be of length max_output_size. Defaults to false . \n *@par Outputs: -*selected_indices: A 1-D integer tensor of shape [M] representing the \n -selected indices from the boxes tensor, where M <= max_output_size. +*selected_indices: A 1-D integer tensor of shape [M] representing the +selected indices from the boxes tensor, where M <= max_output_size . \n *@par Third-party framework compatibility *Compatible with tensorflow NonMaxSuppressionWithOverlaps operator. @@ -930,29 +936,29 @@ REG_OP(NonMaxSuppressionWithOverlaps) .OP_END_FACTORY_REG(NonMaxSuppressionWithOverlaps) /** -*@brief JPEG-encode an image. +*@brief JPEG-encode an image . \n *@par Inputs: -*Input image must be unit8 type. Inputs include: \n -*image: A 3-D uint8 Tensor of shape [height, width, channels]. +*Input image must be unit8 type. Inputs include: +*image: A 3-D uint8 Tensor of shape [height, width, channels] . \n *@par Attributes: *@li format: Per pixel image format. -*@li quality: Quality of the compression from 0 to 100 (higher is better \n +*@li quality: Quality of the compression from 0 to 100 (higher is better and slower). -*@li progressive: If True, create a JPEG that loads progressively (coarse \n +*@li progressive: If True, create a JPEG that loads progressively (coarse to fine). -*@li optimize_size: If True, spend CPU/RAM to reduce size with no quality \n +*@li optimize_size: If True, spend CPU/RAM to reduce size with no quality change. *@li chroma_downsampling: A boolean, default is true. -*@li density_unit: Unit used to specify x_density and y_density: pixels per \n +*@li density_unit: Unit used to specify x_density and y_density: pixels per inch ('in') or centimeter ('cm'). *@li x_density: Horizontal pixels per density unit. *@li y_density: Vertical pixels per density unit. -*@li xmp_metadata: If not empty, embed this XMP metadata in the image header. +*@li xmp_metadata: If not empty, embed this XMP metadata in the image header . \n *@par Outputs: -*contents: 0-D. JPEG-encoded image. +*contents: 0-D. JPEG-encoded image . \n *@par Third-party framework compatibility *Compatible with tensorflow EncodeJpeg operator. @@ -975,16 +981,16 @@ REG_OP(EncodeJpeg) /** *@brief PNG-encode an image. *@par Inputs: -*Input image must be unit8 or uint16 type. Inputs include: \n -*image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels] \n -where channels is: 1: for grayscale; 2: for grayscale + alpha; 3: for RGB; \n -4: for RGBA. +*Input image must be unit8 or uint16 type. Inputs include: +*image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels] +where channels is: 1: for grayscale; 2: for grayscale + alpha; 3: for RGB; +4: for RGBA . \n *@par Attributes: -*compression: Compression level. +*compression: Compression level . \n *@par Outputs: -*contents: 0-D. PNG-encoded image. +*contents: 0-D. PNG-encoded image . \n *@par Third-party framework compatibility *Compatible with tensorflow EncodePng operator. @@ -997,30 +1003,33 @@ REG_OP(EncodePng) .OP_END_FACTORY_REG(EncodePng) /** -*@brief Resizes "images" to "size" using bilinear interpolation. +*@brief Resizes "images" to "size" using bilinear interpolation . \n *@par Inputs: * One input: -*x: An NC1HWC0 Tensor. \n -* Must be one of the following types: float16, float32. +*x: An NC1HWC0 Tensor. +* Must be one of the following types: float16, float32 . \n *@par Attributes: -*@li size: A required int32 Tensor specifying the new size for the images. \n +*@li size: A required int32 Tensor specifying the new size for the images. No default value. -*@li align_corners: An optional bool. If "true", the centers of the corner \n -pixels of the input and output tensors are aligned. Defaults to "false". +*@li align_corners: An optional bool. If "true", the centers of the corner +pixels of the input and output tensors are aligned. Defaults to "false" . \n *@par Outputs: -*y: A Tensor with type float32 and the same format as input "images". +*y: A Tensor with type float32 and the same format as input "images" . \n *@attention Constraints: -*@li The input "size" must be a tensor of 2 elements: size[0] <= 2048, \n +*@li The input "size" must be a tensor of 2 elements: size[0] <= 2048, size[1] <= 2048. -*@li The input "images" must be a tensor of 5 elements: images[2] <= 2048, \n -images[3] <= 2048. +*@li The input "images" must be a tensor of 5 elements: images[2] <= 2048, +images[3] <= 2048 . \n *@par Third-party framework compatibility * Compatible with TensorFlow operator ResizeBilinearV2D. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ResizeBilinearV2 instead. */ REG_OP(ResizeBilinearV2D) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1031,12 +1040,12 @@ REG_OP(ResizeBilinearV2D) .OP_END_FACTORY_REG(ResizeBilinearV2D) /** -*@brief Resizes "images" to "size" using bilinear interpolation and keep ration at the time. +*@brief Resizes "images" to "size" using bilinear interpolation and keep ratio at the time. \n *@par Inputs: * One input: -*images: An NC1HWC0 Tensor. \n -* Must be one of the following types: float16, float32. +*images: An NC1HWC0 Tensor. +* Must be one of the following types: float16, float32 . \n *@par Attributes: *@li min_dimension: A required int32 attribute for the min dimension for the images. @@ -1046,47 +1055,50 @@ REG_OP(ResizeBilinearV2D) *@li align_corners: An optional bool. If "true", the centers of the corner * pixels of the input and output tensors are aligned. Defaults to "false". *@li half_pixel_centers: indicates if the offset coordinates are normalized -* Defaults to "false". +* Defaults to "false" . \n *@par Outputs: -*y: A Tensor with type float32 and the same format as input "images". +*y: A Tensor with type float32 and the same format as input "images" . \n *@attention Constraints: -* The input "images" must be a tensor of 5 elements: images[2] <= 2048, \n +* The input "images" must be a tensor of 5 elements: images[2] <= 2048, images[3] <= 2048. */ -REG_OP(KeepRationResizeBilinear) +REG_OP(KeepRatioResizeBilinear) .INPUT(images, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT})) .REQUIRED_ATTR(min_dimension, Int) .REQUIRED_ATTR(max_dimension, Int) .ATTR(align_corners, Bool, false) .ATTR(half_pixel_centers, Bool, false) - .OP_END_FACTORY_REG(KeepRationResizeBilinear) + .OP_END_FACTORY_REG(KeepRatioResizeBilinear) /** -*@brief Resizes "images" to "size" using nearest neighbor interpolation. +*@brief Resizes "images" to "size" using nearest neighbor interpolation . \n *@par Inputs: * One input: -*x: An NC1HWC0 Tensor. \n +*x: An NC1HWC0 Tensor. * Must be one of the following types: float16, float32, int32, int8, uint8 *@par Attributes: -*@li size: A required int32 Tensor specifying the new size for the images. \n +*@li size: A required int32 Tensor specifying the new size for the images. No default value. -*@li align_corners: An optional bool. If "true", the centers of the corner \n -pixels of the input and output tensors are aligned. Defaults to "false". +*@li align_corners: An optional bool. If "true", the centers of the corner +pixels of the input and output tensors are aligned. Defaults to "false" . \n *@par Outputs: -*y: A Tensor with the same type and format as input "images". +*y: A Tensor with the same type and format as input "images" . \n *@attention Constraints: -* The input "size" must be a tensor of 2 elements: size[0] <= 7680, \n +* The input "size" must be a tensor of 2 elements: size[0] <= 7680, size[1] <= 4320 *@par Third-party framework compatibility * Compatible with TensorFlow operator ResizeNearestNeighborV2. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ResizeNearestNeighborV2 instead. */ REG_OP(ResizeNearestNeighborV2D) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) @@ -1097,18 +1109,18 @@ REG_OP(ResizeNearestNeighborV2D) .OP_END_FACTORY_REG(ResizeNearestNeighborV2D) /** -*@brief Extract the shape information of a JPEG-encoded image. +*@brief Extract the shape information of a JPEG-encoded image . \n *@par Inputs: -*Input contents must be 0-D. Inputs include: \n -*contents: 0-D. The JPEG-encoded image. +*Input contents must be 0-D. Inputs include: +*contents: 0-D. The JPEG-encoded image . \n *@par Attributes: -*output_type: The output type of the operation (int32 or int64). Defaults \n -to int32. +*output_type: The output type of the operation (int32 or int64). Defaults +to int32 . \n *@par Outputs: -*image_shape: 1-D. The image shape with format [height, width, channels]. +*image_shape: 1-D. The image shape with format [height, width, channels] . \n *@par Third-party framework compatibility *Compatible with tensorflow ExtractJpegShape operator. @@ -1121,18 +1133,18 @@ REG_OP(ExtractJpegShape) .OP_END_FACTORY_REG(ExtractJpegShape) /** -*@brief Draw bounding boxes on a batch of images. +*@brief Draw bounding boxes on a batch of images . \n *@par Inputs: -*@li images: 4-D with shape `[batch, height, width, depth]`. \n +*@li images: 4-D with shape `[batch, height, width, depth]`. A batch of images. -*@li boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` \n +*@li boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding boxes. -*@li colors: 2-D. A list of RGBA colors to cycle through for the boxes. +*@li colors: 2-D. A list of RGBA colors to cycle through for the boxes . \n *@par Outputs: -*y: Returns 4-D with the same shape as `images`. \n -The batch of input images with bounding boxes drawn on the images. +*y: Returns 4-D with the same shape as `images`. +The batch of input images with bounding boxes drawn on the images . \n *@par Third-party framework compatibility * Compatible with tensorflow DrawBoundingBoxesV2 operator. @@ -1146,33 +1158,33 @@ REG_OP(DrawBoundingBoxesV2) .OP_END_FACTORY_REG(DrawBoundingBoxesV2) /** -*@brief Greedily selects a subset of bounding boxes in descending order of score, \n -pruning away boxes that have high intersection-over-union (IOU) overlap \n -with previously selected boxes. +*@brief Greedily selects a subset of bounding boxes in descending order of score, +pruning away boxes that have high intersection-over-union (IOU) overlap +with previously selected boxes . \n *@par Inputs: *@li boxes: A 2-D float tensor of shape `[num_boxes, 4]`. -*@li scores: A 1-D float tensor of shape `[num_boxes]` representing a single \n +*@li scores: A 1-D float tensor of shape `[num_boxes]` representing a single score corresponding to each box (each row of boxes). -*@li max_output_size: A scalar integer tensor representing the maximum number of \n +*@li max_output_size: A scalar integer tensor representing the maximum number of boxes to be selected by non max suppression. -*@li iou_threshold: A 0-D float tensor representing the threshold for deciding whether \n +*@li iou_threshold: A 0-D float tensor representing the threshold for deciding whether boxes overlap too much with respect to IOU. -*@li score_threshold: A 0-D float tensor representing the threshold for deciding when to \n +*@li score_threshold: A 0-D float tensor representing the threshold for deciding when to remove boxes based on score. -*@li soft_nms_sigma: A 0-D float tensor representing the sigma parameter for Soft NMS. +*@li soft_nms_sigma: A 0-D float tensor representing the sigma parameter for Soft NMS . \n *@par Attributes: -pad_to_max_output_size: If true, the output `selected_indices` is padded to be of length \n -`max_output_size`. Defaults to false. If not specified, defaults to false. +pad_to_max_output_size: If true, the output `selected_indices` is padded to be of length +`max_output_size`. Defaults to false. If not specified, defaults to false . \n *@par Outputs: -*@li selected_indices: A 1-D integer tensor of shape [M] representing the \n +*@li selected_indices: A 1-D integer tensor of shape [M] representing the selected indices from the boxes tensor, where M <= max_output_size. -*@li selected_scores: A 1-D float tensor of shape `[M]` representing the corresponding \n +*@li selected_scores: A 1-D float tensor of shape `[M]` representing the corresponding scores for each selected box, where `M <= max_output_size`. -*@li valid_outputs: A 0-D integer tensor representing the number of valid \n -elements in selected_indices, with the valid elements appearing first. +*@li valid_outputs: A 0-D integer tensor representing the number of valid +elements in selected_indices, with the valid elements appearing first . \n *@par Third-party framework compatibility * Compatible with tensorflow NonMaxSuppressionV5 operator. @@ -1193,17 +1205,20 @@ REG_OP(NonMaxSuppressionV5) .OP_END_FACTORY_REG(NonMaxSuppressionV5) /** -*@brief Resizes "images" to "size" by scale and translate. +*@brief Resizes "images" to "size" by scale and translate . \n *@par Inputs: -*@li images: A `Tensor`. Must be one of the following types: `int8`, `uint8`, \n -`int16`, `uint16`, `int32`, `int64`, `bfloat16`, `half`, `float32`, `float64`. +*@li images: A `Tensor`. Must be one of the following types: `int8`, `uint8`, +`int16`, `uint16`, `int32`, `int64`, `bfloat16`, `float32`, `float64`. *@li size: A `Tensor` of type `int32`. *@li scale: A `Tensor` of type `float32`. -*@li translation: A `Tensor` of type `float32`. +*@li translation: A `Tensor` of type `float32` . \n + +*@li kernel_type: type is string, default lanczos3 +*@li antialias: type is bool, default true \n *@par Outputs: -*y: A Tensor with type float32. +*y: A Tensor with type float32 . \n *@par Third-party framework compatibility * Compatible with TensorFlow ScaleAndTranslate operator. @@ -1221,16 +1236,19 @@ REG_OP(ScaleAndTranslate) .OP_END_FACTORY_REG(ScaleAndTranslate) /** -*@brief Computes the gradient by scale and translate. +*@brief Computes the gradient by scale and translate . \n *@par Inputs: *@li grads: A `Tensor`. Must be one of the following types: `float32`. *@li original_image: A `Tensor`. Must have the same type as `grads`. *@li scale: A `Tensor` of type `float32`. -*@li translation: A `Tensor` of type `float32`. +*@li translation: A `Tensor` of type `float32` . \n + +*@li kernel_type: type is string, default lanczos3 +*@li antialias: type is bool, default true *@par Outputs: -*y: A `Tensor`. Has the same type as `grads`. +*y: A `Tensor`. Has the same type as `grads` . \n *@par Third-party framework compatibility * Compatible with TensorFlow ScaleAndTranslateGrad operator. @@ -1247,36 +1265,37 @@ REG_OP(ScaleAndTranslateGrad) .OP_END_FACTORY_REG(ScaleAndTranslateGrad) /** -*@brief Greedily selects a subset of bounding boxes in descending order of score, \n -This operation performs non_max_suppression on the inputs per batch, across all classes. +*@brief Greedily selects a subset of bounding boxes in descending order of score, +This operation performs non_max_suppression on the inputs per batch, across all classes . \n *@par Inputs: -*@li boxes: A 4-D float tensor of shape `[batch_size, num_boxes, q, 4]`. If `q` is 1 then \n -same boxes are used for all classes otherwise, if `q` is equal to number of \n +*@li boxes: A 4-D float tensor of shape `[batch_size, num_boxes, q, 4]`. If `q` is 1 then +same boxes are used for all classes otherwise, if `q` is equal to number of classes, class-specific boxes are used. -*@li scores: A 3-D float tensor of shape `[batch_size, num_boxes, num_classes]` \n +*@li scores: A 3-D float tensor of shape `[batch_size, num_boxes, num_classes]` representing a single score corresponding to each box (each row of boxes). -*@li max_output_size_per_class: A scalar integer tensor representing the maximum number of \n +*@li max_output_size_per_class: A scalar integer tensor representing the maximum number of boxes to be selected by non max suppression per class. -*@li max_total_size: A scalar representing maximum number of boxes retained over all classes. \n -*@li iou_threshold: A 0-D float tensor representing the threshold for deciding whether \n +*@li max_total_size: A scalar representing maximum number of boxes retained over all classes. +*@li iou_threshold: A 0-D float tensor representing the threshold for deciding whether boxes overlap too much with respect to IOU. -*@li score_threshold: A 0-D float tensor representing the threshold for deciding when to remove \n -boxes based on score. +*@li score_threshold: A 0-D float tensor representing the threshold for deciding when to remove +boxes based on score . \n *@par Attributes: -*@li pad_per_class: If false, the output nmsed boxes, scores and classes \n -are padded/clipped to `max_total_size`. If true, the \n -output nmsed boxes, scores and classes are padded to be of length \n -`max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in \n +*@li pad_per_class: If false, the output nmsed boxes, scores and classes +are padded/clipped to `max_total_size`. If true, the +output nmsed boxes, scores and classes are padded to be of length +`max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in which case it is clipped to `max_total_size`. Defaults to false. -*@li clip_boxes: If true, assume the box coordinates are between [0, 1] and clip the output boxes \n -if they fall beyond [0, 1]. If false, do not do clipping and output the box \n -coordinates as it is. If not specified, defaults to true. +*@li clip_boxes: If true, assume the box coordinates are between [0, 1] and clip the output boxes +if they fall beyond [0, 1]. If false, do not do clipping and output the box +coordinates as it is. If not specified, defaults to true . \n *@par Outputs: -*y: A 1-D integer tensor of shape `[M]` representing the selected \n -indices from the boxes tensor, where `M <= max_output_size`. +*nmsed_boxes:type is float +*nmsed_scores:type is float +*nmsed_classes:type is float \n *@par Third-party framework compatibility * Compatible with tensorflow CombinedNonMaxSuppression operator. @@ -1298,11 +1317,11 @@ REG_OP(CombinedNonMaxSuppression) .OP_END_FACTORY_REG(CombinedNonMaxSuppression) /** -*@brief Function spatial transformer. +*@brief Function spatial transformer . \n *@par Inputs: *@li x: A Tensor dtype of float16, float32. -*@li theta: A Tensor dtype of float16, float32, auxiliary coefficients. +*@li theta: A Tensor dtype of float16, float32, auxiliary coefficients . \n *@par Attributes: *@li output_size: A tuple output size. diff --git a/third_party/fwkacllib/inc/ops/internal_ops.h b/third_party/fwkacllib/inc/ops/internal_ops.h index 014b7a1b..2f9906fc 100644 --- a/third_party/fwkacllib/inc/ops/internal_ops.h +++ b/third_party/fwkacllib/inc/ops/internal_ops.h @@ -27,13 +27,13 @@ namespace ge { /** -*@brief aicpu assit help op for auxiliary matrix generation. +*@brief aicpu assit help op for auxiliary matrix generation. \n *@par Inputs: -*The input is dynamic for attribute func_name \n +*The input is dynamic for attribute func_name \n *@par Attributes: -*@li func_name:An required param, for example "topkv2". \n +*@li func_name:An required param, for example "topkv2". \n *@par Outputs: *The output is dynamic for attribute func_name. @@ -47,10 +47,10 @@ REG_OP(AssistHelp) . OP_END_FACTORY_REG(AssistHelp) /** -*@brief aicpu cache help for lhisi cache flush. +*@brief aicpu cache help for lhisi cache flush. \n *@par Inputs: -*The input is dynamic for attribute func_name \n +*The input is dynamic for attribute func_name \n *@par Outputs: *The output is dynamic for attribute func_name. @@ -61,10 +61,10 @@ REG_OP(CacheUpdate) .OP_END_FACTORY_REG(CacheUpdate) /** -*@brief transfer data from L1 buffer to DDR or DDR to L1. +*@brief transfer data from L1 buffer to DDR or DDR to L1. \n *@par Inputs: -*The input is dynamic for attribute func_name \n +*The input is dynamic for attribute func_name \n *@par Outputs: *The output is dynamic for attribute func_name. diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h index 145e021e..5d98f999 100644 --- a/third_party/fwkacllib/inc/ops/linalg_ops.h +++ b/third_party/fwkacllib/inc/ops/linalg_ops.h @@ -27,24 +27,24 @@ namespace ge { /** -*@brief Computes the reverse mode backpropagated gradient of the Cholesky \n -algorithm. +*@brief Computes the reverse mode backpropagated gradient of the Cholesky +algorithm . \n *@par Inputs: -*The input x has to be symmetric and positive definite. Inputs include: \n -*@li x:A Tensor. Must be one of the following types: double, float32. Output \n -of batch Cholesky algorithm x = cholesky(A). Shape is [..., M, M]. Algorithm \n +*The input x has to be symmetric and positive definite. Inputs include: +*@li x:A Tensor. Must be one of the following types: double, float32. Output +of batch Cholesky algorithm x = cholesky(A). Shape is [..., M, M]. Algorithm depends only on lower triangular part of the innermost matrices of this tensor. -*@li grad:A Tensor. Must have the same type as l. df/dx where f is some \n -scalar function. Shape is [..., M, M]. Algorithm depends only on lower \n -triangular part of the innermost matrices of this tensor. +*@li grad:A Tensor. Must have the same type as l. df/dx where f is some +scalar function. Shape is [..., M, M]. Algorithm depends only on lower +triangular part of the innermost matrices of this tensor . \n *@par Outputs: -*y:A Tensor. Has the same type as x. +*y:A Tensor. Has the same type as x . \n -*@attention Constraints: \n -*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions \n -form square matrices. \n +*@attention Constraints: +*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions +form square matrices. *@par Third-party framework compatibility *Compatible with tensorflow CholeskyGrad operator. @@ -57,19 +57,19 @@ REG_OP(CholeskyGrad) .OP_END_FACTORY_REG(CholeskyGrad) /** -*@brief Computes the Cholesky decomposition of one or more square matrices. +*@brief Computes the Cholesky decomposition of one or more square matrices . \n *@par Inputs: -*The input x has to be symmetric and positive definite.Inputs include: \n -*x:A Tensor. Must be one of the following types: double, float32. Shape \n -is [..., M, M]. +*The input x has to be symmetric and positive definite.Inputs include: +*x:A Tensor. Must be one of the following types: double, float32. Shape +is [..., M, M] . \n *@par Outputs: -*y:A Tensor. Has the same type as x. +*y:A Tensor. Has the same type as x . \n -*@attention Constraints: \n -*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions \n -form square matrices. \n +*@attention Constraints: +*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions +form square matrices. *@par Third-party framework compatibility *Compatible with tensorflow Cholesky operator. @@ -81,21 +81,21 @@ REG_OP(Cholesky) .OP_END_FACTORY_REG(Cholesky) /** -*@brief Computes the sign and the log of the absolute value of the determinant \n -of one or more square matrices. +*@brief Computes the sign and the log of the absolute value of the determinant +of one or more square matrices . \n *@par Inputs: -*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions \n -form square matrices. Inputs include: \n -*x:A Tensor. Must be one of the following types: double, float32. Shape is \n -[..., M, M]. +*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions +form square matrices. Inputs include: +*x:A Tensor. Must be one of the following types: double, float32. Shape is +[..., M, M] . \n *@par Outputs: *@li y:A Tensor. Has the same type as x. -*@li sign:A Tensor. Has the same type as x. +*@li sign:A Tensor. Has the same type as x . \n -*@attention Constraints: \n -*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions \n +*@attention Constraints: +*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions form square matrices. \n *@par Third-party framework compatibility @@ -109,20 +109,20 @@ REG_OP(LogMatrixDeterminant) .OP_END_FACTORY_REG(LogMatrixDeterminant) /** -*@brief Computes the determinant of one or more square matrices. +*@brief Computes the determinant of one or more square matrices . \n *@par Inputs: -*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions \n -form square matrices. Inputs include: \n -*x:A Tensor. Must be one of the following types: double, float32. Shape is \n -[..., M, M]. +*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions +form square matrices. Inputs include: +*x:A Tensor. Must be one of the following types: double, float32. Shape is +[..., M, M] . \n *@par Outputs: -*y:A Tensor. Has the same type as x. +*y:A Tensor. Has the same type as x . \n -*@attention Constraints: \n -*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions \n -form square matrices. \n +*@attention Constraints: +*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions +form square matrices. *@par Third-party framework compatibility *Compatible with tensorflow MatrixDeterminant operator. @@ -134,25 +134,25 @@ REG_OP(MatrixDeterminant) .OP_END_FACTORY_REG(MatrixDeterminant) /** -*@brief Computes the inverse of one or more square invertible matrices or \n -their adjoints (conjugate transposes). +*@brief Computes the inverse of one or more square invertible matrices or +their adjoints (conjugate transposes) . \n *@par Inputs: -*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions \n -form square matrices. Inputs include: \n -*x:A Tensor. Must be one of the following types: double, float. Shape is \n -[..., M, M]. +*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions +form square matrices. Inputs include: +*x:A Tensor. Must be one of the following types: double, float. Shape is +[..., M, M] . \n *@par Attributes: -*adjoint:An optional bool. Defaults to False.Boolean indicating whether to \n -deal with matrix or its (block-wise) adjoint. +*adjoint:An optional bool. Defaults to False.Boolean indicating whether to +deal with matrix or its (block-wise) adjoint . \n *@par Outputs: -*y:A Tensor. Has the same type as x. +*y:A Tensor. Has the same type as x . \n -*@attention Constraints: \n -*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions \n -form square matrices. \n +*@attention Constraints: +*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions +form square matrices. \n *@par Third-party framework compatibility *Compatible with tensorflow MatrixInverse operator. @@ -165,24 +165,24 @@ REG_OP(MatrixInverse) .OP_END_FACTORY_REG(MatrixInverse) /** -*@brief Solves systems of linear equations. +*@brief Solves systems of linear equations . \n *@par Inputs: -*The input rhs must have the same type as matrix. Inputs include: \n -*@li matrix:A Tensor. Must be one of the following types: double, float. \n +*The input rhs must have the same type as matrix. Inputs include: +*@li matrix:A Tensor. Must be one of the following types: double, float. Shape is [..., M, M]. -*@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K]. +*@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n *@par Attributes: -*adjoint:An optional bool. Defaults to False.Boolean indicating whether to \n -solve with matrix or its (block-wise) adjoint. +*adjoint:An optional bool. Defaults to False.Boolean indicating whether to +solve with matrix or its (block-wise) adjoint . \n *@par Outputs: -*y:A Tensor. Has the same type as matrix. +*y:A Tensor. Has the same type as matrix . \n -*@attention Constraints: \n -*The input matrix is a tensor of shape [..., M, M] whose inner-most 2 \n -dimensions form square matrices. \n +*@attention Constraints: +*The input matrix is a tensor of shape [..., M, M] whose inner-most 2 +dimensions form square matrices. \n *@par Third-party framework compatibility *Compatible with tensorflow MatrixSolve operator. @@ -196,25 +196,25 @@ REG_OP(MatrixSolve) .OP_END_FACTORY_REG(MatrixSolve) /** -*@brief Solves systems of linear equations. +*@brief Solves systems of linear equations . \n *@par Inputs: -*The input rhs must have the same type as matrix. Inputs include: \n +*The input rhs must have the same type as matrix. Inputs include: *@li matrix:A Tensor. Shape is [..., M, M]. *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K]. -*@li l2:0-D double Tensor. Ignored if fast=False. +*@li l2:0-D double Tensor. Ignored if fast=False . \n *@par Attributes: -*fast:bool. Defaults to True. +*fast:bool. Defaults to True . \n *@par Outputs: -*y:Tensor of shape [..., N, K] whose inner-most 2 dimensions form M-by-K \n -matrices that solve the equations matrix[..., :, :] * output[..., :, :] = \n -rhs[..., :, :] in the least squares sense. +*y:Tensor of shape [..., N, K] whose inner-most 2 dimensions form M-by-K +matrices that solve the equations matrix[..., :, :] * output[..., :, :] = +rhs[..., :, :] in the least squares sense . \n -*@attention Constraints: \n -*The input matrix matrix is a tensor of shape [..., M, M] whose inner-most 2 \n -dimensions form square matrices. \n +*@attention Constraints: +*The input matrix matrix is a tensor of shape [..., M, M] whose inner-most 2 +dimensions form square matrices. \n *@par Third-party framework compatibility *Compatible with tensorflow MatrixSolveLs operator. @@ -229,27 +229,27 @@ REG_OP(MatrixSolveLs) .OP_END_FACTORY_REG(MatrixSolveLs) /** -*@brief Solves systems of linear equations with upper or lower triangular \n -matrices by backsubstitution. +*@brief Solves systems of linear equations with upper or lower triangular +matrices by backsubstitution . \n *@par Inputs: -*The input rhs must have the same type as matrix. Inputs include: \n -*@li matrix: A Tensor. Must be one of the following types: double, float. \n +*The input rhs must have the same type as matrix. Inputs include: +*@li matrix: A Tensor. Must be one of the following types: double, float. Shape is [..., M, M]. -*@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K]. +*@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n *@par Attributes: -*@li lower: An optional bool. Defaults to True. Boolean indicating whether \n +*@li lower: An optional bool. Defaults to True. Boolean indicating whether the innermost matrices in matrix are lower or upper triangular. -*@li An optional bool. Defaults to False. Boolean indicating whether to solve \n -with matrix or its (block-wise) adjoint. +*@li An optional bool. Defaults to False. Boolean indicating whether to solve +with matrix or its (block-wise) adjoint . \n *@par Outputs: -*y:A Tensor. Has the same type as matrix. +*y:A Tensor. Has the same type as matrix . \n -*@attention Constraints: \n -*The input matrix is a tensor of shape [..., M, M] whose inner-most 2 \n -dimensions form square matrices. \n +*@attention Constraints: +*The input matrix is a tensor of shape [..., M, M] whose inner-most 2 +dimensions form square matrices. \n *@par Third-party framework compatibility *Compatible with tensorflow MatrixTriangularSolve operator. @@ -264,25 +264,25 @@ REG_OP(MatrixTriangularSolve) .OP_END_FACTORY_REG(MatrixTriangularSolve) /** -*@brief Computes the QR decompositions of one or more matrices. +*@brief Computes the QR decompositions of one or more matrices . \n *@par Inputs: -*The input shape of x must be [..., M, N]. Inputs include: \n -*x:A Tensor whose shape is [..., M, N]. Must be one of the following types: \n -double, float. +*The input shape of x must be [..., M, N]. Inputs include: +*x:A Tensor whose shape is [..., M, N]. Must be one of the following types: +double, float . \n *@par Attributes: -*full_matrices: An optional bool. Defaults to False. If true, compute \n -full-sized q and r. If false (the default), compute only the leading P \n -columns of q. +*full_matrices: An optional bool. Defaults to False. If true, compute +full-sized q and r. If false (the default), compute only the leading P +columns of q . \n *@par Outputs: *@li q: A Tensor. Has the same type as x. -*@li r: A Tensor. Has the same type as x. +*@li r: A Tensor. Has the same type as x . \n -*@attention Constraints: \n -*The input matrix x is a tensor of shape [..., M, N] whose inner-most 2 \n -dimensions form matrices of size [M, N]. \n +*@attention Constraints: +*The input matrix x is a tensor of shape [..., M, N] whose inner-most 2 +dimensions form matrices of size [M, N]. \n *@par Third-party framework compatibility *Compatible with tensorflow Qr operator. @@ -296,24 +296,24 @@ REG_OP(Qr) .OP_END_FACTORY_REG(Qr) /** -*@brief Computes the eigen decomposition of a batch of self-adjoint matrices. +*@brief Computes the eigen decomposition of a batch of self-adjoint matrices . \n *@par Inputs: -*The input shape of x must be [..., N, N]. Inputs include: \n -*x:Tensor of shape [..., N, N]. Only the lower triangular part of each inner \n -inner matrix is referenced. +*The input shape of x must be [..., N, N]. Inputs include: +*x:Tensor of shape [..., N, N]. Only the lower triangular part of each inner +inner matrix is referenced . \n *@par Attributes: -*compute_v:bool. Defaults to True. +*compute_v:bool. Defaults to True . \n *@par Outputs: *@li eigen_value:Eigenvalues. Shape is [..., N]. Sorted in non-decreasing order. -*@li eigen_vector:Shape is [..., N, N]. The columns of the inner most matrices \n +*@li eigen_vector:Shape is [..., N, N]. The columns of the inner most matrices contain eigenvectors of the corresponding matrices in tensor -*@attention Constraints: \n -*The input x is a tensor of shape [..., N, N] whose inner-most 2 dimensions \n -form square matrices. \n +*@attention Constraints: +*The input x is a tensor of shape [..., N, N] whose inner-most 2 dimensions +form square matrices. \n *@par Third-party framework compatibility *Compatible with tensorflow SelfAdjointEig operator. @@ -327,31 +327,31 @@ REG_OP(SelfAdjointEig) .OP_END_FACTORY_REG(SelfAdjointEig) /** -*@brief Computes the singular value decompositions of one or more matrices. +*@brief Computes the singular value decompositions of one or more matrices . \n *@par Inputs: -*The input shape of x must be [..., N, N]. Inputs include: \n -*x:Tensor of shape [..., M, N]. Let P be the minimum of M and N. +*The input shape of x must be [..., N, N]. Inputs include: +*x:Tensor of shape [..., M, N]. Let P be the minimum of M and N . \n *@par Attributes: -*compute_uv:If True then left and right singular vectors will be computed and \n -returned in u and v, respectively. Otherwise, only the singular values will \n -be computed, which can be significantly faster. +*compute_uv:If True then left and right singular vectors will be computed and +returned in u and v, respectively. Otherwise, only the singular values will +be computed, which can be significantly faster . \n *@par Outputs: -*@li sigma:Singular values. Shape is [..., P]. The values are sorted in \n -reverse order of magnitude, so s[..., 0] is the largest value, s[..., 1] \n +*@li sigma:Singular values. Shape is [..., P]. The values are sorted in +reverse order of magnitude, so s[..., 0] is the largest value, s[..., 1] is the second largest, etc. -*@li u:Left singular vectors. If full_matrices is False (default) then shape \n -is [..., M, P]; if full_matrices is True then shape is [..., M, M]. Not \n -returned if compute_uv is False. -*@li v:Right singular vectors. If full_matrices is False (default) then shape \n -is [..., N, P]. If full_matrices is True then shape is [..., N, N]. Not \n +*@li u:Left singular vectors. If full_matrices is False (default) then shape +is [..., M, P]; if full_matrices is True then shape is [..., M, M]. Not returned if compute_uv is False. +*@li v:Right singular vectors. If full_matrices is False (default) then shape +is [..., N, P]. If full_matrices is True then shape is [..., N, N]. Not +returned if compute_uv is False . \n -*@attention Constraints: \n -*The input x is a tensor of shape [..., N, N] whose inner-most 2 dimensions \n -form square matrices. \n +*@attention Constraints: +*The input x is a tensor of shape [..., N, N] whose inner-most 2 dimensions +form square matrices. \n *@par Third-party framework compatibility *Compatible with tensorflow Svd operator @@ -367,17 +367,17 @@ REG_OP(Svd) .OP_END_FACTORY_REG(Svd) /** -*@brief Computes the LU decomposition of one or more square matrices. +*@brief Computes the LU decomposition of one or more square matrices . \n *@par Inputs: -*input: A tensor of shape `[..., M, M]` whose inner-most 2 dimensions form \n -matrices of size `[M, M]`. +*input: A tensor of shape `[..., M, M]` whose inner-most 2 dimensions form +matrices of size `[M, M]` . \n *@par Outputs: -*@li lu: A tensor of shape `[..., M, M]` whose strictly lower triangular part \n +*@li lu: A tensor of shape `[..., M, M]` whose strictly lower triangular part denotes the lower triangular factor `L` with unit diagonal. -*@li p: upper triangular part denotes the upper triangular factor `U`.Permutation \n -of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]`. +*@li p: upper triangular part denotes the upper triangular factor `U`.Permutation +of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n *@par Third-party framework compatibility * Compatible with TensorFlow Lu operator. @@ -391,13 +391,13 @@ REG_OP(Lu) .OP_END_FACTORY_REG(Lu) /** -*@brief Computes the matrix square root of one or more square matrices. +*@brief Computes the matrix square root of one or more square matrices . \n *@par Inputs: -*input: Shape is `[..., M, M]`. +*input: Shape is `[..., M, M]` . \n *@par Outputs: -y: Shape is `[..., M, M]`. +y: Shape is `[..., M, M]` . \n *@par Third-party framework compatibility * Compatible with TensorFlow MatrixSquareRoot operator. @@ -409,18 +409,15 @@ REG_OP(MatrixSquareRoot) .OP_END_FACTORY_REG(MatrixSquareRoot) /** -*@brief Solves tridiagonal systems of equations. +*@brief Solves tridiagonal systems of equations . \n *@par Inputs: -*@li diagonals: Tensor of shape `[..., 3, M]` whose innermost 2 dimensions represent the \n -tridiagonal matrices with three rows being the superdiagonal, diagonals, and \n -subdiagonals, in order. The last element of the superdiagonal and the first \n -element of the subdiagonal is ignored. -*@li rhs: Tensor of shape `[..., M, K]`, representing K right-hand sides per each \n -left-hand side. +*@li diagonals: Tensor of shape `[..., 3, M]` whose innermost 2 dimensions represent the tridiagonal matrices with three rows being the superdiagonal, diagonals, and subdiagonals, in order. The last element of the superdiagonal and the first element of the subdiagonal is ignored. +*@li rhs: Tensor of shape `[..., M, K]`, representing K right-hand sides per each +left-hand side . \n *@par Outputs: -y: Tensor of shape `[..., M, K]` containing the solutions +y: Tensor of shape `[..., M, K]` containing the solutions \n *@par Third-party framework compatibility * Compatible with TensorFlow TridiagonalSolve operator. diff --git a/third_party/fwkacllib/inc/ops/logging_ops.h b/third_party/fwkacllib/inc/ops/logging_ops.h index 7ca04188..db9097ce 100644 --- a/third_party/fwkacllib/inc/ops/logging_ops.h +++ b/third_party/fwkacllib/inc/ops/logging_ops.h @@ -27,18 +27,18 @@ namespace ge { /** -*@brief Provides the time since epoch in seconds. +*@brief Provides the time since epoch in seconds . \n *@par Outputs: -*y: A Tensor of type float64. The timestamp as a double for seconds since \n -the Unix epoch. +*y: A Tensor of type float64. The timestamp as a double for seconds since +the Unix epoch . \n -*@attention Constraints: \n -*The timestamp is computed when the op is executed, not when it is added to \n -the graph. +*@attention Constraints: +*The timestamp is computed when the op is executed, not when it is added to +the graph . \n *@par Third-party framework compatibility -*Compatible with tensorflow Timestamp operator. +*Compatible with tensorflow Timestamp operator . \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -48,19 +48,20 @@ REG_OP(Timestamp) .OP_END_FACTORY_REG(Timestamp) /** -*@brief Asserts that the given condition is true. +*@brief Asserts that the given condition is true . \n *@par Inputs: -*If input_condition evaluates to false, print the list of tensors in data. \n -Inputs include: \n +*If input_condition evaluates to false, print the list of tensors in data. +*Inputs include: *@li input_condition: The condition to evaluate. -*@li input_data: The tensors to print out when condition is false. +*@li input_data: The tensors to print out when condition is false . + It's a dynamic input. \n *@par Attributes: -*summarize: Print this many entries of each tensor. +*summarize: Print this many entries of each tensor . \n *@par Third-party framework compatibility -*Compatible with tensorflow Assert operator. +*Compatible with tensorflow Assert operator . \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -74,12 +75,12 @@ REG_OP(Assert) .OP_END_FACTORY_REG(Assert) /** -*@brief Prints a tensor. +*@brief Prints a tensor . \n *@par Inputs: -*x: The tensor to print, it is a dynamic_input. +*x: The tensor to print, it is a dynamic_input . \n -*Compatible with aicpu Print operator. +*Compatible with aicpu Print operator . \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -90,18 +91,18 @@ REG_OP(Print) .OP_END_FACTORY_REG(Print) /** -*@brief Prints a string scalar. +*@brief Prints a string scalar . \n *@par Inputs: -*The dtype of input x must be string. Inputs include: \n -*x: The string scalar to print. +*The dtype of input x must be string. Inputs include: +*x: The string scalar to print . \n *@par Attributes: -*output_stream: A string specifying the output stream or logging level \n -to print to. +*output_stream: A string specifying the output stream or logging level +to print to . \n *@par Third-party framework compatibility -*Compatible with tensorflow PrintV2 operator. +*Compatible with tensorflow PrintV2 operator . \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. diff --git a/third_party/fwkacllib/inc/ops/lookup_ops.h b/third_party/fwkacllib/inc/ops/lookup_ops.h index bd34ab64..84b138c4 100644 --- a/third_party/fwkacllib/inc/ops/lookup_ops.h +++ b/third_party/fwkacllib/inc/ops/lookup_ops.h @@ -26,13 +26,13 @@ namespace ge { /** -*@brief Replaces the contents of the table with the specified keys and values. +*@brief Replaces the contents of the table with the specified keys and values . \n *@par Inputs: -*The dtype of input handle must be resource. Inputs include: \n +*The dtype of input handle must be resource. Inputs include: *@li handle: A Tensor of type resource. Handle to the table. *@li keys: A Tensor. Any shape. Keys to look up. -*@li values: A Tensor. Values to associate with keys. +*@li values: A Tensor. Values to associate with keys . \n *@par Third-party framework compatibility. *Compatible with tensorflow LookupTableImport operator. @@ -46,17 +46,17 @@ REG_OP(LookupTableImport) .OP_END_FACTORY_REG(LookupTableImport) /** -*@brief Updates the table to associates keys with values. +*@brief Updates the table to associates keys with values . \n *@par Inputs: -*The dtype of input handle must be resource. Inputs include: \n +*The dtype of input handle must be resource. Inputs include: *@li handle: A Tensor of type resource. Handle to the table. *@li keys: A Tensor. Any shape. Keys to look up. -*@li values: A Tensor. Values to associate with keys. +*@li values: A Tensor. Values to associate with keys . \n -*@attention Constraints: \n -*@li The tensor keys must be of the same type as the keys of the table. \n -*@li The tensor values must be of the type of the table values. \n +*@attention Constraints: +*@li The tensor keys must be of the same type as the keys of the table. +*@li The tensor values must be of the type of the table values. *@par Third-party framework compatibility. *Compatible with tensorflow LookupTableInsert operator. @@ -70,19 +70,19 @@ REG_OP(LookupTableInsert) .OP_END_FACTORY_REG(LookupTableInsert) /** -*@brief Outputs all keys and values in the table. +*@brief Outputs all keys and values in the table . \n *@par Inputs: -*The dtype of input handle must be resource. Inputs include: \n -*handle: A Tensor of type resource. Handle to the table. +*The dtype of input handle must be resource. Inputs include: +*handle: A Tensor of type resource. Handle to the table . \n *@par Attributes: *@li Tkeys: A DType. -*@li Tvalues: A DType. +*@li Tvalues: A DType . \n *@par Outputs: *@li keys: A Tensor of type Tkeys. -*@li values: A Tensor of type Tvalues. +*@li values: A Tensor of type Tvalues . \n *@par Third-party framework compatibility. *Compatible with tensorflow LookupTableExport operator. @@ -98,14 +98,14 @@ REG_OP(LookupTableExport) .OP_END_FACTORY_REG(LookupTableExport) /** -*@brief Computes the number of elements in the given table. +*@brief Computes the number of elements in the given table . \n *@par Inputs: -*The dtype of input handle must be resource. Inputs include: \n -*handle: A Tensor of type resource. Handle to the table. +*The dtype of input handle must be resource. Inputs include: +*handle: A Tensor of type resource. Handle to the table . \n *@par Outputs: -*size: A Tensor of type int64. +*size: A Tensor of type int64 . \n *@par Third-party framework compatibility. *Compatible with tensorflow LookupTableSize operator. @@ -117,19 +117,19 @@ REG_OP(LookupTableSize) .OP_END_FACTORY_REG(LookupTableSize) /** -*@brief Looks up keys in a table, outputs the corresponding values. +*@brief Looks up keys in a table, outputs the corresponding values . \n *@par Inputs: -*The dtype of input handle must be resource. Inputs include: \n +*The dtype of input handle must be resource. Inputs include: *@li handle: A Tensor of type resource. Handle to the table. *@li keys: A Tensor. Any shape. Keys to look up. -*@li default_value: A Tensor. +*@li default_value: A Tensor . \n *@par Attributes: -*Tout: Specified type of ouput values. +*Tout: Specified type of ouput values . \n *@par Outputs: -*values: A Tensor. Has the same type as default_value. +*values: A Tensor. Has the same type as default_value . \n *@par Third-party framework compatibility. *Compatible with tensorflow LookupTableFind operator. @@ -146,23 +146,23 @@ REG_OP(LookupTableFind) .OP_END_FACTORY_REG(LookupTableFind) /** -*@brief Creates a non-initialized hash table. +*@brief Creates a non-initialized hash table . \n *@par Attributes: -*@li container: An optional string. Defaults to "". If non-empty, this table \n +*@li container: An optional string. Defaults to "". If non-empty, this table is placed in the given container. Otherwise, a default container is used. -*@li shared_name: An optional string. Defaults to "". If non-empty, this \n +*@li shared_name: An optional string. Defaults to "". If non-empty, this table is shared under the given name across multiple sessions. -*@li use_node_name_sharing: An optional bool. Defaults to False. If true and \n +*@li use_node_name_sharing: An optional bool. Defaults to False. If true and shared_name is empty, the table is shared using the node name. *@li key_dtype: A DType. Type of the table keys. -*@li value_dtype: A DType. Type of the table values. +*@li value_dtype: A DType. Type of the table values . \n *@par Outputs: -*handle: A Tensor of type resource. Handle to the table. +*handle: A Tensor of type resource. Handle to the table . \n -*@attention Constraints: \n -*The implementation for HashTable on Ascend uses ai cpu, with bad performance. \n +*@attention Constraints: +*The implementation for HashTable on Ascend uses ai cpu, with bad performance. *@par Third-party framework compatibility. *Compatible with tensorflow HashTable operator. @@ -178,15 +178,15 @@ REG_OP(HashTable) .OP_END_FACTORY_REG(HashTable) /** -*@brief Table initializer that takes two tensors for keys and values \n -respectively. +*@brief Table initializer that takes two tensors for keys and values +respectively . \n *@par Inputs: -*The dtype of input handle must be resource. Inputs include: \n -*@li handle: A Tensor of type resource. Handle to a table which will be \n +*The dtype of input handle must be resource. Inputs include: +*@li handle: A Tensor of type resource. Handle to a table which will be initialized. *@li keys: A Tensor. Keys of type Tkey. -*@li values: A Tensor. Values of type Tval. +*@li values: A Tensor. Values of type Tval . \n *@par Third-party framework compatibility. *Compatible with tensorflow InitializeTable operator. @@ -200,32 +200,32 @@ REG_OP(InitializeTable) .OP_END_FACTORY_REG(InitializeTable) /** -*@brief Creates an empty hash table that uses tensors as the backing store. +*@brief Creates an empty hash table that uses tensors as the backing store . \n *@par Inputs: -*The input deleted_key must have the same type as empty_key. Inputs include: \n -*@li empty_key: A Tensor. The key used to represent empty key buckets \n +*The input deleted_key must have the same type as empty_key. Inputs include: +*@li empty_key: A Tensor. The key used to represent empty key buckets internally. Must not be used in insert or lookup operations. -*@li deleted_key: A Tensor. Must have the same type as empty_key. +*@li deleted_key: A Tensor. Must have the same type as empty_key . \n *@par Attributes: -*@li container: An optional string. Defaults to "". If non-empty, this table \n +*@li container: An optional string. Defaults to "". If non-empty, this table is placed in the given container. Otherwise, a default container is used. -*@li shared_name: An optional string. Defaults to "". If non-empty, this \n +*@li shared_name: An optional string. Defaults to "". If non-empty, this table is shared under the given name across multiple sessions. -*@li use_node_name_sharing: An optional bool. Defaults to False. If true and \n +*@li use_node_name_sharing: An optional bool. Defaults to False. If true and shared_name is empty, the table is shared using the node name. *@li value_dtype: A DType. Type of the table values. -*@li value_shape: An optional TensorShape or list of ints. Defaults to []. \n +*@li value_shape: An optional TensorShape or list of ints. Defaults to []. The shape of each value. -*@li initial_num_buckets: An optional int. Defaults to 131072. The initial \n +*@li initial_num_buckets: An optional int. Defaults to 131072. The initial number of hash table buckets. Must be a power to 2. -*@li max_load_factor: An optional float. Defaults to 0.8. The maximum ratio \n -between number of entries and number of buckets before growing the table. \n -Must be between 0 and 1. +*@li max_load_factor: An optional float. Defaults to 0.8. The maximum ratio +between number of entries and number of buckets before growing the table. +Must be between 0 and 1 . \n *@par Outputs: -*handle: A Tensor of type resource. Handle to the table. +*handle: A Tensor of type resource. Handle to the table . \n *@par Third-party framework compatibility. *Compatible with tensorflow MutableDenseHashTable operator. @@ -245,21 +245,21 @@ REG_OP(MutableDenseHashTable) .OP_END_FACTORY_REG(MutableDenseHashTable) /** -*@brief Creates an empty hash table. +*@brief Creates an empty hash table . \n *@par Attributes: -*@li container: An optional string. Defaults to "". If non-empty, this table \n +*@li container: An optional string. Defaults to "". If non-empty, this table is placed in the given container. Otherwise, a default container is used. -*@li shared_name: An optional string. Defaults to "". If non-empty, this \n +*@li shared_name: An optional string. Defaults to "". If non-empty, this table is shared under the given name across multiple sessions. -*@li use_node_name_sharing: An optional bool. Defaults to False. If true and \n +*@li use_node_name_sharing: An optional bool. Defaults to False. If true and shared_name is empty, the table is shared using the node name. *@li key_dtype: A DType. Type of the table keys. *@li value_dtype: A DType. Type of the table values. -*@li value_shape: An optional TensorShape or list of ints. Defaults to []. +*@li value_shape: An optional TensorShape or list of ints. Defaults to [] . \n *@par Outputs: -*handle: A Tensor of type resource. Handle to the table. +*handle: A Tensor of type resource. Handle to the table . \n *@par Third-party framework compatibility. *Compatible with tensorflow MutableHashTableOfTensors operator. @@ -276,20 +276,20 @@ REG_OP(MutableHashTableOfTensors) .OP_END_FACTORY_REG(MutableHashTableOfTensors) /** -*@brief Creates an empty hash table. +*@brief Creates an empty hash table . \n *@par Attributes: -*@li container: An optional string. Defaults to "". If non-empty, this table \n +*@li container: An optional string. Defaults to "". If non-empty, this table is placed in the given container. Otherwise, a default container is used. -*@li shared_name: An optional string. Defaults to "". If non-empty, this \n +*@li shared_name: An optional string. Defaults to "". If non-empty, this table is shared under the given name across multiple sessions. -*@li use_node_name_sharing: An optional bool. Defaults to False. If true and \n +*@li use_node_name_sharing: An optional bool. Defaults to False. If true and shared_name is empty, the table is shared using the node name. *@li key_dtype: A DType. Type of the table keys. -*@li value_dtype: A DType. Type of the table values. +*@li value_dtype: A DType. Type of the table values . \n *@par Outputs: -*handle: A Tensor of type resource. Handle to the table. +*handle: A Tensor of type resource. Handle to the table . \n *@par Third-party framework compatibility. *Compatible with tensorflow MutableHashTable operator. diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index 9ee4f6d4..3d7ff1d9 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -27,15 +27,15 @@ namespace ge { /** -*@brief Computes the output as (shift + scale * x) ^ power. +*@brief Computes the output as (shift + scale * x) ^ power . \n *@par Inputs: -* x: A Tensor of type float16 or float32. +* x: A Tensor of type float16 or float32 . \n *@par Attributes: *@li power: Optional. Must be one of the following types: float32. Defaults to 1.0. *@li scale: Optional. Must be one of the following types: float32. Defaults to 1.0. -*@li shift: Optional. Must be one of the following types: float32. Defaults to 0.0. +*@li shift: Optional. Must be one of the following types: float32. Defaults to 0.0 . \n *@par Outputs: * y: A Tensor. Has the same type and shape as "x". @@ -52,15 +52,15 @@ REG_OP(Power) .OP_END_FACTORY_REG(Power); /** -*@brief Compute the lower regularized incomplete Gamma function P(a, x). +*@brief Compute the lower regularized incomplete Gamma function P(a, x) . \n *@par Inputs: -*The input a and x must have the same type. Inputs include: \n +*The input a and x must have the same type. Inputs include: *@li a:A Tensor. Must be one of the following types: float, double. -*@li x:A Tensor. Must have the same type as a. +*@li x:A Tensor. Must have the same type as a . \n *@par Outputs: -*z:A Tensor. Has the same type as a. +*z:A Tensor. Has the same type as a . \n *@par Third-party framework compatibility. *Compatible with tensorflow Igamma operator. @@ -73,15 +73,15 @@ REG_OP(Igamma) .OP_END_FACTORY_REG(Igamma) /** -*@brief Compute the upper regularized incomplete Gamma function Q(a, x). +*@brief Compute the upper regularized incomplete Gamma function Q(a, x) . \n *@par Inputs: -*The input a and x must have the same type. Inputs include: \n +*The input a and x must have the same type. Inputs include: *@li a:A Tensor. Must be one of the following types: float, float64. -*@li x:A Tensor. Must have the same type as a. +*@li x:A Tensor. Must have the same type as a . \n *@par Outputs: -*z:A Tensor. Has the same type as a. +*z:A Tensor. Has the same type as a . \n *@par Third-party framework compatibility. *Compatible with tensorflow Igammac operator. @@ -94,18 +94,18 @@ REG_OP(Igammac) .OP_END_FACTORY_REG(Igammac) /** -*@brief Compare values of input to threshold and pack resulting bits into \n -a uint8. +*@brief Compare values of input to threshold and pack resulting bits into +a uint8 . \n *@par Inputs: -*The input size must be a non-negative int32 scalar Tensor. Inputs include: \n +*The input size must be a non-negative int32 scalar Tensor. Inputs include: *@li input:Values to compare against threshold and bitpack. -*@li threshold:Threshold to compare against. +*@li threshold:Threshold to compare against . \n *@par Outputs: -*y:The bitpacked comparisons. +*y:The bitpacked comparisons . \n -*@attention Constraints: \n +*@attention Constraints: *Currently, the innermost dimension of the tensor must be divisible by 8. \n *@par Third-party framework compatibility @@ -121,23 +121,23 @@ REG_OP(CompareAndBitpack) .OP_END_FACTORY_REG(CompareAndBitpack) /** -*@brief Counts the number of occurrences of each value in an integer array. \n -Outputs a vector with length size and the same dtype as weights. If weights \n -are empty, then index i stores the number of times the value i is counted in \n -arr. If weights are non-empty, then index i stores the sum of the value in \n -weights at each index. +*@brief Counts the number of occurrences of each value in an integer array. +Outputs a vector with length size and the same dtype as weights. If weights +are empty, then index i stores the number of times the value i is counted in +arr. If weights are non-empty, then index i stores the sum of the value in +weights at each index . \n *@par Inputs: -*The input size must be a non-negative int32 scalar Tensor. Inputs include: \n +*The input size must be a non-negative int32 scalar Tensor. Inputs include: *@li array:int32 Tensor. *@li size:non-negative int32 scalar Tensor. -*@li weights: is an int32, int64, float32, or double Tensor with the same \n -shape as arr, or a length-0 Tensor, in which case it acts as all weights \n -equal to 1. +*@li weights: is an int32, int64, float32, or double Tensor with the same +shape as arr, or a length-0 Tensor, in which case it acts as all weights +equal to 1 . \n *@par Outputs: -*bins:1D Tensor with length equal to size. The counts or summed weights for \n -each value in the range [0, size). +*bins:1D Tensor with length equal to size. The counts or summed weights for +each value in the range [0, size) . \n *@par Third-party framework compatibility *Compatible with tensorflow Bincount operator @@ -151,16 +151,16 @@ REG_OP(Bincount) .OP_END_FACTORY_REG(Bincount) /** -*@brief Compute the regularized incomplete beta integral. +*@brief Compute the regularized incomplete beta integral . \n *@par Inputs: -*The input b and x must have the same types as a. Inputs include: \n +*The input b and x must have the same types as a. Inputs include: *@li a:A Tensor. Must be one of the following types: float32, double. *@li b:A Tensor. Must have the same type as a. -*@li x:A Tensor. Must have the same type as a. +*@li x:A Tensor. Must have the same type as a . \n *@par Outputs: -*z:A Tensor. Has the same type as a. +*z:A Tensor. Has the same type as a . \n *@par Third-party framework compatibility. *Compatible with tensorflow Betainc operator. @@ -177,15 +177,15 @@ REG_OP(Betainc) *@brief Compute the Hurwitz zeta function *@par Inputs: -*The input q must be the same type as x. Inputs include: \n +*The input q must be the same type as x. Inputs include: *@li x:A Tensor. Must be one of the following types: float32, double. -*@li q:A Tensor. Must have the same type as x. +*@li q:A Tensor. Must have the same type as x . \n *@par Outputs: -*z:A Tensor. Has the same type as x. +*z:A Tensor. Has the same type as x . \n -*@attention Constraints: \n -*The implementation for Zeta on Ascend uses ai cpu, with bad performance. \n +*@attention Constraints: +*The implementation for Zeta on Ascend uses ai cpu, with bad performance. *@par Third-party framework compatibility. *Compatible with tensorflow Zeta operator. @@ -198,19 +198,19 @@ REG_OP(Zeta) .OP_END_FACTORY_REG(Zeta) /** -*@brief Bucketizes 'input' based on 'boundaries'. For example, if the inputs \n -are boundaries = [0, 10, 100] input = [[-5, 10000] [150, 10] [5, 100]] then \n +*@brief Bucketize 'input' based on 'boundaries'. For example, if the inputs +are boundaries = [0, 10, 100] input = [[-5, 10000] [150, 10] [5, 100]] then the output will be output = [[0, 3] [3, 2] [1, 3]] *@par Inputs: -*The dtype of input x must be int or float. Inputs include: \n -*x:Any shape of Tensor contains with int or float type. +*The dtype of input x int float double. Inputs include: +*x:Any shape of Tensor contains with int or float type . \n *@par Attributes: -*boundaries:A sorted list of floats gives the boundary of the buckets. +*boundaries:A sorted list of floats gives the boundary of the buckets . \n *@par Outputs: -*y:Same shape with 'input', each value of input replaced with bucket index. +*y:Same shape with 'input', each value of input replaced with bucket index . \n *@par Third-party framework compatibility. *Compatible with tensorflow Bucketize operator. @@ -223,19 +223,19 @@ REG_OP(Bucketize) .OP_END_FACTORY_REG(Bucketize) /** -*@brief Computes the sum along sparse segments of a tensor. +*@brief Computes the sum along sparse segments of a tensor . \n *@par Inputs: -*The input indices and segment_ids must have same rank. Inputs include: \n -*@li x:A Tensor. Must be one of the following types: float, double, int32, \n +*The input indices and segment_ids must have same rank. Inputs include: +*@li x:A Tensor. Must be one of the following types: float, double, int32, uint8, int16, int8, int64, uint16, uint32, uint64. -*@li indices: A Tensor. Must be one of the following types: int32, int64. \n +*@li indices: A Tensor. Must be one of the following types: int32, int64. A 1-D tensor. Has same rank as segment_ids. -*@li segment_ids: A Tensor of type int32. A 1-D tensor. Values should be \n -sorted and can be repeated. +*@li segment_ids: A Tensor of type int32. A 1-D tensor. Values should be +sorted and can be repeated . \n *@par Outputs: -*y:A Tensor. Has the same type as x. +*y:A Tensor. Has the same type as x . \n *@par Third-party framework compatibility *Compatible with tensorflow SparseSegmentSum operator @@ -251,18 +251,18 @@ REG_OP(SparseSegmentSum) .OP_END_FACTORY_REG(SparseSegmentSum) /** -*@brief Computes the mean along sparse segments of a tensor. +*@brief Computes the mean along sparse segments of a tensor . \n *@par Inputs: -*The input indices and segment_ids must have same rank. Inputs include: \n +*The input indices and segment_ids must have same rank. Inputs include: *@li x: A Tensor. Must be one of the following types: float, double. -*@li indices: A Tensor. Must be one of the following types: int32, int64. \n +*@li indices: A Tensor. Must be one of the following types: int32, int64. A 1-D tensor. Has same rank as segment_ids. -*@li segment_ids: A Tensor of type int32. A 1-D tensor. Values should be \n -sorted and can be repeated. +*@li segment_ids: A Tensor of type int32. A 1-D tensor. Values should be +sorted and can be repeated . \n *@par Outputs: -*y:A Tensor. Has the same type as x. +*y:A Tensor. Has the same type as x . \n *@par Third-party framework compatibility *Compatible with tensorflow SparseSegmentMean operator @@ -276,21 +276,21 @@ REG_OP(SparseSegmentMean) .OP_END_FACTORY_REG(SparseSegmentMean) /** -*@brief Computes gradients for SparseSegmentMean. +*@brief Computes gradients for SparseSegmentMean . \n *@par Inputs: -*The input grad must have be type float or double. Inputs include: \n -*@li grad: A Tensor. Must be one of the following types: float, double. \n +*The input grad must have be type float or double. Inputs include: +*@li grad: A Tensor. Must be one of the following types: float, double. gradient propagated to the SparseSegmentMean op. -*@li indices: A Tensor. Must be one of the following types: int32, int64. \n +*@li indices: A Tensor. Must be one of the following types: int32, int64. indices passed to the corresponding SparseSegmentMean op. -*@li segment_ids: A Tensor of type int32. segment_ids passed to the \n +*@li segment_ids: A Tensor of type int32. segment_ids passed to the corresponding SparseSegmentMean op. -*@li output_dim0: A Tensor of type int32. dimension 0 of "x" passed to \n -SparseSegmentMean op. +*@li output_dim0: A Tensor of type int32. dimension 0 of "x" passed to +SparseSegmentMean op . \n *@par Outputs: -*y:A Tensor. Has the same type as grad. +*y:A Tensor. Has the same type as grad . \n *@par Third-party framework compatibility *Compatible with tensorflow SparseSegmentMeanGrad operator @@ -308,12 +308,12 @@ REG_OP(SparseSegmentMeanGrad) *@brief Computes the gradient of igamma(a, x) wrt a *@par Inputs: -*The input a and x must have the same type. Inputs include: \n +*The input a and x must have the same type. Inputs include: *@li a:A Tensor. Must be one of the following types: float32, double. -*@li x:A Tensor. Must have the same type as a. +*@li x:A Tensor. Must have the same type as a . \n *@par Outputs: -*y:A Tensor. Has the same type as a. +*y:A Tensor. Has the same type as a . \n *@par Third-party framework compatibility *Compatible with tensorflow IgammaGradA operator @@ -326,10 +326,10 @@ REG_OP(IgammaGradA) .OP_END_FACTORY_REG(IgammaGradA) /** -*@brief Initialize data process channel. +*@brief Initialize data process channel . \n *@par Attributes: -*channel_name: A string. Default "". +*channel_name: A string. Default "" . \n *@par Third-party framework compatibility *Compatible with tensorflow InitData operator @@ -340,17 +340,17 @@ REG_OP(InitData) .OP_END_FACTORY_REG(InitData) /** -*@brief Get the next batch of data in data processing. +*@brief Get the next batch of data in data processing . \n *@par Attributes: -*@li output_types: A nested structure of DType objects corresponding to each \n +*@li output_types: A nested structure of DType objects corresponding to each component of an element of this dataset. -*@li output_shapes: A nested structure of TensorShape objects corresponding \n +*@li output_shapes: A nested structure of TensorShape objects corresponding to each component of an element of this dataset. -*@li channel_name: A string. Default "". +*@li channel_name: A string. Default "" . \n *@par Outputs: -*y:A nested structure of Tensor objects. +*y:A nested structure of Tensor objects . \n *@par Third-party framework compatibility *Compatible with tensorflow GetNext operator @@ -366,10 +366,10 @@ REG_OP(GetNext) .OP_END_FACTORY_REG(GetNext) /** -*@brief End of sequence. +*@brief End of sequence . \n *@par Inputs: -*x: A Tensor of type uint8. +*x: A Tensor of type uint8 . \n *@par Outputs: *y: A Tensor. Has the same type as "x". @@ -381,14 +381,14 @@ REG_OP(EndOfSequence) .OP_END_FACTORY_REG(EndOfSequence) /** -*@brief: Computes the Gauss error function of `x` element-wise. +*@brief: Computes the Gauss error function of `x` element-wise . \n *@par Inputs: *x: A Tensor of type float16, float32 or double. the format can be * [NCHW,NC1HWC0,NHWC,ND] *@par Outputs: -*y: A Tensor. Has the same type and format as "x". +*y: A Tensor. Has the same type and format as "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator Erf. @@ -399,13 +399,13 @@ REG_OP(Erf) .OP_END_FACTORY_REG(Erf) /** -*@brief: Computes the Gauss complementary error function of "x" element-wise. +*@brief: Computes the Gauss complementary error function of "x" element-wise . \n *@par Inputs: -*x: A Tensor of type float16 ,float32, double. +*x: A Tensor of type float16 ,float32, double . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator Erfc. @@ -418,19 +418,19 @@ REG_OP(Erfc) /** *@brief This operation returns a rank 1 histogram counting the number of entries in `values` * that fell into every bin.The bins are equal width and determined by the arguments -* 'value_range' and 'nbins'. +* 'value_range' and 'nbins' . \n *@par Inputs: -*Three inputs, including: \n +*Three inputs, including: *@li x: A Tensor of type float32, float16, int32, int64. *@li range: A Tensor of type float32,float16,int32, int64. -*@li nbins: A Tensor of type int32. +*@li nbins: A Tensor of type int32 . \n *@par Attributes: -* dtype: An optional attribute. Defaults to "int32". +* dtype: An optional attribute. Defaults to "int32" . \n *@par Outputs: -*y: A Tensor. A Tensor of type int32 or int64. +*y: A Tensor. A Tensor of type int32 or int64 . \n *@par Third-party framework compatibility * Compatible with TensorFlow operator HistogramFixedWidth. @@ -446,22 +446,25 @@ REG_OP(HistogramFixedWidth) /** *@brief This operation returns a rank 1 histogram counting the number of entries in `values` * that fell into every bin.The bins are equal width and determined by the arguments -* 'value_range' and 'nbins'. +* 'value_range' and 'nbins' . \n *@par Inputs: -*Two inputs, including: \n +*Two inputs, including: *@li x: A Tensor of type float32,float16,int32, int64. -*@li range: A Tensor of type float32,float16,int32, int64. +*@li range: A Tensor of type float32,float16,int32, int64 . \n *@par Attributes: *@li dtype: An optional attribute. Defaults to "int32". -*@li nbins: A required attribute,the type is int32. +*@li nbins: A required attribute,the type is int32 . \n *@par Outputs: -*y: A Tensor. A Tensor of type int32. +*y: A Tensor. A Tensor of type int32 . \n *@par Third-party framework compatibility * Compatible with TensorFlow operator HistogramFixedWidth. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use HistogramFixedWidth instead. */ REG_OP(HistogramFixedWidthD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) @@ -472,15 +475,15 @@ REG_OP(HistogramFixedWidthD) .OP_END_FACTORY_REG(HistogramFixedWidthD) /** -*@brief Returns the next representable value of x1 in the direction of x2, element-wise. +*@brief Returns the next representable value of x1 in the direction of x2, element-wise . \n *@par Inputs: -*The input X1 and x2 must have the same type. Inputs include: \n +*The input X1 and x2 must have the same type. Inputs include: *@li x1:A Tensor. Must be one of the following types: float32, double. -*@li x2:A Tensor. Must have the same type as x1. +*@li x2:A Tensor. Must have the same type as x1 . \n *@par Outputs: -*output:A Tensor. Has the same type as x1. +*output:A Tensor. Has the same type as x1 . \n *@par Third-party framework compatibility *Compatible with tensorflow NextAfter operator @@ -579,20 +582,20 @@ REG_OP(Conj) .OP_END_FACTORY_REG(Conj) /** -*@brief The negative log likelihood loss. +*@brief The negative log likelihood loss . \n *@par Inputs: -*The input x and weight must have the same type. Inputs include: \n +*The input x and weight must have the same type. Inputs include: *@li x: A Tensor dtype of float32. *@li target: A Tensor dtype of int32. -*@li weight: A Tensor dtype of float32. +*@li weight: A Tensor dtype of float32 . \n *@par Attributes: -*reduction: An optional attribute. Defaults to "mean". +*reduction: An optional attribute. Defaults to "mean" . \n *@par Outputs: *@li y: A Tensor dtype of float32. -*@li total_weight: A Tensor dtype of float32. +*@li total_weight: A Tensor dtype of float32 . \n *@par Third-party framework compatibility *Compatible with pytorch NLLLoss operator @@ -607,20 +610,20 @@ REG_OP(NLLLoss) .OP_END_FACTORY_REG(NLLLoss) /** -*@brief The negative log likelihood loss grad. +*@brief The negative log likelihood loss grad . \n *@par Inputs: *@li x:A Tensor dtype of float32. *@li y_grad:A Tensor dtype of float32. *@li target:A Tensor dtype of int32. *@li weight:A Tensor dtype of float32. -*@li total_weight:A Tensor dtype of float32. +*@li total_weight:A Tensor dtype of float32 . \n *@par Attributes: -*reduction: An optional attribute. Defaults to "mean". +*reduction: An optional attribute. Defaults to "mean" . \n *@par Outputs: -*x_grad: A Tensor. Must be the following type: float32. +*x_grad: A Tensor. Must be the following type: float32 . \n *@par Third-party framework compatibility *Compatible with pytorch NLLLossGrad operator @@ -636,27 +639,27 @@ REG_OP(NLLLossGrad) .OP_END_FACTORY_REG(NLLLossGrad) /** -*@brief The ifmr. +*@brief The ifmr . \n *@par Inputs: *@li data:A Tensor of feature map *@li data_min:A Tensor of min value of feature map. *@li data_max:A Tensor of max value of feature map. -*@li cumsum:A Tensor of cumsum bin of data. +*@li cumsum:A Tensor of cumsum bin of data . \n *@par Attributes: *min_percentile: min init percentile. *max_percentile: max init percentile. *search_range: search range. *search_step: step size of searching. -*with_offset: whether using offset. +*with_offset: whether using offset . \n *@par Outputs: *scale: optimal scale. -*offset: optimal offset. +*offset: optimal offset . \n *@par Third-party framework compatibility -*Compatible with mindspore +*Compatible with mindspore */ REG_OP(IFMR) diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index de94b58e..bceff0cd 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -26,7 +26,7 @@ namespace ge { /** -*@brief Multiplies matrix "a" by matrix "b", producing "a * b". +*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n *@par Inputs: *Three inputs, including: @@ -35,15 +35,15 @@ namespace ge { * @li x2: A matrix Tensor. 2D. Must be one of the following types: float16, * float32, int32. Has format [ND, NHWC, FRACTAL_NZ]. * @li bias: A optional 1D Tensor. Must be one of the following types: float16, -* float32, int32. Has format [ND, NHWC]. +* float32, int32. Has format [ND, NHWC] . \n *@par Attributes: *@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. -*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M]. +*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n *@par Outputs: *y: The result matrix Tensor. 2D. Must be one of the following types: float16, -* float32, int32. Has format [ND, NHWC, FRACTAL_NZ]. +* float32, int32. Has format [ND, NHWC, FRACTAL_NZ] . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator BatchMatmul. @@ -58,7 +58,7 @@ REG_OP(MatMul) .OP_END_FACTORY_REG(MatMul) /** -*@brief Multiplies matrix "a" by matrix "b", producing "a * b". +*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n *@par Inputs: *Two inputs, including: @@ -67,15 +67,15 @@ REG_OP(MatMul) * @li x2: A matrix Tensor. 2D. Must be one of the following types: float16, * float32, int32. Has format [ND, NHWC, FRACTAL_NZ]. * @li bias: A 1D Tensor. Must be one of the following types: float16, -* float32, int32. Has format [ND, NHWC]. +* float32, int32. Has format [ND, NHWC] . \n *@par Attributes: *@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. -*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M]. +*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n *@par Outputs: *y: The result matrix Tensor. 2D. Must be one of the following types: float16, -* float32, int32. Has format [ND, NHWC, FRACTAL_NZ]. +* float32, int32. Has format [ND, NHWC, FRACTAL_NZ] . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator BatchMatmul. @@ -93,7 +93,7 @@ REG_OP(MatMulV2) /** -*@brief Performs Matrix-to-matrix Multiply, producing c=alpha[0]*a*b+beta[0]*c. +*@brief Performs Matrix-to-matrix Multiply, producing c=alpha[0]*a*b+beta[0]*c . \n *@par Inputs: *Five inputs, including: @@ -115,14 +115,14 @@ REG_OP(MatMulV2) * When type of a is float16 and type of c is float16, the format of a, b, c * should all be ND or FRACTAL_NZ.\n * When type of a is float16 and type of c is float32, the format of a, b, c -* should all be ND or FRACTAL_NZ. +* should all be ND or FRACTAL_NZ . \n *@par Attributes: *Two attributes, including: *@li transpose_a: Optional. A bool. If True, changes the shape of "a" from * [M, K] to [K, M]. *@li transpose_b: Optional. A bool. If True, changes the shape of "b" from -* [K, N] to [N, K]. +* [K, N] to [N, K] . \n *@par Outputs: *y: The result matrix Tensor. Must be one of the following types: float16, @@ -142,22 +142,22 @@ REG_OP(GEMM) .OP_END_FACTORY_REG(GEMM) /** -*@brief Multiplies matrix "a" by matrix "b", producing "a * b". +*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n *@par Inputs: *Three inputs, including: * @li x1: A matrix Tensor. Must be one of the following types: float16, * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. * @li x2: A matrix Tensor. Must be one of the following types: float16, -* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. +* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n *@par Attributes: *@li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. -*@li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M]. +*@li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n *@par Outputs: *y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, -* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2". +* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator BatchMatmul. @@ -172,12 +172,12 @@ REG_OP(BatchMatMul) .OP_END_FACTORY_REG(BatchMatMul) /** -*@brief Computes half the L2 norm of a tensor without the sqrt. +*@brief Computes half the L2 norm of a tensor without the sqrt . \n *@par Inputs: * x: A Tensor. -* TensorType::FloatingDataType(). +* TensorType::FloatingDataType() . \n *@par Outputs: *y: A Tensor. Has the same type as "x". @@ -190,15 +190,15 @@ REG_OP(L2Loss) .OP_END_FACTORY_REG(L2Loss) /** -*@brief: Returns a batched diagonal tensor with a given batched diagonal values. +*@brief: Returns a batched diagonal tensor with a given batched diagonal values . \n *@par Inputs: *x: A Tensor. Must be one of the following types: * float16, float32, double, int32, uint8, int16, int8, complex64, int64, -* qint8, quint8, qint32, uint16, complex128, uint32, uint64. +* qint8, quint8, qint32, uint16, complex128, uint32, uint64 . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator MatrixDiag. @@ -209,18 +209,21 @@ REG_OP(MatrixDiag) .OP_END_FACTORY_REG(MatrixDiag) /** -*@brief: Returns a batched diagonal tensor with a given batched diagonal values. +*@brief: Returns a batched diagonal tensor with a given batched diagonal values . \n *@par Inputs: * Two inputs, including: *@li x: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. -*@li assist: A Tensor of the same type as "x". +*@li assist: A Tensor of the same type as "x" . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator MatrixDiag. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use MatrixDiag instead. */ REG_OP(MatrixDiagD) .INPUT(x, TensorType::BasicType()) @@ -229,15 +232,15 @@ REG_OP(MatrixDiagD) .OP_END_FACTORY_REG(MatrixDiagD) /** -*@brief: Returns the batched diagonal part of a batched tensor. +*@brief: Returns the batched diagonal part of a batched tensor . \n *@par Inputs: *x: A Tensor. Must be one of the following types: * float16, float32, double, int32, uint8, int16, int8, complex64, int64, -* qint8, quint8, qint32, uint16, complex128, uint32, uint64. +* qint8, quint8, qint32, uint16, complex128, uint32, uint64 . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator MatrixDiagPart. @@ -248,18 +251,21 @@ REG_OP(MatrixDiagPart) .OP_END_FACTORY_REG(MatrixDiagPart) /** -*@brief: Returns the batched diagonal part of a batched tensor. +*@brief: Returns the batched diagonal part of a batched tensor . \n *@par Inputs: * Two inputs, including: *@li x: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. -*@li assist: A Tensor of the same type as "x". +*@li assist: A Tensor of the same type as "x" . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator MatrixDiagPart. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use MatrixDiagPart instead. */ REG_OP(MatrixDiagPartD) .INPUT(x, TensorType::BasicType()) @@ -268,17 +274,17 @@ REG_OP(MatrixDiagPartD) .OP_END_FACTORY_REG(MatrixDiagPartD) /** -*@brief: Returns a batched matrix tensor with new batched diagonal values. +*@brief: Returns a batched matrix tensor with new batched diagonal values . \n *@par Inputs: * Two inputs, including: *@li x: A Tensor. Must be one of the following types: * float16, float32, double, int32, uint8, int16, int8, complex64, int64, * qint8, quint8, qint32, uint16, complex128, uint32, uint64. -*@li diagonal: A Tensor of the same type as "x". +*@li diagonal: A Tensor of the same type as "x" . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator MatrixSetDiag. @@ -290,19 +296,22 @@ REG_OP(MatrixSetDiag) .OP_END_FACTORY_REG(MatrixSetDiag) /** -*@brief: Returns a batched matrix tensor with new batched diagonal values. +*@brief: Returns a batched matrix tensor with new batched diagonal values . \n *@par Inputs: * Three inputs, including: *@li x: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. *@li diagonal: A Tensor of the same type as "x". -*@li assist: A Tensor of the same type as "x". +*@li assist: A Tensor of the same type as "x" . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator MatrixSetDiag. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use MatrixSetDiag instead. */ REG_OP(MatrixSetDiagD) .INPUT(x, TensorType::BasicType()) @@ -312,7 +321,7 @@ REG_OP(MatrixSetDiagD) .OP_END_FACTORY_REG(MatrixSetDiagD) /** -*@brief Applies sparse "updates" to individual values or slices in a Variable. +*@brief Applies sparse "updates" to individual values or slices in a Variable . \n *@par Inputs: * Three inputs, including: @@ -329,10 +338,10 @@ REG_OP(MatrixSetDiagD) *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", - * the operation will be protected by a lock. + * the operation will be protected by a lock . \n *@par Outputs: -*var: A Tensor. Has the same type and format as input "var". +*var: A Tensor. Has the same type and format as input "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterNdUpdate. @@ -346,7 +355,7 @@ REG_OP(ScatterNdUpdate) .OP_END_FACTORY_REG(ScatterNdUpdate) /** -*@brief Applies sparse addition to individual values or slices in a Variable. +*@brief Applies sparse addition to individual values or slices in a Variable . \n *@par Inputs: * Three inputs, including: @@ -361,7 +370,7 @@ REG_OP(ScatterNdUpdate) *Must be one of the following types: float16, float32, bool, int8, uint8 *@par Outputs: -*y: A Tensor. Has the same type and format as input "x". +*y: A Tensor. Has the same type and format as input "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator TensorScatterUpdate. @@ -374,26 +383,26 @@ REG_OP(TensorScatterUpdate) .OP_END_FACTORY_REG(TensorScatterUpdate) /** -*@brief Adds sparse "updates" to a variable reference. +*@brief Adds sparse "updates" to a variable reference . \n *@par Inputs: * Three inputs, including: -*@li var: An ND Tensor. +*@li var: An ND Tensor . \n *Must be one of the following types: float16, float32, int32, int8, uint8 *@li indices: An ND Tensor of type int32 or int64. -*@li updates: An Tensor. format:NCHW, NHWC. +*@li updates: An Tensor. format:NCHW, NHWC . \n *Must be one of the following types: float16, float32, int32, int8, uint8 *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", the operation - * will be protected by a lock. + * will be protected by a lock . \n *@par Outputs: -*var: A Tensor. Has the same type and format as input "var". +*var: A Tensor. Has the same type and format as input "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterAdd. @@ -407,7 +416,7 @@ REG_OP(ScatterAdd) .OP_END_FACTORY_REG(ScatterAdd) /** -*@brief Divides a variable reference by sparse updates. +*@brief Divides a variable reference by sparse updates . \n *@par Inputs: * Three inputs, including: @@ -421,10 +430,10 @@ REG_OP(ScatterAdd) *@par Attributes: *@li use_locking: An optional bool. Defaults to "False". If "True", - * the operation will be protected by a lock. + * the operation will be protected by a lock . \n *@par Outputs: -*var: A Tensor. Has the same type and format as input "var". +*var: A Tensor. Has the same type and format as input "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterDiv. @@ -438,7 +447,7 @@ REG_OP(ScatterDiv) .OP_END_FACTORY_REG(ScatterDiv) /** -*@brief Applies sparse addition to individual values or slices in a Variable. +*@brief Applies sparse addition to individual values or slices in a Variable . \n *@par Inputs: * Three inputs, including: @@ -450,10 +459,10 @@ REG_OP(ScatterDiv) *Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", - * the operation will be protected by a lock. + * the operation will be protected by a lock . \n *@par Outputs: -*var: A Tensor. Has the same type and format as input "var". +*var: A Tensor. Has the same type and format as input "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterNdAdd. @@ -467,7 +476,7 @@ REG_OP(ScatterNdAdd) .OP_END_FACTORY_REG(ScatterNdAdd) /** -*@brief Applies sparse addition to individual values or slices in a Variable. +*@brief Applies sparse addition to individual values or slices in a Variable . \n *@par Inputs: * Three inputs, including: @@ -482,7 +491,7 @@ REG_OP(ScatterNdAdd) *Must be one of the following types: float16, float32, int32, int8, uint8 *@par Outputs: -*y: A Tensor. Has the same type and format as input "x". +*y: A Tensor. Has the same type and format as input "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator TensorScatterAdd. @@ -495,7 +504,7 @@ REG_OP(TensorScatterAdd) .OP_END_FACTORY_REG(TensorScatterAdd) /** -*@brief Applies sparse subtraction to individual values or slices in a Variable. +*@brief Applies sparse subtraction to individual values or slices in a Variable . \n *@par Inputs: * Three inputs, including: @@ -508,10 +517,10 @@ REG_OP(TensorScatterAdd) *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", - * the operation will be protected by a lock. + * the operation will be protected by a lock . \n *@par Outputs: -*var: A Tensor. Has the same type and format as input "var". +*var: A Tensor. Has the same type and format as input "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterNdSub. @@ -525,7 +534,7 @@ REG_OP(ScatterNdSub) .OP_END_FACTORY_REG(ScatterNdSub) /** -*@brief Applies sparse addition to individual values or slices in a Variable. +*@brief Applies sparse addition to individual values or slices in a Variable . \n *@par Inputs: * Three inputs, including: @@ -540,7 +549,7 @@ REG_OP(ScatterNdSub) *Must be one of the following types: float16, float32, int32, int8, uint8 *@par Outputs: -*y: A Tensor. Has the same type and format as input "x". +*y: A Tensor. Has the same type and format as input "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator TensorScatterSub. @@ -553,7 +562,7 @@ REG_OP(TensorScatterSub) .OP_END_FACTORY_REG(TensorScatterSub) /** -*@brief Subtracts sparse updates to a variable reference. +*@brief Subtracts sparse updates to a variable reference . \n *@par Inputs: * Three inputs, including: @@ -565,10 +574,10 @@ REG_OP(TensorScatterSub) *Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", - * the operation will be protected by a lock. + * the operation will be protected by a lock . \n *@par Outputs: -*var: A Tensor. Has the same type and format as input "var". +*var: A Tensor. Has the same type and format as input "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterSub. @@ -582,18 +591,21 @@ REG_OP(ScatterSub) .OP_END_FACTORY_REG(ScatterSub) /** -*@brief: Returns the batched diagonal part of a batched tensor with "assist". +*@brief: Returns the batched diagonal part of a batched tensor with "assist" . \n *@par Inputs: * Two inputs, including: * @li x: A Tensor of type float16, float32, or int32. -* @li assist: A Tensor of the same type as "x". +* @li assist: A Tensor of the same type as "x" . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator DiagPart. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use DiagPart instead. */ REG_OP(DiagPartD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -602,14 +614,14 @@ REG_OP(DiagPartD) .OP_END_FACTORY_REG(DiagPartD) /** -*@brief: Returns the batched diagonal part of a batched tensor. +*@brief: Returns the batched diagonal part of a batched tensor . \n *@par Inputs: *x: A Tensor. Must be one of the following types: -* float16, float32, int32, int64, double, complex64, complex128. +* float16, float32, int32, int64, double, complex64, complex128 . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator DiagPart. @@ -622,27 +634,27 @@ REG_OP(DiagPart) .OP_END_FACTORY_REG(DiagPart) /** -*@brief Also known as a "fully-connected" layer, computes an inner product with a set of learned weights, and (optionally) adds biases. +*@brief Also known as a "fully-connected" layer, computes an inner product with a set of learned weights, and (optionally) adds biases . \n *@par Inputs: * Four inputs, including: *@li x: A Tensor of type float16, int8. *@li w: A weight matrix of type float16, int8. *@li b: A Tensor of type float16, int32, float32. -*@li offset_w: A Tensor of type int8. +*@li offset_w: A Tensor of type int8 . \n *@par Attributes: *@li num_output: Reserved. *@li transpose: A bool, specifying weight whether to transpose, either "true" or "false". Defaults to "false". *@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1. * The product of the subsequent dimensions starting form first dimension or the second dimension is "K". -*@li offset_x: Reserved. +*@li offset_x: Reserved . \n *@par Outputs: -*y: The result tensor of type float16, int32, float32. +*y: The result tensor of type float16, int32, float32 . \n *@par Third-party framework compatibility -* Compatible with the Caffe operator InnerProduct. +* Compatible with the Caffe operator InnerProduct . \n *@par Quantization supported or not * Yes @@ -660,7 +672,7 @@ REG_OP(FullyConnection) .OP_END_FACTORY_REG(FullyConnection) /** -*@brief Also known as a "fully-connected-compress" layer, computes an inner product with a set of learned weights, and (optionally) adds biases. +*@brief Also known as a "fully-connected-compress" layer, computes an inner product with a set of learned weights, and (optionally) adds biases . \n *@par Inputs: * Four inputs, including: @@ -674,13 +686,13 @@ REG_OP(FullyConnection) *@li num_output: Reserved. *@li transpose: A bool, specifying whether to transpose, either "true" or "false". Defaults to "false". *@li axis: Reserved. -*@li offset_x: Reserved. +*@li offset_x: Reserved . \n *@par Outputs: -*y: The result tensor of type int32. +*y: The result tensor of type int32 . \n *@par Third-party framework compatibility -* Compatible with the Caffe operator InnerProduct. +* Compatible with the Caffe operator InnerProduct . \n *@par Quantization supported or not * Yes @@ -699,7 +711,7 @@ REG_OP(FullyConnectionCompress) .OP_END_FACTORY_REG(FullyConnectionCompress) /** -*@brief Computes the confusion matrix from predictions and labels. +*@brief Computes the confusion matrix from predictions and labels . \n *@par Inputs: * Three inputs, including: @@ -708,12 +720,12 @@ REG_OP(FullyConnectionCompress) *@li predictions: A Tensor. Must be one of the following types: float16, * float32, int32, int8, uint8. *@li weights: A Tensor. Must be one of the following types: float16, float32, -* int32, int8, uint8. +* int32, int8, uint8 . \n *@par Attributes: *@li num_classes: An integer for the shape of the output matrix. * No default value. -*@li dtype: Data type of the confusion matrix. No default value. +*@li dtype: Data type of the confusion matrix. No default value . \n *@par Outputs: *y: A Tensor. Has the same type and format as input "labels" @@ -721,7 +733,7 @@ REG_OP(FullyConnectionCompress) *@attention Constraints: *@li "weights", "labels", and "predictions" are 1D tensors. *@li The output is with shape (num_classes, num_classes), -* where, 1 <= num_classes <= 4096. +* where, 1 <= num_classes <= 4096 . \n *@see Region() @@ -738,7 +750,7 @@ REG_OP(ConfusionMatrix) .OP_END_FACTORY_REG(ConfusionMatrix) /** -*@brief Multiplies sparse updates into a variable reference. +*@brief Multiplies sparse updates into a variable reference . \n *@par Inputs: * Three inputs, including: @@ -746,16 +758,16 @@ REG_OP(ConfusionMatrix) *Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An ND Tensor. *Must be one of the following types: int32 -*@li updates: An ND Tensor. +*@li updates: An ND Tensor . \n *Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", the operation - * will be protected by a lock. + * will be protected by a lock . \n *@par Outputs: -*var: A Tensor. Has the same type and format as input "var". +*var: A Tensor. Has the same type and format as input "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterMul. @@ -770,7 +782,7 @@ REG_OP(ScatterMul) /** *@brief Reduces sparse updates into a variable reference using - * the "min" operation. + * the "min" operation . \n *@par Inputs: * Three inputs, including: @@ -785,10 +797,10 @@ REG_OP(ScatterMul) *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", the operation - * will be protected by a lock. + * will be protected by a lock . \n *@par Outputs: -*var: A Tensor. Has the same type and format as input "var". +*var: A Tensor. Has the same type and format as input "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterMin. @@ -802,26 +814,26 @@ REG_OP(ScatterMin) .OP_END_FACTORY_REG(ScatterMin) /** -*@brief Reduces sparse updates into a variable reference using the "max" operation. +*@brief Reduces sparse updates into a variable reference using the "max" operation . \n *@par Inputs: * Three inputs, including: -*@li var: An ND Tensor. +*@li var: An ND Tensor . \n *Must be one of the following types: float16, float, int32 -*@li indices: An NCHW, NHWC, or ND Tensor. +*@li indices: An NCHW, NHWC, or ND Tensor . \n *Must be one of the following types: int32 -*@li updates: An NCHW, NHWC, or ND Tensor. +*@li updates: An NCHW, NHWC, or ND Tensor . \n *Must be one of the following types: float16, float, int32 *@par Attributes: *use_locking: An optional bool. Defaults to "False". - * If "True", the operation will be protected by a lock. + * If "True", the operation will be protected by a lock . \n *@par Outputs: -*var: A Tensor. Has the same type and format as input "var". +*var: A Tensor. Has the same type and format as input "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterMax. @@ -835,26 +847,26 @@ REG_OP(ScatterMax) .OP_END_FACTORY_REG(ScatterMax) /** -*@brief Applies sparse updates to a variable reference. +*@brief Applies sparse updates to a variable reference . \n *@par Inputs: * Three inputs, including: -*@li var: An ND Tensor. +*@li var: An ND Tensor . \n *Must be one of the following types: float16, float, int32, int8, uint8 -*@li indices: An ND Tensor. +*@li indices: An ND Tensor . \n *Must be one of the following types: int32 -*@li updates: An ND Tensor. +*@li updates: An ND Tensor . \n *Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", - * the operation will be protected by a lock. + * the operation will be protected by a lock . \n *@par Outputs: -*var: A Tensor. Has the same type and format as input "var". +*var: A Tensor. Has the same type and format as input "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterUpdate. @@ -868,7 +880,7 @@ REG_OP(ScatterUpdate) .OP_END_FACTORY_REG(ScatterUpdate) /** -*@brief Returns a tensor with the `k[0]`-th to `k[1]`-th diagonals of the batched `input`. +*@brief Returns a tensor with the `k[0]`-th to `k[1]`-th diagonals of the batched `input` . \n *@par Inputs: * Three inputs, including: @@ -883,7 +895,7 @@ REG_OP(ScatterUpdate) *@li padding_value: The value to fill the area outside the specified diagonal band with. \n *@par Outputs: -*diagonal: The extracted diagonal(s). +*diagonal: The extracted diagonal(s) . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterUpdate. @@ -896,7 +908,7 @@ REG_OP(MatrixDiagPartV2) .OP_END_FACTORY_REG(MatrixDiagPartV2) /** -*@brief Returns a batched matrix tensor with new batched diagonal values. +*@brief Returns a batched matrix tensor with new batched diagonal values . \n *@par Inputs: * Three inputs, including: @@ -911,7 +923,7 @@ REG_OP(MatrixDiagPartV2) *of a matrix band. `k[0]` must not be larger than `k[1]`. \n *@par Outputs: -*output: Rank `r+1`, with `output.shape = input.shape`. +*output: Rank `r+1`, with `output.shape = input.shape` . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterUpdate. @@ -924,7 +936,7 @@ REG_OP(MatrixSetDiagV2) .OP_END_FACTORY_REG(MatrixSetDiagV2) /** -*@brief Returns a batched diagonal tensor with given batched diagonal values. +*@brief Returns a batched diagonal tensor with given batched diagonal values . \n *@par Inputs: * Five inputs, including: @@ -949,7 +961,7 @@ REG_OP(MatrixSetDiagV2) *@li padding_value: The number to fill the area outside the specified diagonal band with. \n *@par Outputs: -*output: Has rank `r+1` when `k` is an integer or `k[0] == k[1]`, rank `r` otherwise. +*output: Has rank `r+1` when `k` is an integer or `k[0] == k[1]`, rank `r` otherwise . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterUpdate. diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h index a120b31d..4fa85cbc 100644 --- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h @@ -26,18 +26,18 @@ namespace ge { /** -*@brief Normalizes elements of a specific dimension of eigenvalues (L2). +*@brief Normalizes elements of a specific dimension of eigenvalues (L2) . \n *@par Inputs: -*One input: \n -*x: A multi-dimensional Tensor of type float16 or float32, specifying the eigenvalue. +*One input: +*x: A multi-dimensional Tensor of type float16 or float32, specifying the eigenvalue . \n *@par Attributes: *@li axis: A required attribute of type list, specifying the axis for normalization. -*@li eps: An optional attribute of type float, specifying the lower limit of normalization. Defaults to "1e-4". +*@li eps: An optional attribute of type float, specifying the lower limit of normalization. Defaults to "1e-4" . \n *@par Outputs: -*y: A multi-dimensional Tensor of type float16 or float32, specifying the eigenvalue for normalization. +*y: A multi-dimensional Tensor of type float16 or float32, specifying the eigenvalue for normalization . \n *@par Third-party framework compatibility * Compatible with the L2 scenario of PyTorch operator Normalize. @@ -50,25 +50,25 @@ REG_OP(L2Normalize) .OP_END_FACTORY_REG(L2Normalize) /** -*@brief Performs the backpropagation of L2Normalize for training scenarios. +*@brief Performs the backpropagation of L2Normalize for training scenarios . \n *@par Inputs: -* Three inputs, including: \n +* Three inputs, including: *@li x: A multi-dimensional Tensor of type float16 or float32, specifying * the eigenvalue of forward inputs. *@li y: A multi-dimensional Tensor of type float16 or float32, specifying * the normalization result of the forward output. *@li dy: A multi-dimensional Tensor of type float16 or float32, specifying -* the reverse input gradient. +* the reverse input gradient . \n *@par Attributes: *@li axis: A required attribute of type int, specifying the axis to be * normalized. *@li eps: An optional attribute of type float, specifying the lower limit of -* normalization. Defaults to "1e-4". +* normalization. Defaults to "1e-4" . \n *@par Outputs: -*dx: Reverse gradient of eigenvalue "x". Has the same dimensions as "x". +*dx: Reverse gradient of eigenvalue "x". Has the same dimensions as "x" . \n *@par Third-party framework compatibility * Compatible with the L2 scenario of PyTorch operator NormalizeGrad. @@ -83,32 +83,43 @@ REG_OP(L2NormalizeGrad) .OP_END_FACTORY_REG(L2NormalizeGrad) /** -*@brief Performs batch normalization. +*@brief Performs batch normalization . \n -*@par Inputs:\n +*@par Inputs: * Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported) *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. -*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D if input "x" is with format NC1HWC0. Specifies the scaling factor. -*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D if input "x" is with format NC1HWC0. Specifies the offset. -*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the operation is used for training. -*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None" if the operation is used for training. +*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D +if input "x" is with format NC1HWC0. Specifies the scaling factor. +*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D +if input "x" is with format NC1HWC0. Specifies the offset. +*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D +if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the +operation is used for training. +*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be +5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None" +if the operation is used for training . \n *@par Attributes: *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". *@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". -*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True". +*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n -*@par Outputs:\n +*@par Outputs: * Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported) *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D. -*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x". -*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". -*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. -*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output. +*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D +if input "x" is with format NC1HWC0. Specifies the mean of "x". +*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. +Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". +*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. +Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. +*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. +Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n *@attention Constraints: -*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance". -*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. +*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, +then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance". +*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n *@par Third-party framework compatibility *@li Compatible with the TensorFlow operator fused_batch_norm. @@ -131,32 +142,37 @@ REG_OP(BatchNorm) .OP_END_FACTORY_REG(BatchNorm) /** -*@brief Performs batch normalization. +*@brief Performs batch normalization . \n -*@par Inputs:\n +*@par Inputs: * Five inputs, including: (NHWC or NCHW supported) *@li x: A 4D Tensor of type float16 or float32. *@li scale: A 1D Tensor of type float32, for the scaling factor. *@li offset: A 1D Tensor of type float32, for the scaling offset. -*@li mean: A 1D Tensor of type float32, for the mean used for inference. \n Must be "None" if the operation is used for training. -*@li variance: A 1D Tensor of type float32, for the variance used for inference. \n Must be "None" if the operation is used for training. +*@li mean: A 1D Tensor of type float32, for the mean used for inference. +Must be "None" if the operation is used for training. +*@li variance: A 1D Tensor of type float32, for the variance used for inference. +Must be "None" if the operation is used for training . \n *@par Attributes: -*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". +*@li epsilon: An optional float32, specifying the small value +added to variance to avoid dividing by zero. Defaults to "0.0001". *@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". -*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True". +*@li is_training: An optional bool, specifying if the operation +is used for training or inference. Defaults to "True" . \n -*@par Outputs:\n +*@par Outputs: * Five outputs, including: (NHWC or NCHW supported) *@li y: A 4D Tensor of type float16 or float32, for the normalized "x". *@li batch_mean: A 1D Tensor of type float32, for the mean of "x". *@li batch_variance: A 1D Tensor of type float32, for the variance of "x". *@li reserve_space_1: A 1D Tensor of type float32, for the mean of "x" for gradient computation. -*@li reserve_space_2: A 1D Tensor of type float32, for the variance of "x" for gradient computation. +*@li reserve_space_2: A 1D Tensor of type float32, for the variance of "x" for gradient computation . \n *@attention Constraints: -*@li If the operation is used for inference, then output "reserve_space_1" has the same value as "mean" and output "reserve_space_2" has the same value as "variance". -*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. +*@li If the operation is used for inference, then output "reserve_space_1" +has the same value as "mean" and output "reserve_space_2" has the same value as "variance". +*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator fused_batch_norm_v2. @@ -178,30 +194,30 @@ REG_OP(BatchNormExt2) .OP_END_FACTORY_REG(BatchNormExt2) /** -*@brief Performs the backpropagation of BatchNorm. +*@brief Performs the backpropagation of BatchNorm . \n *@par Inputs: -* Five inputs, including: \n +* Five inputs, including: *@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the gradient. *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0. *@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. *@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm. -*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm. +*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . \n *@par Attributes: *@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". *@li data_format: An optional string. Defaults to "NHWC". -*@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference. +*@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n *@par Outputs: *@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x". *@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "scale". *@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "offset". *@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output. -*@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output. +*@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output . \n *@attention Constraints: -* The preceding layer of this operator must be operator BatchNorm. +* The preceding layer of this operator must be operator BatchNorm . \n *@see BatchNorm *@par Third-party framework compatibility @@ -224,30 +240,30 @@ REG_OP(BatchNormGrad) .OP_END_FACTORY_REG(BatchNormGrad) /** -*@brief Performs the backpropagation of BatchNorm. +*@brief Performs the backpropagation of BatchNorm . \n *@par Inputs: -* Five inputs, including: \n +* Five inputs, including: *@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient. *@li x: A 4D Tensor of type float16 or float32, with format NHWC or NCHW. *@li scale: A 4D Tensor of type float32, with format NHWC or NCHW. *@li reserve_space_1: A 4D Tensor of type float32, with format NHWC or NCHW. It is an output of BatchNormExt2. -*@li reserve_space_2: A 4D Tensor of type float32, with format NHWC or NCHW. It is an output of BatchNormExt2. +*@li reserve_space_2: A 4D Tensor of type float32, with format NHWC or NCHW. It is an output of BatchNormExt2 . \n *@par Attributes: *@li epsilon: A required float32. A small float number added to the variance of "x". *@li data_format: A required string for the format. -*@li is_training: A required bool for specifying the operation is for training (true) or inference (false). +*@li is_training: A required bool for specifying the operation is for training (true) or inference (false) . \n *@par Outputs: *@li x_backprop: A Tensor of type float16 or float32, with format NHWC or NCHW, for the offset of "x". *@li scale_backprop: A Tensor of type float32, with format NHWC or NCHW, for the offset of "scale". *@li offset_backprop: A Tensor of type float32, with format NHWC or NCHW, for the offset of "offset". *@li reserve_space_3: A Tensor of type float32, with format NHWC or NCHW. -*@li reserve_space_4: A Tensor of type float32, with format NHWC or NCHW. +*@li reserve_space_4: A Tensor of type float32, with format NHWC or NCHW . \n *@attention Constraints: -* The preceding layer of this operator must be BatchNormExt2. +* The preceding layer of this operator must be BatchNormExt2 . \n *@see BatchNormExt2 *@par Third-party framework compatibility @@ -271,7 +287,7 @@ REG_OP(BatchNormGradExt2) /** -*@brief Performs batch normalization. +*@brief Performs batch normalization . \n *@par Inputs: *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. @@ -284,7 +300,7 @@ REG_OP(BatchNormGradExt2) *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". *@li use_global_stats: mean inference mode , only can be "True". *@li mode: An optional input, not use -*@par Outputs:\n +*@par Outputs: *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x" */ REG_OP(BNInference) @@ -300,7 +316,7 @@ REG_OP(BNInference) .ATTR(mode, Int,1) .OP_END_FACTORY_REG(BNInference) /** -*@brief aicpu batch normalization host . +*@brief aicpu batch normalization host . \n *@par Inputs: @@ -329,7 +345,7 @@ REG_OP(BnHost) .OUTPUT(mu, TensorType({DT_FLOAT16,DT_FLOAT})) .OP_END_FACTORY_REG(BnHost) /** -*@brief Performs batch normalization. +*@brief Performs batch normalization . \n *@par Inputs: *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. @@ -342,7 +358,7 @@ REG_OP(BnHost) *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". *@li use_global_stats: mean inference mode , only can be "True". *@li mode: An optional attr, not use -*@par Outputs:\n +*@par Outputs: *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x" *@par Restrictions: *Warning: THIS FUNCTION IS DEPRECATED. Please use BNInference instead. diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index 5b84b1fb..5a02c1ca 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -26,7 +26,7 @@ namespace ge { /** * @brief Computes the gradients of depthwise convolution with respect to -* the filter. +* the filter . \n * @par Inputs: * Three inputs include: \n @@ -34,7 +34,7 @@ namespace ge { * support float16, float32, double * @li filter_size: A 4D tensor of type int32, with shape [H, W, C, K] * @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C]. -* Must be one of the following types: float16, float32, double. +* Must be one of the following types: float16, float32, double . \n * @par Attributes: * @li strides: A required list or tuple. The stride of the sliding window @@ -49,12 +49,12 @@ namespace ge { * @li pads: A required list or tuple. Padding added to each dimension of the * input. * @li data_format: An optional string. Input data format, either "NHWC" or -* "NCHW". +* "NCHW" . \n * @par Outputs: * filter_grad: Gradient of the deep convolution relative to the filter with * shape [H, W, C, K]. Must be one of the following types: float16, float32, -* double. +* double . \n * @attention Constraints:\n * The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but @@ -66,7 +66,7 @@ namespace ge { * data is 5D with shape [N, C1, Ho, Wo, C0], * where C is the same as that of the feature map and C0 is 16.\n * Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 * -* stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512. +* stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512 . \n * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter. @@ -85,7 +85,7 @@ REG_OP(DepthwiseConv2DBackpropFilter) /** * @brief Computes the gradients of depthwise convolution with respect to -* the filter. +* the filter . \n * @par Inputs: * Two inputs include: \n @@ -107,11 +107,11 @@ REG_OP(DepthwiseConv2DBackpropFilter) * @li pads: A required list or tuple. Padding added to each dimension of the * input. * @li data_format: An optional string. Input data format, either "NHWC" or -* "NCHW". +* "NCHW" . \n * @par Outputs: * filter_grad: Gradient of the deep convolution relative to the filter with -* shape [H, W, C, K]. Must be of type float32. +* shape [H, W, C, K]. Must be of type float32 . \n * @attention Constraints:\n * The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but @@ -123,7 +123,7 @@ REG_OP(DepthwiseConv2DBackpropFilter) * data is 5D with shape [N, C1, Ho, Wo, C0], * where C is the same as that of the feature map and C0 is 16.\n * Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 * -* stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512. +* stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512 . \n * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter. @@ -146,7 +146,7 @@ REG_OP(DepthwiseConv2DBackpropFilterD) /** * @brief Computes the gradients of depthwise convolution with respect to the -* input. +* input . \n * @par Inputs: * Three inputs include: \n @@ -154,7 +154,7 @@ REG_OP(DepthwiseConv2DBackpropFilterD) * support int32, int64 * @li filter: 4D filter tensor with shape of [H, W, C, K], support float16. * @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C]. -* Must be one of the following types: float16. +* Must be one of the following types: float16 . \n * @par Attributes: * @li strides: A required list or tuple of int32. The stride of the sliding window for @@ -169,11 +169,11 @@ REG_OP(DepthwiseConv2DBackpropFilterD) * @li pads: A required list or tuple of int32. Padding added to each dimension of the * input. * @li data_format: An optional string. Input data format, either "NHWC" or -* "NCHW". Defaults to "NHWC". +* "NCHW". Defaults to "NHWC" . \n * @par Outputs: * input_grad: Gradient of the deep convolution relative to the input with shape -* [N, C, H, W] or [N, H, W, C] Must be one of the following types: float16. +* [N, C, H, W] or [N, H, W, C] Must be one of the following types: float16 . \n * @attention Constraints:\n * The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but @@ -204,7 +204,7 @@ REG_OP(DepthwiseConv2DBackpropInput) /** * @brief Computes the gradients of depthwise convolution with respect to the -* input. +* input . \n * @par Inputs: * Two inputs include: \n @@ -226,11 +226,11 @@ REG_OP(DepthwiseConv2DBackpropInput) * @li pads: A required list or tuple. Padding added to each dimension of the * input. * @li data_format: An optional string. Input data format, either "NHWC" or -* "NCHW". +* "NCHW" . \n * @par Outputs: * input_grad: Gradient of the deep convolution relative to the input with -* shape [N, C, H, W] or [N, H, W, C]. Must be of type float16. +* shape [N, C, H, W] or [N, H, W, C]. Must be of type float16 . \n * @attention Constraints:\n * The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but @@ -265,7 +265,7 @@ REG_OP(DepthwiseConv2DBackpropInputD) /** *@brief Computes a 2D deep convolution given a 4D input tensor and a filter -* tensor. +* tensor . \n *@par Inputs: *Two required inputs and two optional inputs, including: \n @@ -289,7 +289,7 @@ REG_OP(DepthwiseConv2DBackpropInputD) * @li data_format: An optional string. Input data format, either "NHWC" or * "NCHW". Defaults to "NHWC". * @li offset_x: An optional int. Input offset, used for quantized inference. -* Defaults to 0. +* Defaults to 0 . \n * @par Outputs: * y: 4D tensor of type float16 or int32, with shape [N, C, H, W] or [N, H, W, C] @@ -310,6 +310,9 @@ REG_OP(DepthwiseConv2DBackpropInputD) * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator DepthwiseConv2D. * @li Compatible with the Caffe operator DepthwiseConv2D. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DepthwiseConv2D) .INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) @@ -328,16 +331,16 @@ REG_OP(DepthwiseConv2D) *@brief Performs the the backward operation for "BiasAdd" on the "bias" tensor. * It accumulates all the values from out_backprop into the feature * dimension. For NHWC data format, the feature dimension is the last. -* For NCHW data format, the feature dimension is the third-to-last. +* For NCHW data format, the feature dimension is the third-to-last . \n *@par Inputs: -*x: A Tensor of type NumberType. +*x: A Tensor of type NumberType . \n *@par Attributes: -*data_format: Data format. Defaults to "NHWC". +*data_format: Data format. Defaults to "NHWC" . \n *@par Outputs: -*y: A Tensor.Has the same type as "x". +*y: A Tensor.Has the same type as "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator BiasAddGrad. @@ -352,8 +355,9 @@ REG_OP(BiasAddGrad) *@brief Computes the gradients of convolution with respect to the input. *@par Inputs: * Three inputs: - * @li input_size: A Tensor of type int32. An integer vector representing the - * shape of input, where input is a 4-D tensor [batch, height, width, channels] + * @li input_size: A const Tensor of type int32. Currently does not support + * data tensor. An integer vector representing the shape of input, where + * input is a 4-D tensor [batch, height, width, channels] * or [batch, channels, height, width]. * @li filter: A Tensor. Must be one of the following types: float16, float32, * float64. 4-D with shape @@ -371,13 +375,13 @@ REG_OP(BiasAddGrad) * @li pads: A tuple/list of 4 integers, [top, bottom, left, right] pads * on feature map * @li dilations: A tuple/list of 4 integers, The dilation factor for each - * dimension of input, now only support [1,1,1,1] + * dimension of input, defaults to [1,1,1,1]. * @li groups: Number of blocked connections from input channels to output * channels. * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to * "NHWC". Specify the data format of the input and output data. *@par Outputs: - * y: A Tensor. Has the same type as filter,and has same format as input_size + * y: A Tensor. Has the same type as filter,and has same format as input_size. *@par Third-party framework compatibility * Compatible with Tensorflow's conv2d_backprop_input */ @@ -415,7 +419,7 @@ REG_OP(Conv2DBackpropInput) * @li pads: A tuple/list of 4 integers, [top, bottom, left, right] pads on * feature map * @li dilations: A tuple/list of 4 integers, The dilation factor for each - * dimension of input, now only support [1,1,1,1] + * dimension of input, defaults to [1,1,1,1]. * @li groups: Number of blocked connections from input channels to output * channels. * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to @@ -460,7 +464,7 @@ REG_OP(Conv2DBackpropInputD) * @li pads: A tuple or list of 4 integers. The [top, bottom, left, right] * padding on the feature map. * @li dilations: A tuple or list of 4 integers. The dilation factor for each - * dimension of input. Must be [1, 1, 1, 1]. + * dimension of input, defaults to [1,1,1,1]. * @li groups: Number of blocked connections from input channels to output channels. Defaults to "1". * @li data_format: An optional string from: "NCHW". Defaults to "NCHW". \n @@ -492,11 +496,11 @@ REG_OP(Deconvolution) * @li x: A Tensor. Must be one of the following types: float16, float32, * float64.4-D with shape [batch, in_height, in_width, in_channels] or * [batch, in_channels, in_height, in_width]. - * @li filter_size: A Tensor of type int32. An integer vector representing the - * tensor shape of filter, where filter is a 4-D tensor [filter_height, - * filter_width, in_channels, out_channels] or [out_channels, filter_height, - * filter_width, in_channels] or [out_channels, in_channel, filter_height, - * filter_width]. + * @li filter_size: A const Tensor of type int32. Currently does not support + * data tensor. An integer vector representing the tensor shape of filter, + * where filter is a 4-D tensor [filter_height, filter_width, in_channels, + * out_channels] or [out_channels, filter_height, filter_width, in_channels] + * or [out_channels, in_channel, filter_height, filter_width]. * @li out_backprop: A Tensor. Must have the same type as x. 4-D with shape * [batch, out_height, out_width, out_channels] or [batch, out_channels, * out_height, out_width]. Gradients with respect to the output of the @@ -508,13 +512,13 @@ REG_OP(Deconvolution) * @li pads: A tuple/list of 4 integers, [top, bottom, left, right] pads on * feature map. * @li dilations: A tuple/list of 4 integers, The dilation factor for each - * dimension of input, now only support [1,1,1,1]. + * dimension of input, defaults to [1,1,1,1]. * @li groups: Number of blocked connections from input channels to output * channels. * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to * "NHWC". Specify the data format of the input and output data. *@par Outputs: - * y: A Tensor. Has the same type as x + * y: A Tensor. Has the same type as x, has the same format as filter_size. *@par Third-party framework compatibility * Compatible with Tensorflow's conv2d_backprop_filter */ @@ -553,14 +557,15 @@ REG_OP(Conv2DBackpropFilter) * @li pads: A tuple/list of 4 integers, [top, bottom, left, right] pads on * feature map * @li dilations: A tuple/list of 4 integers, The dilation factor for each - * dimension of input, now only support [1,1,1,1]. + * dimension of input, defaults to [1,1,1,1]. * @li groups: Number of blocked connections from input channels to output * channels. * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to * "NHWC". Specify the data format of the input and output data. *@par Outputs: - * y: A Tensor. Type is float32 -*@par Third-party framework compatibility + * y: A Tensor. Type is float32, a 4-D tensor [filter_height, filter_width, + * in_channels, out_channels] or [out_channels, filter_height, filter_width, + * in_channels] or [out_channels, in_channel, filter_height, filter_width]. * Compatible with Tensorflow's conv2d_backprop_filter *@par Restrictions: * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv2DBackpropFilter instead. @@ -600,7 +605,7 @@ REG_OP(Conv2DBackpropFilterD) | | | HWCN | | | @endverbatim * It should be noted that the data types must correspond to each other, but the -* format does not need to. +* format does not need to . \n *@par Attributes: * @li strides: A list of 4 integers. Specifying the strides of the @@ -615,10 +620,10 @@ REG_OP(Conv2DBackpropFilterD) * "groups".Type is int32. * @li offset_x: An optional integer for quantized convolution. Type is int32. Defaults to "0". * @li data_format: An optional string from: "NHWC", "NCHW". Specifying the -* data format of the input and output images. Type is string. Defaults to "NHWC". Reserved. +* data format of the input and output images. Type is string. Defaults to "NHWC". Reserved . \n *@par Outputs: -* @li y: A 4D Tensor of output images. +* @li y: A 4D Tensor of output images . \n *@attention * @li The parameter scope is listed as follows: @@ -654,7 +659,7 @@ REG_OP(Conv2DBackpropFilterD) H dimension != 1 | @endverbatim * As shown above, "HxW(input)" indicates the image size after padding and -* "HxW(filter)" indicates the filter size after dilation. +* "HxW(filter)" indicates the filter size after dilation . \n *@par Quantization supported or not * Yes @@ -699,11 +704,11 @@ REG_OP(Conv2DCompress) * (Currently does not support int8). The format of x is NCDHW or NDHWC. * @li filter: A 5D tensor of the same type as "x". * (Currently does not support int8). - * The format is NCDHW, NDHWC or DHWCN. + * The format is NCDHW, NDHWC or DHWCN . \n *@par Optional input: * @li bias: An optional 1D tensor of the same type as "x". - * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved. + * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n *@par Required Attributes: * @li strides: A list of 5 integers. Specifies the stride of the sliding window @@ -711,7 +716,7 @@ REG_OP(Conv2DCompress) * The N and C dimensions must be 1. Has the same format as "x". * @li pads: A list of 6 integers. * Supports only padding along the D, H and W dimensions in sequence of head, - * tail, top, bottom, left and right. + * tail, top, bottom, left and right . \n *@par Attributes: * @li groups: Number of blocked connections from input channels to output @@ -722,13 +727,13 @@ REG_OP(Conv2DCompress) * dimension of "x", now only support [1,1,1,1,1] * The N and C dimensions must be 1. Has the same format as "x". * @li offset_x: An optional int. Input offset, used for quantized inference. - * Defaults to 0. Reserved. + * Defaults to 0. Reserved . \n *@par Outputs: - *y: A Tensor. Has the same type as "x". + *y: A Tensor. Has the same type and data format as "x". \n *@attention Constraints: - *The image size after padding is greater than the filter size. + *The image size after padding is greater than the filter size . \n *@par Third-party framework compatibility * @li Compatible with the TensorFlow operator conv3d. @@ -757,12 +762,12 @@ REG_OP(Conv3D) * the shape of input, where input is a 5-D tensor * [batch, depth, height, width, channels] or * [batch, channels, depth, height, width]. - * @li filter: A Tensor. Must be one of the following types: float16, float32, - * float64. + * @li filter: A Tensor. Must be one of the following types: float16, float32. + * Currently does not support double. * @li out_backprop: A Tensor. Must have the same type as filter. * 5-D with shape [batch, depth, out_height, out_width, out_channels] * or [batch, out_channels, depth, out_height, out_width]. Gradients with - * respect to the output of the convolution. + * respect to the output of the convolution . \n *@par Required Attributes: * @li strides: A list of 5 integers. Specifies the stride of the sliding window @@ -770,7 +775,7 @@ REG_OP(Conv3D) * The N and C dimensions must be 1. Has the same format as "x". * @li pads: A list of 6 integers. * Supports only padding along the D, H and W dimensions in sequence of head, - * tail, top, bottom, left and right. + * tail, top, bottom, left and right . \n *@par Attributes: * Three attributes: @@ -803,8 +808,10 @@ REG_OP(Conv3DBackpropInput) *@brief Computes the gradients of convolution 3d with respect to the input. *@par Inputs: * Two inputs: - * @li filter: A Tensor whose type is float16. - * @li out_backprop: A Tensor. Must have the same type as filter. + * @li filter: A Tensor whose type is float16. The format of filter is NCDHW, + * NDHWC or DHWCN. + * @li out_backprop: A Tensor. Must have the same type as filter. The format is + * NDHWC or NCDHW. \n *@par Required Attributes: * @li strides: A list of 5 integers. Specifies the stride of the sliding window @@ -815,7 +822,7 @@ REG_OP(Conv3DBackpropInput) * @li input_size: A tuple/list of type int32, int64. An integer vector * representing the shape of input, where input is a 5-D tensor * [batch, depth, height, width, channels] or - * [batch, channels, depth, height, width]. + * [batch, channels, depth, height, width] . \n *@par Attributes: * Three attributes: @@ -826,7 +833,7 @@ REG_OP(Conv3DBackpropInput) * @li dilations: A tuple/list of 5 integers, The dilation factor for each * dimension of input, now only support [1,1,1,1,1] *@par Outputs: - * y: A Tensor. Has the same type as filter + * y: A Tensor. Has the same type and data format as out_backprop. *@par Third-party framework compatibility * Compatible with Tensorflow's conv3d_backprop_input @@ -846,7 +853,7 @@ REG_OP(Conv3DBackpropInputD) .OP_END_FACTORY_REG(Conv3DBackpropInputD) /** -*@brief Applies a multi-layer long short-term memory (LSTM) RNN to an input sequence. +*@brief Applies a multi-layer long short-term memory (LSTM) RNN to an input sequence . \n *@par Inputs: * @li x: A Tensor dtype of float16. @@ -857,19 +864,21 @@ REG_OP(Conv3DBackpropInputD) * @li x_static: A optinal Tensor dtype of float16. * @li h_0: A optinal Tensor dtype of float16, float32. * @li c_0: A optinal Tensor dtype of float16, float32. -* @li w_x_static: A optinal Tensor dtype of float16. +* @li w_x_static: A optinal Tensor dtype of float16 . \n *@par Attributes: *@li num_output: A Scalar of output size dtype of int. -*@li expose_hidden: A Scalar(bool) of features hidden. +*@li expose_hidden: A Scalar(bool) of features hidden . \n *@par Outputs: *@li h: A Tensor dtype of float16, float32. * @li h_t: A optinal Tensor dtype of float16, float32. The hidden state at time t. -* @li c_t: A optinal Tensor dtype of float16, float32. The cell state at time t. +* @li c_t: A optinal Tensor dtype of float16, float32. The cell state at time t . \n *@par Third-party framework compatibility: * Compatible with the Pytorch operator adds. +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LSTM) .INPUT(x, TensorType({DT_FLOAT16})) @@ -892,26 +901,26 @@ REG_OP(LSTM) *@brief Computes the gradients of convolution3D with respect to the filter *@par Inputs: * Three inputs: - * @li x: A Tensor. Must be one of the following types: float16, float32, - * double. + * @li x: A Tensor. Must be one of the following types: float16, float32. + * Currently does not support double. * 5-D with shape [batch, in_depth, in_height, in_width, in_channels] - * or [batch, in_depth, in_channels, in_height, in_width]. + * or [batch, in_channels, in_depth, in_height, in_width]. * @li filter_size: A Tensor of type int32. An integer vector representing the * tensor shape of filter, where filter is a 5-D tensor * [filter_depth, filter_height, filter_width, in_channels, out_channels] - * or [out_channels, filter_depth, filter_height, filter_width, in_channels] - * or [out_channels, filter_depth, in_channel, filter_height, filter_width]. + * [out_channels, in_channels, filter_depth, filter_height, filter_width] + * or [out_channels, filter_depth, filter_height, filter_width, in_channels]. * @li out_backprop: A Tensor. Must have the same type as x. * 5-D with shape [batch, out_depth, out_height, out_width, out_channels] - * or [batch, out_depth, out_channels, out_height, out_width]. - * Gradients with respect to the output of the convolution. + * or [batch, out_channels, out_depth, out_height, out_width]. + * Gradients with respect to the output of the convolution. \n *@par Required Attributes: * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding * window for each dimension of "x". The N and C dimensions must be 1. * Has the same format as "x". * @li pads: A tuple/list of 6 integers, [front, back, top, bottom, left, right] - * pads on feature map. + * pads on feature map . \n *@par Attributes: * Three attributes: @@ -923,7 +932,8 @@ REG_OP(LSTM) * Defaults to "NDHWC". Specify the data format of the input and output data. *@par Outputs: - * y: A Tensor. Has the same type as x + * y: A Tensor that has the same type as x + * and the format is NDHWC, NCDHW or DHWCN. *@par Third-party framework compatibility * Compatible with Tensorflow's conv3d_backprop_filter */ @@ -945,23 +955,23 @@ REG_OP(Conv3DBackpropFilter) * Two inputs: * @li x: A Tensor of type float16. * 5-D with shape [batch, in_depth, in_height, in_width, in_channels] - * or [batch, in_depth, in_channels, in_height, in_width]. + * or [batch, in_channels, in_depth, in_height, in_width]. * @li out_backprop: A Tensor. Must have the same type as x. * 5-D with shape [batch, out_depth, out_height, out_width, out_channels] - * or [batch, out_depth, out_channels, out_height, out_width]. - * Gradients with respect to the output of the convolution. + * or [batch, out_channels, out_depth, out_height, out_width]. + * Gradients with respect to the output of the convolution. \n *@par Required Attributes: * @li filter_size: A tuple/list of type integers. An integer vector * representing the tensor shape of filter, where filter is a 5-D tensor - * [filter_depth, filter_height, filter_width, in_channels, out_channels] - * or [out_channels, filter_depth, filter_height, filter_width, in_channels] - * or [out_channels, filter_depth, in_channel, filter_height, filter_width]. + * [filter_depth, filter_height, filter_width, in_channels, out_channels], + * [out_channels, filter_depth, filter_height, filter_width, in_channels] + * or [out_channels, in_channels, filter_depth, filter_height, filter_width]. * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding * window for each dimension of "x". * The N and C dimensions must be 1. Has the same format as "x". * @li pads: A tuple/list of 6 integers, [front, back, top, bottom, left, right] - * pads on feature map + * pads on feature map. \n *@par Attributes: * Three attributes: @@ -973,7 +983,7 @@ REG_OP(Conv3DBackpropFilter) * Defaults to "NDHWC". Specify the data format of the input and output data. *@par Outputs: - * y: A Tensor of type float32 + * y: A Tensor of type float32 and the format is NDHWC, NCDHW or DHWCN. *@par Third-party framework compatibility * Compatible with Tensorflow's conv3d_backprop_filter *@par Restrictions: @@ -998,14 +1008,16 @@ REG_OP(Conv3DBackpropFilterD) *@par Inputs: * Three inputs: * @li input_size: A Tensor of type int32. An integer vector representing the - * shape of input - * @li x: A Tensor of type float16, currently does not support int8 - * @li filter: A Tensor of type float16, currently does not support int8 + * shape of input. + * @li x: A Tensor of type float16, currently does not support int8. The format + * is NDHWC or NCDHW. + * @li filter: A Tensor of type float16, currently does not support int8. + * The format is NDHWC, NCDHW or DHWCN. *@par Optional input: * Two optional inputs * @li bias: An optional 1D tensor of the same type as "x". Reserved. - * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved. + * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n *@par Required Attributes: * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding @@ -1024,7 +1036,7 @@ REG_OP(Conv3DBackpropFilterD) * @li output_padding: The size will be added in the output shape. * @li offset_x: Input offset_x value. Reserved. *@par Outputs: - * y: A Tensor. Has the same type as filter + * y: A Tensor. Has the same type and format as x. */ REG_OP(Conv3DTranspose) .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) @@ -1045,12 +1057,14 @@ REG_OP(Conv3DTranspose) /** *@brief Computes the transpose of convolution 3d with respect to the input. *@par Inputs: - * @li x: A Tensor of type float16, currently does not support int8 - * @li filter: A Tensor of type float16, currently does not support int8 + * @li x: A Tensor of type float16, currently does not support int8. + * The format is NDHWC or NCDHW. + * @li filter: A Tensor of type float16, currently does not support int8. + * The format is NDHWC, NCDHW or DHWCN. *@par Optional inputs: * @li bias: An optional 1D tensor of the same type as "x". Reserved. - * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved. + * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n *@par Required Attributes: * @li input_size: A tuple/list of type int32. @@ -1058,7 +1072,7 @@ REG_OP(Conv3DTranspose) * @li strides: A tuple/list of 5 integers. * Specifies the stride of the sliding window for each dimension of "x". * The N and C dimensions must be 1. Has the same format as "x". - * @li pads: A tuple/list of 6 integers. + * @li pads: A tuple/list of 6 integers . \n *@par Attributes: * Five attributes: @@ -1071,7 +1085,7 @@ REG_OP(Conv3DTranspose) * @li output_padding: The size will be added in the output shape. * @li offset_x: Input offset_x value. Reserved. *@par Outputs: - * y: A Tensor. Has the same type as filter + * y: A Tensor. Has the same type and format as x. *@par Restrictions: * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead. */ diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index 38612463..d9c28087 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -28,17 +28,17 @@ namespace ge { /** *@brief Generates bounding boxes based on "rois" and "deltas". -* It is a customized FasterRcnn operator. +* It is a customized FasterRcnn operator . \n *@par Inputs: -* Two inputs, including: \n +* Two inputs, including: *@li rois: Region of interests (ROIs) generated by the region proposal * network (RPN). A 2D Tensor of type float32 or float16 with shape (N, 4). * "N" indicates the number of ROIs, and the value "4" refers to "x0", "x1", * "y0", and "y1". *@li deltas: Absolute variation between the ROIs generated by the RPN and * ground truth boxes. A 2D Tensor of type float32 or float16 with shape (N, 4). -* "N" indicates the number of errors, and 4 indicates "dx", "dy", "dw", and "dh". +* "N" indicates the number of errors, and 4 indicates "dx", "dy", "dw", and "dh" . \n *@par Attributes: *@li means: An index of type int. Defaults to [0,0,0,0]. @@ -49,7 +49,7 @@ namespace ge { * the network. Used to ensure that the bbox shape after conversion does not * exceed "max_shape". *@li wh_ratio_clip: Defaults to "16/1000". The values of "dw" and "dh" fall -* within (-wh_ratio_clip, wh_ratio_clip). +* within (-wh_ratio_clip, wh_ratio_clip) . \n *@par Outputs: *bboxes: Bboxes generated based on "rois" and "deltas". Have the same format @@ -67,22 +67,22 @@ REG_OP(BoundingBoxDecode) /** *@brief Computes the coordinate variations between bboxes and ground truth -* boxes. It is a customized FasterRcnn operator. +* boxes. It is a customized FasterRcnn operator . \n *@par Inputs: -* Two inputs, including: \n +* Two inputs, including: *@li anchor_box: Anchor boxes. A 2D Tensor of float32 with shape (N, 4). * "N" indicates the number of bounding boxes, and the value "4" refers to * "x0", "x1", "y0", and "y1". *@li ground_truth_box: Ground truth boxes. A 2D Tensor of float32 with * shape (N, 4). "N" indicates the number of bounding boxes, and the value "4" -* refers to "x0", "x1", "y0", and "y1". +* refers to "x0", "x1", "y0", and "y1" . \n *@par Attributes: *@li means: An index of type int. Defaults to [0,0,0,0]. * "deltas" = "deltas" x "stds" + "means". *@li stds: An index of type int. Defaults to [1.0,1.0,1.0,1.0]. -* "deltas" = "deltas" x "stds" + "means". +* "deltas" = "deltas" x "stds" + "means" . \n *@par Outputs: *delats: A 2D Tensor of type float32 with shape (N, 4), specifying the variations between all anchor boxes and ground truth boxes. @@ -97,10 +97,10 @@ REG_OP(BoundingBoxEncode) /** *@brief Judges whether the bounding box is valid. It is a customized -* FasterRcnn operator. +* FasterRcnn operator . \n *@par Inputs: -* Two inputs, including: \n +* Two inputs, including: *@li bbox_tensor: Bounding box. A 2D Tensor of type float16 with shape (N, 4). * "N" indicates the number of bounding boxes, the value "4" indicates "x0", * "x1", "y0", and "y1". @@ -109,7 +109,7 @@ REG_OP(BoundingBoxEncode) *@par Outputs: *valid_tensor: A bool with shape (N, 1), specifying whether an input anchor is -* in an image. "1" indicates valid, while "0" indicates invalid. +* in an image. "1" indicates valid, while "0" indicates invalid . \n *@attention Constraints: * 16 "img_metas" are input. The first three numbers (height, width, ratio) are @@ -123,23 +123,23 @@ REG_OP(CheckValid) /** *@brief Computes the intersection over union (iou) or the intersection over -* foreground (iof) based on the ground-truth and predicted regions. +* foreground (iof) based on the ground-truth and predicted regions . \n *@par Inputs: -* Two inputs, including: \n +* Two inputs, including: *@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with * shape (N, 4). "N" indicates the number of bounding boxes, and the value * "4" refers to "x0", "x1", "y0", and "y1". *@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32 -* with shape (M, 4). "M" indicates the number of ground truth boxes, and -* the value "4" refers to "x0", "x1", "y0", and "y1". +* with shape (M, 4). "M" indicates the number of ground truth boxes, and +* the value "4" refers to "x0", "x1", "y0", and "y1" . \n *@par Attributes: -*mode: Computation mode, a character string with the value range of [iou, iof]. +*mode: Computation mode, a character string with the value range of [iou, iof] . \n *@par Outputs: *overlap: A 2D Tensor of type float16 or float32 with shape [M, N], specifying -* the IoU or IoF ratio. +* the IoU or IoF ratio . \n *@attention Constraints: * Only computation of float16 data is supported. To avoid overflow, the input @@ -153,20 +153,23 @@ REG_OP(Iou) .OP_END_FACTORY_REG(Iou) /** -*@brief Performs the backpropagation of ROIAlign for training scenarios. +*@brief Performs the backpropagation of ROIAlign for training scenarios . \n *@par Inputs: -* Three inputs, including: \n +* Three inputs, including: *@li ydiff: A 5HD gradient input of type float32. -*@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" indicates the number of ROIs, the value "5" indicates the indexes of images where the ROIs are located, "x0", "x1", "y0", and "y1". -*@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved. +*@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" indicates the number of ROIs, +the value "5" indicates the indexes of images where the ROIs are located, "x0", "x1", "y0", and "y1". +*@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved . \n *@par Attributes: *@li xdiff_shape: A required list of 4 ints, obtained based on the shape of "features" of ROIAlign. *@li pooled_width: A required attribute of type int, specifying the W dimension. *@li pooled_height: A required attribute of type int, specifying the H dimension. *@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image. -*@li sample_num: An optional attribute of type int, specifying the horizontal and vertical sampling frequency of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois", which is a floating point number. Defaults to "2". +*@li sample_num: An optional attribute of type int, specifying the horizontal and vertical +sampling frequency of each output. If this attribute is set to "0", the sampling frequency is +equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . \n *@par Outputs: *xdiff: Gradient added to input "features". Has the same 5HD shape as input "features". @@ -184,14 +187,15 @@ REG_OP(ROIAlignGrad) .OP_END_FACTORY_REG(ROIAlignGrad) /** -*@brief Obtains the ROI feature matrix from the feature map. It is a customized FasterRcnn operator. +*@brief Obtains the ROI feature matrix from the feature map. It is a customized FasterRcnn operator . \n *@par Inputs: -* Three inputs, including: \n +* Three inputs, including: *@li features: A 5HD Tensor of type float32 or float16. -*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, the value "5" indicates the indexes of images where the ROIs are located, +*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, +the value "5" indicates the indexes of images where the ROIs are located, * "x0", "y0", "x1", and "y1". -*@li rois_n: An optional input of type int32, specifying the number of valid ROIs. This parameter is reserved. +*@li rois_n: An optional input of type int32, specifying the number of valid ROIs. This parameter is reserved . \n *@par Attributes: *@li spatial_scale: A required attribute of type float32, specifying the scaling ratio of "features" to the original image. @@ -199,10 +203,11 @@ REG_OP(ROIAlignGrad) *@li pooled_width: A required attribute of type int32, specifying the W dimension. *@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency of each output. If this attribute is set to "0", * the sampling frequency is equal to the rounded up value of "rois", which is a floating point number. Defaults to "2". -*@li roi_end_mode: An optional attribute of type int32. Defaults to "1". +*@li roi_end_mode: An optional attribute of type int32. Defaults to "1" . \n *@par Outputs: -* output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16. The axis N is the number of input ROIs. Axes H, W, and C are consistent +* output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16. +The axis N is the number of input ROIs. Axes H, W, and C are consistent * with the values of "pooled_height", * "pooled_width", and "features", respectively. */ @@ -219,17 +224,18 @@ REG_OP(ROIAlign) .OP_END_FACTORY_REG(ROIAlign) /** -*@brief Performs SSD prior box detection. +*@brief Performs SSD prior box detection . \n *@par Inputs: * Two inputs, including: *@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16. -*@li img: source image. Has the same type and format as "x". +*@li img: source image. Has the same type and format as "x" . \n *@par Attributes: *@li min_size: A required float32, specifying the minimum edge length of a square prior box. *@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size) -*@li aspect_ratio: An required float32, specifying the aspect ratio for generated rectangle boxes. The height is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaults to "1.0". +*@li aspect_ratio: An required float32, specifying the aspect ratio for generated rectangle boxes. The height +is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaults to "1.0". *@li img_h: An optional int32, specifying the source image height. Defaults to "0". *@li img_w: An optional int32, specifying the source image width. Defaults to "0". *@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image. Defaults to "0.0". @@ -237,12 +243,12 @@ REG_OP(ROIAlign) *@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True". *@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False". *@li offset: An optional float32, specifying the offset. Defaults to "0.5". -*@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value). +*@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n *@par Outputs: -*y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance. +*y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n -*@attention Constraints:\n +*@attention Constraints: * This operator applies only to SSD networks. *@see SSDDetectionOutput() *@par Third-party framework compatibility @@ -266,7 +272,7 @@ REG_OP(ROIAlign) .OP_END_FACTORY_REG(PriorBox); /** -*@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox. +*@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n *@par Inputs: * Six inputs, including: @@ -275,7 +281,7 @@ REG_OP(ROIAlign) *@li data_h: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map height. *@li data_w: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map width. *@li box_height: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the height of each prior box. -*@li box_width: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the width of each prior box. +*@li box_width: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the width of each prior box . \n *@par Attributes: *@li min_size: A required float32, specifying the minimum edge length of a square prior box. @@ -287,17 +293,17 @@ REG_OP(ROIAlign) *@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True". *@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False". *@li offset: An optional float32, specifying the offset. Defaults to "0.5". -*@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value). +*@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n *@par Outputs: -*y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance. +*y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n -*@attention Constraints:\n +*@attention Constraints: * This operator applies only to SSD networks. *@see SSDDetectionOutput() *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. -*@par Restrictions: +*@par Restrictions: *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead. */ REG_OP(PriorBoxD) @@ -321,7 +327,7 @@ REG_OP(ROIAlign) .OP_END_FACTORY_REG(PriorBoxD); /** -*@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox. +*@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n *@par Inputs: * Six inputs, including: @@ -339,17 +345,17 @@ REG_OP(ROIAlign) *@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True". *@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False". *@li offset: An optional float32, specifying the offset. Defaults to "0.5". -*@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value). +*@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n *@par Outputs: -*y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance. +*y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n -*@attention Constraints:\n +*@attention Constraints: * This operator applies only to SSD networks. *@see SSDDetectionOutput() *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. -*@par Restrictions: +*@par Restrictions: *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead. */ REG_OP(PriorBoxDV2) @@ -370,7 +376,7 @@ REG_OP(ROIAlign) .OP_END_FACTORY_REG(PriorBoxDV2); /** -*@brief Performs Position Sensitive ROI Pooling. +*@brief Performs Position Sensitive ROI Pooling . \n *@par Inputs: * Two inputs, including: @@ -381,7 +387,7 @@ REG_OP(ROIAlign) * [batch, 5, rois_num], describing the ROIs, each ROI consists of five * elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates * the index of the input feature map, "x1", "y1", "x2", or "y2" must be -* greater than or equal to "0.0". +* greater than or equal to "0.0" . \n *@par Attributes: *@li output_dim: A required int32, specifying the number of output channels, @@ -389,11 +395,11 @@ REG_OP(ROIAlign) *@li group_size: A required int32, specifying the number of groups to encode * position-sensitive score maps, must be within the range (0, 128). *@li spatial_scale: A required float32, scaling factor for mapping the input -* coordinates to the ROI coordinates. +* coordinates to the ROI coordinates . \n *@par Outputs: *y: An NC1HWC0 tensor of type float16 or float32, describing the result -* feature map. +* feature map . \n *@attention Constraints: * HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16 @@ -408,7 +414,7 @@ REG_OP(PSROIPooling) .OP_END_FACTORY_REG(PSROIPooling) /** -*@brief Returns detection result. +*@brief Returns detection result . \n *@par Inputs: * Four inputs, including: @@ -426,9 +432,9 @@ REG_OP(PSROIPooling) *@li box: A tensor of type float16 or float32 for proposal of actual output, with output shape [batch, numBoxes,8]. * 8 means [x1, y1, x2, y2, score, label, batchID, NULL], the maximum value of numBoxes is 1024. That is, take min (the maximum number of input boxes, 1024) -*@li actual_bbox_num: A tensor of type int32 With shape [bacth, num_classes], specifying the number of output boxes. +*@li actual_bbox_num: A tensor of type int32 With shape [bacth, num_classes], specifying the number of output boxes . \n -*@attention Constraints:\n +*@attention Constraints: *@li totalnum < max_rois_num * batch_rois. *@li "score" must be with shape (total_num, (num_classes+15)//16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images. *@li "bbox_delta" must be with shape (total_num, (num_classes*4+15)//16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images. @@ -450,7 +456,7 @@ REG_OP(FSRDetectionOutput) .OP_END_FACTORY_REG(FSRDetectionOutput) /** -*@brief Returns detection result. +*@brief Returns detection result . \n *@par Inputs: * Four inputs, including: @@ -494,10 +500,12 @@ REG_OP(SSDDetectionOutput) .OP_END_FACTORY_REG(SSDDetectionOutput) /** -*@brief Normalizes data. It is called Region on YOLO v2 and Yolo on YOLO v3. +*@brief Normalizes data. It is called Region on YOLO v2 and Yolo on YOLO v3 . \n *@par Inputs: -*x: An NCHW tensor of type float16 or float32. The data is with shape (N, boxes*(coords+obj+classes), H, W),where, "obj" indicates the confidence of an object, and only one confidence is supported. Boxes are arranged as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn. +*x: An NCHW tensor of type float16 or float32. The data is with shape (N, boxes*(coords+obj+classes), H, W), +where, "obj" indicates the confidence of an object, and only one confidence is supported. Boxes are arranged +as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n *@par Attributes: *@li boxes: A required int32, specifying the number of anchor boxes. Defaults to "5" for V2 or "3" for V3. @@ -506,12 +514,15 @@ REG_OP(SSDDetectionOutput) *@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3".Defaults to "V3" *@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2". Defaults to "false". *@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2". Defaults to "false". -*@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used. Defaults to "false". +*@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used. Defaults to "false" . \n *@par Outputs: -*@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2], where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box. -*@li obj_prob: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2], where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence. -*@li classes_prob: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2], where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes. +*@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2], +where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box. +*@li obj_prob: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2], +where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence. +*@li classes_prob: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2], +where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n *@attention Constraints: *@li This operator applies to YOLO v2 and v3 networks. @@ -534,13 +545,13 @@ REG_OP(Yolo) .OP_END_FACTORY_REG(Yolo) /** -*@brief Performs YOLO V2 detection. +*@brief Performs YOLO V2 detection . \n *@par Inputs: * Four inputs, including: -*@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov3DetectionOutput. \n +*@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov3DetectionOutput. Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. -*@li img_info: A float16 or float32, describing the image information including the required image height and width \n +*@li img_info: A float16 or float32, describing the image information including the required image height and width and the actual image height and width. * *@par Attributes: @@ -549,11 +560,13 @@ and the actual image height and width. *@li coords: Specifies the number of coordinate parameters. Must be 4. *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 20]. *@li relative: An optional bool. Defaults to and must be "true". -*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0]. +*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, +which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n *@li post_nms_topn: An optional int32. This attribute is reserved. -*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. -*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n +*@li score_threshold: A required float, specifying the class score threshold for box filtering, + which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. +*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0]. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". * *@par Outputs: @@ -562,7 +575,7 @@ and the actual image height and width. *@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid, * the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 * -*@attention Constraints:\n +*@attention Constraints: *@li This operator applies only to the YOLO v2 network. *@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator. * @@ -590,17 +603,18 @@ REG_OP(YoloV2DetectionOutput) .OP_END_FACTORY_REG(YoloV2DetectionOutput) /** -*@brief Performs YOLO V2 detection. +*@brief Performs YOLO V2 detection . \n *@par Inputs: *Six inputs, including: -*@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov2DetectionOutput. \n +*@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov2DetectionOutput. Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. -*@li imginfo: A float16, describing the image information including the required image height and width \n +*@li imginfo: A float16, describing the image information including the required image height and width and the actual image height and width. -*@li windex: A windex tensor with shape [height, weight]. Has the same type as the inputs. [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed. \n +*@li windex: A windex tensor with shape [height, weight]. Has the same type as the inputs. +[[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed. -*@li hindex: A hindex tensor with shape [height, weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]]. \n +*@li hindex: A hindex tensor with shape [height, weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]]. * *@par Attributes: @@ -611,9 +625,9 @@ and the actual image height and width. *@li relative: An optional bool. Defaults to and must be "true". *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0]. *@li post_nms_topn: An optional int32. This attribute is reserved. -*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. +*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0] . \n -*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n +*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0]. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". * *@par Outputs: @@ -622,14 +636,14 @@ and the actual image height and width. *@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid, * the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 * -*@attention Constraints:\n +*@attention Constraints: *@li This operator applies only to the YOLO v2 network. -*@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator. +*@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator . \n *@see Yolo() *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. -*@par Restrictions: +*@par Restrictions: *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV2DetectionOutput instead. */ REG_OP(YoloV2DetectionOutputD) @@ -654,13 +668,13 @@ REG_OP(YoloV2DetectionOutputD) .OP_END_FACTORY_REG(YoloV2DetectionOutputD) /** -*@brief Performs YOLO V3 detection. +*@brief Performs YOLO V3 detection . \n *@par Inputs: *Ten inputs, including: -*@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n +*@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo. -*@li img_info: A float16 or float32, describing the image information including the required image height and width \n +*@li img_info: A float16 or float32, describing the image information including the required image height and width and the actual image height and width. * *@par Attributes: @@ -669,24 +683,24 @@ and the actual image height and width. *@li coords: Specifies the number of coordinate parameters. Must be 4. *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80]. *@li relative: An optional bool. Defaults to and must be "true". -*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0]. +*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n *@li post_nms_topn: An optional int32. This attribute is reserved. -*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. +*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0] . \n -*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n +*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0]. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". * *@par Outputs: -*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn], describing the information of each output box. +*@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. -*@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. +*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 * -*@attention Constraints:\n +*@attention Constraints: *@li This operator applies only to the YOLO v3 network. -*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators. +*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators . \n *@see Yolo() *@par Third-party framework compatibility @@ -720,17 +734,19 @@ REG_OP(YoloV3DetectionOutput) .OP_END_FACTORY_REG(YoloV3DetectionOutput) /** -*@brief Performs YOLO V3 detection. +*@brief Performs YOLO V3 detection . \n *@par Inputs: *16 Input, including: -*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput. \n +*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput. A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. -*@li imginfo: A float16, describing the image information including the required image height and width \n +*@li imginfo: A float16, describing the image information including the required image height and width and the actual image height and width. -*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively. +*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. +[[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively . \n -*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively. +*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. +[[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n * *@par Attributes: @@ -742,22 +758,22 @@ and the actual image height and width. *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0]. *@li post_nms_topn: An optional int32. This attribute is reserved. *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. -*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n +*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0]. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". * *@par Outputs: -*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn], describing the information of each output box. +*@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. -*@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. +*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 * -*@attention Constraints:\n +*@attention Constraints: *@li This operator applies only to the YOLO v3 network. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators. *@see Yolo() *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. -*@par Restrictions: +*@par Restrictions: *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutput instead. */ REG_OP(YoloV3DetectionOutputD) @@ -794,17 +810,77 @@ REG_OP(YoloV3DetectionOutputD) .OP_END_FACTORY_REG(YoloV3DetectionOutputD) /** -*@brief Performs YOLO V3 detection. +*@brief Performs YOLO V3 detection . \n + +*@par Inputs: +*Ten inputs, including: +*@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n +There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo. +*@li img_info: A float16 or float32, describing the image information including the required image height and width \n +and the actual image height and width. +* +*@par Attributes: +*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" +*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. +*@li coords: Specifies the number of coordinate parameters. Must be 4. +*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80]. +*@li relative: An optional bool. Defaults to and must be "true". +*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0]. + +*@li post_nms_topn: An optional int32. This attribute is reserved. +*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. + +*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n + +*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". +* +*@par Outputs: +*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2), +* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. +*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. +* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 +* +*@attention Constraints:\n +*@li This operator applies only to the YOLO v3 network. +*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators. + +*@see Yolo() +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. +*/ +REG_OP(YoloV3DetectionOutputV2) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .REQUIRED_ATTR(biases, ListFloat) + .ATTR(boxes, Int, 3) + .ATTR(coords, Int, 4) + .ATTR(classes, Int, 80) + .ATTR(relative, Bool, true) + .ATTR(obj_threshold, Float, 0.5) + .ATTR(post_nms_topn, Int, 512) + .ATTR(score_threshold, Float, 0.5) + .ATTR(iou_threshold, Float, 0.45) + .ATTR(pre_nms_topn, Int, 512) + .ATTR(N, Int, 10) + .ATTR(resize_origin_img_to_net, Bool, false) + .ATTR(out_box_dim, Int, 3) + .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(box_out_num, TensorType({DT_INT32})) + .OP_END_FACTORY_REG(YoloV3DetectionOutputV2) + +/** +*@brief Performs YOLO V3 detection . \n *@par Inputs: *16 Input, including: -*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput. \n +*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput. A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. -*@li imginfo: A float16, describing the image information including the required image height and width \n +*@li imginfo: A float16, describing the image information including the required image height and width and the actual image height and width. -*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively. +*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. +[[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] +is formed for the three Yolo outputs, respectively .It's a dynamic input. \n -*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively. +*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n * *@par Attributes: @@ -816,23 +892,23 @@ and the actual image height and width. *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0]. *@li post_nms_topn: An optional int32. This attribute is reserved. *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. -*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n +*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0]. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". * *@par Outputs: -*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn], describing the information of each output box. +*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2), * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. -*@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. +*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 * -*@attention Constraints:\n +*@attention Constraints: *@li This operator applies only to the YOLO v3 network. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators. *@see Yolo() *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. */ -REG_OP(YoloV3DetectionOutputV2) +REG_OP(YoloV3DetectionOutputV2D) .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT})) .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -848,30 +924,31 @@ REG_OP(YoloV3DetectionOutputV2) .ATTR(pre_nms_topn, Int, 512) .ATTR(N, Int, 10) .ATTR(resize_origin_img_to_net, Bool, false) + .ATTR(out_box_dim, Int, 3) .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) .OUTPUT(box_out_num, TensorType({DT_INT32})) - .OP_END_FACTORY_REG(YoloV3DetectionOutputV2) + .OP_END_FACTORY_REG(YoloV3DetectionOutputV2D) /** *@brief Spatial Pyramid Pooling, multi-level pooling. -* Pooling out(n, sigma(c*2^i*2^i)) tensor, i in range[0,pyramid_height). +* Pooling out(n, sigma(c*2^i*2^i)) tensor, i in range[0,pyramid_height) . \n *@par Inputs: -*x: An NCHW tensor, support float16 or float32 type. +*x: An NCHW tensor, support float16 or float32 type . \n *@par Attributes: * @li pyramid_height: An required int32. * Multi-level pooling out from 2^0 to 2^(pyramid_height-1). * @li pool_method: An optional int32, pooling method: 0-MAX, 1-AVE. -* Defaults to "0". +* Defaults to "0" . \n *@par Outputs: -*y: A NCHW tensor, support float16 or float32 type. +*y: A NCHW tensor, support float16 or float32 type . \n *@attention Constraints: * @li pyramid_height: pyramid_heigjt should be in range [0,7). * Pooling paramter should statisfied with caffe pooling param(pad= -1. -*@li If "scale_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis of "scale" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis).\n +*@li If "scale_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis of "scale" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis). * If "axis < 0", the ith axis of "scale" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < -axis). *@li If "scale_from_blob = true" and "num_axes = 0", "scale" is a scalar with shape length 1 and dimension size 1. -*@li If "scale_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + num_axes" must be less than or equal to "n" and the ith axis of "scale" and the (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).\n +*@li If "scale_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + num_axes" must be less than or equal to "n" and the ith axis of "scale" and the (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes). * If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and the ith axis of "scale" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < num_axes). -*@li If "scale_from_blob = false", "scale" is not a scalar, and "axis >= 0","axis + m" must be less than or equal to "n" and the ith axis of "scale" and the (i+"axis")th axis of "x" must have the same size (0 <= i < m).\n +*@li If "scale_from_blob = false", "scale" is not a scalar, and "axis >= 0","axis + m" must be less than or equal to "n" and the ith axis of "scale" and the (i+"axis")th axis of "x" must have the same size (0 <= i < m). * If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith axis of "scale" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < m). *@li If "bias" is not None, the constraints for "bias" is the same as that for "scale". *@par Third-party framework compatibility @@ -660,11 +660,11 @@ REG_OP(Scale) .OP_END_FACTORY_REG(Scale) /** -*@brief Local Response Normalization. +*@brief Local Response Normalization . \n *@par Inputs: *One input, including: -*@li x: A Tensor. Must be 4-D shape, and only support the following types: float16, float32. +*@li x: A Tensor. Must be 4-D shape, and only support the following types: float16, float32 . \n *@par Attributes: *@li depth_radius: An optional int32, specifying the half-width of the normalization window. Defaults to "5". @@ -676,10 +676,10 @@ REG_OP(Scale) *@li alpha: An optional float32. A scaling factor, usually positive. * Defaults to "1.0". *@li beta: An optional float32. An exponent. Defaults to "0.75" for the caffe framework, Defaults to "0.5" for others. -*@li norm_region: An optional string. A mode option. "ACROSS_CHANNELS":0. Defaults to "ACROSS_CHANNELS". +*@li norm_region: An optional string. A mode option. "ACROSS_CHANNELS":0. Defaults to "ACROSS_CHANNELS" . \n *@par Outputs: -*y: A Tensor. Has the same data type and shape as "x". +*y: A Tensor. Has the same data type and shape as "x" . \n *@par Third-party framework compatibility: * Compatible with the TensorFlow operator LRN. @@ -695,12 +695,12 @@ REG_OP(LRN) .OP_END_FACTORY_REG(LRN) /** -* @brief Computes the gradient for Local Response Normalization. +* @brief Computes the gradient for Local Response Normalization . \n * @par Inputs: * @li grads: A 4D Tensor of type float16 or float32. * @li x: A 4D Tensor of type float16 or float32. -* @li y: A 4D Tensor of type float16 or float32. +* @li y: A 4D Tensor of type float16 or float32 . \n * @par Attributes: * @li depth_radius: An optional int, specifying the half-width of the @@ -709,13 +709,13 @@ REG_OP(LRN) * Defaults to "1". * @li alpha: An optional float32. A scaling factor, usually positive. * Defaults to "1". -* @li beta: An optional float32. An exponent. Defaults to "0.5". +* @li beta: An optional float32. An exponent. Defaults to "0.5" . \n * @par Outputs: -* z: A Tensor. Has the same type and shape as "grads". +* z: A Tensor. Has the same type and shape as "grads" . \n * @attention Constraints: -* "x" and "y" must have the same shape and type as "grads". +* "x" and "y" must have the same shape and type as "grads" . \n * @par Third-party framework compatibility * Compatible with the TensorFlow operator LRNGrad. @@ -732,7 +732,7 @@ REG_OP(LRNGrad) .OP_END_FACTORY_REG(LRNGrad) /** - *@brief Calculates the RNNT Loss (log probability) for each batch entry. \n + *@brief Calculates the RNNT Loss (log probability) for each batch entry. Also calculates the gradient. *@par Inputs: @@ -762,52 +762,52 @@ REG_OP(RNNTLoss) .OP_END_FACTORY_REG(RNNTLoss) /** -*@brief Performs group normalization. +*@brief Performs group normalization . \n -*@par Inputs:\n +*@par Inputs: * Five inputs, including: (NHWC, NCHW supported) -*@li x: A 4D Tensor of type float16 or float32, with format NHWC or \n +*@li x: A 4D Tensor of type float16 or float32, with format NHWC or NCHW for 4D. -*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format \n +*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Specifies the scaling factor. -*@li offset: A Tensor of type float32. Must be 1D if input "x" is with \n +*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Specifies the offset. -*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format \n +*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Reserved. Mu st be "None" if the operation is used for training. -*@li variance: A Tensor of type float32. Must be 1D if input "x" is with \n -format NHWC or NCHW. Specifies the variance used for inference. Reserved. +*@li variance: A Tensor of type float32. Must be 1D if input "x" is with +format NHWC or NCHW. Specifies the variance used for inference. Reserved . \n *@par Attributes: -*@li epsilon: An optional float32, specifying the small value added to \n +*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". -*@li data_format: An optional string, specifying the format of "x". \n +*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". -*@li is_training: An optional bool, specifying if the operation is used for \n -training or inference. Defaults to "True". +*@li is_training: An optional bool, specifying if the operation is used for +training or inference. Defaults to "True" . \n -*@par Outputs:\n +*@par Outputs: * Five outputs, including: (NHWC, NCHW supported) -*@li y: A 4D Tensor of type float16 or float32 for the normalized "x", \n +*@li y: A 4D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D. -*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with \n +*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Specifies the mean of "x". -*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is \n +*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Specifies the variance of "x". -*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if \n +*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Specifies the mean o f "x" for gradient computation. Pass "None" to skip this output. -*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if \n +*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Specifies the varian -ce of "x" for gradient computation. Pass "None" to skip this output. +ce of "x" for gradient computation. Pass "None" to skip this output . \n *@attention Constraints: -*@li If the operation is used for inference and outputs "reserve_space_1" \n -and "reserve_space_2" are available, then "reserve_space_1" has the same \n +*@li If the operation is used for inference and outputs "reserve_space_1" +and "reserve_space_2" are available, then "reserve_space_1" has the same value as "mean" and "reserve_spa ce_2" has the same value as "variance". -*@li For Ascend 310, the result accuracy fails due to the square root \n -instruction. +*@li For Ascend 310, the result accuracy fails due to the square root +instruction . \n *@par Third-party framework compatibility *@li Compatible with the PyTorch operator GroupNorm. @@ -830,35 +830,35 @@ REG_OP(GroupNorm) .OP_END_FACTORY_REG(GroupNorm) /** -*@brief Performs instance normalization. +*@brief Performs instance normalization . \n -*@par Inputs:\n +*@par Inputs: * Five inputs, including: (NC1HWC0, supported) *@li x: A 5D Tensor of type float16 or float32, NC1HWC0. *@li gamma: A Tensor of type float32. A 5D Tensor for scaling factor, to scale the normalized x. -*@li beta: A Tensor of type float32. +*@li beta: A Tensor of type float32. A 5D Tensor for offset, to shift to the normalized x. -*@li mean: A Tensor of type float32. +*@li mean: A Tensor of type float32. A 5D Tensor Specifies the mean used for inference. Reserved. -*@li variance: A Tensor of type float32. -A 5D Tensor Specifies the variance used for inference. Reserved. +*@li variance: A Tensor of type float32. +A 5D Tensor Specifies the variance used for inference. Reserved . \n *@par Attributes: -*@li is_training: An optional bool, specifying if the operation is used for \n +*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True". -*@li momentum: An optional float32, \n +*@li momentum: An optional float32, the value used for the running_mean and running_var computation. Default: "0.1". -*@li epsilon: An optional float32, specifying the small value added to \n -variance to avoid dividing by zero. Defaults to "0.00001". +*@li epsilon: An optional float32, specifying the small value added to +variance to avoid dividing by zero. Defaults to "0.00001" . \n -*@par Outputs:\n +*@par Outputs: * Three outputs, including: (NHWC, NCHW NC1HWC0 supported) -*@li y: A 5D tensor of type float16 or float32 for the normalized "x", \n -*@li batch_mean: A Tensor of type float32. +*@li y: A 5D tensor of type float16 or float32 for the normalized "x", +*@li batch_mean: A Tensor of type float32. Specifies the mean of "x". -*@li batch_variance: A Tensor of type float32. -Specifies the variance of "x". +*@li batch_variance: A Tensor of type float32. +Specifies the variance of "x" . \n *@par Third-party framework compatibility *@li Compatible with the PyTorch operator InstanceNorm. @@ -879,6 +879,29 @@ REG_OP(InstanceNormV2) .ATTR(epsilon, Float, 0.00001) .OP_END_FACTORY_REG(InstanceNormV2) +/** +*@brief Performs instance normalization for inference. + +*@par Inputs:\n +* Five inputs, including: (NC1HWC0 supported) +*@li x: A Tensor of type float16 or float32. +*@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma. +*@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta. +*@li mean: A [N, C1, 1, 1, C0] ensor of type float32, for the mean. +*@li variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance. +*@li variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt. + +*@par Outputs:\n +*y: A Tensor of type float16 or float32 for the normalized "x". +*batch_mean: A Tensor of type float32 for the result mean. +*batch_ variance: A Tensor of type float32 for the result variance. + +*@attention Constraints: +*For Ascend 310, the result accuracy fails to reach 1<89> due to the square root instruction. + +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use INInferV2 instead. +*/ REG_OP(INInferV2D) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .OPTIONAL_INPUT(gamma, TensorType({DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index 4878935f..5d3cd931 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -33,26 +33,26 @@ namespace ge { *@par Attributes: *@li mode: An optional int32, specifying the pooling algorithm, either "1" (max pooling) or "0" (avg pooling). Defaults to "0". *@li global_pooling: An optional bool. Defaults to "false". -*@li window: Optional, including: \n -*window[0]: An optional int32, specifying the window size along in the H dimension. The value range is [1, 32768]. Defaults to "1". \n -*window[1]: An optional int32, specifying the window size along in the W dimension. The value range is [1, 32768]. Defaults to "1". \n -*@li stride: Optional, including: \n -*stride[0]: An optional int32, specifying the stride along in the H dimension. The value range is [1, 63]. Defaults to "1". \n -*stride[1]: An optional int32, specifying the stride along in the W dimension. The value range is [1, 63]. Defaults to "1". \n -*@li pad: Optional, including: \n -*pad[0]: An optional int32, specifying the up padding. Defaults to "0". \n -*pad[1]: An optional int32, specifying the bottom padding. Defaults to "0". \n -*pad[2]: An optional int32, specifying the left padding. Defaults to "0". \n -*pad[3]: An optional int32, specifying the right padding. Defaults to "0". \n -*@li dilation: Optional, including: \n -*dilation[0]: An optional int32, specifying the up dilation. Defaults to "1". \n -*dilation[1]: An optional int32, specifying the bottom dilation. Defaults to "1". \n -*dilation[2]: An optional int32, specifying the left dilation. Defaults to "1". \n -*dilation[3]: An optional int32, specifying the right dilation. Defaults to "1". \n +*@li window: Optional, including: +*window[0]: An optional int32, specifying the window size along in the H dimension. The value range is [1, 32768]. Defaults to "1". +*window[1]: An optional int32, specifying the window size along in the W dimension. The value range is [1, 32768]. Defaults to "1". +*@li stride: Optional, including: +*stride[0]: An optional int32, specifying the stride along in the H dimension. The value range is [1, 63]. Defaults to "1". +*stride[1]: An optional int32, specifying the stride along in the W dimension. The value range is [1, 63]. Defaults to "1". +*@li pad: Optional, including: +*pad[0]: An optional int32, specifying the up padding. Defaults to "0". +*pad[1]: An optional int32, specifying the bottom padding. Defaults to "0". +*pad[2]: An optional int32, specifying the left padding. Defaults to "0". +*pad[3]: An optional int32, specifying the right padding. Defaults to "0". +*@li dilation: Optional, including: +*dilation[0]: An optional int32, specifying the up dilation. Defaults to "1". +*dilation[1]: An optional int32, specifying the bottom dilation. Defaults to "1". +*dilation[2]: An optional int32, specifying the left dilation. Defaults to "1". +*dilation[3]: An optional int32, specifying the right dilation. Defaults to "1". *@li ceil_mode: An optional int32, either "0" (ceil mode) or "1" (floor mode). Defaults to "0". *@par Outputs: *y: An NCHW tensor of type float16, float32, int32. -*@attention Constraints:\n +*@attention Constraints: *@li window[0] * window[1] < 256; *@li 1<=input_h<=4096,1<=input_w<=4096 *@li If input tensor N is a prime number, it should be less than 65535. @@ -73,19 +73,19 @@ REG_OP(Pooling) .OP_END_FACTORY_REG(Pooling) /** -*@brief Performs average pooling on the input. +*@brief Performs average pooling on the input . \n *@par Inputs: -*x: A tensor of type float16, float32, double. +*x: A tensor of type float16, float32, double . \n *@par Attributes: *@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, where N = C = 1, and H and W are positive integers within the range [1, 32768]. *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. The strides of the H and W dimensions are positive integers within the range [1, 63]. *@li padding: A required string, specifying the padding algorithm, either "VALID" or "SAME". With "SAME" means that the outputs will have the same spatial dimensions as its inputs. With "VALID" means no padding. -*@li data_format: An optional string, specifying the data format of "ksize" and "strides", either "NCHW", "NC1HWC0", or "NHWC" (default). +*@li data_format: An optional string, specifying the data format of "ksize" and "strides", either "NCHW", "NC1HWC0", or "NHWC" (default) . \n *@par Outputs: -*y: The average pooled output tensor. Has the same type and format as input "x". +*y: The average pooled output tensor. Has the same type and format as input "x" . \n *@attention Constraints: *@li This operator applies only to a TensorFlow network. @@ -106,10 +106,10 @@ REG_OP(AvgPool) .OP_END_FACTORY_REG(AvgPool) /** -*@brief Performs average pooling on the input. +*@brief Performs average pooling on the input . \n *@par Inputs: -*x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double. +*x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double . \n *@par Attributes: *@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor. @@ -118,16 +118,19 @@ REG_OP(AvgPool) *@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. *@li count_include_pad: When true, will include the zero-padding in the averaging calculation. *@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. -*@li data_format: A string, format of input data. +*@li data_format: A string, format of input data . \n *@par Outputs: -*y: The average pooled output tensor. +*y: The average pooled output tensor . \n *@attention Constraints: *@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] *@par Third-party framework compatibility * Compatible with the TensorFlow operator AvgPool3D. +* +* @par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(AvgPool3D) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) @@ -142,7 +145,7 @@ REG_OP(AvgPool3D) .OP_END_FACTORY_REG(AvgPool3D) /** -*@brief Performs max_pool_ext2 on the input. +*@brief Performs max_pool_ext2 on the input . \n *@par Inputs: * One input: @@ -153,15 +156,15 @@ REG_OP(AvgPool3D) *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value. *@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value. *@li padding: A required string. No default value. -*@li data_format: An optional string. Defaults to "NC1HWC0". +*@li data_format: An optional string. Defaults to "NC1HWC0" . \n *@par Outputs: -*y: A Tensor. Has the same type and format as input "x". +*y: A Tensor. Has the same type and format as input "x" . \n *@attention Constraints: *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. -*@li "padding" is either "SAME" or "VALID". +*@li "padding" is either "SAME" or "VALID" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator MaxPoolV2. @@ -180,7 +183,7 @@ REG_OP(MaxPoolExt2) .OP_END_FACTORY_REG(MaxPoolExt2) /** -*@brief Performs max pooling on the input. +*@brief Performs max pooling on the input . \n *@par Inputs: * One input: @@ -195,10 +198,10 @@ REG_OP(MaxPoolExt2) * specifying the stride of the sliding window for each dimension of * the input tensor. No default value. *@li padding: A required string. No default value. -*@li data_format: An optional string. Defaults to "NHWC". +*@li data_format: An optional string. Defaults to "NHWC" . \n *@par Outputs: -*y: A Tensor. Has the same type and format as input "x". +*y: A Tensor. Has the same type and format as input "x" . \n *@attention Constraints: *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, @@ -224,33 +227,33 @@ REG_OP(MaxPool) .OP_END_FACTORY_REG(MaxPool) /** -*@brief Performs max 3d pooling on the input. +*@brief Performs max 3d pooling on the input . \n *@par Inputs: -*x: An NC1HWC0 Tensor. Supported type float16, float32, double. +*x: An NC1HWC0 Tensor. Supported type float16, float32, double . \n *@par Attributes: -*@li ksize: A required list of int8, int16, int32, or int64 values, \n -specifying the size of the window for each dimension of the input tensor. \n +*@li ksize: A required list of int8, int16, int32, or int64 values, +specifying the size of the window for each dimension of the input tensor. No default value. -*@li strides: A required list of int8, int16, int32, or int64 values, \n -specifying the stride of the sliding window for each dimension of \n +*@li strides: A required list of int8, int16, int32, or int64 values, +specifying the stride of the sliding window for each dimension of the input tensor. No default value. -*@li padding: A required string. Default value "SAME". +*@li padding: A required string type of float16. *@li pads: A list type of int32. Default value {0, 0, 0}. *@li dilation: A list type of int32. Default value {1, 1, 1}. *@li ceil_mode: A ceil mode number of int32 . Default value 0. -*@li data_format: An optional string. Defaults to "NDHWC". +*@li data_format: An optional string. Defaults to "NDHWC" . \n *@par Outputs: -*y: A Tensor. Has the same type and format as input "x". +*y: A Tensor. Has the same type and format as input "x" . \n *@attention Constraints: *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, * ksize[1] * ksize[2] <= 255. *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. -*@li "padding" is either "SAME" or "VALID". +*@li "padding" is either "SAME" or "VALID" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator MaxPool3D. @@ -269,7 +272,7 @@ REG_OP(MaxPool3D) /** -* @brief Computes second-order gradients of the maxpooling3d function. +* @brief Computes second-order gradients of the maxpooling3d function . \n * @par Inputs: * @li orig_x: Original forward input tensor(NDC1HWC0) of type float16 @@ -285,13 +288,13 @@ REG_OP(MaxPool3D) * @li pads: A required list or tuple * @li padding: A required string, window sliding mode. Either SAME or VALID. * @li data_format: An optional string. -* Format of the original input, either NCDHW or NDHWC. Defaults to NDHWC. +* Format of the original input, either NCDHW or NDHWC. Defaults to NDHWC . \n * @attention Constraints: * @li Only the Ascend 910 platform is supported. * @li "orig_x" and "grads" must have the same shape. * @li "orig_y" and "y" must have the same shape. Otherwise, an error is reported. -* @li "orig_x", "orig_y", "grads", and "y" must be NDC1HWC0 tensors. +* @li "orig_x", "orig_y", "grads", and "y" must be NDC1HWC0 tensors . \n * @par Outputs: * @li y: Result tensor of type float16 @@ -313,12 +316,12 @@ REG_OP(MaxPool3DGradGrad) /** -* @brief Computes gradients of the maxpooling function. +* @brief Computes gradients of the maxpooling function . \n * @par Inputs: * @li x1: A mutable NC1HWC0 tensor of type RealNumberType. * @li x2: A mutable NC1HWC0 tensor of type RealNumberTypex. -* @li grad: A mutable NC1HWC0 tensor of type RealNumberType. +* @li grad: A mutable NC1HWC0 tensor of type RealNumberType . \n * @par Attributes: * @li ksize: A required tuple or list, specifying the size of the window for @@ -328,10 +331,10 @@ REG_OP(MaxPool3DGradGrad) * @li padding: A required string, specifying the type of padding algorithm * to use. * @li data_format: An optional string, Specify the data format of the input and -* output data. With the default format "NHWC". +* output data. With the default format "NHWC" . \n * @par Outputs: -* y: A mutable tensor. Has the same shape and type as "x1". +* y: A mutable tensor. Has the same shape and type as "x1" . \n * @attention Constraints: * @li Computing gradients of global pooling is not supported, which means @@ -353,13 +356,13 @@ REG_OP(MaxPoolGrad) .OP_END_FACTORY_REG(MaxPoolGrad) /** -* @brief Computes second-order gradients of the maxpooling function. +* @brief Computes second-order gradients of the maxpooling function . \n * @par Inputs: * @li x1: Original forward input tensor. Supported type:float, double, int32, * uint8, int16, int8, int64, uint16, half, uint32, uint64. * @li x2: Has the same type and format as input "x1". -* @li grad:Has the same type and format as input "x1". +* @li grad:Has the same type and format as input "x1" . \n * @par Attributes: * @li ksize: A required list or tuple, @@ -368,7 +371,7 @@ REG_OP(MaxPoolGrad) * specifying the stride of the sliding window. * @li padding: A required string, window sliding mode. Either SAME or VALID. * @li data_format: An optional string. -* Format of the original input, either NCHW or NHWC. Defaults to NHWC. +* Format of the original input, either NCHW or NHWC. Defaults to NHWC . \n * @attention Constraints: * @li Only the Ascend 910 platform is supported. @@ -377,10 +380,10 @@ REG_OP(MaxPoolGrad) * @li "x1", "x2", "grads", and "y" must be 5D tensors. * @li ksize[H] and ksize[W] is in the range [1, 255]. * @li strides[H] and strides[W] is in the range [1, 63]. -* @li Other dimensions of ksize and strides is 1. +* @li Other dimensions of ksize and strides is 1 . \n * @par Outputs: -* @li y: Has the same type and format as input "x1". +* @li y: Has the same type and format as input "x1" . \n * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator MaxPoolGradGrad. @@ -397,7 +400,7 @@ REG_OP(MaxPoolGradGrad) .OP_END_FACTORY_REG(MaxPoolGradGrad) /** -*@brief Performs max_pool_ext2 on the input. +*@brief Performs max_pool_ext2 on the input . \n *@par Inputs: * Two inputs: @@ -408,15 +411,15 @@ REG_OP(MaxPoolGradGrad) *@par Attributes: *@li padding: A required string. No default value. -*@li data_format: An optional string. Defaults to "NC1HWC0". +*@li data_format: An optional string. Defaults to "NC1HWC0" . \n *@par Outputs: -*y: A Tensor. Has the same type and format as input "x". +*y: A Tensor. Has the same type and format as input "x" . \n *@attention Constraints: *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. -*@li "padding" is either "SAME" or "VALID". +*@li "padding" is either "SAME" or "VALID" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator MaxPoolV2. @@ -432,12 +435,12 @@ REG_OP(MaxPoolV2) /** *@brief Performs max pooling on the input and outputs both max values and - * indices. + * indices . \n *@par Inputs: * One input: *x: An NC1HWC0 Tensor. Supported type: float, double, int32, - * uint8, int16, int8, int64, uint16, half, uint32, uint64. + * uint8, int16, int8, int64, uint16, half, uint32, uint64 . \n *@par Attributes: *@li ksize: A required list of int8, int16, int32, or int64 values, @@ -446,7 +449,7 @@ REG_OP(MaxPoolV2) *@li strides: A required list of int8, int16, int32, or int64 values, * specifying the stride of the sliding window for each dimension of * the input tensor. No default value. -*@li padding: A required string. No default value. +*@li padding: A required string. No default value . \n *@par Outputs: *y: A Tensor. Has the same type and format as input "x". @@ -456,7 +459,7 @@ REG_OP(MaxPoolV2) * ksize[1] * ksize[2] <= 255. *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. -*@li "padding" is either "SAME" or "VALID". +*@li "padding" is either "SAME" or "VALID" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator MaxPoolWithArgmax. @@ -472,7 +475,7 @@ REG_OP(MaxPoolWithArgmax) .OP_END_FACTORY_REG(MaxPoolWithArgmax) /** -*@brief Performs the backpropagation of MaxPoolWithArgmax. +*@brief Performs the backpropagation of MaxPoolWithArgmax . \n *@par Inputs: * Three inputs, including: @@ -480,7 +483,7 @@ REG_OP(MaxPoolWithArgmax) * uint8, int16, int8, int64, uint16, half, uint32, uint64. *@li grad: An NC1HWC0 tensor. Supported type: float, double, int32, * uint8, int16, int8, int64, uint16, half, uint32, uint64. -*@li argmx: An NC1HWC0 tensor of type int32 or int64. +*@li argmx: An NC1HWC0 tensor of type int32 or int64 . \n *@par Attributes: *@li ksize: A required list of int8, int16, int32, or int64 values, @@ -489,10 +492,10 @@ REG_OP(MaxPoolWithArgmax) *@li strides: A required list of int8, int16, int32, or int64 values, * specifying the stride of the sliding window for each dimension of * the input tensor. No default value. -*@li padding: A required string. No default value. +*@li padding: A required string. No default value . \n *@par Outputs: -*y: A Tensor. Has the same type and format as input "x". +*y: A Tensor. Has the same type and format as input "x" . \n *@attention Constraints: *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, @@ -516,25 +519,25 @@ REG_OP(MaxPoolGradWithArgmax) .OP_END_FACTORY_REG(MaxPoolGradWithArgmax) /** -*@brief Performs transform mask to argmax. +*@brief Performs transform mask to argmax . \n *@par Inputs: * Two input: *x: An NC1HWC0 Tensor of type float16. -*mask: An NC1HWC0 Tensor of type uint16. +*mask: An NC1HWC0 Tensor of type uint16 . \n *@par Attributes: *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value. *@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value. -*@li padding: A required string. No default value. +*@li padding: A required string. No default value . \n *@par Outputs: -*argmax: An NC1HWC0 Tensor of type int32. +*argmax: An NC1HWC0 Tensor of type int32 . \n *@attention Constraints: *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. -*@li "padding" is either "SAME" or "VALID". +*@li "padding" is either "SAME" or "VALID" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator Mask2Argmax. @@ -550,7 +553,7 @@ REG_OP(Mask2Argmax) .OP_END_FACTORY_REG(Mask2Argmax) /** -* @brief Computes second-order gradients of the maxpooling function. +* @brief Computes second-order gradients of the maxpooling function . \n * @par Inputs: * @li x: Original forward input tensor. Supported type: float, double, int32, @@ -573,7 +576,7 @@ REG_OP(Mask2Argmax) * @li shape of argmax must be (fmap_n, fmap_c1, kernel_h * kernel_w, * (shape_max_pool[2] * shape_max_pool[3] + 15) // 16 * 16, 1), * or (fmap_n, fmap_c1, kernel_h * kernel_w, -* (shape_max_pool[2] * shape_max_pool[3] + 31) // 16, 16), else failed. +* (shape_max_pool[2] * shape_max_pool[3] + 31) // 16, 16), else failed . \n * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator MaxPoolGradGradWithArgmax. @@ -589,11 +592,11 @@ REG_OP(MaxPoolGradGradWithArgmax) .OP_END_FACTORY_REG(MaxPoolGradGradWithArgmax) /** -* @brief Computes avgpoograd function. +* @brief Computes avgpoograd function . \n * @par Inputs: * @li orig_input_shape: An NHWC tensor of type int32. -* @li input_grad: An NHWC tensor of type float16, float32, or double. +* @li input_grad: An NHWC tensor of type float16, float32, or double . \n * @par Attributes: * @li ksize: A required tuple or list, specifying the size of the window for @@ -602,10 +605,10 @@ REG_OP(MaxPoolGradGradWithArgmax) * window for each dimension of the input tensor. * @li padding: A required string, specifying the type of * the padding algorithm to use. -* @li data_format: An optional string. Defaults to "NHWC". +* @li data_format: An optional string. Defaults to "NHWC" . \n * @par Outputs: -* @out_grad: A mutable tensor with the same shape and type as "orig_input". +* @out_grad: A mutable tensor with the same shape and type as "orig_input" . \n * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator AvgPoolGrad. @@ -621,10 +624,12 @@ REG_OP(AvgPoolGrad) .OP_END_FACTORY_REG(AvgPoolGrad) /** -* @brief Computes gradients of average pooling function. +* @brief Computes gradients of average pooling function . \n * @par Inputs: -* @input_grad: An NHWC tensor of type float16, float32, or double. +* @input_grad: An NHWC tensor of type float16. +* @mean_matrix: Assist matrix, an NHWC tensor of type float16. +* @kernel_matrix: Assist matrix, an NHWC tensor of type float16. \n * @par Attributes: * @li orig_input_shape: A required Original input dimensions. @@ -634,7 +639,7 @@ REG_OP(AvgPoolGrad) * the sliding window for each dimension of the input tensor. * @li padding: A required string, specifying the type of the padding algorithm * to use. -* @li data_format: An optional string. Defaults to "NHWC". +* @li data_format: An optional string. Defaults to "NHWC" . \n * @par Outputs: * @out_grad: A mutable tensor with the same shape and type as "orig_input". @@ -643,14 +648,15 @@ REG_OP(AvgPoolGrad) * Warning: THIS FUNCTION IS DEPRECATED. Please use AvgPoolGrad instead. */ REG_OP(AvgPoolGradD) - .INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) - .OUTPUT(out_grad, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .INPUT(input_grad, TensorType({DT_FLOAT16})) + .INPUT(mean_matrix, TensorType({DT_FLOAT16})) + .INPUT(kernel_matrix, TensorType({DT_FLOAT16})) + .OUTPUT(out_grad, TensorType({DT_FLOAT16})) .REQUIRED_ATTR(orig_input_shape, ListInt) .REQUIRED_ATTR(ksize, ListInt) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(padding, String) .ATTR(data_format, String, "NHWC") - .OP_END_FACTORY_REG(AvgPoolGradD) @@ -676,25 +682,25 @@ REG_OP(Upsample) .OP_END_FACTORY_REG(Upsample) /** -*@brief Computes gradient of the FractionalMaxPool function. +*@brief Computes gradient of the FractionalMaxPool function . \n *@par Inputs: -*Inputs include: \n +*Inputs include: * @li orig_input: A Tensor. Must be one of the following types: float32, float64, int32, int64. * @li orig_output: A Tensor. Must have the same type as orig_input. -* @li out_backprop: A Tensor. Must have the same type as orig_input. \n +* @li out_backprop: A Tensor. Must have the same type as orig_input. 4-D with shape [batch, height, width, channels]. * @li row_pooling_sequence: A Tensor of type int64. -* @li col_pooling_sequence: A Tensor of type int64. +* @li col_pooling_sequence: A Tensor of type int64 . \n *@par Attributes: -*overlapping: An optional bool. Defaults to False. +*overlapping: An optional bool. Defaults to False . \n *@par Outputs: -*y: A Tensor. Has the same type as orig_input. +*y: A Tensor. Has the same type as orig_input . \n -*@attention Constraints:\n -*-The implementation for FractionalMaxPoolGrad on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for FractionalMaxPoolGrad on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow FractionalMaxPoolGrad operator. @@ -710,12 +716,12 @@ REG_OP(FractionalMaxPoolGrad) .OP_END_FACTORY_REG(FractionalMaxPoolGrad) /** -*@brief Performs fractional average pooling on the input. +*@brief Performs fractional average pooling on the input . \n *@par Inputs: -*Inputs include: \n -*x: A Tensor. Must be one of the following types: float32, float64, int32, int64. \n - 4-D with shape [batch, height, width, channels]. +*Inputs include: +*x: A Tensor. Must be one of the following types: float32, float64, int32, int64. + 4-D with shape [batch, height, width, channels] . \n *@par Attributes: *@li pooling_ratio: A list of floats that has length >= 4. @@ -723,15 +729,15 @@ REG_OP(FractionalMaxPoolGrad) *@li overlapping: An optional bool. Defaults to False. When set to True, it means when pooling. *@li deterministic: An optional bool. Defaults to False. *@li seed: An optional int. Defaults to 0. -*@li seed2: An optional int. Defaults to 0. +*@li seed2: An optional int. Defaults to 0 . \n *@par Outputs: *@li y: A Tensor. Has the same type as x. *@li row_pooling_sequence: A Tensor of type int64. -*@li col_pooling_sequence: A Tensor of type int64. +*@li col_pooling_sequence: A Tensor of type int64 . \n -*@attention Constraints:\n -*-The implementation for FractionalAvgPool on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for FractionalAvgPool on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow FractionalAvgPool operator. @@ -750,12 +756,12 @@ REG_OP(FractionalAvgPool) .OP_END_FACTORY_REG(FractionalAvgPool) /** -*@brief Performs fractional max pooling on the input. +*@brief Performs fractional max pooling on the input . \n *@par Inputs: -*Inputs include: \n -*x: A Tensor. Must be one of the following types: float32, float64, int32, int64. \n - 4-D with shape [batch, height, width, channels]. +*Inputs include: +*x: A Tensor. Must be one of the following types: float32, float64, int32, int64. + 4-D with shape [batch, height, width, channels] . \n *@par Attributes: *@li pooling_ratio: A list of floats that has length >= 4. Pooling ratio for each dimension of value. @@ -763,15 +769,15 @@ REG_OP(FractionalAvgPool) *@li overlapping: An optional bool. Defaults to False. *@li deterministic: An optional bool. Defaults to False. *@li seed: An optional int. Defaults to 0. -*@li seed2: An optional int. Defaults to 0. +*@li seed2: An optional int. Defaults to 0 . \n *@par Outputs: *@li y: A Tensor. Has the same type as x. *@li row_pooling_sequence: A Tensor of type int64. -*@li col_pooling_sequence: A Tensor of type int64. +*@li col_pooling_sequence: A Tensor of type int64 . \n -*@attention Constraints:\n -*-The implementation for FractionalMaxPool on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for FractionalMaxPool on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow FractionalMaxPool operator. @@ -790,22 +796,22 @@ REG_OP(FractionalMaxPool) .OP_END_FACTORY_REG(FractionalMaxPool) /** -*@brief Finds values of the n-th order statistic for the last dimension. +*@brief Finds values of the n-th order statistic for the last dimension . \n *@par Inputs: -*Inputs include: \n -* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, \n +*Inputs include: +* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, int64, bfloat16, uint16, half, uint32, uint64. -* @li n: A Tensor of type int32. 0-D. +* @li n: A Tensor of type int32. 0-D . \n *@par Attributes: -*reverse: An optional bool. Defaults to False. +*reverse: An optional bool. Defaults to False . \n *@par Outputs: -*y: A Tensor. Has the same type as x. +*y: A Tensor. Has the same type as x . \n -*@attention Constraints:\n -*-The implementation for NthElement on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for NthElement on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow NthElement operator. @@ -820,24 +826,24 @@ REG_OP(NthElement) .OP_END_FACTORY_REG(NthElement) /** -*@brief Computes gradient of the FractionalAvgPool function. +*@brief Computes gradient of the FractionalAvgPool function . \n *@par Inputs: -*Inputs include: \n +*Inputs include: * @li orig_input_tensor_shape: A Tensor of type int64. -* @li out_backprop: A Tensor. Must be one of the following types: float32, float64, \n +* @li out_backprop: A Tensor. Must be one of the following types: float32, float64, int32, int64. 4-D with shape [batch, height, width, channels]. * @li row_pooling_sequence: A Tensor of type int64. -* @li col_pooling_sequence: A Tensor of type int64. +* @li col_pooling_sequence: A Tensor of type int64 . \n *@par Attributes: -*overlapping: An optional bool. Defaults to False. +*overlapping: An optional bool. Defaults to False . \n *@par Outputs: -*y: A Tensor. Has the same type as out_backprop. +*y: A Tensor. Has the same type as out_backprop . \n -*@attention Constraints:\n -*-The implementation for FractionalAvgPoolGrad on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for FractionalAvgPoolGrad on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow FractionalAvgPoolGrad operator. @@ -852,22 +858,22 @@ REG_OP(FractionalAvgPoolGrad) .OP_END_FACTORY_REG(FractionalAvgPoolGrad) /** -*@brief Returns the permuted vector/tensor in the destination data format given the. +*@brief Returns the permuted vector/tensor in the destination data format given the . \n *@par Inputs: -*Inputs include: \n -*x: A Tensor. Must be one of the following types: int32, int64. Vector of size 4 \n - or Tensor of shape (4, 2) in source data format. +*Inputs include: +*x: A Tensor. Must be one of the following types: int32, int64. Vector of size 4 + or Tensor of shape (4, 2) in source data format . \n *@par Attributes: *@li src_format: An optional string. Defaults to "NHWC". source data format. -*@li dst_format: An optional string. Defaults to "NCHW". destination data format. +*@li dst_format: An optional string. Defaults to "NCHW". destination data format . \n *@par Outputs: -*y: A Tensor. Has the same type as x. +*y: A Tensor. Has the same type as x . \n -*@attention Constraints:\n -*-The implementation for DataFormatVecPermute on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for DataFormatVecPermute on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow DataFormatVecPermute operator. @@ -880,12 +886,12 @@ REG_OP(DataFormatVecPermute) .OP_END_FACTORY_REG(DataFormatVecPermute) /** -* @brief Computes gradients of the MaxPool3D function. +* @brief Computes gradients of the MaxPool3D function . \n * @par Inputs: * @li orig_x: A mutable NDC1HWC0 tensor of type float16. * @li orig_y: A mutable NDC1HWC0 tensor of type float16. -* @li grads: A mutable NDC1HWC0 tensor of type float16. +* @li grads: A mutable NDC1HWC0 tensor of type float16 . \n * @par Attributes: * @li ksize: A required tuple or list, specifying the size of the window for @@ -896,10 +902,10 @@ REG_OP(DataFormatVecPermute) * H and W dimensions in sequence of head, tail, top, bottom, left and right. * to use. * @li data_format: An optional string, Specify the data format of the input and -* output data. With the default format "NDHWC". +* output data. With the default format "NDHWC" . \n * @par Outputs: -* y: A mutable tensor. Has the same shape as "orig_x", but type is float32. +* y: A mutable tensor. Has the same shape as "orig_x", but type is float32 . \n * @par Third-party framework compatibility * Compatible with the TensorFlow operator MaxPool3DGrad. @@ -916,20 +922,20 @@ REG_OP(MaxPool3DGrad) .OP_END_FACTORY_REG(MaxPool3DGrad) /** -*@brief Performs AvgPool1D on the input. +*@brief Performs AvgPool1D on the input . \n *@par Inputs: -*x: A Tensor. Must be one of the following types: int8, uint8, int16, int32, int64, float16, float32, float64. +*x: A Tensor. Must be one of the following types: int8, uint8, int16, int32, int64, float16, float32, float64 . \n *@par Attributes: *@li ksize: An required int, specifying the size of the window. *@li strides: An required int. *@li pads: A required tuple or list. *@li ceil_mode: An optional bool. Defaults to False. -*@li count_include_pad: An optional bool. Defaults to False. +*@li count_include_pad: An optional bool. Defaults to False . \n *@par Outputs: -*y: A Tensor. Has the same type as x. +*y: A Tensor. Has the same type as x . \n *@par Third-party framework compatibility *@li compatible with pytorch AvgPool1D operator. @@ -945,20 +951,20 @@ REG_OP(AvgPool1D) .OP_END_FACTORY_REG(AvgPool1D) /** -*@brief Performs AvgPool1D on the input. +*@brief Performs AvgPool1D on the input . \n *@par Inputs: -*x: A Tensor. Must be one of the following types: int8, uint8, int16, int32, int64, float16, float32, float64. +*x: A Tensor. Must be one of the following types: int8, uint8, int16, int32, int64, float16, float32, float64 . \n *@par Attributes: *@li ksize: An required int, specifying the size of the window. *@li strides: An required int. *@li pads: A required tuple or list. *@li ceil_mode: An optional bool. Defaults to False. -*@li count_include_pad: An optional bool. Defaults to False. +*@li count_include_pad: An optional bool. Defaults to False . \n *@par Outputs: -*y: A Tensor. Has the same type as x. +*y: A Tensor. Has the same type as x . \n *@par Third-party framework compatibility *@li compatible with pytorch AvgPool1D operator. @@ -977,7 +983,7 @@ REG_OP(AvgPool1DD) .ATTR(count_include_pad, Bool, false) .OP_END_FACTORY_REG(AvgPool1DD) /** -*@brief Performs max pooling on the input and outputs both max values and indices. +*@brief Performs max pooling on the input and outputs both max values and indices . \n *@par Inputs: * One input: @@ -990,7 +996,7 @@ REG_OP(AvgPool1DD) *@li pads: A required string. No default value. *@li dtype: A optional int. default value is 3. *@li dilation: A optional list of int8, int16, int32, or int64 values. -*@li ceil_mode: A optional bool. default value is false. +*@li ceil_mode: A optional bool. default value is false . \n *@par Outputs: *y: A Tensor. Has the same type and format as input "x". @@ -1000,7 +1006,7 @@ REG_OP(AvgPool1DD) *@li "strides is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, * strides[2] <= 63, strides[2] >= 1. *@li "dilation" is a list that has length 4. -*@li "ceil_mode" is a bool, default is false. +*@li "ceil_mode" is a bool, default is false . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator MaxPoolWithArgmax. @@ -1018,13 +1024,13 @@ REG_OP(MaxPoolWithArgmaxV2) .OP_END_FACTORY_REG(MaxPoolWithArgmaxV2) /** -*@brief Performs the backpropagation of MaxPoolWithArgmaxV2. +*@brief Performs the backpropagation of MaxPoolWithArgmaxV2 . \n *@par Inputs: * Three inputs, including: *@li x: An NC1HWC0 tensor of type float16. *@li grad: An NC1HWC0 tensor of type float16. -*@li argmx: An NC1HWC0 tensor of type uint16 or int64. +*@li argmx: An NC1HWC0 tensor of type uint16 or int64 . \n *@par Attributes: *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for @@ -1034,16 +1040,16 @@ REG_OP(MaxPoolWithArgmaxV2) *@li pads: A required string. No default value. *@li dtype: A optional int. default value is 3. *@li dilation: A optional list of int8, int16, int32, or int64 values. -*@li ceil_mode: A optional bool. default value is false. +*@li ceil_mode: A optional bool. default value is false . \n *@par Outputs: -*y: A Tensor. Has the same type and format as input "x". +*y: A Tensor. Has the same type and format as input "x" . \n *@attention Constraints: *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. *@li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1 *@li "dilation" is a list that has length 4. -*@li "ceil_mode" is a bool, default is false. +*@li "ceil_mode" is a bool, default is false . \n *@see max_pool_grad_with_argmaxv2 *@par Third-party framework compatibility diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h index 0dab8606..4f51a82e 100644 --- a/third_party/fwkacllib/inc/ops/nn_training_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h @@ -26,8 +26,8 @@ namespace ge { /** *@brief Updates "var" according to the AdaMax algorithm. * t-1 mean previous period. -* m_t <- beta1 * m{t-1} + (1 - beta1) * grad\n -* v_t <- max(beta2 * v{t-1}, abs(grad))\n +* m_t <- beta1 * m{t-1} + (1 - beta1) * grad +* v_t <- max(beta2 * v{t-1}, abs(grad)) * var <- var - lr / (1 - beta1^t) * m_t / (v_t + epsilon) * *@attention Constraints: @@ -77,8 +77,8 @@ REG_OP(ApplyAdaMax) /** *@brief Updates "var" according to the AdaMax algorithm. * t-1 mean previous period. -* m_t <- beta1 * m{t-1} + (1 - beta1) * grad\n -* v_t <- max(beta2 * v{t-1}, abs(grad))\n +* m_t <- beta1 * m{t-1} + (1 - beta1) * grad +* v_t <- max(beta2 * v{t-1}, abs(grad)) * var <- var - lr / (1 - beta1^t) * m_t / (v_t + epsilon) * *@attention Constraints: @@ -112,6 +112,8 @@ REG_OP(ApplyAdaMax) *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyAdaMax. * +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdaMax instead. */ REG_OP(ApplyAdaMaxD) .INPUT(var, TensorType::NumberType()) @@ -130,7 +132,7 @@ REG_OP(ApplyAdaMaxD) .OP_END_FACTORY_REG(ApplyAdaMaxD) /** -*@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme. +*@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme . \n *@par Inputs: * Five inputs, including: @@ -138,14 +140,14 @@ REG_OP(ApplyAdaMaxD) *@li accum: An NCHW, NHWC, or ND Tensor of type float32. *@li lr: An NCHW, NHWC, or ND Tensor of type float32. *@li grad: An NCHW, NHWC, or ND Tensor of type float32. -*@li indices: An NCHW, NHWC, or ND Tensor of type float32. +*@li indices: An NCHW, NHWC, or ND Tensor of type float32 . \n *@par Attributes: *@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock. -*@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False". +*@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False" . \n *@par Outputs: -*var: A Tensor. Has the same type and format as input "var". +*var: A Tensor. Has the same type and format as input "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseApplyAdagrad. @@ -162,26 +164,26 @@ REG_OP(SparseApplyAdagrad) .OP_END_FACTORY_REG(SparseApplyAdagrad) /** -*@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme. +*@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme . \n *@par Inputs: * Four inputs, including: *@li var: An NCHW, NHWC, or ND Tensor of type float32. *@li accum: An NCHW, NHWC, or ND Tensor of type float32. *@li grad: An NCHW, NHWC, or ND Tensor of type float32. -*@li indices: An NCHW, NHWC, or ND Tensor of type int32. +*@li indices: An NCHW, NHWC, or ND Tensor of type int32 . \n *@par Attributes: *@li lr: Required, used for computation. *@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock. -*@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False". +*@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False" . \n *@par Outputs: *@li var: A Tensor. Has the same type and format as input "var". -*@li accum: A Tensor. Has the same type and format as input "var". +*@li accum: A Tensor. Has the same type and format as input "var" . \n *@par Third-party framework compatibility -* Compatible with the TensorFlow operator SparseApplyAdagrad. +* Compatible with the TensorFlow operator SparseApplyAdagrad. \n * *@par Restrictions: *Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyAdagrad instead. @@ -199,7 +201,7 @@ REG_OP(SparseApplyAdagradD) .OP_END_FACTORY_REG(SparseApplyAdagradD) /** -*@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme. +*@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme . \n *@par Inputs: *Six inputs, including: @@ -208,14 +210,14 @@ REG_OP(SparseApplyAdagradD) *@li lr: An NCHW, NHWC, or ND Tensor of type float32. *@li epsilon: An NCHW, NHWC, or ND Tensor of type float32. *@li grad: An NCHW, NHWC, or ND Tensor of type float32. -*@li indices: An NCHW, NHWC, or ND Tensor of type float32. +*@li indices: An NCHW, NHWC, or ND Tensor of type float32 . \n *@par Attributes: *@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock. -*@li update_slots: An optional bool. Defaults to "True". If "False", the computation logic will be different. +*@li update_slots: An optional bool. Defaults to "True". If "False", the computation logic will be different . \n *@par Outputs: -*var: A Tensor. Has the same type and format as input "var". +*var: A Tensor. Has the same type and format as input "var" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator SparseApplyAdagradV2. @@ -233,27 +235,27 @@ REG_OP(SparseApplyAdagradV2) .OP_END_FACTORY_REG(SparseApplyAdagradV2) /** -*@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme. +*@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme . \n *@par Inputs: *Four inputs, including: *@li var: An NCHW, NHWC, or ND Tensor of type float32. *@li accum: An NCHW, NHWC, or ND Tensor of type float32. *@li grad: An NCHW, NHWC, or ND Tensor of type float32. -*@li indices: An NCHW, NHWC, or ND Tensor of type int32. +*@li indices: An NCHW, NHWC, or ND Tensor of type int32 . \n *@par Attributes: *@li lr: Required, used for computation. *@li epsilon: Required, used for computation. *@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock. -*@li update_slots: An optional bool. Defaults to "True". If "False", the computation logic will be different. +*@li update_slots: An optional bool. Defaults to "True". If "False", the computation logic will be different . \n *@par Outputs: *@li var: A Tensor. Has the same type and format as input "var". -*@li accum: A Tensor. Has the same type and format as input "accum". +*@li accum: A Tensor. Has the same type and format as input "accum" . \n *@par Third-party framework compatibility -*Compatible with the TensorFlow operator SparseApplyAdagradV2. +*Compatible with the TensorFlow operator SparseApplyAdagradV2. \n * *@par Restrictions: *Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyAdagradV2 instead. @@ -274,8 +276,8 @@ REG_OP(SparseApplyAdagradV2D) /** *@brief Updates "var" according to the momentum scheme. Set use_nesterov = True if you * want to use Nesterov momentum. -* computing process: \n -* accum = accum * momentum + grad\n +* computing process: +* accum = accum * momentum + grad * var -= lr * accum * *@attention Constraints: @@ -290,8 +292,8 @@ REG_OP(SparseApplyAdagradV2D) * *@par Attributes: *@li use_nesterov: An optional bool. Defaults to "False". -* If "True", the tensor passed to compute grad will be -* var - lr * momentum * accum, so in the end, the var you get is actually +* If "True", the tensor passed to compute grad will be +* var - lr * momentum * accum, so in the end, the var you get is actually * var - lr * momentum * accum. * *@li use_locking: An optional bool. Defaults to "False". @@ -321,8 +323,8 @@ REG_OP(ApplyMomentum) /** *@brief Updates "var" according to the momentum scheme. Set use_nesterov = True if you * want to use Nesterov momentum. -* computing process: \n -* accum = accum * momentum + grad\n +* computing process: +* accum = accum * momentum + grad * var -= lr * accum * *@attention Constraints: @@ -351,6 +353,8 @@ REG_OP(ApplyMomentum) *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyMomentum. * +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyMomentum instead. */ REG_OP(ApplyMomentumD) @@ -367,10 +371,10 @@ REG_OP(ApplyMomentumD) /** *@brief Updates '*var' according to the momentum scheme. -* accum = accum * momentum - grad * lr \n -* if use_nesterov is True: \n -* var += accum * momentum - grad * lr \n -* else: \n +* accum = accum * momentum - grad * lr +* if use_nesterov is True: +* var += accum * momentum - grad * lr +* else: * var += accum * *@par Inputs: @@ -415,10 +419,10 @@ REG_OP(ApplyKerasMomentum) /** *@brief Updates '*var' according to the momentum scheme. -* accum = accum * momentum - grad * lr \n -* if use_nesterov is True: \n -* var += accum * momentum - grad * lr \n -* else: \n +* accum = accum * momentum - grad * lr +* if use_nesterov is True: +* var += accum * momentum - grad * lr +* else: * var += accum * *@par Inputs: @@ -603,8 +607,8 @@ REG_OP(ApplyAdamWithAmsgrad) /** *@brief Updates "var" according to the AddSign update. * t-1 mean previous period. -* m_t <- beta1 * m_{t-1} + (1 - beta1) * grad\n -* update <- exp(logbase * sign_decay * sign(grad) * sign(m_t)) * grad\n +* m_t <- beta1 * m_{t-1} + (1 - beta1) * grad +* update <- exp(logbase * sign_decay * sign(grad) * sign(m_t)) * grad * var <- var - lr * update * *@attention Constraints: @@ -648,8 +652,8 @@ REG_OP(ApplyPowerSign) /** *@brief Updates "var" according to the AddSign update. * t-1 mean previous period. -* m_t <- beta1 * m_{t-1} + (1 - beta1) * grad\n -* update <- exp(logbase * sign_decay * sign(grad) * sign(m_t)) * grad\n +* m_t <- beta1 * m_{t-1} + (1 - beta1) * grad +* update <- exp(logbase * sign_decay * sign(grad) * sign(m_t)) * grad * var <- var - lr * update * *@attention Constraints: @@ -678,6 +682,8 @@ REG_OP(ApplyPowerSign) *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyPowerSign. * +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyPowerSign instead. */ REG_OP(ApplyPowerSignD) .INPUT(var, TensorType::NumberType()) @@ -693,11 +699,11 @@ REG_OP(ApplyPowerSignD) .OP_END_FACTORY_REG(ApplyPowerSignD) /** -*@brief Updates "var" as FOBOS algorithm with fixed learning rate.\n -* prox_v = var - alpha * delta\n +*@brief Updates "var" as FOBOS algorithm with fixed learning rate. +* prox_v = var - alpha * delta * var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0} * -*@attention Constraints:\n +*@attention Constraints: * the input tensors must have the same shape. * *@par Inputs: @@ -731,7 +737,7 @@ REG_OP(ApplyProximalGradientDescent) .OP_END_FACTORY_REG(ApplyProximalGradientDescent) /** -*@brief Updates "var" according to the AddSign update. +*@brief Updates "var" according to the AddSign update . \n *@par Inputs: *Seven inputs, including: @@ -750,10 +756,10 @@ REG_OP(ApplyProximalGradientDescent) *use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var" and "m" tensors will be * protected by a lock; otherwise the behavior is undefined, -* but may exhibit less contention. +* but may exhibit less contention . \n *@par Outputs: -*var: A mutable Tensor. Has the same type as "var". +*var: A mutable Tensor. Has the same type as "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ApplyAddSign. @@ -771,7 +777,7 @@ REG_OP(ApplyAddSign) .OP_END_FACTORY_REG(ApplyAddSign) /** -*@brief Updates "var" according to the AddSign update. +*@brief Updates "var" according to the AddSign update . \n *@par Inputs: *Seven inputs, including: @@ -790,14 +796,17 @@ REG_OP(ApplyAddSign) *use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var" and "m" tensors will be * protected by a lock; otherwise the behavior is undefined, -* but may exhibit less contention. +* but may exhibit less contention . \n *@par Outputs: *@li var: A mutable Tensor. Has the same type as "var". -*@li m: A mutable Tensor. Has the same type as "m". +*@li m: A mutable Tensor. Has the same type as "m" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ApplyAddSign. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAddSign instead. */ REG_OP(ApplyAddSignD) .INPUT(var, TensorType::NumberType()) @@ -820,10 +829,10 @@ REG_OP(ApplyAddSignD) * slightly more expensive in terms of computation and memory. * * t-1 mean previous period. -* mg <- rho * mg{t-1} + (1-rho) * grad\n -* ms <- rho * ms{t-1} + (1-rho) * grad * grad\n -* mom <- momentum * mom{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)\n -* var <- var - mom\n +* mg <- rho * mg{t-1} + (1-rho) * grad +* ms <- rho * ms{t-1} + (1-rho) * grad * grad +* mom <- momentum * mom{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon) +* var <- var - mom * *@attention Constraints: *@li in dense implementation of this algorithm, mg, ms, and mom will @@ -880,10 +889,10 @@ REG_OP(ApplyCenteredRMSProp) * slightly more expensive in terms of computation and memory. * * t-1 mean previous period. -* mg <- rho * mg{t-1} + (1-rho) * grad\n -* ms <- rho * ms{t-1} + (1-rho) * grad * grad\n -* mom <- momentum * mom{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)\n -* var <- var - mom\n +* mg <- rho * mg{t-1} + (1-rho) * grad +* ms <- rho * ms{t-1} + (1-rho) * grad * grad +* mom <- momentum * mom{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon) +* var <- var - mom * *@attention Constraints: *@li in dense implementation of this algorithm, mg, ms, and mom will @@ -915,11 +924,13 @@ REG_OP(ApplyCenteredRMSProp) *@li var: A mutable Tensor. Has the same type as "var". *@li mg: A mutable Tensor. Has the same type as "mg". *@li ms: A mutable Tensor. Has the same type as "ms". -*@li mom: A mutable Tensor. Has the same type as "mom". +*@li mom: A mutable Tensor. Has the same type as "mom" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyCenteredRMSPropD. * +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyCenteredRMSProp instead. */ REG_OP(ApplyCenteredRMSPropD) .INPUT(var, TensorType::NumberType()) @@ -973,7 +984,7 @@ REG_OP(ApplyGradientDescent) /** *@brief Updates "var" according to the adagrad scheme. -* accum += grad * grad\n +* accum += grad * grad * var -= lr * grad * (1 / sqrt(accum)) * *@attention Constraints: @@ -1012,7 +1023,7 @@ REG_OP(ApplyAdagrad) /** *@brief Updates "var" according to the adagrad scheme. -* accum += grad * grad\n +* accum += grad * grad * var -= lr * grad * (1 / sqrt(accum)) * *@attention Constraints: @@ -1039,6 +1050,8 @@ REG_OP(ApplyAdagrad) *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyAdagrad. * +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdagrad instead. */ REG_OP(ApplyAdagradD) .INPUT(var, TensorType::NumberType()) @@ -1053,7 +1066,7 @@ REG_OP(ApplyAdagradD) /** * @brief Updates "var" according to the adagradv2 scheme. -* accum += grad * grad \n +* accum += grad * grad * var -= lr * grad * (1 / sqrt(accum) + epsilon) * * @par Inputs: @@ -1098,7 +1111,7 @@ REG_OP(ApplyAdagradV2) /** * @brief Updates "var" according to the adagradv2 scheme. -* accum += grad * grad \n +* accum += grad * grad * var -= lr * grad * (1 / sqrt(accum) + epsilon) * * @par Inputs: @@ -1144,7 +1157,7 @@ REG_OP(ApplyAdagradV2D) .OP_END_FACTORY_REG(ApplyAdagradV2D) /** -*@brief Updates "var" according to the proximal adagrad scheme. +*@brief Updates "var" according to the proximal adagrad scheme . \n *@par Inputs: *Eight inputs, including: @@ -1162,16 +1175,16 @@ REG_OP(ApplyAdagradV2D) * @li l2: A Tensor of the same type as "var". * L2 regulariation. Must be a scalar. * @li global_step: A Tensor of type int32 or int64. -* Training step number. Must be a scalar. +* Training step number. Must be a scalar . \n *@par Attributes: *use_locking: An optional bool. Defaults to "False". * If "True", updating of the var and accum tensors will be * protected by a lock; otherwise the behavior is undefined, -* but may exhibit less contention. +* but may exhibit less contention . \n *@par Outputs: -*var: A mutable Tensor. Has the same type as "var". +*var: A mutable Tensor. Has the same type as "var" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyAdagradDA. @@ -1190,7 +1203,7 @@ REG_OP(ApplyAdagradDA) .OP_END_FACTORY_REG(ApplyAdagradDA) /** -*@brief Updates "var" according to the proximal adagrad scheme. +*@brief Updates "var" according to the proximal adagrad scheme . \n *@par Inputs: *Eight inputs, including: @@ -1208,21 +1221,24 @@ REG_OP(ApplyAdagradDA) * @li l2: A Tensor of the same type as "var". * L2 regulariation. Must be a scalar. * @li global_step: A Tensor of type int32 or int64. -* Training step number. Must be a scalar. +* Training step number. Must be a scalar . \n *@par Attributes: *use_locking: An optional bool. Defaults to "False". * If "True", updating of the var and accum tensors will be * protected by a lock; otherwise the behavior is undefined, -* but may exhibit less contention. +* but may exhibit less contention . \n *@par Outputs: *var: A mutable Tensor. Has the same type as "var". *gradient_accumulator: A mutable Tensor. Has the same type as "var". -*gradient_squared_accumulator: A mutable Tensor. Has the same type as "var". +*gradient_squared_accumulator: A mutable Tensor. Has the same type as "var" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyAdagradDA. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdagradDA instead. */ REG_OP(ApplyAdagradDAD) .INPUT(var, TensorType::NumberType()) @@ -1271,22 +1287,22 @@ REG_OP(DataFormatDimMap) /** * @brief Implements stochastic gradient descent (optionally with momentum). * Nesterov momentum is based on the formula from -* On the importance of initialization and momentum in deep learning.\n +* On the importance of initialization and momentum in deep learning. * @par Inputs: -* @li parameters: A mutable tensor of type float16 or float32.\n +* @li parameters: A mutable tensor of type float16 or float32. * Specifies the iterable of parameters to optimize or dicts defining parameter * groups. -* @li gradient: A tensor of type float16 or float32.\n +* @li gradient: A tensor of type float16 or float32. * Specifies the gradient of training step. -* @li learning_rate: A tensor of type float16 or float32.\n +* @li learning_rate: A tensor of type float16 or float32. * Specifies the learing_rate of training step. * @li accum: A tensor of type float16 or float32. * Specifies the velocity of training step. * @li momentum: A tensor of type float16 or float32. * Specifies the momentum factor. * @li stat: A tensor of type float16 or float32. -* Specifies the status representing the first step or not. +* Specifies the status representing the first step or not . \n * @par Attributes: * @li dampening: An optional float, specifying the dampening for momentum. @@ -1294,10 +1310,10 @@ REG_OP(DataFormatDimMap) * @li weight_decay: An optional float, specifying the L2 penalty. Defaults to * "0.0". * @li nesterov: An optional bool, specifying whether to enable Nesterov -* momentum. Defaults to "False". +* momentum. Defaults to "False" . \n * @par Outputs: -* parameters: A mutable tensor same as input "parameters". +* parameters: A mutable tensor same as input "parameters" . \n * @see ApplyMomentum() @@ -1319,11 +1335,11 @@ REG_OP(SGD) /** * @brief Updates "var" according to the RMSProp algorithm. -* mean_square = decay * mean_square + (1-decay) * gradient ** 2\n -* Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n -* ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n -* mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n -* var <- var - mom\n +* mean_square = decay * mean_square + (1-decay) * gradient ** 2 +* Delta = learning_rate * gradient / sqrt(mean_square + epsilon) +* ms <- rho * ms_{t-1} + (1-rho) * grad * grad +* mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) +* var <- var - mom * * @par Inputs: * @li var: A mutable tensor. Must be one of the data types defined in @@ -1347,8 +1363,8 @@ REG_OP(SGD) * var: A mutable tensor. Has the same type as input "var". * * @attention Constraints: -* @li Note that in dense implementation of this algorithm, "ms" and "mom" will -* update even if "grad" is 0, but in this sparse implementation, "ms" and "mom" +* @li Note that in dense implementation of this algorithm, "ms" and "mom" will +* update even if "grad" is 0, but in this sparse implementation, "ms" and "mom" * will not update in iterations during which "grad" is 0. * @li The input tensors "var", "ms", "mom" and "grad" must have the same shape. * @@ -1371,10 +1387,10 @@ REG_OP(ApplyRMSProp) /** * @brief Updates "var" according to the RMSProp algorithm, a const input will be * considered as an attribute. -* mean_square = decay * mean_square + (1-decay) * gradient ** 2\n -* Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n -* ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n -* mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n +* mean_square = decay * mean_square + (1-decay) * gradient ** 2 +* Delta = learning_rate * gradient / sqrt(mean_square + epsilon) +* ms <- rho * ms_{t-1} + (1-rho) * grad * grad +* mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) * var <- var - mom * * @par Inputs: @@ -1389,7 +1405,7 @@ REG_OP(ApplyRMSProp) * * @par Attributes: * @li use_locking: An optional "bool". Defaults to "False". If "True", updating -* of the "var", "ms", and "mom" tensors will be protected by a lock; +* of the "var", "ms", and "mom" tensors will be protected by a lock; * otherwise the behavior is undefined, but may exhibit less contention. * @li rho: A required scalar. Must have the same type as "var". * @li momentum: A required scalar. Must have the same type as "var". @@ -1426,7 +1442,7 @@ REG_OP(ApplyRMSPropD) .OP_END_FACTORY_REG(ApplyRMSPropD) /** -*@brief Update "var" and "accum" according to FOBOS with Adagrad learning rate. +*@brief Update "var" and "accum" according to FOBOS with Adagrad learning rate . \n *@par Inputs: *Six inputs, including: @@ -1436,13 +1452,13 @@ REG_OP(ApplyRMSPropD) * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar. -* @li grad: A Tensor of the same type as "var", for the gradient. +* @li grad: A Tensor of the same type as "var", for the gradient . \n *@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", updating of the "var" and "accum" *tensors will be protected by a lock; otherwise the behavior is undefined, but may exhibit less *contention. +*use_locking: An optional bool. Defaults to "False". If "True", updating of the "var" and "accum" *tensors will be protected by a lock; otherwise the behavior is undefined, but may exhibit less *contention . \n *@par Outputs: -*var: A mutable tensor. Must have the same type as input "var". +*var: A mutable tensor. Must have the same type as input "var" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyProximalAdagrad. @@ -1459,7 +1475,7 @@ REG_OP(ApplyProximalAdagrad) .OP_END_FACTORY_REG(ApplyProximalAdagrad) /** -*@brief Update "var" and "accum" according to FOBOS with Adagrad learning rate. +*@brief Update "var" and "accum" according to FOBOS with Adagrad learning rate . \n *@par Inputs: *Six inputs, including: @@ -1469,17 +1485,20 @@ REG_OP(ApplyProximalAdagrad) * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar. -* @li grad: A Tensor of the same type as "var", for the gradient. +* @li grad: A Tensor of the same type as "var", for the gradient . \n *@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", updating of the "var" and "accum" *tensors will be protected by a lock; otherwise the behavior is undefined, but may exhibit less *contention. +*use_locking: An optional bool. Defaults to "False". If "True", updating of the "var" and "accum" *tensors will be protected by a lock; otherwise the behavior is undefined, but may exhibit less *contention . \n *@par Outputs: * @li var: A mutable Tensor. Has the same type as "var". -* @li accum: A mutable Tensor. Has the same type as "var". +* @li accum: A mutable Tensor. Has the same type as "var" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyProximalAdagradD. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyProximalAdagrad instead. */ REG_OP(ApplyProximalAdagradD) .INPUT(var, TensorType::NumberType()) @@ -1496,33 +1515,33 @@ REG_OP(ApplyProximalAdagradD) /** *@brief Updates entries in 'var' and 'accum' according to the Proximal Adagrad algorithm. * Compared with op ApplyProximalAdagrad, an additional index tensor is input, -* Only the indices into the first dimensions of "var" and "accum" are updated. +* Only the indices into the first dimensions of "var" and "accum" are updated . \n *@par Inputs: -* Seven inputs, including:\n -* @li var: A mutable Tensor.\n +* Seven inputs, including: +* @li var: A mutable Tensor. * TensorType::NumberType(). Should be a Variable Tensor. -* @li accum: A mutable Tensor of the same type as "var".\n -* Should be a Variable Tensor. Should be greater than or equal to zero.\n +* @li accum: A mutable Tensor of the same type as "var". +* Should be a Variable Tensor. Should be greater than or equal to zero. * Accum and grad cannot be equal to zero at the same time. -* @li lr: A Tensor of the same type as "var".\n +* @li lr: A Tensor of the same type as "var". * Scaling factor. Must be a scalar. Should be greater than zero. -* @li l1: A Tensor of the same type as "var".\n +* @li l1: A Tensor of the same type as "var". * L1 regulariation. Must be a scalar. Should be greater than or equal to zero. -* @li l2: A Tensor of the same type as "var".\n +* @li l2: A Tensor of the same type as "var". * L2 regulariation. Must be a scalar. Should be greater than or equal to zero. -* @li grad: A Tensor. Has the same type as "var".\n +* @li grad: A Tensor. Has the same type as "var". * The gradient. -* @li indices: A vector of indices into the first dimension of "var" and "accum".\n -* TensorType::IndexNumberType(). Can contain duplicate values. +* @li indices: A vector of indices into the first dimension of "var" and "accum". +* TensorType::IndexNumberType(). Can contain duplicate values . \n *@par Attributes: -*use_locking: An optional bool. Defaults to "False".\n -* If "True", updating of the var and accum tensors will be protected by a lock; \n -* If "False", the behavior is undefined, but may exhibit less contention. +*use_locking: An optional bool. Defaults to "False". +* If "True", updating of the var and accum tensors will be protected by a lock; +* If "False", the behavior is undefined, but may exhibit less contention. *@par Outputs: -*var: A mutable Tensor. Has the same type as "var". +*var: A mutable Tensor. Has the same type as "var" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator SparseApplyProximalAdagrad. @@ -1542,37 +1561,40 @@ REG_OP(SparseApplyProximalAdagrad) /** *@brief Updates entries in 'var' and 'accum' according to the Proximal Adagrad algorithm.\ n * Compared with op ApplyProximalAdagrad, an additional index tensor is input, -* Only the indices into the first dimensions of "var" and "accum" are updated. +* Only the indices into the first dimensions of "var" and "accum" are updated . \n *@par Inputs: -* Seven inputs, including:\n -* @li var: A mutable Tensor.\n +* Seven inputs, including: +* @li var: A mutable Tensor. * TensorType::NumberType(). Should be a Variable Tensor. -* @li accum: A mutable Tensor of the same type as "var".\n -* Should be a Variable Tensor. Should be greater than or equal to zero.\n +* @li accum: A mutable Tensor of the same type as "var". +* Should be a Variable Tensor. Should be greater than or equal to zero. * Accum and grad cannot be equal to zero at the same time. -* @li lr: A Tensor of the same type as "var".\n -* Scaling factor. Must be a scalar. Should be greater than zero. -* @li l1: A Tensor of the same type as "var".\n +* @li lr: A Tensor of the same type as "var". +* Scaling factor. Must be a scalar. Should be greater than zero. +* @li l1: A Tensor of the same type as "var". * L1 regulariation. Must be a scalar. Should be greater than or equal to zero. -* @li l2: A Tensor of the same type as "var".\n +* @li l2: A Tensor of the same type as "var". * L2 regulariation. Must be a scalar. Should be greater than or equal to zero. -* @li grad: A Tensor. Has the same type as "var". \n +* @li grad: A Tensor. Has the same type as "var". * The gradient. -* @li indices: A vector of indices into the first dimension of "var" and "accum".\n -* TensorType::IndexNumberType(). Can contain duplicate values. +* @li indices: A vector of indices into the first dimension of "var" and "accum". +* TensorType::IndexNumberType(). Can contain duplicate values . \n *@par Attributes: -*use_locking: An optional bool. Defaults to "False".\n -* If "True", updating of the var and accum tensors will be protected by a lock; \n -* If "False", the behavior is undefined, but may exhibit less contention. +*use_locking: An optional bool. Defaults to "False". +* If "True", updating of the var and accum tensors will be protected by a lock; +* If "False", the behavior is undefined, but may exhibit less contention . \n *@par Outputs: *@li var: A mutable Tensor. Has the same type as "var". -*@li accum: A mutable Tensor. Has the same type as "var". +*@li accum: A mutable Tensor. Has the same type as "var" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator SparseApplyProximalAdagrad. + +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyProximalAdagrad instead. */ REG_OP(SparseApplyProximalAdagradD) .INPUT(var, TensorType::NumberType()) @@ -1588,7 +1610,7 @@ REG_OP(SparseApplyProximalAdagradD) .OP_END_FACTORY_REG(SparseApplyProximalAdagradD) /** -*@brief Updates "var" according to the Ftrl-proximal scheme. +*@brief Updates "var" according to the Ftrl-proximal scheme . \n *@par Inputs: *Eight inputs, including: @@ -1602,16 +1624,16 @@ REG_OP(SparseApplyProximalAdagradD) * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar. -* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. +* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar . \n *@par Attributes: *use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var" and "accum" tensors will be * protected by a lock; otherwise the behavior is undefined, -* but may exhibit less contention. +* but may exhibit less contention . \n *@par Outputs: -*var: A mutable Tensor. Has the same type as "var". +*var: A mutable Tensor. Has the same type as "var" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyFtrl. @@ -1630,7 +1652,7 @@ REG_OP(ApplyFtrl) .OP_END_FACTORY_REG(ApplyFtrl) /** -*@brief Updates "var" according to the Ftrl-proximal scheme. +*@brief Updates "var" according to the Ftrl-proximal scheme . \n *@par Inputs: *Eight inputs, including: @@ -1644,21 +1666,24 @@ REG_OP(ApplyFtrl) * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar. -* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. +* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar . \n *@par Attributes: *use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var" and "accum" tensors will be * protected by a lock; otherwise the behavior is undefined, -* but may exhibit less contention. +* but may exhibit less contention . \n *@par Outputs: *@li var: A mutable Tensor. Has the same type as "var". *@li accum: A mutable Tensor. Has the same type as "accum". -*@li linear: A mutable Tensor. Has the same type as "linear". +*@li linear: A mutable Tensor. Has the same type as "linear" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyFtrl. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyFtrl instead. */ REG_OP(ApplyFtrlD) .INPUT(var, TensorType::NumberType()) @@ -1676,7 +1701,7 @@ REG_OP(ApplyFtrlD) .OP_END_FACTORY_REG(ApplyFtrlD) /** -*@brief Update "var" according to the Ftrl-proximal scheme. +*@brief Update "var" according to the Ftrl-proximal scheme . \n *@par Inputs: *Nine inputs, including: @@ -1691,16 +1716,16 @@ REG_OP(ApplyFtrlD) * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar. * @li l2_shrinkage: A Tensor of the same type as "var". -* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. +* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar . \n *@par Attributes: *use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var" and "accum" tensors will be * protected by a lock; otherwise the behavior is undefined, -* but may exhibit less contention. +* but may exhibit less contention . \n *@par Outputs: -*var: A mutable Tensor. Has the same type as "var". +*var: A mutable Tensor. Has the same type as "var" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyFtrlV2. @@ -1720,7 +1745,7 @@ REG_OP(ApplyFtrlV2) .OP_END_FACTORY_REG(ApplyFtrlV2) /** -*@brief Update "var" according to the Ftrl-proximal scheme. +*@brief Update "var" according to the Ftrl-proximal scheme . \n *@par Inputs: *Nine inputs, including: @@ -1735,21 +1760,24 @@ REG_OP(ApplyFtrlV2) * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar. * @li l2_shrinkage: A Tensor of the same type as "var". -* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. +* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar . \n *@par Attributes: *use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var" and "accum" tensors will be * protected by a lock; otherwise the behavior is undefined, -* but may exhibit less contention. +* but may exhibit less contention . \n *@par Outputs: *var: A mutable Tensor. Has the same type as "var". *accum: A mutable Tensor. Has the same type as "accum". -*linear: A mutable Tensor. Has the same type as "linear". +*linear: A mutable Tensor. Has the same type as "linear" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyFtrlV2. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyFtrlV2 instead. */ REG_OP(ApplyFtrlV2D) .INPUT(var, TensorType::NumberType()) @@ -1769,9 +1797,9 @@ REG_OP(ApplyFtrlV2D) /** *@brief Updates "var" according to the Adam algorithm. -* lr_t <- text{learning\_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)\n -* m_t <- beta_1 * m_{t-1} + (1 - beta_1) * g\n -* v_t <- max(beta2 * v{t-1}, abs(g))\n +* lr_t <- text{learning\_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t) +* m_t <- beta_1 * m_{t-1} + (1 - beta_1) * g +* v_t <- max(beta2 * v{t-1}, abs(g)) * variable <- variable - lr_t * m_t / (sqrt{v_t} + epsilon) * *@attention Constraints: @@ -1801,7 +1829,7 @@ REG_OP(ApplyFtrlV2D) If "True", uses the nesterov update. * *@par Outputs: -* var: A mutable Tensor. Has the same type as intput "var". +* var: A mutable Tensor. Has the same type as intput "var" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyAdam. @@ -1824,9 +1852,9 @@ REG_OP(ApplyAdam) /** *@brief Updates "var" according to the Adam algorithm. -* lr_t <- text{learning\_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)\n -* m_t <- beta_1 * m_{t-1} + (1 - beta_1) * g\n -* v_t <- max(beta2 * v{t-1}, abs(g))\n +* lr_t <- text{learning\_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t) +* m_t <- beta_1 * m_{t-1} + (1 - beta_1) * g +* v_t <- max(beta2 * v{t-1}, abs(g)) * variable <- variable - lr_t * m_t / (sqrt{v_t} + epsilon) * *@attention Constraints: @@ -1858,10 +1886,13 @@ REG_OP(ApplyAdam) *@par Outputs: *@li var: A mutable tensor. Has the same type as input "var". *@li m: A mutable tensor. Has the same type as input "m". -*@li v: A mutable tensor. Has the same type as input "v". +*@li v: A mutable tensor. Has the same type as input "v" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyAdam. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdam instead. */ REG_OP(ApplyAdamD) .INPUT(var, TensorType::NumberType()) @@ -1882,7 +1913,7 @@ REG_OP(ApplyAdamD) .OP_END_FACTORY_REG(ApplyAdamD) /** -*@brief Updates "var" according to the proximal adadelta scheme. +*@brief Updates "var" according to the proximal adadelta scheme . \n *@par Inputs: *Seven inputs, including: @@ -1895,16 +1926,16 @@ REG_OP(ApplyAdamD) * @li lr: A scalar of the same type as "var", for the scaling factor. * @li rho: A scalar of the same type as "var", for the decay factor. * @li epsilon: A scalar of the same type as "var", for the constant factor. -* @li grad: A Tensor of the same type as "var", for the gradient. +* @li grad: A Tensor of the same type as "var", for the gradient . \n *@par Attributes: *use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var", "accum" and "accum_update" tensors will be * protected by a lock; otherwise the behavior is undefined, -* but may exhibit less contention. +* but may exhibit less contention . \n *@par Outputs: -*var: A mutable Tensor. Has the same type as "var". +*var: A mutable Tensor. Has the same type as "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ApplyAdadelta. @@ -1922,7 +1953,7 @@ REG_OP(ApplyAdadelta) .OP_END_FACTORY_REG(ApplyAdadelta) /** -*@brief Updates "var" according to the proximal adadelta scheme. +*@brief Updates "var" according to the proximal adadelta scheme . \n *@par Inputs: *Seven inputs, including: @@ -1935,21 +1966,24 @@ REG_OP(ApplyAdadelta) * @li lr: A scalar of the same type as "var", for the scaling factor. * @li rho: A scalar of the same type as "var", for the decay factor. * @li epsilon: A scalar of the same type as "var", for the constant factor. -* @li grad: A Tensor of the same type as "var", for the gradient. +* @li grad: A Tensor of the same type as "var", for the gradient . \n *@par Attributes: *use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var", "accum" and "accum_update" tensors will be * protected by a lock; otherwise the behavior is undefined, -* but may exhibit less contention. +* but may exhibit less contention . \n *@par Outputs: *@li var: A mutable Tensor. Has the same type as "var". *@li accum: A mutable Tensor. Has the same type as "var". -*@li accum_update: A mutable Tensor. Has the same type as "var". +*@li accum_update: A mutable Tensor. Has the same type as "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ApplyAdadelta. + +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdadelta instead. */ REG_OP(ApplyAdadeltaD) .INPUT(var, TensorType::NumberType()) @@ -1966,11 +2000,11 @@ REG_OP(ApplyAdadeltaD) .OP_END_FACTORY_REG(ApplyAdadeltaD) /** -* @brief Updates "var" according to the ApplyMomentum algorithm. \n -* accum = accum * momentum + x1 * x2 \n -* if use_nesterov is True: \n -* var -= x1 * x2 * lr + accum * momentum * lr \n -* else:\n +* @brief Updates "var" according to the ApplyMomentum algorithm. +* accum = accum * momentum + x1 * x2 +* if use_nesterov is True: +* var -= x1 * x2 * lr + accum * momentum * lr +* else: * var -= accum * lr * * @par Inputs: @@ -1986,11 +2020,11 @@ REG_OP(ApplyAdadeltaD) * * @par Attributes: * Two attributes, including: -* @li use_nesterov: An optional bool. Defaults to "False". \n -* If True, the tensor passed to compute grad will be var - lr * momentum * accum, \n +* @li use_nesterov: An optional bool. Defaults to "False". +* If True, the tensor passed to compute grad will be var - lr * momentum * accum, * so in the end, the var you get is actually var - lr * momentum * accum. -* @li use_locking: An optional bool. Defaults to "False". \n -* If "True", updating of the "var", m", and "v" tensors will be protected \n +* @li use_locking: An optional bool. Defaults to "False". +* If "True", updating of the "var", m", and "v" tensors will be protected * by a lock; otherwise the behavior is undefined, but may exhibit less contention. * * @par Outputs: @@ -2012,11 +2046,11 @@ REG_OP(FusedMulApplyMomentum) .OP_END_FACTORY_REG(FusedMulApplyMomentum) /** -* @brief Updates "var" according to the ApplyMomentum algorithm. \n -* accum = accum * momentum + x1 * x2 \n -* if use_nesterov is True: \n -* var -= x1 * x2 * lr + accum * momentum * lr \n -* else: \n +* @brief Updates "var" according to the ApplyMomentum algorithm. +* accum = accum * momentum + x1 * x2 +* if use_nesterov is True: +* var -= x1 * x2 * lr + accum * momentum * lr +* else: * var -= accum * lr * * @par Inputs: @@ -2033,11 +2067,11 @@ REG_OP(FusedMulApplyMomentum) * * @par Attributes: * Two Attributes, including: -* @li use_nesterov: An optional bool. Defaults to "False". \n -* If True, the tensor passed to compute grad will be var - lr * momentum * accum, \n +* @li use_nesterov: An optional bool. Defaults to "False". +* If True, the tensor passed to compute grad will be var - lr * momentum * accum, * so in the end, the var you get is actually var - lr * momentum * accum. -* @li use_locking: An optional bool. Defaults to "False". \n -* If "True", updating of the "var", m", and "v" tensors will be protected \n +* @li use_locking: An optional bool. Defaults to "False". +* If "True", updating of the "var", m", and "v" tensors will be protected * by a lock; otherwise the behavior is undefined, but may exhibit less contention. * * @par Outputs: @@ -2062,21 +2096,21 @@ REG_OP(FusedMulApplyMomentumExtern) .OP_END_FACTORY_REG(FusedMulApplyMomentumExtern) /** -*@brief Update "g" according to the LARS algorithm. +*@brief Update "g" according to the LARS algorithm . \n *@par Inputs: *Four inputs, including: * @li w: A Tensor. Must be of type TensorType::DT_FLOAT. * @li g: A Tensor of the same type and shape as "w". * @li weight_decay: A Tensor of the same type as "w", Must be a scalar. -* @li learning_rate: A Tensor of the same type as "w", Must be a scalar. +* @li learning_rate: A Tensor of the same type as "w", Must be a scalar . \n *@par Attributes: *Three Attributes, including: * @li hyperpara: An optional float. Default value is 0.001. * @li epsilon: An optional float. Default value is 1e-5.Avoid denominator is 0. -* @li use_clip: An optional bool. Defaults to "False".\n -* If "True", updating learning rate. +* @li use_clip: An optional bool. Defaults to "False". +* If "True", updating learning rate . \n *@par Outputs: *g_new: Tensor of the same type as "w". @@ -2093,7 +2127,7 @@ REG_OP(LarsV2) .OP_END_FACTORY_REG(LarsV2) /** -*@brief Update "g" according to the LARS algorithm. +*@brief Update "g" according to the LARS algorithm . \n *@par Inputs: *Six inputs, including: @@ -2102,14 +2136,14 @@ REG_OP(LarsV2) * @li w_square_sum: A Tensor of square_sum(w), has the same type as "w", Must be a scalar. * @li g_square_sum: A Tensor of square(g), has the same type as "w", Must be a scalar. * @li weight_decay: A Tensor of the same type as "w", Must be a scalar. -* @li learning_rate: A Tensor of the same type as "w", Must be a scalar. +* @li learning_rate: A Tensor of the same type as "w", Must be a scalar . \n *@par Attributes: *Three Attributes, including: * @li hyperpara: An optional float. Default value is 0.001. * @li epsilon: An optional float. Default value is 1e-5.Avoid denominator is 0. -* @li use_clip: An optional bool. Defaults to "False".\n -* If "True", updating learning rate. +* @li use_clip: An optional bool. Defaults to "False". +* If "True", updating learning rate . \n *@par Outputs: *g_new: Tensor of the same type as "w". @@ -2128,7 +2162,7 @@ REG_OP(LarsV2Update) .OP_END_FACTORY_REG(LarsV2Update) /** -* @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme. +* @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme . \n * @par Inputs: * Nine inputs, including: @@ -2144,16 +2178,16 @@ REG_OP(LarsV2Update) * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar. -* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. +* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar . \n * @par Attributes: * use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var" and "accum" tensors will be * protected by a lock; otherwise the behavior is undefined, -* but may exhibit less contention. +* but may exhibit less contention . \n * @par Outputs: -* var: A Tensor. Has the same type and format as input "var". +* var: A Tensor. Has the same type and format as input "var" . \n * @par Third-party framework compatibility * Compatible with the TensorFlow operator SparseApplyFtrl. @@ -2173,7 +2207,7 @@ REG_OP(SparseApplyFtrl) .OP_END_FACTORY_REG(SparseApplyFtrl) /** -* @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme. +* @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme . \n * @par Inputs: * Five inputs, including: @@ -2185,7 +2219,7 @@ REG_OP(SparseApplyFtrl) * Should be a Variable Tensor. * @li grad: A Tensor of the same type as "var", for the gradient. * @li indices: A vector of indices into the first dimension of var and accum. -* The value of indices must be unique. Otherwise, the result is unpredictable. +* The value of indices must be unique. Otherwise, the result is unpredictable . \n * @par Attributes: * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. @@ -2195,12 +2229,12 @@ REG_OP(SparseApplyFtrl) * @li use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var" and "accum" tensors will be * protected by a lock; otherwise the behavior is undefined, -* but may exhibit less contention. +* but may exhibit less contention . \n * @par Outputs: * @li var: A Tensor. Has the same type and format as input "var". * @li accum: A Tensor. Has the same type and format as input "accum". -* @li linear: A Tensor. Has the same type and format as input "linear". +* @li linear: A Tensor. Has the same type and format as input "linear" . \n * @par Third-party framework compatibility * Compatible with the TensorFlow operator SparseApplyFtrl. @@ -2226,7 +2260,7 @@ REG_OP(SparseApplyFtrlD) /** * @brief Updates relevant entries in '*var' according to the Ftrl-proximal scheme. -* That is for rows we have grad for, "var", "accum" and "linear" are updated. +* That is for rows we have grad for, "var", "accum" and "linear" are updated . \n * @par Inputs: * Ten inputs, including: @@ -2242,16 +2276,16 @@ REG_OP(SparseApplyFtrlD) * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar. * @li l2_shrinkage: A Tensor of the same type as "var", L2 shrinkage regulariation. Must be a scalar. -* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. +* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar . \n * @par Attributes: * use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var" and "accum" tensors will be * protected by a lock; otherwise the behavior is undefined, -* but may exhibit less contention. +* but may exhibit less contention . \n * @par Outputs: -* var: A Tensor. Has the same type and format as input "var". +* var: A Tensor. Has the same type and format as input "var" . \n * @par Third-party framework compatibility * Compatible with the TensorFlow operator SparseApplyFtrlV2. @@ -2273,7 +2307,7 @@ REG_OP(SparseApplyFtrlV2) /** * @brief Updates relevant entries in '*var' according to the Ftrl-proximal scheme. -* That is for rows we have grad for, "var", "accum" and "linear" are updated. +* That is for rows we have grad for, "var", "accum" and "linear" are updated . \n * @par Inputs: * Five inputs, including: @@ -2284,7 +2318,7 @@ REG_OP(SparseApplyFtrlV2) * @li linear: A mutable Tensor of the same type as "var". * Should be a Variable Tensor. * @li grad: A Tensor of the same type as "var", for the gradient. -* @li indices: A vector of indices into the first dimension of "var" and "accum". +* @li indices: A vector of indices into the first dimension of "var" and "accum" . \n * @par Attributes: * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. @@ -2295,12 +2329,12 @@ REG_OP(SparseApplyFtrlV2) * @li use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var" and "accum" tensors will be * protected by a lock; otherwise the behavior is undefined, -* but may exhibit less contention. +* but may exhibit less contention . \n * @par Outputs: * @li var: A Tensor. Has the same type and format as input "var". * @li accum: A Tensor. Has the same type and format as input "accum". -* @li linear: A Tensor. Has the same type and format as input "linear". +* @li linear: A Tensor. Has the same type and format as input "linear" . \n * @par Third-party framework compatibility * Compatible with the TensorFlow operator SparseApplyFtrlV2D. @@ -2327,15 +2361,15 @@ REG_OP(SparseApplyFtrlV2D) /** * @brief Updates "var" in specified index according to the RMSProp algorithm. -* mean_square = decay * mean_square + (1-decay) * gradient ** 2\n -* Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n -* ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n -* mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n -* var <- var - mom\n +* mean_square = decay * mean_square + (1-decay) * gradient ** 2 +* Delta = learning_rate * gradient / sqrt(mean_square + epsilon) +* ms <- rho * ms_{t-1} + (1-rho) * grad * grad +* mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) +* var <- var - mom * * @par Inputs: * Nine inputs, including: -* @li var: A mutable tensor. Must be one of the data types defined in\n +* @li var: A mutable tensor. Must be one of the data types defined in * TensorType::NumberType(). Should be from a Variable(). * @li ms: A mutable tensor. Must have the same type as "var". Should be from a * Variable(). @@ -2380,11 +2414,11 @@ REG_OP(SparseApplyRMSProp) /** * @brief Updates "var" in specified index according to the RMSProp algorithm. -* a const input will be considered as an attribute.\n -* mean_square = decay * mean_square + (1-decay) * gradient ** 2\n -* Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n -* ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n -* mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n +* a const input will be considered as an attribute. +* mean_square = decay * mean_square + (1-decay) * gradient ** 2 +* Delta = learning_rate * gradient / sqrt(mean_square + epsilon) +* ms <- rho * ms_{t-1} + (1-rho) * grad * grad +* mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) * var <- var - mom * * @par Inputs: @@ -2437,14 +2471,14 @@ REG_OP(SparseApplyRMSPropD) /** * @brief Updates "var" in specified index according to the Adadelta algorithm. -* accum <- rho * accum + (1 - rho) * grad.square()\n -* update <- (accum_update + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad\n -* var <- var - update * lr\n -* accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n +* accum <- rho * accum + (1 - rho) * grad.square() +* update <- (accum_update + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad +* var <- var - update * lr +* accum_update <- rho() * accum_update + (1 - rho()) * update.square() * * @par Inputs: * Eight inputs, including: -* @li var: A mutable tensor. Must be one of the data types defined in\n +* @li var: A mutable tensor. Must be one of the data types defined in * TensorType::NumberType(). Should be from a Variable(). * @li accum: A mutable tensor. Must have the same type as "var". Should be from a * Variable(). @@ -2487,11 +2521,11 @@ REG_OP(SparseApplyAdadelta) /** * @brief Updates "var" in specified index according to the Adadelta algorithm. -* a const input will be considered as an attribute.\n -* accum <- rho * accum + (1 - rho) * grad.square()\n -* update <- (accum_update + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad\n -* var <- var - update * lr\n -* accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n +* a const input will be considered as an attribute. +* accum <- rho * accum + (1 - rho) * grad.square() +* update <- (accum_update + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad +* var <- var - update * lr +* accum_update <- rho() * accum_update + (1 - rho()) * update.square() * * @par Inputs: * Seven inputs, including: @@ -2542,10 +2576,10 @@ REG_OP(SparseApplyAdadeltaD) /** -*@brief Clean memory of workspace list. +*@brief Clean memory of workspace list . \n *@par Attributes: -* @li automic_add_mem_size: sizes of workspaces. +* @li automic_add_mem_size: sizes of workspaces . \n */ REG_OP(AtomicAddrClean) diff --git a/third_party/fwkacllib/inc/ops/no_op.h b/third_party/fwkacllib/inc/ops/no_op.h index 9cde8a0f..503d97b1 100644 --- a/third_party/fwkacllib/inc/ops/no_op.h +++ b/third_party/fwkacllib/inc/ops/no_op.h @@ -27,7 +27,7 @@ namespace ge { /** -*@brief Does nothing. Only useful as a placeholder for control edges. +*@brief Does nothing. Only useful as a placeholder for control edges . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator NoOp. diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index d265d4e5..b50b7cd1 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -25,7 +25,7 @@ namespace ge { /** -*@brief Computes the for the gelu of "x". +*@brief Computes the for the gelu of "x" . \n *@par Inputs: *Two inputs, including: @@ -42,13 +42,13 @@ REG_OP(Gelu) .OP_END_FACTORY_REG(Gelu) /** -*@brief Computes the gradient for the gelu of "x". +*@brief Computes the gradient for the gelu of "x" . \n *@par Inputs: *Three inputs, including: * @li dy: A Tensor. Must be one of the following types: float16, float32 * @li x: A Tensor of the same type as "dy". -* @li y: A Tensor of the same type as "dy". +* @li y: A Tensor of the same type as "dy" . \n *@par Outputs: *z: A Tensor. Has the same type as "dy". @@ -63,7 +63,7 @@ REG_OP(GeluGrad) .OP_END_FACTORY_REG(GeluGrad) /** -*@brief Computes the for the fast_gelu of "x". +*@brief Computes the for the fast_gelu of "x" . \n *@par Inputs: *Two inputs, including: @@ -80,12 +80,12 @@ REG_OP(FastGelu) .OP_END_FACTORY_REG(FastGelu) /** -*@brief Computes the gradient for the fast_gelu of "x". +*@brief Computes the gradient for the fast_gelu of "x" . \n *@par Inputs: *Three inputs, including: * @li dy: A Tensor. Must be one of the following types: float16, float32 -* @li x: A Tensor of the same type as "dy". +* @li x: A Tensor of the same type as "dy" . \n *@par Outputs: *z: A Tensor. Has the same type as "dy". @@ -100,13 +100,13 @@ REG_OP(FastGeluGrad) /** -*@brief Computes the gradient for the tanh of "x". +*@brief Computes the gradient for the tanh of "x" . \n *@par Inputs: *Two inputs, including: * @li y: A Tensor. Must be one of the following types: float16, float32, * double, complex64, complex128. -* @li dy: A Tensor of the same type as "y". +* @li dy: A Tensor of the same type as "y" . \n *@par Outputs: *z: A Tensor. Has the same type as "y". @@ -120,14 +120,14 @@ REG_OP(TanhGrad) .OP_END_FACTORY_REG(TanhGrad) /** -*@brief: Computes hyperbolic tangent of "x" element-wise. +*@brief: Computes hyperbolic tangent of "x" element-wise . \n *@par Inputs: *One input: -*x: A Tensor. Must be one of the following types: float16, float32, complex64, complex128, double. +*x: A Tensor. Must be one of the following types: float16, float32, complex64, complex128, double . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@par Third-party framework compatibility * Compatible with TensorFlow operator Tanh. @@ -141,7 +141,7 @@ REG_OP(Tanh) * @brief Computes rectified linear: "max(x, 0)". * * @par Inputs: -* x: A tensor. Must be one of the following types: float32, float64, int32, uint8,\n +* x: A tensor. Must be one of the following types: float32, float64, int32, uint8, * int16, int8, int64, uint16, float16, qint8. * * @par Outputs: @@ -163,13 +163,13 @@ REG_OP(Relu) /** * @brief Computes rectified linear 6. -* activations = min(max(x, 0), 6). +* activations = min(max(x, 0), 6) . \n * @par Inputs: -* x: A Tensor of type RealNumberType. +* x: A Tensor of type RealNumberType . \n * @par Outputs: -* y: A Tensor of type RealNumberType. +* y: A Tensor of type RealNumberType . \n * @par Third-party framework compatibility * Compatible with the TensorFlow operator Relu6. @@ -181,16 +181,16 @@ REG_OP(Relu6) /** * @brief Computes rectified linear 6*scale. -* activations = min(max(x, 0), 6*scale). +* activations = min(max(x, 0), 6*scale) . \n * @par Inputs: -* x: A Tensor of type RealNumberType. +* x: A Tensor of type RealNumberType . \n * @par Attributes: -* epsilon: A required scalar. The data type is float32. +* epsilon: A required scalar. The data type is float32 . \n * @par Outputs: -* y: A Tensor of type RealNumberType. +* y: A Tensor of type RealNumberType . \n * @par Third-party framework compatibility * Compatible with the TensorFlow operator Relu6. @@ -206,14 +206,14 @@ REG_OP(Relu6D) /** * @brief Computes rectified linear 6 gradients for a Relu6 operation. -* backprops = gradients * (features > 0) * (features < 6). +* backprops = gradients * (features > 0) * (features < 6) . \n * @par Inputs: * @li features: A Tensor of type RealNumberType. -* @li gradients: A Tensor of type RealNumberType. +* @li gradients: A Tensor of type RealNumberType . \n * @par Outputs: -* backprops: A Tensor of type RealNumberType. +* backprops: A Tensor of type RealNumberType . \n * @par Third-party framework compatibility * Compatible with the TensorFlow operator Relu6Grad. @@ -225,13 +225,13 @@ REG_OP(Relu6Grad) .OP_END_FACTORY_REG(Relu6Grad) /** -* @brief Compute sigmoid of "x" element-wise. +* @brief Compute sigmoid of "x" element-wise . \n * @par Inputs: -* A Tensor of type complex64, complex128, float16, float32 or double. +* A Tensor of type complex64, complex128, float16, float32 or double . \n * @par Outputs: -* A Tensor. Has the same type as "x". +* A Tensor. Has the same type as "x" . \n * @see Relu() @@ -244,11 +244,11 @@ REG_OP(Sigmoid) .OP_END_FACTORY_REG(Sigmoid) /** -* @brief Computes z = (y - y*y)*dy. +* @brief Computes z = (y - y*y)*dy . \n * @par Inputs: * @li y: The input is Tensor, dtype is UnaryDataType. -* @li dy: The input is Tensor, dtype is UnaryDataType. +* @li dy: The input is Tensor, dtype is UnaryDataType . \n * @par Outputs: * z: The shape of output, dtype is UnaryDataType. @@ -260,14 +260,14 @@ REG_OP(SigmoidGrad) .OP_END_FACTORY_REG(SigmoidGrad) /** -*@brief Computes the binomial normal log likelihood (BNLL) output:\n -*if x>0, x+log(1+exp(-x)); otherwise log(1+exp(x)). +*@brief Computes the binomial normal log likelihood (BNLL) output: +*if x>0, x+log(1+exp(-x)); otherwise log(1+exp(x)) . \n *@par Inputs: -*x: A Tensor of type double, float16 or float32. +*x: A Tensor of type double, float16 or float32 . \n *@par Outputs: -*y: A tensor. Has the same type and format as input "x". +*y: A tensor. Has the same type and format as input "x" . \n *@par Third-party framework compatibility * Compatible with the Caffe operator BNLL. @@ -278,11 +278,11 @@ REG_OP(BNLL) .OP_END_FACTORY_REG(BNLL) /** -*@brief Computes softplus: log(exp(x) + 1). +*@brief Computes softplus: log(exp(x) + 1) . \n *@par Inputs: -* One input:\n -*x: A Tensor of type float16 or float32. Up to 8D. +* One input: +*x: A Tensor of type float16 or float32. Up to 8D . \n *@par Outputs: *y: The activations tensor. Has the same type and format as input "x" @@ -296,7 +296,7 @@ REG_OP(Softplus) .OP_END_FACTORY_REG(Softplus) /** -*@brief Computes softplus gradients for a softplus operation. +*@brief Computes softplus gradients for a softplus operation . \n *@par Inputs: *Two inputs: @@ -305,7 +305,7 @@ REG_OP(Softplus) *@par Outputs: -*backprops: A Tensor. Has the same type and format as input "gradients". +*backprops: A Tensor. Has the same type and format as input "gradients" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SoftplusGrad. @@ -317,11 +317,11 @@ REG_OP(SoftplusGrad) .OP_END_FACTORY_REG(SoftplusGrad) /** -*@brief Computes softsign: x/(abs(x) + 1). +*@brief Computes softsign: x/(abs(x) + 1) . \n *@par Inputs: -* One input:\n -*x: A Tensor of type float16 or float32. Up to 8D. +* One input: +*x: A Tensor of type float16 or float32. Up to 8D . \n *@par Outputs: *y: The activations tensor. Has the same type and format as "x" @@ -335,15 +335,15 @@ REG_OP(Softsign) .OP_END_FACTORY_REG(Softsign) /** -*@brief Computes scaled exponential linear: scale * alpha * (exp(x) - 1). +*@brief Computes scaled exponential linear: scale * alpha * (exp(x) - 1) . \n *@par Inputs: * One input: *x: A Tensor. Must be one of the following types: float16, float, double - * int32, int8. format:ND, NC1HWC0. + * int32, int8. format:ND, NC1HWC0 . \n *@par Outputs: -*y: A Tensor. Has the same type and format as input "x". format:ND, NC1HWC0. +*y: A Tensor. Has the same type and format as input "x". format:ND, NC1HWC0 . \n *@see Region() @@ -358,7 +358,7 @@ REG_OP(Selu) .OP_END_FACTORY_REG(Selu) /** -*@brief Computes rectified linear gradients for a ReLU operation. +*@brief Computes rectified linear gradients for a ReLU operation . \n *@par Inputs: * Two inputs, including: @@ -368,10 +368,10 @@ REG_OP(Selu) * int32, int8, int16, int64, uint16, float16, uint32, uint64 *@par Outputs: -*backprops: A Tensor. Must have the same type as"gradients". +*backprops: A Tensor. Must have the same type as"gradients" . \n *@attention Constraints: -* The corresponding Relu operator needs to be called before using this operator on the network. +* The corresponding Relu operator needs to be called before using this operator on the network . \n *@see Relu @@ -385,18 +385,18 @@ REG_OP(ReluGrad) .OP_END_FACTORY_REG(ReluGrad) /** -*@brief Computes rectified linear gradients for a ReLU operation. +*@brief Computes rectified linear gradients for a ReLU operation . \n *@par Inputs: * Two inputs, including: -*@li gradients: A Tensor. Must be one of the following types: float32, double, int32, int8, int16,\n int8, int64, uint16, float16, uint32, uint64 +*@li gradients: A Tensor. Must be one of the following types: float32, double, int32, int8, int16, int8, int64, uint16, float16, uint32, uint64 *@li mask: A Tensor. Must be the following types: uint8 *@par Outputs: -*backprops: A Tensor. Must have the same type as"gradients". +*backprops: A Tensor. Must have the same type as"gradients" . \n *@attention Constraints: -* The corresponding Relu operator needs to be called before using this operator on the network. +* The corresponding Relu operator needs to be called before using this operator on the network . \n *@see Relu @@ -412,7 +412,7 @@ REG_OP(ReluGradV2) /** *@brief Computes rectified linear: "max(x, 0)". * -*@attention Constraints:\n +*@attention Constraints: * The last dimension must be divisible by 8. * The second output "mask" is "1" (for y >= 0) or "0" ( for y < 0). * @@ -435,15 +435,15 @@ REG_OP(ReluV2) .OP_END_FACTORY_REG(ReluV2) /** -*@brief Performs parametric ReLU. +*@brief Performs parametric ReLU . \n *@par Inputs: -* Two inputs, including: \n +* Two inputs, including: *@li x: A multi-dimensional Tensor of type float16 or float32. -*@li weight: A Scalar or 1D Tensor of type float16 or float32, specifying the weight, the initial value of "a". The number of dimensions must be the same as the number of channels. +*@li weight: A Scalar or 1D Tensor of type float16 or float32, specifying the weight, the initial value of "a". The number of dimensions must be the same as the number of channels . \n *@par Outputs: -*y: An activated Tensor. Has the same dimensions with "x". +*y: An activated Tensor. Has the same dimensions with "x" . \n *@par Third-party framework compatibility * Compatible with PyTorch and Caffe operator PReLU. @@ -455,17 +455,17 @@ REG_OP(PRelu) .OP_END_FACTORY_REG(PRelu) /** -*@brief Performs the backpropagation of PRelu for training scenarios. +*@brief Performs the backpropagation of PRelu for training scenarios . \n *@par Inputs: -* Three inputs, including: \n +* Three inputs, including: *@li grads: Input gradient. Multi-dimensional Tensors are supported. The data type can be float16 or float32. *@li features: A multi-dimensional Tensor of type float16 or float32. -*@li weights: A Scalar or 1D Tensor of type float16 or float32, specifying the weight. The number of dimensions must be the same as the number of channels. +*@li weights: A Scalar or 1D Tensor of type float16 or float32, specifying the weight. The number of dimensions must be the same as the number of channels . \n *@par Outputs: *@li dx: Reverse gradient of "features". Has the same dimensions and type as "features". -*@li da: Reverse gradient of "weight". Has the same dimensions and type as "features". +*@li da: Reverse gradient of "weight". Has the same dimensions and type as "features" . \n *@par Third-party framework compatibility * Compatible with PyTorch operator PReluGrad. @@ -480,19 +480,19 @@ REG_OP(PReluGrad) /** *@brief Activation function fused from sigmoid and ReLU, with soft saturation -* on the left and no saturation on the right. +* on the left and no saturation on the right . \n *@par Inputs: -*x: A float16, float32 or double, for the input data type. +*x: A float16, float32 or double, for the input data type . \n *@par Attributes: -*alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0". +*alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n *@par Outputs: -*y: A float16, float32 or double, for the normalized result. +*y: A float16, float32 or double, for the normalized result . \n *@attention Constraints: -*@li The input is of type float16 or float32. +*@li The input is of type float16 or float32 . \n *@par Multiple batches supported or not *Supported @@ -531,7 +531,7 @@ REG_OP(EluGrad) .OP_END_FACTORY_REG(EluGrad) /** -*@brief Computes the output as x if x > 0 and negative_slope * x if x <= 0. +*@brief Computes the output as x if x > 0 and negative_slope * x if x <= 0 . \n *@par Inputs: * One input: @@ -552,18 +552,18 @@ REG_OP(LeakyRelu) .OP_END_FACTORY_REG(LeakyRelu) /** -*@brief Computes the output as gradients if features > 0 and negative_slope * gradients if features <= 0. +*@brief Computes the output as gradients if features > 0 and negative_slope * gradients if features <= 0 . \n *@par Inputs: * Two inputs, including: * @li gradients: A Tensor. Must be one of the following types: float16, float32, double. -* @li features: A Tensor. Has the same type as "gradients". +* @li features: A Tensor. Has the same type as "gradients" . \n *@par Attributes: -*negative_slope: A float32. Defaults to "0.0". +*negative_slope: A float32. Defaults to "0.0" . \n *@par Outputs: -*backprops: A Tensor. Has the same type as "gradients". +*backprops: A Tensor. Has the same type as "gradients" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator LeakyReluGrad. @@ -576,17 +576,17 @@ REG_OP(LeakyReluGrad) .OP_END_FACTORY_REG(LeakyReluGrad) /** -*@brief Thresholds grad each element of the input Tensor. +*@brief Thresholds grad each element of the input Tensor . \n *@par Inputs: * @li gradients: A Tensor shape and dtype of input gradients. Support float16, int32. -* @li features: A Tensor shape and dtype of input features. Support float16, int32. +* @li features: A Tensor shape and dtype of input features. Support float16, int32 . \n *@par Attributes: -*threshold: A float32 scale value to threshold at. +*threshold: A float32 scale value to threshold at . \n *@par Outputs: -*backprops: A Tensor of shape and dtype of output backprops, should be same shape and type as inputs. +*backprops: A Tensor of shape and dtype of output backprops, should be same shape and type as inputs . \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -599,17 +599,17 @@ REG_OP(ThresholdGradV2D) .OP_END_FACTORY_REG(ThresholdGradV2D) /** -*@brief Thresholds each element of the input Tensor y = (x > threshold) ? x : value. +*@brief Thresholds each element of the input Tensor y = (x > threshold) ? x : value . \n *@par Inputs: -*x: A Tensor dtype of real number. +*x: A Tensor dtype of real number . \n *@par Attributes: *@li threshold: A float32 scale value to threshold at. -*@li value: A float32 scale value to replace with. +*@li value: A float32 scale value to replace with . \n *@par Outputs: -*y: A Tensor of shape and dtype of output, should be same shape and type as input. +*y: A Tensor of shape and dtype of output, should be same shape and type as input . \n *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -622,14 +622,14 @@ REG_OP(ThresholdV2D) .OP_END_FACTORY_REG(ThresholdV2D) /** -*@brief: Computes hyperbolic tangent of "x" element-wise. +*@brief: Computes hyperbolic tangent of "x" element-wise . \n *@par Inputs: *One input: -*x: A Tensor. Must be one of the following types: float16, float32. +*x: A Tensor. Must be one of the following types: float16, float32 . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@par Third-party framework compatibility * Compatible with TensorFlow operator Mish. diff --git a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h index 91aff6ba..90628af6 100644 --- a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h +++ b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h @@ -26,7 +26,7 @@ namespace ge { /** -*@brief Computes NPU alloc float status operator function. +*@brief Computes NPU alloc float status operator function . \n *@par Outputs: *data: A Tensor of data value. Must be float32. @@ -36,10 +36,10 @@ REG_OP(NPUAllocFloatStatusOperator) .OP_END_FACTORY_REG(NPUAllocFloatStatusOperator) /** -*@brief Computes NPU clear float status operator function. +*@brief Computes NPU clear float status operator function . \n *@par Inputs: -*addr: A Tensor of data memory address. Must be float32. +*addr: A Tensor of data memory address. Must be float32 . \n *@par Outputs: *data: A Tensor of data value. Must be float32. @@ -50,10 +50,10 @@ REG_OP(NPUClearFloatStatusOperator) .OP_END_FACTORY_REG(NPUClearFloatStatusOperator) /** -*@brief Computes NPU get float status operator function. +*@brief Computes NPU get float status operator function . \n *@par Inputs: -*addr: A Tensor of data memory address. Must be float32. +*addr: A Tensor of data memory address. Must be float32 . \n *@par Outputs: *data: A Tensor of data value. Must be float32. @@ -64,7 +64,7 @@ REG_OP(NPUGetFloatStatusOperator) .OP_END_FACTORY_REG(NPUGetFloatStatusOperator) /** -*@brief Produces a variable with 0 in memory. +*@brief Produces a variable with 0 in memory . \n *@par Outputs: *y: A Tensor of type int32, output eight numbers with a value of zero. @@ -74,10 +74,10 @@ REG_OP(NPUAllocFloatStatus) .OP_END_FACTORY_REG(NPUAllocFloatStatus) /** -*@brief Set the value of address 0x40000 to 0 in each core. +*@brief Set the value of address 0x40000 to 0 in each core . \n *@par Inputs: -*addr: A tensor of type float32. +*addr: A tensor of type float32 . \n *@par Outputs: *data: A Tensor of type float32. @@ -88,10 +88,10 @@ REG_OP(NPUClearFloatStatus) .OP_END_FACTORY_REG(NPUClearFloatStatus) /** -*@brief Get the value of address 0x40000. +*@brief Get the value of address 0x40000 . \n *@par Inputs: -*addr: A tensor of type float32. +*addr: A tensor of type float32 . \n *@par Outputs: *data: A Tensor of type float32. diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h index 6a0492f6..5938941a 100644 --- a/third_party/fwkacllib/inc/ops/pad_ops.h +++ b/third_party/fwkacllib/inc/ops/pad_ops.h @@ -29,7 +29,7 @@ namespace ge { * This operation creates a tensor of shape "dims" and fills it with "value". * *@par Inputs: -*@li dims: A 1D tensor of types int32 or int64. Represents the shape of the output tensor. +*@li dims: A 1D tensor of types int32 or int64. Represents the shape of the output tensor . \n *@li value: A 0D scalar. Specifies the value to fill the returned tensor. * Must be one of the following types: @@ -66,6 +66,8 @@ REG_OP(Fill) *@par Outputs: * y: A tensor. Has the same type as "value". * +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use Fill instead. */ REG_OP(FillD) .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, @@ -124,6 +126,8 @@ REG_OP(BroadcastTo) *@par Third-party framework compatibility *Compatible with the TensorFlow operator BroadcastTo. * +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use BroadcastTo instead. */ REG_OP(BroadcastToD) .INPUT(x, TensorType::BasicType()) @@ -132,17 +136,17 @@ REG_OP(BroadcastToD) .OP_END_FACTORY_REG(BroadcastToD) /** -*@brief Pads a tensor. +*@brief Pads a tensor . \n *@par Inputs: *Two inputs, including: * @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, * uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, * complex128, uint32, uint64. -* @li paddings: A Tensor of type int32 or int64. +* @li paddings: A Tensor of type int32 or int64 . \n *@par Outputs: -*y: A Tensor of the same type as "x". +*y: A Tensor of the same type as "x" . \n *@par Third-party framework compatibility: * Compatible with TensorFlow operator Pad. @@ -154,23 +158,26 @@ REG_OP(Pad) .OP_END_FACTORY_REG(Pad) /** -*@brief Pads a tensor. +*@brief Pads a tensor . \n *@par Inputs: -*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32. +*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n *@par Attributes: *paddings: An optional "vector>". Defaults to "{}". * For each dimension D of input, paddings[D, 0] indicates how many * values to add before the contents of tensor in that dimension, * and paddings[D, 1] indicates how many values to add after the -* contents of tensor in that dimension. +* contents of tensor in that dimension . \n *@par Outputs: -*y: A Tensor of the same type as "x". +*y: A Tensor of the same type as "x" . \n *@par Third-party framework compatibility: * Compatible with TensorFlow operator Pad. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead. */ REG_OP(PadD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) @@ -184,17 +191,20 @@ REG_OP(PadD) *@par Inputs: *Two inputs, including: * @li x: A mutable Tensor. Must be one of the following types: -* float16, float32, int32. +* float16, float32, int32 . \n * @li assist: A mutable Tensor with rank k is at most 1, -* Has the same type as "x". +* Has the same type as "x" . \n *@par Outputs: -*y: A mutable Tensor. Has the same type as "x". +*y: A mutable Tensor. Has the same type as "x" . \n *@see Diag() *@par Third-party framework compatibility * Compatible with the TensorFlow operator Diag. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use Diag instead. */ REG_OP(DiagD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) @@ -209,10 +219,10 @@ REG_OP(DiagD) *One input, include: * x: A mutable Tensor with rank k, where k is at most 1. Must be one of the * following types: -* float16, float32, double, int32, int64, complex64, complex128. +* float16, float32, double, int32, int64, complex64, complex128 . \n *@par Outputs: -*y: A mutable Tensor. Has the same type as "x". +*y: A mutable Tensor. Has the same type as "x" . \n *@see DiagD() *@par Third-party framework compatibility @@ -230,10 +240,10 @@ REG_OP(Diag) *@par Inputs: *One input, include: -*x: Tensor which last dimension must be 1. For example: [624000, 1]. +*x: Tensor which last dimension must be 1. For example: [624000, 1] . \n *@par Outputs: -*y: Padding the last dimension of x to padDimSize, [624000, padDimSize]. +*y: Padding the last dimension of x to padDimSize, [624000, padDimSize] . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator Diag. @@ -246,7 +256,7 @@ REG_OP(AscendPadding) /** -*@brief EmbeddingRankId, traverse the index calculation server and its position in the server. +*@brief EmbeddingRankId, traverse the index calculation server and its position in the server . \n *@par Inputs: *One input, include: diff --git a/third_party/fwkacllib/inc/ops/parsing_ops.h b/third_party/fwkacllib/inc/ops/parsing_ops.h index e73a69fe..b3c50654 100644 --- a/third_party/fwkacllib/inc/ops/parsing_ops.h +++ b/third_party/fwkacllib/inc/ops/parsing_ops.h @@ -27,20 +27,20 @@ namespace ge { /** -*@brief Converts each string in the input Tensor to the specified numeric type. +*@brief Converts each string in the input Tensor to the specified numeric type . \n *@par Inputs: -*Inputs include: \n -*x: A Tensor. Must be one of the following types: string. +*Inputs include: +*x: A Tensor. Must be one of the following types: string . \n *@par Attributes: -*out_type: The numeric type to interpret each string in string_tensor as. +*out_type: The numeric type to interpret each string in string_tensor as . \n *@par Outputs: -*y: A Tensor. Has the same type as x. +*y: A Tensor. Has the same type as x . \n -*@attention Constraints:\n -*-The implementation for StringToNumber on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for StringToNumber on Ascend uses AICPU, with bad performance. \n *@par Third-party framework compatibility *@li compatible with tensorflow StringToNumber operator. diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h index 772f9edb..31ba266b 100644 --- a/third_party/fwkacllib/inc/ops/quantize_ops.h +++ b/third_party/fwkacllib/inc/ops/quantize_ops.h @@ -27,8 +27,8 @@ namespace ge { /** * @brief Dequantizes the input tensor into a float tensor. * [min_range, max_range] are float32 tensors that specify the range -* for "y". \n -* The "mode" attribute controls exactly which calculations are used to convert\n +* for "y". +* The "mode" attribute controls exactly which calculations are used to convert * the float values to their quantized equivalents. * @par Inputs: * @li x: A Tensor. Must be one of the following types: int8, uint8, @@ -36,18 +36,18 @@ namespace ge { * @li min_range: A Tensor of type float32. * Specifies the minimum scalar value possibly produced for the input. * @li max_range: A Tensor of type float32. -* Specifies the maximum scalar value possibly produced for the input. +* Specifies the maximum scalar value possibly produced for the input . \n * @par Attributes: * mode: An optional string from: "MIN_COMBINED", "MIN_FIRST", and "SCALED". -* Defaults to "MIN_COMBINED". +* Defaults to "MIN_COMBINED" . \n * @par Outputs: -* y: A dictionary of type float32. +* y: A dictionary of type float32 . \n * @attention Constraints: * @li "min_range" and "max_range" have the same shapes. -* @li "x" and "y" have the same shapes. +* @li "x" and "y" have the same shapes . \n * @par Third-party framework compatibility * Compatible with the TensorFlow operator Dequantize. @@ -61,20 +61,20 @@ REG_OP(Dequantize) .OP_END_FACTORY_REG(Dequantize) /** -*@brief Quantizes the input. +*@brief Quantizes the input . \n *@par Inputs: -*x: An NC1HWC0 tensor of type float16 or float32, specifying the input. +*x: An NC1HWC0 tensor of type float16 or float32, specifying the input . \n *@par Attributes: *@li scale: A required float32, specifying the scaling ratio. *@li offset: A required float16, specifying the offset. *@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False". *@li round_mode: An optional string, specifying the float16 to int8 cast type. -* The value range is [Round, Floor, Ceiling, Truncate]. Defaults to "Round". +* The value range is [Round, Floor, Ceiling, Truncate]. Defaults to "Round" . \n *@par Outputs: -*y: The quantized output tensor of type int8 and with format NC1HWC0. +*y: The quantized output tensor of type int8 and with format NC1HWC0 . \n *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. @@ -89,19 +89,19 @@ REG_OP(AscendQuant) .OP_END_FACTORY_REG(AscendQuant) /** -*@brief Dequantizes the input. +*@brief Dequantizes the input . \n *@par Inputs: *@li x: An NC1HWC0 tensor of type int32, specifying the input. -*@li deq_scale: An NC1HWC0 tensor of type float16 or uint64, specifying the scaling ratio. +*@li deq_scale: An NC1HWC0 tensor of type float16 or uint64, specifying the scaling ratio . \n *@par Attributes: *@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False". *@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False". -*@li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT". +*@li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT" . \n *@par Outputs: -*y: The dequantized output tensor of type float16 or float32 and with format NC1HWC0. +*y: The dequantized output tensor of type float16 or float32 and with format NC1HWC0 . \n *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. @@ -116,19 +116,19 @@ REG_OP(AscendDequant) .OP_END_FACTORY_REG(AscendDequant) /** -*@brief Anti quantizes the input. +*@brief Anti quantizes the input . \n *@par Inputs: -*x: An NC1HWC0 tensor of type int8, specifying the input. +*x: An NC1HWC0 tensor of type int8, specifying the input . \n *@par Attributes: *@li scale: A required float32 scale. *@li offset: A required float32 offset. *@li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT". -*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False". +*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False" . \n *@par Outputs: -*y: The dequantized output tensor of type float16 or float32 and with format NC1HWC0. +*y: The dequantized output tensor of type float16 or float32 and with format NC1HWC0 . \n *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. @@ -143,18 +143,18 @@ REG_OP(AscendAntiQuant) .OP_END_FACTORY_REG(AscendAntiQuant) /** -*@brief Dequantizes the input of int16. +*@brief Dequantizes the input of int16 . \n *@par Inputs: *@li x0: An NC1HWC0 tensor of type int32, specifying the input. *@li deq_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio. -*@li x1: An NC1HWC0 tensor of type int16, specifying the input. +*@li x1: An NC1HWC0 tensor of type int16, specifying the input . \n *@par Attributes: -*relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False". +*relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n *@par Outputs: -*y: The dequantized output tensor of type int16 and with format NC1HWC0. +*y: The dequantized output tensor of type int16 and with format NC1HWC0 . \n *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. @@ -168,17 +168,17 @@ REG_OP(AscendDequantS16) .OP_END_FACTORY_REG(AscendDequantS16) /** -*@brief Requantizes the input. +*@brief Requantizes the input . \n *@par Inputs: *@li x: An NC1HWC0 tensor of type int32, specifying the input. -*@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio. +*@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio . \n *@par Attributes: -*relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False". +*relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n *@par Outputs: -*y: The dequantized output tensor of type int8 and with format NC1HWC0. +*y: The dequantized output tensor of type int8 and with format NC1HWC0 . \n *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. @@ -191,20 +191,20 @@ REG_OP(AscendRequant) .OP_END_FACTORY_REG(AscendRequant) /** -*@brief Requantizes the input of int16. +*@brief Requantizes the input of int16 . \n *@par Inputs: *@li x: An NC1HWC0 tensor of type int16, specifying the input. *@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio. -*@li x1: An NC1HWC0 tensor of type int16. +*@li x1: An NC1HWC0 tensor of type int16 . \n *@par Attributes: *@li dual_output: A optional bool, specifying whether to perform dual ouput, either "True" or "False". Defaults to "False". -*@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False". +*@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n *@par Outputs: *@li y: The dequantized output tensor of type int8 and with format NC1HWC0. -*@li y1: The dequantized output tensor of type int16 and with format NC1HWC0. +*@li y1: The dequantized output tensor of type int16 and with format NC1HWC0 . \n *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. diff --git a/third_party/fwkacllib/inc/ops/ragged_array_ops.h b/third_party/fwkacllib/inc/ops/ragged_array_ops.h index d0f2b1c5..4c62ec86 100644 --- a/third_party/fwkacllib/inc/ops/ragged_array_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_array_ops.h @@ -27,23 +27,23 @@ namespace ge { /** -*@brief Gather ragged slices from `params` axis `0` according to `indices`. +*@brief Gather ragged slices from `params` axis `0` according to `indices` . \n *@par Inputs: -*@li params_nested_splits: The `nested_row_splits` tensors that define the row-partitioning for the \n -*params` RaggedTensor input. -*@li params_dense_values: The `flat_values` for the `params` RaggedTensor. There was a terminology change \n -*at the python level from dense_values to flat_values, so dense_values is the \n +*@li params_nested_splits: The `nested_row_splits` tensors that define the row-partitioning for the +*params` RaggedTensor input. It's a dynamic input. +*@li params_dense_values: The `flat_values` for the `params` RaggedTensor. There was a terminology change +*at the python level from dense_values to flat_values, so dense_values is the *deprecated name. -*@li indices: Indices in the outermost dimension of `params` of the values that should be \n +*@li indices: Indices in the outermost dimension of `params` of the values that should be *gathered. -*@li OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain \n -*this number of `row_splits` tensors. This value should equal \n -*`indices.shape.ndims + params.ragged_rank - 1`. +*@li OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain +*this number of `row_splits` tensors. This value should equal +*`indices.shape.ndims + params.ragged_rank - 1` . \n *@par Outputs: -*y:A Returns The `nested_row_splits` tensors that define the row-partitioning for the \n -*returned RaggedTensor.The `flat_values` for the returned RaggedTensor. +*y:A Returns The `nested_row_splits` tensors that define the row-partitioning for the +*returned RaggedTensor.The `flat_values` for the returned RaggedTensor . \n *@par Third-party framework compatibility * Compatible with tensorflow RaggedGather operator. diff --git a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h index a95884a8..cd6cfdfe 100644 --- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h @@ -25,24 +25,24 @@ namespace ge { /** -*@brief Converts a RaggedTensor into a SparseTensor with the same values. +*@brief Converts a RaggedTensor into a SparseTensor with the same values . \n *@par Inputs: -*Two inputs, including: \n -*@li rt_nested_splits: A list of at least 1 Tensor objects with the same type \n -in: int32, int64. The row_splits for the RaggedTensor. -*@li rt_dense_values: A Tensor. The flat_values for the RaggedTensor \n -Must be one of the following types: bool, int8, int16, uint16, int32, \n -int64, double, float, float16. +*Two inputs, including: +*@li rt_nested_splits: A list of at least 1 Tensor objects with the same type +in: int32, int64. The row_splits for the RaggedTensor. It's a dynamic input. +*@li rt_dense_values: A Tensor. The flat_values for the RaggedTensor +Must be one of the following types: bool, int8, int16, uint16, int32, +int64, double, float, float16 . \n *@par Attributes: *@li RAGGED_RANK: the dynamic of input rt_nested_splits with type int. -*@li Tsplits: A required attribute, the type is int64. +*@li Tsplits: A required attribute, the type is int64 . \n *@par Outputs: *@li sparse_indices: A Tensor of type int64. *@li sparse_values: A Tensor. Has the same type as rt_dense_values. -*@li sparse_dense_shape: A Tensor of type int64. +*@li sparse_dense_shape: A Tensor of type int64 . \n *@par Third-party framework compatibility * Compatible with TensorFlow operator RaggedTensorToSparse. @@ -58,24 +58,24 @@ REG_OP(RaggedTensorToSparse) .OP_END_FACTORY_REG(RaggedTensorToSparse) /** -*@brief Create a dense tensor from a ragged tensor, possibly altering its shape. +*@brief Create a dense tensor from a ragged tensor, possibly altering its shape . \n *@par Inputs: *Six inputs, including: *@li shape:A `Tensor`. Must be one of the following types: `int64`, `int32`. *@li values:A 1D tensor representing the values of the ragged tensor. *@li default_value:A `Tensor`. Must have the same type as `values`. -*@li row_partition_tensors:A list of at least 1 `Tensor` objects with the same \n -type in: `int64`, `int32`. +*@li row_partition_tensors:A list of at least 1 `Tensor` objects with the same +type in: `int64`, `int32` . It's a dynamic input.\n *@par Attributes: *@li num_row_partition_tensors:Numbers of row partition tensors. -*@li row_partition_types: A list of `strings`. \n -The types of the row partition tensors. At present, these can be: \n -* "ROW_SPLITS": the row_splits tensor from the ragged tensor. \n -* "VALUE_ROWIDS": the value_rowids tensor from the ragged tensor. \n -* "FIRST_DIM_SIZE": if value_rowids is used for the first dimension, then it \n -is preceeded by "FIRST_DIM_SIZE". +*@li row_partition_types: A list of `strings`. +The types of the row partition tensors. At present, these can be: +* "ROW_SPLITS": the row_splits tensor from the ragged tensor. +* "VALUE_ROWIDS": the value_rowids tensor from the ragged tensor. +* "FIRST_DIM_SIZE": if value_rowids is used for the first dimension, then it +is preceeded by "FIRST_DIM_SIZE" . \n *@par Outputs: *@li result: A `Tensor`. Has the same type as `values`. diff --git a/third_party/fwkacllib/inc/ops/ragged_math_ops.h b/third_party/fwkacllib/inc/ops/ragged_math_ops.h index 5acdb7f6..ab871b7e 100644 --- a/third_party/fwkacllib/inc/ops/ragged_math_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_math_ops.h @@ -27,20 +27,20 @@ namespace ge { /** -*@brief Returns a `RaggedTensor` containing the specified sequences of numbers. +*@brief Returns a `RaggedTensor` containing the specified sequences of numbers . \n *@par Inputs: *@li starts: The starts of each range. *@li limits: The limits of each range. -*@li deltas: The deltas of each range. +*@li deltas: The deltas of each range . \n *@par Outputs: -*y:A Returns The `row_splits` for the returned `RaggedTensor`.The `flat_values` for the returned `RaggedTensor`. +*y:A Returns The `row_splits` for the returned `RaggedTensor`.The `flat_values` for the returned `RaggedTensor` . \n -*@attention Constraints: \n -*The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors. \n -*The vector inputs must all have the same size. Scalar inputs are broadcast \n -*to match the size of the vector inputs. +*@attention Constraints: +*The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors. +*The vector inputs must all have the same size. Scalar inputs are broadcast +*to match the size of the vector inputs . \n *@par Third-party framework compatibility * Compatible with tensorflow RaggedRange operator. diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index 8c95ea64..edec232d 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -28,24 +28,24 @@ namespace ge { /** -*@brief Draws samples from a multinomial distribution. +*@brief Draws samples from a multinomial distribution . \n *@par Inputs: -*Inputs include: \n -* @li logits: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n - int64, bfloat16, uint16, half, uint32, uint64. 2-D Tensor with shape [batch_size, num_classes]. -* @li num_samples: A Tensor of type int32. 0-D. Number of independent samples to draw for each row slice. +*Inputs include: +* @li logits: A Tensor. Must be one of the following types: float32, float64,double. +2-D Tensor with shape [batch_size, num_classes]. +* @li num_samples: A Tensor of type int32. 0-D. Number of independent samples to draw for each row slice . \n *@par Attributes: *@li output_dtype: An optional type from: int32, int64. Defaults to int64. *@li seed: An optional int. Defaults to 0. -*@li seed2: An optional int. Defaults to 0. +*@li seed2: An optional int. Defaults to 0 . \n *@par Outputs: -*y_indices: A Tensor of type output_dtype. +*y_indices: A Tensor of type output_dtype . \n -*@attention Constraints:\n -*-The implementation for Multinomial on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for Multinomial on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow Multinomial operator. @@ -60,26 +60,26 @@ REG_OP(Multinomial) .OP_END_FACTORY_REG(Multinomial) /** -*@brief Outputs random values from a normal distribution. +*@brief Outputs random values from a normal distribution . \n *@par Inputs: -*Inputs include: \n -* @li shape: A Tensor. Must be one of the following types: int32, int64. \n +*Inputs include: +* @li shape: A Tensor. Must be one of the following types: int32, int64. The shape of the output tensor. Batches are indexed by the 0th dimension. * @li means: A Tensor. Must be one of the following types: half, bfloat16, float32, float64. * @li stdevs: A Tensor. Must have the same type as means. * @li min: A Tensor. Must have the same type as means. The minimum cutoff. May be -infinity. -* @li max: A Tensor. Must have the same type as means. +* @li max: A Tensor. Must have the same type as means . \n *@par Attributes: *@li seed: An optional int. Defaults to 0. -*@li seed2: An optional int. Defaults to 0. +*@li seed2: An optional int. Defaults to 0 . \n *@par Outputs: -*y: A Tensor. Has the same type as means. +*y: A Tensor. Has the same type as means . \n -*@attention Constraints:\n -*-The implementation for ParameterizedTruncatedNormal on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for ParameterizedTruncatedNormal on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow ParameterizedTruncatedNormal operator. @@ -96,18 +96,18 @@ REG_OP(ParameterizedTruncatedNormal) .OP_END_FACTORY_REG(ParameterizedTruncatedNormal) /** -*@brief Computes the derivative of a Gamma random sample w.r.t. alpha. +*@brief Computes the derivative of a Gamma random sample w.r.t. alpha . \n *@par Inputs: -*Inputs include: \n +*Inputs include: * @li alpha: A Tensor. Must be one of the following types: float32, float64. -* @li sample: A Tensor. Must have the same type as alpha. +* @li sample: A Tensor. Must have the same type as alpha . \n *@par Outputs: -*y: A Tensor. Has the same type as alpha. +*y: A Tensor. Has the same type as alpha . \n -*@attention Constraints:\n -*-The implementation for RandomGammaGrad on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for RandomGammaGrad on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow RandomGammaGrad operator. @@ -119,22 +119,22 @@ REG_OP(RandomGammaGrad) .OP_END_FACTORY_REG(RandomGammaGrad) /** -*@brief Outputs random values from the Gamma distribution(s) described by alpha. +*@brief Outputs random values from the Gamma distribution(s) described by alpha . \n *@par Inputs: -*Inputs include: \n +*Inputs include: * @li shape: A Tensor. Must be one of the following types: int32, int64. 1-D integer tensor. -* @li alpha: A Tensor. Must be one of the following types: half, float32, float64. +* @li alpha: A Tensor. Must be one of the following types: half, float32, float64 . \n *@par Attributes: *@li seed: An optional int. Defaults to 0. -*@li seed2: An optional int. Defaults to 0. +*@li seed2: An optional int. Defaults to 0 . \n *@par Outputs: -*y: A Tensor. Has the same type as alpha. +*y: A Tensor. Has the same type as alpha . \n -*@attention Constraints:\n -*-The implementation for RandomGamma on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for RandomGamma on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow RandomGamma operator. @@ -148,23 +148,23 @@ REG_OP(RandomGamma) .OP_END_FACTORY_REG(RandomGamma) /** -*@brief Outputs random values from the Poisson distribution(s) described by rate. +*@brief Outputs random values from the Poisson distribution(s) described by rate . \n *@par Inputs: -*Inputs include: \n +*Inputs include: * @li shape: A Tensor. Must be one of the following types: int32, int64. 1-D integer tensor. -* @li rate: A Tensor. Must be one of the following types: half, float32, float64, int32, int64. +* @li rate: A Tensor. Must be one of the following types: half, float32, float64, int32, int64 . \n *@par Attributes: *@li dtype: An optional type from: half, float32, float64, int32, int64. Defaults to int64. *@li seed: An optional int. Defaults to 0. -*@li seed2: An optional int. Defaults to 0. +*@li seed2: An optional int. Defaults to 0 . \n *@par Outputs: -*y: A Tensor of type dtype. +*y: A Tensor of type dtype . \n -*@attention Constraints:\n -*-The implementation for RandomPoisson on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for RandomPoisson on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow RandomPoisson operator. @@ -181,21 +181,21 @@ REG_OP(RandomPoisson) .OP_END_FACTORY_REG(RandomPoisson) /** -*@brief Randomly shuffles a tensor along its first dimension. +*@brief Randomly shuffles a tensor along its first dimension . \n *@par Inputs: -*Inputs include: \n -*x: A Tensor. The tensor to be shuffled. +*Inputs include: +*x: A Tensor. The tensor to be shuffled . \n *@par Attributes: *@li seed: An optional int. Defaults to 0. -*@li seed2: An optional int. Defaults to 0. +*@li seed2: An optional int. Defaults to 0 . \n *@par Outputs: -*y: A Tensor. Has the same type as x. +*y: A Tensor. Has the same type as x . \n -*@attention Constraints:\n -*-The implementation for RandomShuffle on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for RandomShuffle on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow RandomShuffle operator. @@ -212,22 +212,22 @@ REG_OP(RandomShuffle) .OP_END_FACTORY_REG(RandomShuffle) /** -*@brief Outputs random values from a normal distribution. +*@brief Outputs random values from a normal distribution . \n *@par Inputs: -*Inputs include: \n -*shape: A Tensor. Must be one of the following types: int32, int64. The shape of the output tensor. +*Inputs include: +*shape: A Tensor. Must be one of the following types: int32, int64. The shape of the output tensor . \n *@par Attributes: *@li dtype: A type from: half, float16, float32, float64. The type of the output. *@li seed: An optional int. Defaults to 0. -*@li seed2: An optional int. Defaults to 0. +*@li seed2: An optional int. Defaults to 0 . \n *@par Outputs: -*y: A Tensor of type dtype. +*y: A Tensor of type dtype . \n -*@attention Constraints:\n -*-The implementation for RandomStandardNormal on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for RandomStandardNormal on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow RandomStandardNormal operator. @@ -241,23 +241,23 @@ REG_OP(RandomStandardNormal) .OP_END_FACTORY_REG(RandomStandardNormal) /** -*@brief Outputs random integers from a uniform distribution. +*@brief Outputs random integers from a uniform distribution . \n *@par Inputs: -*Inputs include: \n +*Inputs include: * @li shape: A Tensor. Must be one of the following types: int32, int64. The shape of the output tensor. * @li min: A Tensor. Must be one of the following types: int32, int64. 0-D. -* @li max: A Tensor. Must have the same type as minval. 0-D. +* @li max: A Tensor. Must have the same type as minval. 0-D . \n *@par Attributes: *@li seed: An optional int. Defaults to 0. -*@li seed2: An optional int. Defaults to 0. +*@li seed2: An optional int. Defaults to 0 . \n *@par Outputs: -*y: A Tensor. Has the same type as min. +*y: A Tensor. Has the same type as min . \n -*@attention Constraints:\n -*-The implementation for RandomUniformInt on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for RandomUniformInt on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow RandomUniformInt operator. @@ -272,22 +272,22 @@ REG_OP(RandomUniformInt) .OP_END_FACTORY_REG(RandomUniformInt) /** -*@brief Outputs random values from a uniform distribution. +*@brief Outputs random values from a uniform distribution . \n *@par Inputs: -*Inputs include: \n -*shape: A Tensor. Must be one of the following types: int32, int64. The shape of the output tensor. +*Inputs include: +*shape: A Tensor. Must be one of the following types: int32, int64. The shape of the output tensor . \n *@par Attributes: *@li dtype: A type from: half, float16, float32, float64. The type of the output. *@li seed: An optional int. Defaults to 0. -*@li seed2: An optional int. Defaults to 0. +*@li seed2: An optional int. Defaults to 0 . \n *@par Outputs: -*y: A Tensor of type dtype. +*y: A Tensor of type dtype . \n -*@attention Constraints:\n -*-The implementation for RandomUniform on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for RandomUniform on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow RandomUniform operator. @@ -301,21 +301,21 @@ REG_OP(RandomUniform) .OP_END_FACTORY_REG(RandomUniform) /** -*@brief Outputs random values from a truncated normal distribution. +*@brief Outputs random values from a truncated normal distribution . \n *@par Inputs: -*Inputs include: \n -*shape: A Tensor. Must be one of the following types: int32, int64. +*Inputs include: +*shape: A Tensor. Must be one of the following types: int32, int64 . \n *@par Attributes: *@li seed: An optional int. Defaults to 0. -*@li seed2: An optional int. Defaults to 0. +*@li seed2: An optional int. Defaults to 0 . \n *@par Outputs: -*size: A Tensor of types: float16, float32, double. +*size: A Tensor of types: float16, float32, double . \n -*@attention Constraints:\n -*-The implementation for TruncatedNormal on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for TruncatedNormal on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow TruncatedNormal operator. @@ -328,22 +328,22 @@ REG_OP(TruncatedNormal) .OP_END_FACTORY_REG(TruncatedNormal) /** -*@brief Generate random bit mask for dropout. +*@brief Generate random bit mask for dropout . \n *@par Inputs: -include: \n +include: *@li shape:The shape of the output tensor. -*@li prob:0-D. Number of bit 1. +*@li prob:0-D. Number of bit 1 . \n *@par Attributes: -*@li seed:If either seed or seed2 are set to be non-zero, the random number\n +*@li seed:If either seed or seed2 are set to be non-zero, the random number *generator is seeded by the given seed. Otherwise, it is seeded by a random seed. -*@li seed2:A second seed to avoid seed collision. +*@li seed2:A second seed to avoid seed collision . \n *@par Outputs: -*y:Output (1-D) random number using uint data format. +*y:Output (1-D) random number using uint data format . \n -*@attention Constraints:\n +*@attention Constraints: *The output is aligned with 128 bits *@see DropOutGenMask() @@ -357,23 +357,26 @@ REG_OP(DropOutGenMask) .OP_END_FACTORY_REG(DropOutGenMask) /** -*@brief Generates values in an interval. +*@brief Generates values in an interval . \n *@par Inputs: * Four ND inputs, including: *@li assist: A 1D Tensor of type float32. *@li start: A 1D Tensor of type float32, for the first entry in the range. *@li stop: A 1D Tensor of type float32, for the last entry in the range. -*@li num: A 1D Tensor of type int32 or int64, for the common difference of the entries. +*@li num: A 1D Tensor of type int32 or int64, for the common difference of the entries . \n *@par Outputs: -*output_op: A 1D Tensor of type float32. +*output_op: A 1D Tensor of type float32 . \n *@attention Constraints: -* "input_assist" is a sequence of "input_num" evenly-spaced values beginning at 0 with an common difference of 1. +* "input_assist" is a sequence of "input_num" evenly-spaced values beginning at 0 with an common difference of 1 . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator lin_space. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use LinSpace instead. */ REG_OP(LinSpaceD) .INPUT(assist, TensorType({DT_FLOAT})) @@ -384,19 +387,19 @@ REG_OP(LinSpaceD) .OP_END_FACTORY_REG(LinSpaceD) /** -*@brief Generates values in an interval. +*@brief Generates values in an interval . \n *@par Inputs: * Four ND inputs, including: *@li start: A 1D Tensor of type float32, for the first entry in the range. *@li stop: A 1D Tensor of type float32, for the last entry in the range. -*@li num: A 1D Tensor of type int32 or int64, for the common difference of the entries. +*@li num: A 1D Tensor of type int32 or int64, for the common difference of the entries . \n *@par Outputs: -*output_op: A 1D Tensor of type float32. +*output_op: A 1D Tensor of type float32 . \n *@attention Constraints: -* "input_assist" is a sequence of "input_num" evenly-spaced values beginning at 0 with an common difference of 1. +* "input_assist" is a sequence of "input_num" evenly-spaced values beginning at 0 with an common difference of 1 . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator lin_space. @@ -418,21 +421,21 @@ REG_OP(Dropout) .OP_END_FACTORY_REG(Dropout) /** -*@brief Shuffle index of no-zero element. +*@brief Shuffle index of no-zero element . \n *@par Inputs: -include: \n -*x:A tensor <= 5-D. +include: +*x:A tensor <= 5-D . \n *@par Attributes: *@li count:the count of output, if 0, out all no-zero elements. *@li seed:If either seed or seed2 are set to be non-zero, the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed. -*@li seed2:A second seed to avoid seed collision. +*@li seed2:A second seed to avoid seed collision . \n *@par Outputs: *@li y:2-D tensor, no-zero element index. -*@li mask:1-D, whether the corresponding index is valid. +*@li mask:1-D, whether the corresponding index is valid . \n *@see RandomChoiceWithMask() */ @@ -449,18 +452,18 @@ REG_OP(RandomChoiceWithMask) *@brief Permutes data in the channel dimension of the input *@par Inputs: -*Inputs including: \n +*Inputs including: * @li x: A required Tensor. Must be one of the following types: - float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. + float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n *@par Attributes: -*@li group: A required int32, specifying the number of groups to split the channel dimension into. Defaults to "1". +*@li group: A required int32, specifying the number of groups to split the channel dimension into. Defaults to "1" . \n *@par Outputs: *y: A required Tensor. Has same type and shape as "x". Must be one of the following types: - float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. + float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n -*@attention Constraints:\n +*@attention Constraints: *@li "group" must be greater than 0 and must evenly divide the channel dimension size. *@li The format of input "x" must be NCHW. *@par Third-party framework compatibility diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index a7f8a178..7a239732 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -25,18 +25,18 @@ namespace ge { /** -*@brief Performs reduced batch normalization. +*@brief Performs reduced batch normalization . \n *@par Inputs: -*x: A 5D Tensor of type float16 or float32, with format NC1HWC0. +*x: A 5D Tensor of type float16 or float32, with format NC1HWC0 . \n *@par Outputs: *@li sum: A 1D Tensor of type float32 for SUM reduced "x". -*@li square_sum: A 1D Tensor of type float32 for SUMSQ reduced "x". +*@li square_sum: A 1D Tensor of type float32 for SUMSQ reduced "x" . \n *@attention Constraints: * This operator is a BatchNorm fusion operator for updating the moving -* averages for training. \n +* averages for training. * This operator is used in conjunction with BNTrainingUpdate. */ REG_OP(BNTrainingReduce) @@ -46,10 +46,10 @@ REG_OP(BNTrainingReduce) .OP_END_FACTORY_REG(BNTrainingReduce) /** -*@brief Performs the backpropagation of BatchNorm. +*@brief Performs the backpropagation of BatchNorm . \n *@par Inputs: -* Seven inputs, including: \n +* Seven inputs, including: *@li grads: A 5D Tensor of type float16 or float32, with format NC1HWC0, for * the gradient. *@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0. @@ -61,18 +61,18 @@ REG_OP(BNTrainingReduce) *@li batch_mean: A 5D Tensor of type float32, with format NC1HWC0, * for the mean of "x". *@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0, -* for the variance of "x". +* for the variance of "x" . \n *@par Attributes: *epsilon: An optional float32. Defaults to "0.0001". A small float number -* added to the variance of "x". +* added to the variance of "x" . \n *@par Outputs: *y: A Tensor of type float16 or float32, with format NC1HWC0, for the offset -* of "x". +* of "x" . \n *@attention Constraints: -* The preceding layer of this operator must be BNTrainingUpdateGrad. +* The preceding layer of this operator must be BNTrainingUpdateGrad . \n *@see BNTrainingUpdateGrad */ @@ -89,7 +89,7 @@ REG_OP(BNTrainingReduceGrad) .OP_END_FACTORY_REG(BNTrainingReduceGrad) /** -*@brief Performs reduced batch normalization. +*@brief Performs reduced batch normalization . \n *@par Inputs: * Seven inputs, including: (NC1HWC0 supported) @@ -101,13 +101,13 @@ REG_OP(BNTrainingReduceGrad) *@li scale: A 1D Tensor of type float32, for the scaling factor. *@li offset: A 1D Tensor of type float32, for the scaling offset. *@li mean: A 1D Tensor of type float32, for the updated mean. -*@li variance: A 1D Tensor of type float32, for the updated variance. +*@li variance: A 1D Tensor of type float32, for the updated variance . \n *@par Attributes: *@li epsilon: A required float32, specifying the small value added to variance * to avoid dividing by zero. *@li factor: A required float32, specifying the weight for updating the mean -* and variance. +* and variance . \n *@par Outputs: * Five outputs, including: (NC1HWC0 supported) @@ -115,11 +115,11 @@ REG_OP(BNTrainingReduceGrad) *@li mean: A 5D Tensor of type float32, for the updated mean. *@li variance: A 5D Tensor of type float32, for the updated variance. *@li batch_mean: A 1D Tensor of type float32, for the mean of "x". -*@li batch_variance: A 1D Tensor of type float32, for the variance of "x". +*@li batch_variance: A 1D Tensor of type float32, for the variance of "x" . \n *@attention Constraints: *@li This operator is a BatchNorm fusion operator for updating the moving -* averages for training. \n +averages for training. *This operator is used in conjunction with BNTrainingReduce. *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square * root instruction. @@ -142,22 +142,22 @@ REG_OP(BNTrainingUpdate) .OP_END_FACTORY_REG(BNTrainingUpdate) /** -*@brief Performs batch normalization for inference. +*@brief Performs batch normalization for inference . \n -*@par Inputs:\n +*@par Inputs: * Five inputs, including: (NC1HWC0 supported) *@li x: A 5D Tensor of type float16 or float32. *@li scale: A 5D Tensor of type float32, for the scaling factor. *@li offset: A 5D Tensor of type float32, for the scaling offset. *@li mean: A 5D Tensor of type float32, for the mean. -*@li variance: A 5D Tensor of type float32, for the variance. +*@li variance: A 5D Tensor of type float32, for the variance . \n *@par Attributes: *epsilon: An optional float32, specifying the small value added to variance to -* avoid dividing by zero. Defaults to "0.0001". +* avoid dividing by zero. Defaults to "0.0001" . \n -*@par Outputs:\n -*y: A 5D Tensor of type float16 or float32 for the normalized "x". +*@par Outputs: +*y: A 5D Tensor of type float16 or float32 for the normalized "x" . \n *@attention Constraints: *For Ascend 310, the result accuracy fails to reach 1‰ due to the square root @@ -175,7 +175,7 @@ REG_OP(BNInfer) /** *@brief Performs reduced batch normalization. For some scene which don't contain -assignmoving average. +assignmoving average . \n *@par Inputs: *Five inputs, including: (NC1HWC0 supported) @@ -183,19 +183,19 @@ assignmoving average. *@li sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce. *@li square_sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce. *@li scale: A 5D Tensor of type float32, for the scaling factor. -*@li offset: A 5D Tensor of type float32, for the scaling offset. +*@li offset: A 5D Tensor of type float32, for the scaling offset . \n *@par Attributes: -*epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero. +*epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n *@par Outputs: *Three outputs, including: (NC1HWC0 supported) *@li y: A 5D Tensor of type float16 or float32, for normalized "x". *@li batch_mean: A 5D Tensor of type float32, for the mean of "x". -*@li batch_variance: A 5D Tensor of type float32, for the variance of "x". +*@li batch_variance: A 5D Tensor of type float32, for the variance of "x" . \n *@attention Constraints: -*This operator is used in conjunction with BNTrainingReduce. \n +*This operator is used in conjunction with BNTrainingReduce. For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. */ REG_OP(BNTrainingUpdateV2) @@ -212,7 +212,7 @@ REG_OP(BNTrainingUpdateV2) /** *@brief Performs reduced batch normalization v3. For some scene which don't contain -assign moving average. +assign moving average . \n *@par Inputs: * Five inputs, including: (NC1HWC0 supported) @@ -220,17 +220,17 @@ assign moving average. *@li sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce. *@li square_sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce. *@li scale: A 5D Tensor of type float32, for the scaling factor. -*@li offset: A 5D Tensor of type float32, for the scaling offset. +*@li offset: A 5D Tensor of type float32, for the scaling offset . \n *@par Attributes: -*epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero. +*epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n *@par Outputs: *@li y: A 5D Tensor of type float16 or float32, for normalized "x". *@li batch_mean: A 5D Tensor of type float32, for the mean of "x". *@li batch_variance: A 5D Tensor of type float32, for the variance of "x". *@li reserve_1: A 5D Tensor of type float32, for the mean of batch "x". Has the same type as batch_mean. -*@li reserve_2: A 5D Tensor of type float32, for the variance of batch "x". Has the same type as batch_mean. +*@li reserve_2: A 5D Tensor of type float32, for the variance of batch "x". Has the same type as batch_mean . \n *@attention Constraints: *@li This operator is used in conjunction with BNTrainingReduce. @@ -251,27 +251,27 @@ REG_OP(BNTrainingUpdateV3) .OP_END_FACTORY_REG(BNTrainingUpdateV3) /** -*@brief Performs the backpropagation of BatchNorm. +*@brief Performs the backpropagation of BatchNorm . \n *@par Inputs: -* Four inputs, including: \n +* Four inputs, including: *@li grads: A 5D Tensor of type float16 or float32, with format NC1HWC0, * for the gradient. *@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0. *@li batch_mean: A 5D Tensor of type float32, with format NC1HWC0, * for the mean of "x". *@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0, -* for the variance of "x". +* for the variance of "x" . \n *@par Attributes: *epsilon: An optional float32. Defaults to "0.0001". A small float number -* added to the variance of "x". +* added to the variance of "x" . \n *@par Outputs: *@li diff_scale: A Tensor of type float32, with format NC1HWC0, * for the offset of "scale". *@li diff_offset: A Tensor of type float32, with format NC1HWC0, -* for the offset of "offset". +* for the offset of "offset" . \n */ REG_OP(BNTrainingUpdateGrad) @@ -285,19 +285,19 @@ REG_OP(BNTrainingUpdateGrad) .OP_END_FACTORY_REG(BNTrainingUpdateGrad) /** -*@brief Performs the backpropagation of BatchNorm for inference. +*@brief Performs the backpropagation of BatchNorm for inference . \n *@par Inputs: -* Three inputs, including: \n +* Three inputs, including: *@li grads: A 5D Tensor of type loat16 or float32, with format NC1HWC0, for the gradient. *@li scale: A 5D Tensor of type float32, with format NC1HWC0. -*@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0. It is an output of BatchNorm. +*@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0. It is an output of BatchNorm . \n *@par Attributes: -*epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". +*epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x" . \n *@par Outputs: -*x_backprop: A Tensor of type float16 or float32, with format NC1HWC0, for the offset of "x". +*x_backprop: A Tensor of type float16 or float32, with format NC1HWC0, for the offset of "x" . \n *@attention Constraints: * The preceding layer of this operator must be operator BatchNorm. @@ -311,21 +311,21 @@ REG_OP(BNInferGrad) .OP_END_FACTORY_REG(BNInferGrad) /** -*@brief Computes the sum of elements across dimensions of a tensor. +*@brief Computes the sum of elements across dimensions of a tensor . \n *@par Inputs: -* Two inputs, including: \n +* Two inputs, including: *@li x: A Tensor. Must be one of the following types: * float32, float64, int32, uint8, int16, int8, * complex64, int64, qint8, quint8, qint32, uint16, * complex128, float16, uint32, uint64, complex64, complex128. -*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce. +*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n *@par Attributes: -*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false". +*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n *@par Outputs: -*y: The reduced tensor. Has the same type and format as input "x". +*y: The reduced tensor. Has the same type and format as input "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator Sum. @@ -338,46 +338,52 @@ REG_OP(ReduceSum) .OP_END_FACTORY_REG(ReduceSum) /** -*@brief Computes the sum of elements across dimensions of a tensor. +*@brief Computes the sum of elements across dimensions of a tensor . \n *@par Inputs: -* One input: \n -*x: A Tensor. Up to 8D. Must be one of the following types: float16, float32, int32, int8, uint8. +* One input: +*x: A Tensor. Up to 8D. Must be one of the following types: float16, float32. \n *@par Attributes: *@li axes: A required 1D list or tuple of int32 or int64. Specifies the dimensions to reduce. -*@li keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false". +*@li keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n *@par Outputs: -*y: The reduced tensor. Has the same type and format as input "x". +*y: The reduced tensor. Has the same type and format as input "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator Sum. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceSum instead. */ REG_OP(ReduceSumD) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_INT32})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_INT32})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(axes, ListInt) .ATTR(keep_dims, Bool, false) .OP_END_FACTORY_REG(ReduceSumD) /** -*@brief Calculates the "logical sum" of elements of a tensor in a dimension. +*@brief Calculates the "logical sum" of elements of a tensor in a dimension . \n *@par Inputs: *One input: -*x: The boolean tensor to reduce. +*x: The boolean tensor to reduce . \n *@par Attributes: *@li keep_dims: A bool. If true, retains reduced dimensions with length 1. *@li axis: The dimensions to reduce. If None, reduces all dimensions. -*Must be in the range [- rank (input_sensor), rank (input_sensor)). +*Must be in the range [- rank (input_sensor), rank (input_sensor)) . \n *@par Outputs: -*y: The reduced tensor. +*y: The reduced tensor . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ReduceAll. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceAll instead. */ REG_OP(ReduceAllD) .INPUT(x, TensorType({DT_BOOL})) @@ -387,18 +393,18 @@ REG_OP(ReduceAllD) .OP_END_FACTORY_REG(ReduceAllD) /** -*@brief Calculates the "logical sum" of elements of a tensor in a dimension. +*@brief Calculates the "logical sum" of elements of a tensor in a dimension . \n *@par Inputs: *Two inputs, including: *@li x: The boolean tensor to reduce. -*@li axis: A mutable Tensor. The dimensions to reduce. If None, reduces all dimensions. Must be in the range [- rank (input_sensor), rank (input_sensor)). +*@li axis: A mutable Tensor. The dimensions to reduce. If None, reduces all dimensions. Must be in the range [- rank (input_sensor), rank (input_sensor)) . \n *@par Attributes: -*keep_dims: A bool. If true, retains reduced dimensions with length 1. +*keep_dims: A bool. If true, retains reduced dimensions with length 1 . \n *@par Outputs: -*y: The reduced tensor. +*y: The reduced tensor . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ReduceAll. @@ -411,18 +417,18 @@ REG_OP(ReduceAll) .OP_END_FACTORY_REG(ReduceAll) /** -*@brief Reduce a tensor on a certain axis based on product.. +*@brief Reduce a tensor on a certain axis based on product. . \n *@par Inputs: *Two inputs, including: *@li x: A mutable Tensor. Must be the type of NumberType. -*@li axis: A mutable Tensor. The dimensions to reduce. +*@li axis: A mutable Tensor. The dimensions to reduce . \n *@par Attributes: -*@li keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False". +*@li keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False" . \n *@par Outputs: -*y: A Tensor. Has the same type and format as input "x". +*y: A Tensor. Has the same type and format as input "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ReduceProd. @@ -435,24 +441,27 @@ REG_OP(ReduceProd) .OP_END_FACTORY_REG(ReduceProd) /** -*@brief Computes the product of elements across dimensions of a tensor. +*@brief Computes the product of elements across dimensions of a tensor . \n *@par Inputs: -* One input: \n -*x: A Tensor. Must be one of the following types: float16, float, int8, uint8. +* One input: +*x: A Tensor. Must be one of the following types: float16, float, int8, uint8 . \n *@par Attributes: *@li axes: A required int8, int16, int32, or int64. Specifies the dimensions to reduce. No default value. -*@li keep_dims: An optional bool. If "True", retains reduced dimensions with length 1. Defaults to "False". +*@li keep_dims: An optional bool. If "True", retains reduced dimensions with length 1. Defaults to "False" . \n *@par Outputs: -*y: A Tensor. Has the same type and format as input "x". +*y: A Tensor. Has the same type and format as input "x" . \n *@attention Constraints: -* "keep_dims" is in the range [-rank(input_tensor), rank(input_tensor)]. +* "keep_dims" is in the range [-rank(input_tensor), rank(input_tensor)] . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ReduceProd. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceProd instead. */ REG_OP(ReduceProdD) .INPUT(x,TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16})) @@ -462,21 +471,21 @@ REG_OP(ReduceProdD) .OP_END_FACTORY_REG(ReduceProdD) /** -*@brief Reduces "x" along the dimensions according to "axis". +*@brief Reduces "x" along the dimensions according to "axis" . \n *@par Inputs: *Two inputs, including: * @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8. -* @li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType.\n -* - If None (the default), reduces all dimensions.\n -* - Must be in the range [-rank(x), rank(x)). +* @li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType. +* - If None (the default), reduces all dimensions. +* - Must be in the range [-rank(x), rank(x)) . \n *@par Attributes: -*keep_dims: A bool or NoneType. \n -* - If true, retains reduced dimensions with length 1. \n +*keep_dims: A bool or NoneType. +* - If true, retains reduced dimensions with length 1. * - If false, the rank of the tensor is reduced by 1 for each entry in axis. *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@par Third-party framework compatibility: * Compatible with the TensorFlow operator ReduceMean. @@ -489,24 +498,27 @@ REG_OP(ReduceMean) .OP_END_FACTORY_REG(ReduceMean) /** -*@brief Reduces "x" along the dimensions according to "axis". +*@brief Reduces "x" along the dimensions according to "axis" . \n *@par Inputs: *One input: -* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8. +* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8 . \n *@par Attributes: -*@li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType. \n -* If None (the default), reduces all dimensions. \n -* Must be in the range [-rank(x), rank(x)). \n -*@li keep_dims: A bool or NoneType. \n -* - If true, retains reduced dimensions with length 1. \n +*@li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType. +* If None (the default), reduces all dimensions. +* Must be in the range [-rank(x), rank(x)). +*@li keep_dims: A bool or NoneType. +* - If true, retains reduced dimensions with length 1. * - If false, the rank of the tensor is reduced by 1 for each entry in axis. *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@par Third-party framework compatibility: * Compatible with the TensorFlow operator ReduceMean. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMean instead. */ REG_OP(ReduceMeanD) .INPUT(x, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT, DT_INT8, DT_UINT8})) @@ -516,21 +528,21 @@ REG_OP(ReduceMeanD) .OP_END_FACTORY_REG(ReduceMeanD) /** -*@brief Returns the maximum of elements across dimensions of a Tensor. +*@brief Returns the maximum of elements across dimensions of a Tensor . \n *@par Inputs: -* Two inputs, including: \n +* Two inputs, including: *@li x: A multi-dimensional Tensor of type float16, float32, or int16. -*@li axes: A Scalar of type int32, specifying the axes information of the index with the maximum value. +*@li axes: A Scalar of type int32, specifying the axes information of the index with the maximum value . \n *@par Attributes: -*keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false". +*keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false" . \n *@par Outputs: *y: A multi-dimensional Tensor, specifying the maximum value of the corresponding axis in the tensor. Has the same type as "x". (If "keep_dims" is set to "false", the output dimensions are reduced by "dimension" compared with that of "x". Otherwise, the output has one fewer dimension than "x".) *@attention Constraints: -* The value range of "axes" is [-dims, dims - 1]. "dims" indicates the dimension length of "x". +* The value range of "axes" is [-dims, dims - 1]. "dims" indicates the dimension length of "x" . \n *@par Third-party framework compatibility * Compatible with TensorFlow operator Max. @@ -543,24 +555,27 @@ REG_OP(ReduceMax) .OP_END_FACTORY_REG(ReduceMax) /** -*@brief Returns the maximum of elements across dimensions of a Tensor. +*@brief Returns the maximum of elements across dimensions of a Tensor . \n *@par Inputs: -*x: A multi-dimensional Tensor of type float16, float32, or int16. +*x: A multi-dimensional Tensor of type float16, float32, or int16 . \n *@par Attributes: -* Two attributes, including: \n +* Two attributes, including: *@li axes: A required listint, specifying the axes information of the index with the maximum value. -*@li keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false". +*@li keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false" . \n *@par Outputs: *y: A multi-dimensional Tensor, specifying the maximum value of the corresponding axis in the tensor. Has the same type as "x". (If "keep_dims" is set to "false", the output dimensions are reduced by "dimension" compared with that of "x". Otherwise, the output has one fewer dimension than "x".) *@attention Constraints: -* The value range of "axis" is [-dims, dims - 1]. "dims" indicates the dimension length of "x". +* The value range of "axis" is [-dims, dims - 1]. "dims" indicates the dimension length of "x" . \n *@par Third-party framework compatibility * Compatible with TensorFlow operator Max. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMax instead. */ REG_OP(ReduceMaxD) .INPUT(x, TensorType({DT_FLOAT, DT_UINT8, DT_INT8, @@ -572,20 +587,20 @@ REG_OP(ReduceMaxD) .OP_END_FACTORY_REG(ReduceMaxD) /** -*@brief Computes the minimum of elements across dimensions of a tensor. +*@brief Computes the minimum of elements across dimensions of a tensor . \n *@par Inputs: *@li input_tensor: A Tensor. Must be one of the following types: float16, float32, int8, uint8. -*@li axes: A Tensor of type int8 or int32. Specifies the dimensions to reduce. Defaults to "None". +*@li axes: A Tensor of type int8 or int32. Specifies the dimensions to reduce. Defaults to "None". -*@par Attributes:\n -*keep_dims: An optional bool. If "True", reduced dimensions will be retained. Defaults to "False". +*@par Attributes: +*keep_dims: An optional bool. If "True", reduced dimensions will be retained. Defaults to "False". -*@par Outputs:\n -*output_tensor: A Tensor. Must be one of the following types: float16, float32, int8, uint8. +*@par Outputs: +*output_tensor: A Tensor. Must be one of the following types: float16, float32, int8, uint8 . \n -*@attention Constraints:\n -* If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)). +*@attention Constraints: +* If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)) . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator reduce_min. @@ -598,23 +613,26 @@ REG_OP(ReduceMin) .OP_END_FACTORY_REG(ReduceMin) /** -*@brief Computes the minimum of elements across dimensions of a tensor. +*@brief Computes the minimum of elements across dimensions of a tensor . \n -*@par Inputs:\n -*input_min: A Tensor. Must be one of the following types: float16, float32, int8, uint8. +*@par Inputs: +*input_min: A Tensor. Must be one of the following types: float16, float32, int8, uint8 . \n *@par Attributes: -*@li axes: An optional int32, list, tuple, or NoneType value. Specifies the dimensions to reduce. Defaults to "None". -*@li keep_dims: An optional bool or NoneType value. If "True", reduced dimensions will be retained. Defaults to "None" (equivalent to "False"). +*@li axes: An optional int32, list, tuple, or NoneType value. Specifies the dimensions to reduce. Defaults to "None". +*@li keep_dims: An optional bool or NoneType value. If "True", reduced dimensions will be retained. Defaults to "None" (equivalent to "False"). -*@par Outputs:\n -*output_min: A Tensor. Must be one of the following types: float16, float32, int8, uint8. +*@par Outputs: +*output_min: A Tensor. Must be one of the following types: float16, float32, int8, uint8 . \n -*@attention Constraints:\n -* If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)). +*@attention Constraints: +* If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)) . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator reduce_min. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead. */ REG_OP(ReduceMinD) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) @@ -682,6 +700,8 @@ REG_OP(ReduceAny) *@par Third-party framework compatibility *Compatible with the TensorFlow operator reduce_any. * +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceAny instead. */ REG_OP(ReduceAnyD) .INPUT(x, TensorType({DT_BOOL})) @@ -691,25 +711,25 @@ REG_OP(ReduceAnyD) .OP_END_FACTORY_REG(ReduceAnyD) /** -*@brief Compute reduction on dimensions specified by "axis". +*@brief Compute reduction on dimensions specified by "axis". *Four reduction operations are provided: *SUM Computes the sum of elements across specified dimensions of a tensor. *ASUM Computes the sum of absolute values of elements across specified dimensions of a tensor. *SUMSQ Computes the sum of squares of elements across specified dimensions of a tensor. -*SUMSQ Computes the mean values of elements across specified dimensions of a tensor. +*SUMSQ Computes the mean values of elements across specified dimensions of a tensor . \n -*@par Inputs: +*@par Inputs: *x: A Tensor of type float16 or float32 *@par Attributes: -*@li operation: An optional int32 from 1(SUM), 2(ASUM), 3(SUMSQ), and 4(MEAN), +*@li operation: An optional int32 from 1(SUM), 2(ASUM), 3(SUMSQ), and 4(MEAN), *specifying the reduction algorithm. Defaults to "1". -*@li axis: An optional int32, specifying the first axis to reduce. Defaults to "0". +*@li axis: An optional int32, specifying the first axis to reduce. Defaults to "0". *The value range is [-N, N-1], where N is the input tensor rank. -*@li coeff: An optional float32, specifying the scale coefficient. Defaults to "1.0". +*@li coeff: An optional float32, specifying the scale coefficient. Defaults to "1.0" . \n -*@par Outputs: -*y: A Tensor. Has the same type as "x". +*@par Outputs: +*y: A Tensor. Has the same type as "x" . \n *@attention Constraints: The Reduction operator supports type float16 only on the device chip. *@par Third-party framework compatibility @@ -724,20 +744,20 @@ REG_OP(Reduction) .OP_END_FACTORY_REG(Reduction); /** -*@brief Computes the euclidean norm of elements across dimensions of a tensor. +*@brief Computes the euclidean norm of elements across dimensions of a tensor . \n *@par Inputs: *@li input_tensor: A Tensor. Must be one of the following types: float16, float32, int32. -*@li axes: A Tensor of type int8 or int32. Specifies the dimensions to reduce. Defaults to "None". +*@li axes: A Tensor of type int8 or int32. Specifies the dimensions to reduce. Defaults to "None" . \n -*@par Attributes:\n -*keep_dims: An optional bool. If "True", reduced dimensions will be retained. Defaults to "False". +*@par Attributes: +*keep_dims: An optional bool. If "True", reduced dimensions will be retained. Defaults to "False" . \n -*@par Outputs:\n -*output_tensor: A Tensor. Must be one of the following types: float16, float32, int32. +*@par Outputs: +*output_tensor: A Tensor. Must be one of the following types: float16, float32, int32 . \n -*@attention Constraints:\n -* If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)). +*@attention Constraints: +* If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)) . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator EuclideanNorm. @@ -750,23 +770,26 @@ REG_OP(EuclideanNorm) .OP_END_FACTORY_REG(EuclideanNorm) /** -*@brief Computes the euclidean norm of elements across dimensions of a tensor. +*@brief Computes the euclidean norm of elements across dimensions of a tensor . \n -*@par Inputs:\n -*input_min: A Tensor. Must be one of the following types: float16, float32, int32. +*@par Inputs: +*input_min: A Tensor. Must be one of the following types: float16, float32, int32 . \n *@par Attributes: *@li axes: An optional int32, list, tuple, or NoneType value. Specifies the dimensions to reduce. Defaults to "None". -*@li keep_dims: An optional bool or NoneType value. If "True", reduced dimensions will be retained. Defaults to "None" (equivalent to "False"). +*@li keep_dims: An optional bool or NoneType value. If "True", reduced dimensions will be retained. Defaults to "None" (equivalent to "False") . \n -*@par Outputs:\n -*output_min: A Tensor. Must be one of the following types: float16, float32, int32. +*@par Outputs: +*output_min: A Tensor. Must be one of the following types: float16, float32, int32 . \n -*@attention Constraints:\n -* If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)). +*@attention Constraints: +* If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)) . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator EuclideanNorm. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use EuclideanNorm instead. */ REG_OP(EuclideanNormD) .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16})) @@ -778,24 +801,24 @@ REG_OP(EuclideanNormD) /** -*@brief Performs instance normalization for inference. +*@brief Performs instance normalization for inference . \n -*@par Inputs:\n +*@par Inputs: * Five inputs, including: (NC1HWC0 supported) *@li x: A Tensor of type float16 or float32. *@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma. *@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta. *@li mean: A [N, C1, 1, 1, C0] ensor of type float32, for the mean. -*@li variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance. +*@li variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance . \n *@par Attributes: -*epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. -Defaults to "0.00001". +*epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. +Defaults to "0.00001" . \n -*@par Outputs:\n +*@par Outputs: *y: A Tensor of type float16 or float32 for the normalized "x". *batch_mean: A Tensor of type float32 for the result mean. -*batch_ variance: A Tensor of type float32 for the result variance. +*batch_ variance: A Tensor of type float32 for the result variance . \n *@attention Constraints: *For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. @@ -813,17 +836,17 @@ REG_OP(INInferV2) .OP_END_FACTORY_REG(INInferV2) /** -*@brief Performs reduced instance normalization. +*@brief Performs reduced instance normalization . \n -*@par Inputs:\n -*x: A Tensor of type float16 or float32, with format NC1HWC0. +*@par Inputs: +*x: A Tensor of type float16 or float32, with format NC1HWC0 . \n *@par Outputs: *@li sum: A Tensor of type float32 for SUM reduced "x". -*@li square_sum: A Tensor of type float32 for SUMSQ reduced "x". +*@li square_sum: A Tensor of type float32 for SUMSQ reduced "x" . \n -*@attention Constraints:\n -* This operator is a InstanceNorm fusion operator for updating the moving averages for training. \n +*@attention Constraints: +* This operator is a InstanceNorm fusion operator for updating the moving averages for training. * This operator is used in conjunction with INTrainingUpdateV2. */ REG_OP(INTrainingReduceV2) @@ -834,9 +857,9 @@ REG_OP(INTrainingReduceV2) /** -*@brief Performs update instance normalization. +*@brief Performs update instance normalization . \n -*@par Inputs:\n +*@par Inputs: * Seven inputs, including: (NC1HWC0supported) *@li x: A Tensor of type float16 or float32. *@li sum: A T [N, C1, 1, 1, C0] ensor of type float32 for the output of operator INTrainingReduceV2. @@ -844,20 +867,20 @@ REG_OP(INTrainingReduceV2) *@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma. *@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta. *@li mean: A [N, C1, 1, 1, C0] Tensor of type float32, for the updated mean. -*@li variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the updated variance. +*@li variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the updated variance . \n *@par Attributes: *@li momentum: A required float32, specifying the momentum to update mean and var. -*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero. +*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n -*@par Outputs:\n +*@par Outputs: * Three outputs, including: (NC1HWC0 supported) *@li y: A Tensor of type float16 or float32, for normalized "x". *@li batch_mean: A Tensor of type float32, for the updated mean. -*@li batch_variance: A Tensor of type float32, for the updated variance. +*@li batch_variance: A Tensor of type float32, for the updated variance . \n *@attention Constraints: -*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. \n +*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. * This operator is used in conjunction with INTrainingReduceV2. *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. */ @@ -878,10 +901,10 @@ REG_OP(INTrainingUpdateV2) /** -*@brief Performs reduced group normalization. +*@brief Performs reduced group normalization . \n -*@par Inputs:\n -*x: A Tensor of type float16 or float32, with format NCHW NHWC. +*@par Inputs: +*x: A Tensor of type float16 or float32, with format NCHW NHWC . \n *@par Outputs: *@li sum: A Tensor of type float32 for SUM reduced "x". @@ -889,10 +912,10 @@ REG_OP(INTrainingUpdateV2) *@par Attributes: -*@li num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate. +*@li num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate . \n -*@attention Constraints:\n -* This operator is a GroupNorm fusion operator for updating the moving averages for training. \n +*@attention Constraints: +* This operator is a GroupNorm fusion operator for updating the moving averages for training. * This operator is used in conjunction with GNTrainingUpdate. */ REG_OP(GNTrainingReduce) @@ -904,27 +927,27 @@ REG_OP(GNTrainingReduce) /** -*@brief Performs update group normalization. +*@brief Performs update group normalization . \n -*@par Inputs:\n +*@par Inputs: * Eight inputs, including: (NCHW NHWC supported) *@li x: A Tensor of type float16 or float32. -*@li sum: A 5D Tensor of type float32, +*@li sum: A 5D Tensor of type float32, shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC for the output of operator GNTrainingReduce. -*@li square_sum: A 5D Tensor of type float32, +*@li square_sum: A 5D Tensor of type float32, shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC for the output of operator GNTrainingReduce. -*@li scale: A 5D Tensor of type float32, +*@li scale: A 5D Tensor of type float32, shape is [1, G, 1, 1, 1] for NCHW, [1, 1, 1, G, 1] for NHWC is for the scaling gamma. -*@li offset: A 5D Tensor of type float32, +*@li offset: A 5D Tensor of type float32, shape is [1, G, 1, 1, 1] for NCHW, [1, 1, 1, G, 1] for NHWC for the scaling beta. -*@li mean: A 5D Tensor of type float32, +*@li mean: A 5D Tensor of type float32, shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC for the updated mean. -*@li variance: A 5D Tensor of type float32, +*@li variance: A 5D Tensor of type float32, shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC for the updated variance. @@ -933,14 +956,14 @@ for the updated variance. *@li epsilon: A float32, specifying the small value added to variance to avoid dividing by zero. *@li num_groups: Int, specifying the num of groups. required, same to GNTrainingReduce -*@par Outputs:\n +*@par Outputs: * Three outputs, including: (NC1HWC0 supported) *@li y: A Tensor of type float16 or float32, for normalized "x". *@li batch_mean: A Tensor of type float32, for the updated mean. -*@li batch_variance: A Tensor of type float32, for the updated variance. +*@li batch_variance: A Tensor of type float32, for the updated variance . \n *@attention Constraints: -*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. \n +*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. * This operator is used in conjunction with GNTrainingUpdate. *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. */ diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index ee19865f..77437aba 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -27,21 +27,21 @@ namespace ge { /** *@brief: Basic LSTM Cell forward calculation. *@par Inputs: -*five inputs: \n +*five inputs: *@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. *@li h:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. -*@li b:A 1D Tensor. Must be one of the following types: float16. The format must be ND. +*@li b:A 1D Tensor. Must be one of the following types: float16. The format must be ND . \n *@par Attributes: *@li keep_prob:An integer identifying the keep prob in the op. Default to 1. *@li forget_bias:An integer identifying the forget bias in the op. Default to 1. *@li state_is_tuple:An bool identifying if the hidden state and cell state is tuple. Default to true. -*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. +*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported . \n *@par Outputs: -*seven outputs: \n +*seven outputs: *@li mask:A 1D Tensor. Must be one of the following types: uint8. *@li ct:A 4D Tensor. Must be one of the following types: float16, float32. *@li ht:A 4D Tensor. Must be one of the following types: float16. @@ -72,12 +72,12 @@ REG_OP(BasicLSTMCell) .OP_END_FACTORY_REG(BasicLSTMCell) /** -*@brief: Dynamic LSTM forward calculation. +*@brief: Dynamic LSTM forward calculation . \n *@par Inputs: *@li x:A 4D Tensor. Must be the type float32. The format must be FRACTAL_NZ. *@li w:A 4D Tensor. Must be the type float32. The format must be FRACTAL_Z. -*@li b:A 1D Tensor. Must be the type float32. The format must be ND. +*@li b:A 1D Tensor. Must be the type float32. The format must be ND . \n *@par Outputs: *output_h:A Tensor of output. Must be the type float32. The format must be FRACTAL_Z. @@ -90,10 +90,98 @@ REG_OP(DynamicLSTM) .OP_END_FACTORY_REG(DynamicLSTM) /** -*@brief: DynamicRNN calculation. +*@brief: DynamicRNNGrad calculation. *@par Inputs: *ten inputs: \n *@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li y:A 1D Tensor. Must be one of the following types: int32. The format must be FRACTAL_NZ. +*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dc:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li seq_length:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li mask:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Attributes: +*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. +*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. +*@li cell_depth:An integer identifying the cell depth in the op. Default to 1. +*@li use_peephole:An bool identifying if use peephole in the op. Default to false. +*@li keep_prob:An float identifying the keep prob in the op. Default to 1. +*@li cell_clip:An float identifying the cell clip in the op. Default to -1. +*@li num_proj:An integer identifying the num projection in the op. Default to 0. +*@li time_major:An bool identifying the time major in the op. Default to false. +*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. +*@li forget_bias:An float identifying the forget bias in the op. Default to 0. +*@li is_training:An bool identifying is training in the op. Default to true. + +*@par Outputs: +*eight outputs: \n +*@li dw:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li db:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*/ +REG_OP(DynamicRNNGrad) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dc, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(i, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(j, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) + .OPTIONAL_INPUT(mask, TensorType({DT_UINT8})) + .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dw, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(db, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dc_prev, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_OUTPUT(dwci, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_OUTPUT(dwcf, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_OUTPUT(dwco, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(cell_type, String, "LSTM") + .ATTR(direction, String, "UNIDIRECTIONAL") + .ATTR(cell_depth, Int, 0) + .ATTR(use_peephole, Bool, false) + .ATTR(keep_prob, Float, -1.0) + .ATTR(cell_clip, Float, -1.0) + .ATTR(num_proj, Int, 0) + .ATTR(time_major, Bool, true) + .ATTR(forget_bias, Float, 0.0) + .OP_END_FACTORY_REG(DynamicRNNGrad) + +/** +*@brief: DynamicRNN calculation. +*@par Inputs: +*ten inputs: +*@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. *@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. *@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND. @@ -102,7 +190,7 @@ REG_OP(DynamicLSTM) *@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. *@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. *@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. -*@li mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND. +*@li mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n *@par Attributes: *@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. @@ -112,13 +200,13 @@ REG_OP(DynamicLSTM) *@li keep_prob:An float identifying the keep prob in the op. Default to 1. *@li cell_clip:An float identifying the cell clip in the op. Default to -1. *@li num_proj:An integer identifying the num projection in the op. Default to 0. -*@li time_major:An bool identifying the time major in the op. Default to false. +*@li time_major:An bool identifying the time major in the op. Default to true. *@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. *@li forget_bias:An float identifying the forget bias in the op. Default to 0. -*@li is_training:An bool identifying is training in the op. Default to true. +*@li is_training:An bool identifying is training in the op. Default to true . \n *@par Outputs: -*eight outputs: \n +*eight outputs: *@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. @@ -132,7 +220,7 @@ REG_OP(DynamicRNN) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) - .OPTIONAL_INPUT(seq_length, TensorType({DT_UINT32})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -154,24 +242,68 @@ REG_OP(DynamicRNN) .ATTR(keep_prob, Float, 1.0) .ATTR(cell_clip, Float, -1.0) .ATTR(num_proj, Int, 0) - .ATTR(time_major, Bool, false) + .ATTR(time_major, Bool, true) + .ATTR(activation, String, "tanh") .ATTR(forget_bias, Float, 0.0) .ATTR(is_training, Bool, true) .OP_END_FACTORY_REG(DynamicRNN) +/** +*@brief: LSTMInputGrad calculation. +*@par Inputs: +*ten inputs: \n +*@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dc:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + + +*@par Outputs: +*eight outputs: \n +*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*/ +REG_OP(LSTMInputGrad) + .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dc, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(i, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(j, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dc_prev, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dgate, TensorType({DT_FLOAT16})) + .OP_END_FACTORY_REG(LSTMInputGrad) + + /** *@brief: Basic LSTM Cell backward calculation.Calculate the gradient of input and hidden state. *@par Inputs: -*three inputs: \n +*three inputs: *@li dgate:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. *@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. -*@li dropout_mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND. +*@li dropout_mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n *@par Attributes: -*keep_prob:An integer identifying the keep prob in the op. Default to 1. +*keep_prob:An integer identifying the keep prob in the op. Default to 1 . \n *@par Outputs: -*two outputs: \n +*two outputs: *@li dxt:A 4D Tensor. Must be one of the following types: float16, float32. *@li dht:A 4D Tensor. Must be one of the following types: float16, float32. */ @@ -187,13 +319,13 @@ REG_OP(BasicLSTMCellInputGrad) /** *@brief: Basic LSTM Cell backward calculation.Calculate the gradient of weight and bias. *@par Inputs: -*three inputs: \n +*three inputs: *@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. *@li h:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. -*@li dgate:A 4D Tensor. Must be one of the following types: uint8. The format must be FRACTAL_NZ. +*@li dgate:A 4D Tensor. Must be one of the following types: uint8. The format must be FRACTAL_NZ . \n *@par Outputs: -*two outputs: \n +*two outputs: *@li dw:A 4D Tensor. Must be one of the following types: float16. *@li db:A 4D Tensor. Must be one of the following types: float16, float32. */ @@ -208,7 +340,7 @@ REG_OP(BasicLSTMCellWeightGrad) /** *@brief: Basic LSTM Cell backward calculation.Calculate the gradient of gates and cell state. *@par Inputs: -*eight inputs: \n +*eight inputs: *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dht:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. @@ -216,14 +348,14 @@ REG_OP(BasicLSTMCellWeightGrad) *@li jt:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li ft:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li ot:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ . \n *@par Attributes: *@li forget_bias:An integer identifying the forget bias in the op. Default to 1. -*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. +*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported . \n *@par Outputs: -*two outputs: \n +*two outputs: *@li dgate:A 4D Tensor. Must be one of the following types: float16. *@li dct_1:A 4D Tensor. Must be one of the following types: float16, float32. */ @@ -245,7 +377,7 @@ REG_OP(BasicLSTMCellCStateGrad) /** *@brief: RNN operator. *@par Inputs: -*eight inputs: \n +*eight inputs: *@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. *@li cont:A 1D Tensor. Must be one of the following types: float16. The format must be ND. *@li x_static:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. @@ -255,16 +387,18 @@ REG_OP(BasicLSTMCellCStateGrad) *@li w_hh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. *@li w_ho:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. *@li bias_h:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. -*@li bias_o:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li bias_o:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND . \n *@par Attributes: *@li expose_hidden:An bool identifying if expose the hidden state of last time step. Default to false. -*@li num_output:An integer identifying the number of output features. Default to 0. +*@li num_output:An integer identifying the number of output features. Default to 0 . \n *@par Outputs: -*two outputs: \n +*two outputs: *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(RNN) .INPUT(x, TensorType({DT_FLOAT16})) @@ -286,7 +420,7 @@ REG_OP(RNN) /** *@brief: BasicRNNCell operator. *@par Inputs: -*eight inputs: \n +*eight inputs: *@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. *@li cont:A 1D Tensor. Must be one of the following types: float16. The format must be ND. *@li w_xh_x_static:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. @@ -295,14 +429,14 @@ REG_OP(RNN) *@li w_hh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. *@li w_ho:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. *@li bias_h:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. -*@li bias_o:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li bias_o:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND . \n *@par Attributes: *@li expose_hidden:An bool identifying if expose the hidden state of last time step. Default to false. -*@li num_output:An integer identifying the number of output features. Default to 0. +*@li num_output:An integer identifying the number of output features. Default to 0 . \n *@par Outputs: -*two outputs: \n +*two outputs: *@li o_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. */ @@ -321,6 +455,124 @@ REG_OP(BasicRNNCell) .ATTR(expose_hidden, Bool, false) .ATTR(num_output, Int, 0) .OP_END_FACTORY_REG(BasicRNNCell) + +/** +*@brief: DynamicGRU calculation. +*@par Inputs: +*seven inputs: \n +*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. +*@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. +*@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li cw:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. +*@li cb:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND. +*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Attributes: +*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. +*@li cell_depth:An integer identifying the cell depth in the op. Default to 1. +*@li keep_prob:An float identifying the keep prob in the op. Default to 1. +*@li cell_clip:An float identifying the cell clip in the op. Default to -1. +*@li num_proj:An integer identifying the num projection in the op. Default to 0. +*@li time_major:An bool identifying the time major in the op. Default to true. +*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. +*@li is_training:An bool identifying is training in the op. Default to true. + +*@par Outputs: +*five outputs: \n +*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li r:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li n:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(DynamicGRU) + .INPUT(x, TensorType({DT_FLOAT16})) + .INPUT(w, TensorType({DT_FLOAT16})) + .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(cw, TensorType({DT_FLOAT16})) + .INPUT(cb, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_UINT32})) + .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(r, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(n, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(direction, String, "UNIDIRECTIONAL") + .ATTR(cell_depth, Int, 1) + .ATTR(keep_prob, Float, 1.0) + .ATTR(cell_clip, Float, -1.0) + .ATTR(num_proj, Int, 0) + .ATTR(time_major, Bool, true) + .ATTR(activation, String, "tanh") + .ATTR(is_training, Bool, true) + .OP_END_FACTORY_REG(DynamicGRU) + +/** +*@brief: DynamicGRUV2 calculation. +*@par Inputs: +*seven inputs: \n +*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. +*@li weight_input:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. +*@li weight_hidden:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. +*@li bias_input:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li bias_hidden:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND. +*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Attributes: +*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. +*@li cell_depth:An integer identifying the cell depth in the op. Default to 1. +*@li keep_prob:An float identifying the keep prob in the op. Default to 1. +*@li cell_clip:An float identifying the cell clip in the op. Default to -1. +*@li num_proj:An integer identifying the num projection in the op. Default to 0. +*@li time_major:An bool identifying the time major in the op. Default to true. +*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. +*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. +*@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true. +*@li is_training:An bool identifying is training in the op. Default to true. + +*@par Outputs: +*six outputs: \n +*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(DynamicGRUV2) + .INPUT(x, TensorType({DT_FLOAT16})) + .INPUT(weight_input, TensorType({DT_FLOAT16})) + .INPUT(weight_hidden, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(bias_input, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_UINT32})) + .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(update, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(new, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(direction, String, "UNIDIRECTIONAL") + .ATTR(cell_depth, Int, 1) + .ATTR(keep_prob, Float, 1.0) + .ATTR(cell_clip, Float, -1.0) + .ATTR(num_proj, Int, 0) + .ATTR(time_major, Bool, true) + .ATTR(activation, String, "tanh") + .ATTR(gate_order, String, "zrh") + .ATTR(reset_after, Bool, true) + .ATTR(is_training, Bool, true) + .OP_END_FACTORY_REG(DynamicGRUV2) } // namespace ge #endif // GE_OP_RNN_H diff --git a/third_party/fwkacllib/inc/ops/rpn_ops.h b/third_party/fwkacllib/inc/ops/rpn_ops.h index 1484e95e..39583293 100644 --- a/third_party/fwkacllib/inc/ops/rpn_ops.h +++ b/third_party/fwkacllib/inc/ops/rpn_ops.h @@ -26,15 +26,15 @@ namespace ge { /** *@brief Iteratively removes lower scoring boxes which have an IoU greater than * iou_threshold with higher scoring box according to their -* intersection-over-union (IoU). +* intersection-over-union (IoU) . \n *@par Input: * @li box_scores: 2-D tensor with shape of [N, 8], including proposal boxes and -* corresponding confidence scores. +* corresponding confidence scores . \n * @par Attributes: * @li iou_threshold: An optional float. The threshold for deciding whether boxes -* overlap too much with respect to IOU. +* overlap too much with respect to IOU . \n * @par Outputs: * @li selected_boxes: 2-D tensor with shape of [N,5], representing filtered @@ -42,7 +42,7 @@ namespace ge { * @li selected_idx: 1-D tensor with shape of [N], representing the index of * input proposal boxes. * @li selected_mask: 1-D tensor with shape of [N], the symbol judging whether -* the output proposal boxes is valid. +* the output proposal boxes is valid . \n * @attention Constraints: * The 2nd-dim of input box_scores must be equal to 8.\n diff --git a/third_party/fwkacllib/inc/ops/save_ops.h b/third_party/fwkacllib/inc/ops/save_ops.h index 7fd853d3..159e7382 100644 --- a/third_party/fwkacllib/inc/ops/save_ops.h +++ b/third_party/fwkacllib/inc/ops/save_ops.h @@ -28,7 +28,7 @@ namespace ge { /** *@brief Mark which tensors need to be saved to the ckpt file. *@par Inputs: -*tensors: A list of input tensor. +*tensors: A list of input tensor.It's a dynamic input. *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ diff --git a/third_party/fwkacllib/inc/ops/sdca_ops.h b/third_party/fwkacllib/inc/ops/sdca_ops.h index 712fc1fc..dc6852d4 100644 --- a/third_party/fwkacllib/inc/ops/sdca_ops.h +++ b/third_party/fwkacllib/inc/ops/sdca_ops.h @@ -27,36 +27,36 @@ namespace ge { /** -*@brief Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for \n -*linear models with L1 + L2 regularization. As global optimization objective is \n -*strongly-convex, the optimizer optimizes the dual objective at each step. The \n -*optimizer applies each update one example at a time. Examples are sampled \n -*uniformly, and the optimizer is learning rate free and enjoys linear convergence \n -*rate. +*@brief Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for +*linear models with L1 + L2 regularization. As global optimization objective is +*strongly-convex, the optimizer optimizes the dual objective at each step. The +*optimizer applies each update one example at a time. Examples are sampled +*uniformly, and the optimizer is learning rate free and enjoys linear convergence +*rate . \n *@par Inputs: -*@li sparse_example_indices: a list of vectors which contain example indices. -*@li sparse_feature_indices: a list of vectors which contain feature indices. -*@li sparse_feature_values: a list of vectors which contains feature value associated with each feature group. -*@li dense_features: a list of matrices which contains the dense feature values. +*@li sparse_example_indices: a list of vectors which contain example indices.It's a dynamic input. +*@li sparse_feature_indices: a list of vectors which contain feature indices.It's a dynamic input. +*@li sparse_feature_values: a list of vectors which contains feature value associated with each feature group.It's a dynamic input. +*@li dense_features: a list of matrices which contains the dense feature values.It's a dynamic input. *@li example_weights: a vector which contains the weight associated with each example. *@li example_labels: a vector which contains the label/target associated with each example. -*@li sparse_indices: a list of vectors where each value is the indices which has \n -*corresponding weights in sparse_weights. This field maybe omitted for the dense approach. +*@li sparse_indices: a list of vectors where each value is the indices which has +*corresponding weights in sparse_weights. This field maybe omitted for the dense approach.It's a dynamic input. *@li sparse_weights: a list of vectors where each value is the weight associated with a sparse feature group. -*@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group. +*@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group.It's a dynamic input. *@li example_state_data: a list of vectors containing the example state data. *@li loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, squared and hinge losses. *@li l1: Symmetric l1 regularization strength. *@li l2: Symmetric l2 regularization strength. *@li num_loss_partitions: Number of partitions of the global loss function. -*@li num_inner_iterations: Number of iterations per mini-batch. +*@li num_inner_iterations: Number of iterations per mini-batch . \n *@par Outputs: -*y: A Returns a list of vectors containing the updated example state \n -*data.a list of vectors where each value is the delta \n -*weights associated with a sparse feature group.a list of vectors where the values are the delta \n -*weights associated with a dense feature group. +*y: A Returns a list of vectors containing the updated example state +*data.a list of vectors where each value is the delta +*weights associated with a sparse feature group.a list of vectors where the values are the delta +*weights associated with a dense feature group . \n *@par Third-party framework compatibility * Compatible with tensorflow SdcaOptimizerV2 operator. diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index 1328ae52..d17e8e94 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -24,7 +24,7 @@ namespace ge { /** -*@brief Creates a sequence of numbers. +*@brief Creates a sequence of numbers . \n *@par Inputs: *Three inputs, including: @@ -35,10 +35,10 @@ namespace ge { * defaults to the value of "start" while the first entry of the range * defaults to "0". The supported types are: float32, int32, double, int64. * @li delta: A 0D Tensor (scalar). Number that increments "start". -* Defaults to "1". The supported types are: float32, int32, double, int64. +* Defaults to "1". The supported types are: float32, int32, double, int64 . \n *@par Outputs: -*y: A 1D Tensor. +*y: A 1D Tensor . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Range. @@ -51,7 +51,7 @@ REG_OP(Range) .OP_END_FACTORY_REG(Range) /** -*@brief: Creates a sequence of numbers. +*@brief: Creates a sequence of numbers . \n *@par Inputs: *Four inputs, including: @@ -63,10 +63,10 @@ REG_OP(Range) * defaults to the value of "start" while the first entry of the range * defaults to "0". * @li delta: A 0D Tensor (scalar) of type float32 or int32. -* Number that increments "start". Defaults to "1". +* Number that increments "start". Defaults to "1" . \n *@par Outputs: -*y: A 1D Tensor. +*y: A 1D Tensor . \n *@par Quantization supported or not *Not supported @@ -79,6 +79,9 @@ REG_OP(Range) *@see Range() *@since V100R001C33 +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use Range instead. */ REG_OP(RangeD) .INPUT(x, TensorType({DT_FLOAT,DT_INT32})) @@ -89,17 +92,18 @@ REG_OP(RangeD) .OP_END_FACTORY_REG(RangeD) /** -*@brief Constructs a tensor by tiling a given tensor. +*@brief Constructs a tensor by tiling a given tensor . \n *@par Inputs: *Two inputs, including: -* @li x: A Tensor. -* Must be one of the following types: float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. +* @li x: A Tensor. +* Must be one of the following types: float16, float32, double, int64, int32, uint8, uint16, +uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. * @li multiples: A 1D Tensor of type int32 or int64. * The length must be the same as the number of dimensions in "input" *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@see TileD() @@ -113,23 +117,23 @@ REG_OP(Tile) .OP_END_FACTORY_REG(Tile) /** -*@brief Constructs a tensor by tiling a given tensor. +*@brief Constructs a tensor by tiling a given tensor . \n *@par Inputs: -*x: A Tensor. Must be one of the following types: float32, float16, int32. +*x: A Tensor. Must be one of the following types: float32, float16, int32 . \n *@par Attributes: *multiples: A required Tensor of type int32 or int64. -* Number of replication times. +* Number of replication times . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@see Tile() *@par Third-party framework compatibility *Compatible with the TensorFlow operator Tile. -*@par Restrictions: +*@par Restrictions: *Warning: THIS FUNCTION IS DEPRECATED. Please use Tile instead. */ REG_OP(TileD) @@ -140,20 +144,20 @@ REG_OP(TileD) /** * @brief Gather slices from "x" into a tensor with shape specified by -* "indices". "indices" is an K-dimensional integer tensor, best thought of as a\n -* (K-1)-dimensional tensor of "indices" into "params", where each element\n -* defines a slice of "params":\n -* output[\\(i_0, ..., i_{K-2}\\)] = params[indices[\\(i_0, ..., i_{K-2}\\)]]\n -* "indices" defines slices into the first N dimensions of\n -* "params", where\n -* N = indices.shape[-1]\n -* indices = [[0, 0], [1, 1]]\n -* x = [['a', 'b'], ['c', 'd']]\n -* output = ['a', 'd']\n +* "indices". "indices" is an K-dimensional integer tensor, best thought of as a +* (K-1)-dimensional tensor of "indices" into "params", where each element +* defines a slice of "params": +* output[\\(i_0, ..., i_{K-2}\\)] = params[indices[\\(i_0, ..., i_{K-2}\\)]] +* "indices" defines slices into the first N dimensions of +* "params", where +* N = indices.shape[-1] +* indices = [[0, 0], [1, 1]] +* x = [['a', 'b'], ['c', 'd']] +* output = ['a', 'd'] * @par Inputs: * @li x: A Tensor of type BasicType. -* @li indices: A Tensor of type IndexNumberType. +* @li indices: A Tensor of type IndexNumberType . \n * @par Outputs: * y: A Tensor of type BasicType. @@ -162,7 +166,7 @@ REG_OP(TileD) * @attention Constraints: * @li "x" is one of the following types: float16, float32, double, int32, * uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, -* complex128, uint32, uint64. +* complex128, uint32, uint64 . \n * @par Third-party framework compatibility * Compatible with the TensorFlow operator GatherNd. @@ -174,7 +178,7 @@ REG_OP(GatherNd) .OP_END_FACTORY_REG(GatherNd) /** -*@brief Gather slices from "x" according to "indices" by corresponding axis. +*@brief Gather slices from "x" according to "indices" by corresponding axis . \n *@par Inputs: *Three inputs, including: @@ -182,16 +186,16 @@ REG_OP(GatherNd) * uint8, int16, int8, int64, qint8, quint8, qint32, qint16, quint16, * uint16, complex128, float16, uint32, uint64, complex64, complex128. * @li indices: A Tensor of type int32 or int64. -* @li axis: A Tensor of type as int32. +* @li axis: A Tensor of type as int32 . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@attention Constraints: *Value in indices must be in range [0, x.shape[axis]) *@par Third-party framework compatibility -* Compatible with the TensorFlow operator GatherV2. +* Compatible with the TensorFlow operator GatherV2 . \n */ REG_OP(GatherV2) @@ -202,25 +206,28 @@ REG_OP(GatherV2) .OP_END_FACTORY_REG(GatherV2) /** -*@brief Gather slices from "x" according to "indices" by corresponding axis. +*@brief Gather slices from "x" according to "indices" by corresponding axis . \n *@par Inputs: *Two inputs, including: -* @li x: A Tensor. Must be one of the following types: float32, float16, int32, uint32, int8, uint8, \n +* @li x: A Tensor. Must be one of the following types: float32, float16, int32, uint32, int8, uint8, * int16, uint16, int64, uint64. -* @li indices: A Tensor of type int32 or int64. +* @li indices: A Tensor of type int32 or int64 . \n *@par Attributes: -*axis: A int32 specifying the axis to gather from. +*axis: A int32 specifying the axis to gather from . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@attention Constraints: *@par Third-party framework compatibility * Compatible with the TensorFlow operator GatherV2. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use GatherV2 instead. */ REG_OP(GatherV2D) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT32, DT_INT8, DT_UINT8, @@ -232,40 +239,40 @@ REG_OP(GatherV2D) .OP_END_FACTORY_REG(GatherV2D) /** -*@brief Extracts a strided slice of a tensor. Roughly speaking, this op \n - extracts a slice of size (end-begin)/stride from the given input tensor. \n - Starting at the location specified by begin the slice continues by \n - adding stride to the index until all dimensions are not less than end. \n +*@brief Extracts a strided slice of a tensor. Roughly speaking, this op + extracts a slice of size (end-begin)/stride from the given input tensor. + Starting at the location specified by begin the slice continues by + adding stride to the index until all dimensions are not less than end. *@par Inputs: *Four inputs, including: -* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n -* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n -* complex128, float16, uint32, uint64, complex64, complex128. \n -* @li begin: A Tensor of type int32 or int64, for the index of the first value to select. +* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, +* complex128, float16, uint32, uint64, complex64, complex128. +* @li begin: A Tensor of type int32 or int64, for the index of the first value to select . \n -* @li end: A Tensor of type int32 or int64, for the index of the last value to select. +* @li end: A Tensor of type int32 or int64, for the index of the last value to select . \n -* @li strides: A Tensor of type int32 or int64, for the increment. +* @li strides: A Tensor of type int32 or int64, for the increment . \n *@par Attributes: -* @li begin_mask: A Tensor of type int32. \n - A bitmask where a bit "i" being "1" means to ignore the begin \n +* @li begin_mask: A Tensor of type int32. + A bitmask where a bit "i" being "1" means to ignore the begin value and instead use the largest interval possible. -* @li end_mask: A Tensor of type int32. \n +* @li end_mask: A Tensor of type int32. Analogous to "begin_mask". -* @li ellipsis_mask: A Tensor of type int32. \n - A bitmask where bit "i" being "1" means the "i"th position \n +* @li ellipsis_mask: A Tensor of type int32. + A bitmask where bit "i" being "1" means the "i"th position is actually an ellipsis. -* @li new_axis_mask: A Tensor of type int32. \n - A bitmask where bit "i" being "1" means the "i"th \n +* @li new_axis_mask: A Tensor of type int32. + A bitmask where bit "i" being "1" means the "i"th specification creates a new shape 1 dimension. -* @li shrink_axis_mask: A Tensor of type int32. \n - A bitmask where bit "i" implies that the "i"th \n - specification should shrink the dimensionality. +* @li shrink_axis_mask: A Tensor of type int32. + A bitmask where bit "i" implies that the "i"th + specification should shrink the dimensionality . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@attention Constraints: @@ -286,43 +293,46 @@ REG_OP(StridedSlice) .OP_END_FACTORY_REG(StridedSlice) /** -*@brief Extracts a strided slice of a tensor. Roughly speaking, this op \n - extracts a slice of size "(end-begin)/stride" from the given input tensor. \n - Starting at the location specified by "begin" the slice continues by \n - adding "stride" to the index until all dimensions are not less than "end". +*@brief Extracts a strided slice of a tensor. Roughly speaking, this op + extracts a slice of size "(end-begin)/stride" from the given input tensor. + Starting at the location specified by "begin" the slice continues by + adding "stride" to the index until all dimensions are not less than "end" . \n *@par Inputs: -*x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n -* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n -* complex128, float16, uint32, uint64, complex64, complex128. +*x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, +* complex128, float16, uint32, uint64, complex64, complex128 . \n *@par Attributes: -* @li begin: A Tensor of type int32 or int64. \n +* @li begin: A Tensor of type int32 or int64. The index of the first value to select. -* @li end: A Tensor of type int32 or int64. \n +* @li end: A Tensor of type int32 or int64. The index of the last value to select. -* @li strides: A Tensor of type int32 or int64, for the increment. \n -* @li begin_mask: A Tensor of type int32. \n - A bitmask where a bit "i" being "1" means to ignore the begin \n +* @li strides: A Tensor of type int32 or int64, for the increment. +* @li begin_mask: A Tensor of type int32. + A bitmask where a bit "i" being "1" means to ignore the begin value and instead use the largest interval possible. * @li end_mask: Analogous to "begin_mask". A Tensor of type as int32. -* @li ellipsis_mask: A Tensor of type int32. \n - A bitmask where bit "i" being "1" means the "i"th position \n +* @li ellipsis_mask: A Tensor of type int32. + A bitmask where bit "i" being "1" means the "i"th position is actually an ellipsis. -* @li new_axis_mask: A Tensor of type int32. \n - A bitmask where bit "i" being "1" means the "i"th \n +* @li new_axis_mask: A Tensor of type int32. + A bitmask where bit "i" being "1" means the "i"th specification creates a new shape 1 dimension. -* @li shrink_axis_mask: A Tensor of type int32. \n - A bitmask where bit "i" implies that the "i"th \n - specification should shrink the dimensionality. +* @li shrink_axis_mask: A Tensor of type int32. + A bitmask where bit "i" implies that the "i"th + specification should shrink the dimensionality . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@attention Constraints: *@par Third-party framework compatibility * Compatible with the TensorFlow operator StridedSlice. + +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSlice instead. */ REG_OP(StridedSliceD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8, @@ -340,44 +350,47 @@ REG_OP(StridedSliceD) .OP_END_FACTORY_REG(StridedSliceD) /** -*@brief Since StridedSlice cuts out pieces of its "input" which is size "dy", \n - its gradient will have the same shape (which is passed here as "shape"). \n - The gradient will be zero in any element that the slice does not select. +*@brief Since StridedSlice cuts out pieces of its "input" which is size "dy", + its gradient will have the same shape (which is passed here as "shape"). + The gradient will be zero in any element that the slice does not select . \n *@par Inputs: -*dy: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n -* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n -* complex128, float16, uint32, uint64, complex64, complex128. +*dy: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, +* complex128, float16, uint32, uint64, complex64, complex128 . \n *@par Attributes: * @li shape: A Tensor of type int32 or int64. -* @li begin: A Tensor of type int32 or int64. \n +* @li begin: A Tensor of type int32 or int64. The index of the first value to select. -* @li end: A Tensor of type int32 or int64. \n +* @li end: A Tensor of type int32 or int64. The index of the last value to select. * @li strides: A Tensor of type int32 or int64, for the increment. -* @li begin_mask: A Tensor of type int32. \n - A bitmask where a bit "i" being "1" means to ignore the begin \n +* @li begin_mask: A Tensor of type int32. + A bitmask where a bit "i" being "1" means to ignore the begin value and instead use the largest interval possible. -* @li end_mask: A Tensor of type int32. \n +* @li end_mask: A Tensor of type int32. Analogous to "begin_mask". -* @li ellipsis_mask: A Tensor of type int32. \n - A bitmask where bit "i" being "1" means the "i"th position \n +* @li ellipsis_mask: A Tensor of type int32. + A bitmask where bit "i" being "1" means the "i"th position is actually an ellipsis. -* @li new_axis_mask: A Tensor of type int32. \n - A bitmask where bit "i" being "1" means the "i"th \n +* @li new_axis_mask: A Tensor of type int32. + A bitmask where bit "i" being "1" means the "i"th specification creates a new shape 1 dimension. -* @li shrink_axis_mask: A Tensor of type int32. \n - A bitmask where bit "i" implies that the "i"th \n - specification should shrink the dimensionality. +* @li shrink_axis_mask: A Tensor of type int32. + A bitmask where bit "i" implies that the "i"th + specification should shrink the dimensionality . \n *@par Outputs: -*output: A Tensor. Has the same type as "dy". +*output: A Tensor. Has the same type as "dy" . \n *@attention Constraints: *@par Third-party framework compatibility * Compatible with the TensorFlow operator StridedSliceGradD. + +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSliceGrad instead. */ REG_OP(StridedSliceGradD) .INPUT(dy, TensorType::BasicType()) @@ -396,7 +409,7 @@ REG_OP(StridedSliceGradD) /** *@brief Since StridedSlice cuts out pieces of its "input" which is size "dy", its gradient will have the same shape (which is passed here as "shape"). - The gradient will be zero in any element that the slice does not select. + The gradient will be zero in any element that the slice does not select . \n *@par Inputs: *Five inputs, including: @@ -409,7 +422,7 @@ REG_OP(StridedSliceGradD) * @li dy: A Tensor. Must be one of the following types: * float32, float64, int32, uint8, int16, int8, * complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, -* complex128, float16, uint32, uint64, complex64, complex128. +* complex128, float16, uint32, uint64, complex64, complex128 . \n *@par Attributes: * @li begin_mask: A Tensor of type int32. @@ -425,10 +438,10 @@ REG_OP(StridedSliceGradD) specification creates a new shape 1 dimension. * @li shrink_axis_mask: A Tensor of type int32. A bitmask where bit "i" implies that the "i"th - specification should shrink the dimensionality. + specification should shrink the dimensionality . \n *@par Outputs: -*output: A Tensor has the same type as "dy". +*output: A Tensor has the same type as "dy" . \n *@attention Constraints: @@ -450,17 +463,17 @@ REG_OP(StridedSliceGrad) .OP_END_FACTORY_REG(StridedSliceGrad) /** -*@brief Computes the sum along segments of a tensor. +*@brief Computes the sum along segments of a tensor . \n *@par Inputs: *Three inputs, including: * @li x: A Tensor of type NumberType. * @li segment_ids: A Tensor of type IndexNumberType, whose shape is a prefix * of "x.shape". -* @li num_segments: A Tensor of type IndexNumberType. +* @li num_segments: A Tensor of type IndexNumberType . \n *@par Outputs: -*y: A Tensor of type NumberType. +*y: A Tensor of type NumberType . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator UnsortedSegmentSum. @@ -473,22 +486,25 @@ REG_OP(UnsortedSegmentSum) .OP_END_FACTORY_REG(UnsortedSegmentSum) /** -*@brief Computes the sum along segments of a tensor. +*@brief Computes the sum along segments of a tensor . \n *@par Inputs: *Two inputs, including: * @li x: A Tensor of type float16, float32, int32, int8, uint8. * @li segment_ids: A Tensor of type int32, whose shape is a prefix -* of "x.shape". +* of "x.shape" . \n *@par Attributes: -*num_segments: An int32, specifying the number of distinct segment IDs. +*num_segments: An int32, specifying the number of distinct segment IDs . \n *@par Outputs: -*y: A Tensor with same type as "x". +*y: A Tensor with same type as "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator UnsortedSegmentSum. + +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use UnsortedSegmentSum instead. */ REG_OP(UnsortedSegmentSumD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_UINT8})) @@ -498,20 +514,20 @@ REG_OP(UnsortedSegmentSumD) .OP_END_FACTORY_REG(UnsortedSegmentSumD) /** -*@brief Reverses specific dimensions of a tensor. +*@brief Reverses specific dimensions of a tensor . \n *@par Inputs: -* Two inputs, including:\n -*@li x: An ND Tensor (up to 8D). \n +* Two inputs, including: +*@li x: An ND Tensor (up to 8D). *Must be one of the following types: int8, uint8, int16, uint16, int32, int64, bool, float16, float32, double, complex64, complex128, string. -*@li axis: A 1D Tensor.\n +*@li axis: A 1D Tensor. *Must be one of the following types: int32, int64 *@par Outputs: *y: A Tensor. Has the same type and format as "x" *@attention Constraints: -"axis" must be within the rank of "x". +"axis" must be within the rank of "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ReverseV2. @@ -527,22 +543,22 @@ REG_OP(ReverseV2) .OP_END_FACTORY_REG(ReverseV2) /** -*@brief Reverses specific dimensions of a tensor. +*@brief Reverses specific dimensions of a tensor . \n *@par Inputs: * One input: *@li x: An ND Tensor (up to 8D). * Must be one of the following types: int8, uint8, int16, uint16, int32, -* int64, bool, float16, float, double, complex64, complex128, string. +* int64, bool, float16, float, double, complex64, complex128, string . \n *@par Attributes: -*axis: The indices of the dimensions to reverse. Support type: listInt. +*axis: The indices of the dimensions to reverse. Support type: listInt . \n *@par Outputs: *y: A Tensor. Has the same type and format as "x" *@attention Constraints: -"axis" must be within the rank of "x". +"axis" must be within the rank of "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ReverseV2. @@ -560,7 +576,7 @@ REG_OP(ReverseV2D) .OP_END_FACTORY_REG(ReverseV2D) /** -*@brief: Selects elements from "x1" or "x2", depending on "condition". +*@brief: Selects elements from "x1" or "x2", depending on "condition" . \n *@par Inputs: * Three inputs, including: @@ -585,16 +601,16 @@ REG_OP(Select) .OP_END_FACTORY_REG(Select) /** -*@brief: SelectV2s elements from "then" or "else", depending on "condition". +*@brief: SelectV2s elements from "then" or "else", depending on "condition" . \n *@par Inputs: * Three inputs, including: * @li condition: A Tensor of type bool. * @li then: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. -* @li else: A Tensor of the same type as "then". +* @li else: A Tensor of the same type as "then" . \n *@par Outputs: -*result: A Tensor. Has the same type as "then". +*result: A Tensor. Has the same type as "then" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SelectV2. @@ -620,7 +636,7 @@ REG_OP(SelectV2) must be positive intege *@par Outputs: -*y:A Tensor with same type as "x". +*y:A Tensor with same type as "x" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator SegmentMax. @@ -651,6 +667,9 @@ REG_OP(SegmentMax) *@par Third-party framework compatibility *Compatible with the TensorFlow operator SegmentMax. + +*@par Restrictions: +*Warning: THIS FUNCTION IS DEPRECATED. Please use SegmentMax instead. */ REG_OP(SegmentMaxD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) @@ -660,7 +679,7 @@ REG_OP(SegmentMaxD) /** *@brief Returns a one-hot tensor. The locations represented by index in "x" take value "on_value", -* while all other locations take value "off_value". +* while all other locations take value "off_value" . \n *@par Inputs: *Four inputs, including: @@ -669,13 +688,13 @@ REG_OP(SegmentMaxD) * @li on_value: A scalar. The value to fill in output when indices[j] = i, * Must be one of the following types: float16, float32, int32, int8, uint8. * @li off_value: A scalar. The value to fill in output when indices[j] != i, -* Has the same type as "on_value". +* Has the same type as "on_value" . \n *@par Attributes: -*axis: An int. The axis to fill. Defaults to "-1". +*axis: An int. The axis to fill. Defaults to "-1" . \n *@par Outputs: -*y: A Tensor. Has the same type as "on_value". +*y: A Tensor. Has the same type as "on_value" . \n *@par Third-party framework compatibility: * Compatible with the TensorFlow operator OneHot. @@ -691,7 +710,7 @@ REG_OP(OneHot) /** *@brief Returns a one-hot tensor. The locations represented by index in "x" take value "on_value", -* while all other locations take value "off_value". +* while all other locations take value "off_value" . \n *@par Inputs: *Three inputs, including: @@ -699,17 +718,20 @@ REG_OP(OneHot) *@li on_value: A scalar. The value to fill in output when indices[j] = i, * Must be one of the following types: float16, float32, int32, int8, uint8. *@li off_value: A scalar. The value to fill in output when indices[j] != i, -* Has the same type as "on_value". +* Has the same type as "on_value" . \n *@par Attributes: *@li depth: A scalar of type int32. The depth of the one hot dimension. -*@li axis: An int. The axis to fill. Defaults to "-1". +*@li axis: An int. The axis to fill. Defaults to "-1" . \n *@par Outputs: -*y: A Tensor. Has the same type as "on_value". +*y: A Tensor. Has the same type as "on_value" . \n *@par Third-party framework compatibility: * Compatible with the TensorFlow operator OneHot. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use OneHot instead. */ REG_OP(OneHotD) .INPUT(x, TensorType({DT_UINT8, DT_INT32})) @@ -724,18 +746,18 @@ REG_OP(OneHotD) /** *@brief Extracts a slice from a tensor. -* This operation extracts a slice of size "size" from a tensor "x" -* starting at the location specified by "begin". +* This operation extracts a slice of size "size" from a tensor "x" +* starting at the location specified by "begin" . \n *@par Inputs: *@li x: A Tensor. Must be one of the following types: * float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, * int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. *@li offsets: A Tensor of type int32 or int64. The starting location for the slice. -*@li size: A Tensor of type int32 or int64. The tensor shape. +*@li size: A Tensor of type int32 or int64. The tensor shape . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". The slice extracted from the tensor. +*y: A Tensor. Has the same type as "x". The slice extracted from the tensor . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Slice. @@ -749,17 +771,17 @@ REG_OP(Slice) /** *@brief Extracts a slice from a tensor. -* This operation extracts a slice of size "size" from a tensor "x" -* starting at the location specified by "begin". +* This operation extracts a slice of size "size" from a tensor "x" +* starting at the location specified by "begin" . \n *@par Inputs: *@li x: A Tensor. Must be one of the following types: * float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, -* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. +* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32 . \n *@par Attributes: *@li offsets: The starting location for the slice. -*@li size: The tensor shape. +*@li size: The tensor shape . \n *@par Outputs: *y: A Tensor. Has the same type as "x". The slice extracted from the tensor. @@ -775,7 +797,7 @@ REG_OP(SliceD) /** * @brief Finds values and indices of the "k" largest elements for the last -* dimension. +* dimension . \n * @par Inputs: * Two inputs, including: @@ -785,20 +807,20 @@ REG_OP(SliceD) * @li assist_seq: A 1D tensor of type float16. * with size of 2N, which "N" is the last dimension. * The first N numbers is indices, and the next N numbers is deviation of casting -* float16 to int32. +* int32 to float16. \n * @par Attributes: -* @li k: A required int that is at least 0, specifying the number of top elements \n +* @li k: A required int that is at least 0, specifying the number of top elements * to look for along the last dimension (along each row for matrices). -* @li sorted: An optional bool. Defaults to true.\n +* @li sorted: An optional bool. Defaults to true. * If true, the resulting "k" elements will be sorted by the values in descending * order. -* @li dim: An optional int. Defaults to -1. For reserved use.\n -* @li largest: An optional bool. Defaults to true. For reserved use.\n +* @li dim: An optional int. Defaults to -1. For reserved use. +* @li largest: An optional bool. Defaults to true. For reserved use. \n * @par Outputs: * @li values: A Tensor, specifying the sorted data. Has the same type as "input". -* @li indices: A Tensor of type int32, specifying the indices of sorted data. +* @li indices: A Tensor of type int32, specifying the indices of sorted data . \n * @attention Constraints: * @li k =< 5120 @@ -822,26 +844,26 @@ REG_OP(TopKD) /** * @brief Finds values and indices of the "k" largest elements for the last -* dimension. +* dimension . \n * @par Inputs: * Two inputs, including: * @li x: A 1D or higher tensor of type BasicType, with the last dimension * at least "k". -* @li k: A 0D Tensor of type int32.\n +* @li k: A 0D Tensor of type int32. * Number of top elements to look for along the last dimension (along each row -* for matrices). +* for matrices) . \n * @par Attributes: -* @li sorted: An optional bool. Defaults to true.\n +* @li sorted: An optional bool. Defaults to true. * If true, the resulting "k" elements will be sorted by the values in descending * order. -* @li T: Indicator of indices type. +* @li T: Indicator of indices type . \n * @par Outputs: * @li values: A Tensor, specifying the sorted data. Has the same type as * "input". -* @li indices: A Tensor of type int32, specifying the indices of sorted data. +* @li indices: A Tensor of type int32, specifying the indices of sorted data . \n * @see TopK() * @par Third-party framework compatibility @@ -855,17 +877,17 @@ REG_OP(TopK) .ATTR(sorted, Bool, true) .OP_END_FACTORY_REG(TopK) /** -*@brief Creates a new tensor by applying sparse "updates" to individual values or slices within a tensor (initially zero for numeric, empty for string) of the given "shape" according to "indices". +*@brief Creates a new tensor by applying sparse "updates" to individual values or slices within a tensor (initially zero for numeric, empty for string) of the given "shape" according to "indices" . \n *@par Inputs: -*Inputs including: \n +*Inputs including: * @li indices: A required index tensor. Must be one of the following types: float32, float16, int32, int8, uint8. * @li x: A required slice tensor. Must be one of the following types: float32, float16, int32, int8, uint8. * @li shape: A required list of int32, specifying the output shape. *@par Outputs: -*y:A output Tensor with same datatype as "updates". +*y:A output Tensor with same datatype as "updates" . \n -*@attention Constraints:\n +*@attention Constraints: *@li "y" has the same shape as "shape". *@li "y" has the same type as "x". *@par Third-party framework compatibility @@ -880,7 +902,7 @@ REG_OP(ScatterNd) /** *@brief Creates a new tensor by applying sparse "updates" to individual values * or slices within a tensor (initially zero for numeric, empty for string) of - * the given "shape" according to "indices". + * the given "shape" according to "indices" . \n *@par Inputs: *Inputs including: @@ -891,7 +913,7 @@ REG_OP(ScatterNd) *@par Attributes: * @li shape: A required list of int32, specifying the output shape. *@par Outputs: -*y: A Tensor. Has the same type as "x". format:ND. +*y: A Tensor. Has the same type as "x". format:ND . \n *@attention Constraints: *@li "y" has the same shape as "shape". @@ -907,19 +929,22 @@ REG_OP(ScatterNdD) .OP_END_FACTORY_REG(ScatterNdD) /** -* @brief Says whether the targets are in the top "k" predictions. +* @brief Says whether the targets are in the top "k" predictions . \n * @par Inputs: * Three inputs, including: * @li x1: A 2D Tensor of type float32. A "batch_size * classes" tensor. -* @li x2: A 1D Tensor of type int32. A batch_size tensor of class ids. +* @li x2: A 1D Tensor of type int32. A batch_size tensor of class ids . \n * @par Attributes: * @li k: A required IndexNumberType, specifying the number of top elements to -* look at for computing precision. +* look at for computing precision . \n * @par Outputs: -* y: A Tensor of type bool. +* y: A Tensor of type bool . \n + +* @attention Constraints: +* @li x2 must be non-negative tensor. * @see InTopK() @@ -937,17 +962,20 @@ REG_OP(InTopKD) .OP_END_FACTORY_REG(InTopKD) /** -* @brief Says whether the targets are in the top "k" predictions. +* @brief Says whether the targets are in the top "k" predictions . \n * @par Inputs: * Two inputs, including: * @li x1: A 2D Tensor of type float32. A "batch_size * classes" tensor. * @li x2: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids. * @li k: A 1D Tensor of the same type as "x2". -* Specifies the number of top elements to look at for computing precision. +* Specifies the number of top elements to look at for computing precision . \n * @par Outputs: -* y: A Tensor of type bool. +* y: A Tensor of type bool . \n + +* @attention Constraints: +* @li x2 must be non-negative tensor. * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator InTopKV2. @@ -960,10 +988,10 @@ REG_OP(InTopK) .OP_END_FACTORY_REG(InTopK) /** -* @brief Assigns "value" to the sliced l-value reference of "var".\n -* The values of "value" are assigned to the positions in the variable. "var"\n -* that are selected by the slice parameters. The slice parameters "begin, "end",\n -* "strides", etc. work exactly as in "StridedSlice". +* @brief Assigns "value" to the sliced l-value reference of "var". +* The values of "value" are assigned to the positions in the variable. "var" +* that are selected by the slice parameters. The slice parameters "begin, "end", +* "strides", etc. work exactly as in "StridedSlice" . \n * @par Inputs: * Five inputs, including: @@ -974,21 +1002,21 @@ REG_OP(InTopK) * Specifies the index of the last value to select. * @li strides: A mutable ND Tensor of type IndexNumberType. * Specifies the stride to select. -* @li input_value: A mutable ND Tensor of type BasicType. +* @li input_value: A mutable ND Tensor of type BasicType . \n * @par Attributes: * @li begin_mask: An optional int. Defaults to "0". * @li end_mask: An optional int. Defaults to "0". * @li ellipsis_mask: An optional int. Defaults to "0". * @li new_axis_mask: An optional int. Defaults to "0". -* @li shrink_axis_mask: An optional int. Defaults to "0". +* @li shrink_axis_mask: An optional int. Defaults to "0" . \n * @par Outputs: -* var: A mutable Tensor. Has the same type as "var". +* var: A mutable Tensor. Has the same type as "var" . \n * @attention Constraints: * This operator currently does not support broadcasting. Therefore, the shape -* of "value" must be exactly the shape produced by the slice of "var". +* of "value" must be exactly the shape produced by the slice of "var" . \n * @see StridedSlice() @@ -1010,15 +1038,15 @@ REG_OP(StridedSliceAssign) .OP_END_FACTORY_REG(StridedSliceAssign) /** -* @brief Assigns "value" to the sliced l-value reference of "var".\n -* The values of "value" are assigned to the positions in the variable. "var"\n -* that are selected by the slice parameters. The slice parameters "begin, "end",\n -* "strides", etc. work exactly as in "StridedSlice". +* @brief Assigns "value" to the sliced l-value reference of "var". +* The values of "value" are assigned to the positions in the variable. "var" +* that are selected by the slice parameters. The slice parameters "begin, "end", +* "strides", etc. work exactly as in "StridedSlice" . \n * @par Inputs: * Two inputs, including: * @li var: A mutable ND Tensor of the following types:int32, int16, float16, float32. -* @li input_value: A mutable ND "Tensor" of the following types:int32, int16, float16, float32. +* @li input_value: A mutable ND "Tensor" of the following types:int32, int16, float16, float32 . \n * @par Attributes: * @li begin: A required list of ints. @@ -1030,14 +1058,14 @@ REG_OP(StridedSliceAssign) * @li end_mask: An optional int. Defaults to "0". * @li ellipsis_mask: An optional int. Defaults to "0". * @li new_axis_mask: An optional int. Defaults to "0". -* @li shrink_axis_mask: An optional int. Defaults to "0". +* @li shrink_axis_mask: An optional int. Defaults to "0" . \n * @par Outputs: -* var: A mutable Tensor. Has the same type as input "var". +* var: A mutable Tensor. Has the same type as input "var" . \n * @attention Constraints: * This operator currently does not support broadcasting. Therefore, the shape of -* "value" shape must be exactly the shape produced by the slice of "var". +* "value" shape must be exactly the shape produced by the slice of "var" . \n * @see StridedSlice() * @@ -1060,27 +1088,27 @@ REG_OP(StridedSliceAssignD) /** *@brief Gather slices from "params" according to "indices"."indices" must be - an integer tensor of any dimension(usually 0-D or 1-D). \n - Produces an output tensor with shape "indices.shape + params.shape[1:]". + an integer tensor of any dimension(usually 0-D or 1-D). + Produces an output tensor with shape "indices.shape + params.shape[1:]" . \n *@par Inputs: *Two inputs, including: -* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n -* int64, qint8, quint8, qint32, qint16, quint16, uint16, \n +* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +* int64, qint8, quint8, qint32, qint16, quint16, uint16, * float16, uint32, uint64, complex64, complex128. -* @li indices: A Tensor of type int32 or int64. +* @li indices: A Tensor of type int32 or int64 . \n *@par Attributes: -*validate_indices: A bool specifying whether to verify the argument of "indice". +*validate_indices: A bool specifying whether to verify the argument of "indice" . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@attention Constraints: -* "indices" is in the range [0, x.shape[0]). +* "indices" is in the range [0, x.shape[0]) . \n *@par Third-party framework compatibility -* Compatible with the TensorFlow operator Gather. +* Compatible with the TensorFlow operator Gather . \n */ REG_OP(Gather) @@ -1091,16 +1119,16 @@ REG_OP(Gather) .OP_END_FACTORY_REG(Gather) /** -*@brief Computes the cumulative product of the tensor "x" along "axis". +*@brief Computes the cumulative product of the tensor "x" along "axis" . \n *@par Inputs: * Two inputs, including: -*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, * complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64 *@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". * *@par Attributes: -*@li exclusive: If "False", performs inclusive cumprod, which means that the first element of the input +*@li exclusive: If "False", performs inclusive cumprod, which means that the first element of the input * is identical to the first element of the output. If "True", performs exclusive cumprod. *@li reverse: A bool. Defaults to "False". * @@ -1118,16 +1146,16 @@ REG_OP(Cumprod) .OP_END_FACTORY_REG(Cumprod) /** -*@brief Computes the cumulative product of the tensor "x" along "axis". +*@brief Computes the cumulative product of the tensor "x" along "axis" . \n *@par Inputs: * One input: -*x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +*x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, * complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64 * *@par Attributes: *@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". -*@li exclusive: If "False", performs inclusive cumprod, which means that the first element of the input +*@li exclusive: If "False", performs inclusive cumprod, which means that the first element of the input * is identical to the first element of the output. If "True", performs exclusive cumprod. *@li reverse: A bool. Defaults to "False". * @@ -1145,11 +1173,11 @@ REG_OP(CumprodD) .OP_END_FACTORY_REG(CumprodD) /** -*@brief Computes the cumulative sum of the tensor "x" along "axis". +*@brief Computes the cumulative sum of the tensor "x" along "axis" . \n *@par Inputs: * Two inputs, including: -*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, * complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64. *@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". * @@ -1176,12 +1204,12 @@ REG_OP(Cumsum) * *@par Inputs: * One input: -*x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +*x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, * complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64. * *@par Attributes: *@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". -*@li exclusive: If "False", performs inclusive cumsum, which means that the first element of the input is +*@li exclusive: If "False", performs inclusive cumsum, which means that the first element of the input is * identical to the first element of the output. If "True", performs exclusive cumsum. *@li reverse: A bool. Defaults to "False". * @@ -1203,16 +1231,16 @@ REG_OP(CumsumD) *Computes x[i, :] = v; return x. *@par Inputs: *Three inputs, including: -* @li x: A Tensor. \n +* @li x: A Tensor. * TensorType::NumberType(). -* @li indices: A vector of type int32. \n +* @li indices: A vector of type int32. * Indices into the left-most dimension of "x". -* @li v: A Tensor of the same type as "x". \n -* Same dimension sizes as x except the first dimension, \n -* which must be the same as the size of "indices". +* @li v: A Tensor of the same type as "x". +* Same dimension sizes as x except the first dimension, +* which must be the same as the size of "indices" . \n *@par Outputs: -*y: A Tensor of the same type as "x". \n +*y: A Tensor of the same type as "x". * An alias of "x". The content of "y" is undefined if there are duplicates in indices. *@par Third-party framework compatibility *Compatible with the TensorFlow operator InplaceUpdate. @@ -1230,18 +1258,21 @@ REG_OP(InplaceUpdate) *@par Inputs: *Two inputs, including: * @li x: A Tensor of type int32, float16, floay32. -* @li v: A Tensor of the same type as "x". \n -* Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices". +* @li v: A Tensor of the same type as "x". +* Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices" . \n *@par Attributes: -*indices: A required list of ints. Indices into the left-most dimension of "x". +*indices: A required list of ints. Indices into the left-most dimension of "x" . \n *@par Outputs: -*y: A Tensor of the same type as "x". \n -* An alias of "x". The content of "y" is undefined if there are duplicates in indices. +*y: A Tensor of the same type as "x". +* An alias of "x". The content of "y" is undefined if there are duplicates in indices . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator InplaceUpdate. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceUpdate instead. */ REG_OP(InplaceUpdateD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -1255,16 +1286,16 @@ REG_OP(InplaceUpdateD) *Computes y = x; y[i, :] += v. *@par Inputs: *Three inputs, including: -* @li x: A Tensor. \n +* @li x: A Tensor. * TensorType::NumberType(). -* @li indices: A vector of type int32. \n +* @li indices: A vector of type int32. * Indices into the left-most dimension of "x". -* @li v: A Tensor of the same type as "x". \n -* Same dimension sizes as x except the first dimension, \n -* which must be the same as the size of "indices". +* @li v: A Tensor of the same type as "x". +* Same dimension sizes as x except the first dimension, +* which must be the same as the size of "indices" . \n *@par Outputs: -*y: A Tensor of the same type as "x". \n +*y: A Tensor of the same type as "x". * An alias of "x". The content of "y" is undefined if there are duplicates in indices. *@par Third-party framework compatibility *Compatible with the TensorFlow operator InplaceAdd. @@ -1282,18 +1313,21 @@ REG_OP(InplaceAdd) *@par Inputs: *Two inputs, including: * @li x: A Tensor of type is int32, float16, float32. -* @li v: A Tensor of the same type as "x". \n -* Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices". +* @li v: A Tensor of the same type as "x". +* Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices" . \n *@par Attributes: -*indices: A required list of ints. Indices into the left-most dimension of "x". +*indices: A required list of ints. Indices into the left-most dimension of "x" . \n *@par Outputs: -*y: A Tensor of the same type as "x". \n -* An alias of "x". The content of "y" is undefined if there are duplicates in indices. +*y: A Tensor of the same type as "x". +* An alias of "x". The content of "y" is undefined if there are duplicates in indices . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator InplaceAdd. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceAdd instead. */ REG_OP(InplaceAddD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -1309,12 +1343,12 @@ REG_OP(InplaceAddD) **Three inputs, including: * @li x: A Tensor. TensorType::NumberType(). * @li indices: A vector of type int32. Indices into the left-most dimension of x. -* @li v: A Tensor of the same type as "x". \n -* Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices". +* @li v: A Tensor of the same type as "x". +* Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices" . \n *@par Outputs: -*y: A Tensor. Has the same type as "x".\n -* An alias of "x". The content of "y" is undefined if there are duplicates in indices. +*y: A Tensor. Has the same type as "x". +* An alias of "x". The content of "y" is undefined if there are duplicates in indices . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator InplaceSub. @@ -1328,23 +1362,26 @@ REG_OP(InplaceSub) /** *@brief Subtracts "v" into specified rows of "x". -*Computes y = x; y[i, :] -= v. +*Computes y = x; y[i, :] -= v . \n *@par Inputs: **Two inputs, including: * @li x: A Tensor of type is int32, float16, float32. -* @li v: A Tensor of the same type as "x". \n -* Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices". +* @li v: A Tensor of the same type as "x". +* Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices" . \n *@par Attributes: -*indices: A required list of ints. Indices into the left-most dimension of "x". +*indices: A required list of ints. Indices into the left-most dimension of "x" . \n *@par Outputs: -*y: A Tensor. Has the same type as "x".\n -* An alias of x. The content of y is undefined if there are duplicates in indices. +*y: A Tensor. Has the same type as "x". +* An alias of x. The content of y is undefined if there are duplicates in indices . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator InplaceSub. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceSub instead. */ REG_OP(InplaceSubD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -1355,21 +1392,21 @@ REG_OP(InplaceSubD) /** * @brief Applies sparse addition to input "x" using individual values or slices -* from "updates" according to "indices". The updates are non-aliasing: "x" is\n -* only modified in-place if no other operations will use it. Otherwise, a copy\n +* from "updates" according to "indices". The updates are non-aliasing: "x" is +* only modified in-place if no other operations will use it. Otherwise, a copy * of "x" is made. This operation has a gradient with respect to both "x" and -* "updates". +* "updates" . \n * @par Inputs: * Three inputs, including: * @li x: A Tensor of type NumberType. A batch_size x classes tensor. * @li indices: A Tensor of type IndexNumberType. Specifies the indices into "x". * @li updates: A Tensor. Must have the same type as "x". -* Specifies the updated values to add to "x". +* Specifies the updated values to add to "x" . \n * @par Outputs: * y: A Tensor with the same shape as "x", containing values of "x" updated with -* "updates". +* "updates" . \n * @see ScatterNd(),ScatterNdAdd() @@ -1384,17 +1421,20 @@ REG_OP(ScatterNonAliasingAdd) .OP_END_FACTORY_REG(ScatterNonAliasingAdd) /** -* @brief Computes the minimum along segments of a tensor. +* @brief Computes the minimum along segments of a tensor . \n * @par Inputs: * Three inputs, including: * @li x: A Tensor of type RealNumberType. * @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix * of "x.shape". -* @li num_segments: A Tensor of type IndexNumberType. +* @li num_segments: A Tensor of type IndexNumberType . \n * @par Outputs: -* y: A Tensor of type RealNumberType. +* y: A Tensor of type RealNumberType . \n + +* @attention Constraints: +* @li segment_ids must be non-negative tensor. * @see UnsortedSegmentSum(), UnsortedSegmentProd(), @@ -1409,19 +1449,22 @@ REG_OP(UnsortedSegmentMin) .OP_END_FACTORY_REG(UnsortedSegmentMin) /** -* @brief Computes the minimum along segments of a tensor. +* @brief Computes the minimum along segments of a tensor . \n * @par Inputs: * Two inputs, including: * @li x: A Tensor of the following types:int32, int16, float16, float32. * @li segment_ids: A 1D Tensor of type int32, whose shape is a prefix -* of "x.shape". +* of "x.shape" . \n * @par Attributes: -* num_segments: A required int32, specifying the number of distinct segment IDs. +* num_segments: A required int32, specifying the number of distinct segment IDs . \n * @par Outputs: -* y: A Tensor.Must have the same type as input "x". +* y: A Tensor.Must have the same type as input "x" . \n + +* @attention Constraints: +* @li segment_ids must be non-negative tensor. * @see UnsortedSegmentProdD(), UnsortedSegmentSumD(), * @@ -1436,17 +1479,20 @@ REG_OP(UnsortedSegmentMinD) .OP_END_FACTORY_REG(UnsortedSegmentMinD) /** -* @brief Computes the maximum along segments of a tensor. +* @brief Computes the maximum along segments of a tensor . \n * @par Inputs: * Three inputs, including: * @li x: A Tensor of type RealNumberType. * @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix * of "x.shape". -* @li num_segments: A Tensor of type IndexNumberType. +* @li num_segments: A Tensor of type IndexNumberType . \n * @par Outputs: -* y: A Tensor of type RealNumberType. +* y: A Tensor of type RealNumberType . \n + +* @attention Constraints: +* @li segment_ids must be non-negative tensor. * @see UnsortedSegmentSum(), UnsortedSegmentProd(), @@ -1461,19 +1507,22 @@ REG_OP(UnsortedSegmentMax) .OP_END_FACTORY_REG(UnsortedSegmentMax) /** -* @brief Computes the maximum along segments of a tensor. +* @brief Computes the maximum along segments of a tensor . \n * @par Inputs: * Two inputs, including: * @li x: A Tensor of the following types:int32, int16, float16, float32. * @li segment_ids: A 1D Tensor of type int32, whose shape is a prefix -* of "x.shape". +* of "x.shape" . \n * @par Attributes: -* num_segments: A required int32, specifying the number of distinct segment IDs. +* num_segments: A required int32, specifying the number of distinct segment IDs . \n * @par Outputs: -* y: A Tensor.Must have the same type as input "x". +* y: A Tensor.Must have the same type as input "x" . \n + +* @attention Constraints: +* @li segment_ids must be non-negative tensor. * @see UnsortedSegmentProdD(), * @@ -1487,17 +1536,20 @@ REG_OP(UnsortedSegmentMaxD) .REQUIRED_ATTR(num_segments, Int) .OP_END_FACTORY_REG(UnsortedSegmentMaxD) /** -* @brief Computes the product along segments of a tensor. +* @brief Computes the product along segments of a tensor . \n * @par Inputs: * Three inputs, including: * @li x: A Tensor of type NumberType. * @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix * of "x.shape". -* @li num_segments: A Tensor of type IndexNumberType. +* @li num_segments: A Tensor of type IndexNumberType . \n * @par Outputs: -* y: A Tensor of type NumberType. +* y: A Tensor of type NumberType . \n + +* @attention Constraints: +* @li segment_ids must be non-negative tensor. * @see UnsortedSegmentSum(), UnsortedSegmentMin(), @@ -1512,19 +1564,22 @@ REG_OP(UnsortedSegmentProd) .OP_END_FACTORY_REG(UnsortedSegmentProd) /** -* @brief Computes the product along segments of a tensor. +* @brief Computes the product along segments of a tensor . \n * @par Inputs: * Two inputs, including: * @li x: A Tensor of the following types:int32, int16, float16, float32. * @li segment_ids: A 1D Tensor of type int32, whose shape is a prefix -* of "x.shape". +* of "x.shape" . \n * @par Attributes: -* num_segments: An int32, specifying the number of distinct segment IDs. +* num_segments: An int32, specifying the number of distinct segment IDs . \n * @par Outputs: -* y: A Tensor.Must have the same type as input "x". +* y: A Tensor.Must have the same type as input "x" . \n + +* @attention Constraints: +* @li segment_ids must be non-negative tensor. * @see UnsortedSegmentMinD() * @@ -1539,12 +1594,12 @@ REG_OP(UnsortedSegmentProdD) .OP_END_FACTORY_REG(UnsortedSegmentProdD) /** -*@brief Performs object detection. +*@brief Performs object detection . \n *@par Inputs: *@li cls_prob: An NCHW tensor of type float16 or float32, specifying the probability of the proposal is the background class. *@li bbox_delta: An NCHW tensor of type float16 or float32, specifying the coordinates of the proposals bounding boxes. -*@li im_info: An ND tensor of type float16 or float32, specifying the Image information. +*@li im_info: An ND tensor of type float16 or float32, specifying the Image information . \n *@par Attributes: *@li feat_stride: A optional float32, specifying the stride of the sliding window. Must be greater than "0".Defaults to "16". @@ -1555,7 +1610,7 @@ REG_OP(UnsortedSegmentProdD) *@li pre_nms_topn: A required int, specifying top K boxes before NMS. For float16 input, pre_nms_topn <= 6000. For float32 input, pre_nms_topn <= 3000. Defaults to "3000". *@li post_nms_topn: A required int, specifying the number of boxes to be output after NMS. The value is a multiple of 16. For float16 input, post_nms_topn <= 6000. For float32 input, post_nms_topn <= 3000 (the maximum multiple of 16 is 2992 within the range). Defaults to "304". *@li iou_threshold: A required float32, specifying the NMS threshold. The value range is (0,1]. Defaults to "0.7". -*@li output_actual_rois_num: An optional bool. Defaults to "false". +*@li output_actual_rois_num: An optional bool. Defaults to "false" . \n *@par Outputs: *@li rois: A Tensor with shape [batch, 5, post_nms_topn], of type float16 or float32, specifying the output box information. "post_nms_topn" must be a multiple of 16. The dimension "5" indicates (batchID, x1, y1, x2, y2). The number of BBoxes output per batch is determined by "actual_rois_num". @@ -1581,13 +1636,13 @@ REG_OP(UnsortedSegmentProdD) .OP_END_FACTORY_REG(Proposal) /** -*@brief Performs object detection. Different from Proposal, this is an internal API called after FE fusion and has an additional "rpn_bbox" attribute. The suffix "D" in the API name will be removed from the generated model. +*@brief Performs object detection. Different from Proposal, this is an internal API called after FE fusion and has an additional "rpn_bbox" attribute. The suffix "D" in the API name will be removed from the generated model . \n *@par Inputs: *@li cls_prob: An NCHW tensor of type float16, specifying the probability of the proposal is the background class. *@li bbox_delta: An NCHW tensor of type float16, specifying the coordinates of the proposals bounding boxes. *@li im_info: An ND tensor of type float16 or float32, specifying the Image information. -*@li rpn_bbox: An NCHW tensor of type float16, specifying the coordinates of the proposals bounding boxes. +*@li rpn_bbox: An NCHW tensor of type float16, specifying the coordinates of the proposals bounding boxes . \n *@par Attributes: *@li feat_stride: A required float32, specifying the stride of the sliding window. Must be greater than "0".Defaults to "16". @@ -1598,7 +1653,7 @@ REG_OP(UnsortedSegmentProdD) *@li pre_nms_topn: A required int, specifying top K boxes before NMS. For float16 input, pre_nms_topn <= 6000. For float32 input, pre_nms_topn <= 3000. Defaults to "3000". *@li post_nms_topn: A required int, specifying the number of boxes to be output after NMS. The value is a multiple of 16. For float16 input, post_nms_topn <= 6000. For float32 input, post_nms_topn <= 3000 (the maximum multiple of 16 is 2992 within the range). Defaults to "304". *@li iou_threshold: A required float32, specifying the NMS threshold. The value range is (0,1]. Defaults to 0.7. -*@li output_actual_rois_num: An optional bool. Defaults to "false". +*@li output_actual_rois_num: An optional bool. Defaults to "false" . \n *@par Outputs: *@li rois: A Tensor with shape [batch, 5, post_nms_topn], of type float16 or float32, specifying the output box information. "post_nms_topn" must be a multiple of 16. The dimension "5" indicates (batchID, x1, y1, x2, y2). The number of BBoxes output per batch is determined by "actual_rois_num". @@ -1632,14 +1687,14 @@ REG_OP(ProposalD) * If reverse=false: (N, H, W, C)->(N, H/stride, W/stride, C*(stride*stride)) *@par Inputs: -*x: An (N, H, W, C) tensor. Type is float16, float32, int8, uint8, int16, uint16, int32, uint32, int64 or uint64.. +*x: An (N, H, W, C) tensor. Type is float16, float32, int8, uint8, int16, uint16, int32, uint32, int64 or uint64. . \n *@par Attributes: *@li stride: An optional int32, specifying the plane or channel scaling factor. Defaults to "2". -*@li reverse: An optional bool, specifying the conversion mode. If "true", depth to space conversion is performed. If "false", space to depth conversion is performed. Defaults to "false". +*@li reverse: An optional bool, specifying the conversion mode. If "true", depth to space conversion is performed. If "false", space to depth conversion is performed. Defaults to "false" . \n *@par Outputs: -*y: An (N, H, W, C) tensor. Has same type as "x". +*y: An (N, H, W, C) tensor. Has same type as "x" . \n *@attention Constraints: *@li If reverse=true: C/(stride*stride) yields an integer result. If reverse=false: W/stride and H/stride yield integer results. @@ -1654,24 +1709,24 @@ REG_OP(PassThrough) .OP_END_FACTORY_REG(PassThrough) /** -*@brief Crops the input tensor x to the shape of size. For example: \n -*(1) x: bottom to be cropped, with shape (20, 50, 512, 512);\n -*(2) size: reference input for cropping, with shape (20, 10, 256, 256);\n -*(3) axis = 1;\n -*(4) offset = (25, 128, 128);\n -*(5) y = x[:, 25:25 + size.shape[1], 128:128 + size.shape[2], 128:128 + size.shape[3]]. +*@brief Crops the input tensor x to the shape of size. For example: +*(1) x: bottom to be cropped, with shape (20, 50, 512, 512); +*(2) size: reference input for cropping, with shape (20, 10, 256, 256); +*(3) axis = 1; +*(4) offset = (25, 128, 128); +*(5) y = x[:, 25:25 + size.shape[1], 128:128 + size.shape[2], 128:128 + size.shape[3]] . \n *@par Inputs: -*Inputs include: \n +*Inputs include: * @li x: A required Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32,int64, uint64. * @li size: A required Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. *@par Attributes: *@li axis: A required int32, specifying the first dimension to crop. Defaults to "2". *@li offset: A required array, specifying the shift for all/each dimension to align the cropped bottom with the reference bottom. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. *@par Outputs: -*y: A required Tensor. Has the same type and shape as "size". +*y: A required Tensor. Has the same type and shape as "size" . \n -*@attention Constraints:\n +*@attention Constraints: *@li "y" must have the same type and shape as "size". "x" must have the same type as "size". *@li "axis" must be less than the rank of "x". *@li The "offset" for each dimension must not exceed the maximum value of the corresponding dimension of "x". @@ -1688,24 +1743,24 @@ REG_OP(Crop) .OP_END_FACTORY_REG(Crop) /** -*@brief Extends the input with copies of data along a specified dimension. For example: \n -*(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2);\n -*(2) axis = 1;\n -*(3) tiles = 2;\n -*(4) Then, y = [[[1, 2], [3, 4], [5, 6], [1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12], [7, 8], [9, 10], [11, 12]]], with shape (2, 6, 2). +*@brief Extends the input with copies of data along a specified dimension. For example: +*(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2); +*(2) axis = 1; +*(3) tiles = 2; +*(4) Then, y = [[[1, 2], [3, 4], [5, 6], [1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12], [7, 8], [9, 10], [11, 12]]], with shape (2, 6, 2) . \n *@par Inputs: * One input: -*input_x: A Tensor with any format. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. +*input_x: A Tensor with any format. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n *@par Attributes: *@li axis: An optional int32, specifying the axis to tile. Defaults to 1. -*@li tiles: A required int32, specifying the number of copies (tiles) to output. +*@li tiles: A required int32, specifying the number of copies (tiles) to output . \n *@par Outputs: -*output_y: A Tensor of any format. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. +*output_y: A Tensor of any format. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n -*@attention Constraints:\n +*@attention Constraints: *@li "axis" must be within the rank of the input tensor. *@li "tiles" must be greater than 1. *@par Third-party framework compatibility @@ -1721,14 +1776,14 @@ REG_OP(TileWithAxis) .OP_END_FACTORY_REG(TileWithAxis) /** -*@brief Read data with offset and stride. +*@brief Read data with offset and stride . \n *@par Inputs: -*One input:\n -*x: A Tensor. Must be one of the following types: float16, int8. +*One input: +*x: A Tensor. Must be one of the following types: float16, int8 . \n *@par Attributes: -*@li stride_list: An optional 5D list of type int32. Defaults to "[1,1,1,1,1]". +*@li stride_list: An optional 5D list of type int32. Defaults to "[1,1,1,1,1]" . \n *@par Outputs: *y: A Tensor of the same type as "x". @@ -1740,10 +1795,10 @@ REG_OP(ReadSelect) .OP_END_FACTORY_REG(ReadSelect) /** -*@brief: Write data with offset. +*@brief: Write data with offset . \n -*@par Inputs:\n -*x: A Tensor. Must be one of the following types: int32, float32, float16, int8. +*@par Inputs: +*x: A Tensor. Must be one of the following types: int32, float32, float16, int8 . \n *@par Outputs: *y: A Tensor. Has the same type as "x". @@ -1754,17 +1809,17 @@ REG_OP(WriteSelect) .OP_END_FACTORY_REG(WriteSelect) /** -*@brief Read data by stride. +*@brief Read data by stride . \n *@par Inputs: -*One input:\n -*x: A Tensor. Must be one of the following types: float16, int8. +*One input: +*x: A Tensor. Must be one of the following types: float16, int8 . \n *@par Attributes: -*@li axis: A required int32, specifying the index of axis to read by stride. +*@li axis: A required int32, specifying the index of axis to read by stride . \n *@par Attributes: -*@li stride: A required int32, specifying the value of reading stride. +*@li stride: A required int32, specifying the value of reading stride . \n *@par Outputs: *y: A Tensor of the same type as "x". @@ -1777,16 +1832,16 @@ REG_OP(StridedRead) .OP_END_FACTORY_REG(StridedRead) /** -*@brief: Write data by stride. +*@brief: Write data by stride . \n -*@par Inputs:\n -*x: A Tensor. Must be one of the following types: float16, int8. +*@par Inputs: +*x: A Tensor. Must be one of the following types: float16, int8 . \n *@par Attributes: -*@li axis: A required int32, specifying the index of axis to write by stride. +*@li axis: A required int32, specifying the index of axis to write by stride . \n *@par Attributes: -*@li stride: A required int32, specifying the value of writing stride. +*@li stride: A required int32, specifying the value of writing stride . \n *@par Outputs: *y: A Tensor. Has the same type as "x". @@ -1799,7 +1854,7 @@ REG_OP(StridedWrite) .OP_END_FACTORY_REG(StridedWrite) /** -*@brief Computes the cumulative log sum exp of the tensor "x" along "axis". +*@brief Computes the cumulative log sum exp of the tensor "x" along "axis" . \n *@par Inputs: * Two inputs, including: @@ -1839,6 +1894,9 @@ REG_OP(CumulativeLogsumexp) *y: A Tensor. Has the same type as "x". *@par Third-party framework compatibility * Compatible with the TensorFlow operator Cumsum. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use CumulativeLogsumexp instead. */ REG_OP(CumulativeLogsumexpD) .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16})) diff --git a/third_party/fwkacllib/inc/ops/set_ops.h b/third_party/fwkacllib/inc/ops/set_ops.h index f4d5c4ba..18df6edf 100644 --- a/third_party/fwkacllib/inc/ops/set_ops.h +++ b/third_party/fwkacllib/inc/ops/set_ops.h @@ -27,24 +27,24 @@ namespace ge { /** -*@brief Applies set operation along last dimension of 2 Tensor inputs. +*@brief Applies set operation along last dimension of 2 Tensor inputs . \n *@par Inputs: -*Inputs include: \n +*Inputs include: * @li x1: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, uint16, string. -* @li x2: A Tensor. Must have the same type as x1. +* @li x2: A Tensor. Must have the same type as x1 . \n *@par Attributes: *@li set_operation: A string. -*@li validate_indices: An optional bool. Defaults to True. +*@li validate_indices: An optional bool. Defaults to True . \n *@par Outputs: *@li y_indices: A Tensor of type int64. *@li y_values: A Tensor. Has the same type as x1. -*@li y_shape: A Tensor of type int64. +*@li y_shape: A Tensor of type int64 . \n -*@attention Constraints:\n -*-The implementation for DenseToDenseSetOperation on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for DenseToDenseSetOperation on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow DenseToDenseSetOperation operator. @@ -63,26 +63,26 @@ REG_OP(DenseToDenseSetOperation) .OP_END_FACTORY_REG(DenseToDenseSetOperation) /** -*@brief Applies set operation along last dimension of Tensor and SparseTensor. +*@brief Applies set operation along last dimension of Tensor and SparseTensor . \n *@par Inputs: -*Inputs include: \n +*Inputs include: * @li x1: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, uint16, string. * @li x2_indices: A Tensor of type int64. 2D Tensor, indices of a SparseTensor. * @li x2_values: A Tensor. Must have the same type as set1. 1D Tensor, values of a SparseTensor. -* @li x2_shape: A Tensor of type int64. 1D Tensor, shape of a SparseTensor. +* @li x2_shape: A Tensor of type int64. 1D Tensor, shape of a SparseTensor . \n *@par Attributes: *@li set_operation: A string. -*@li validate_indices: An optional bool. Defaults to True. +*@li validate_indices: An optional bool. Defaults to True . \n *@par Outputs: *@li y_indices: A Tensor of type int64. *@li y_values: A Tensor. Has the same type as x1. -*@li y_shape: A Tensor of type int64. +*@li y_shape: A Tensor of type int64 . \n -*@attention Constraints:\n -*-The implementation for DenseToSparseSetOperation on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for DenseToSparseSetOperation on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow DenseToSparseSetOperation operator. @@ -103,29 +103,29 @@ REG_OP(DenseToSparseSetOperation) .OP_END_FACTORY_REG(DenseToSparseSetOperation) /** -*@brief Applies set operation along last dimension of 2 SparseTensor inputs. +*@brief Applies set operation along last dimension of 2 SparseTensor inputs . \n *@par Inputs: -*Inputs include: \n +*Inputs include: * @li x1_indices: A Tensor of type int64. 2D Tensor, indices of a SparseTensor. -* @li x1_values: A Tensor. Must be one of the following types: int8, int16, \n +* @li x1_values: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, uint16, string. 1D Tensor, values of a SparseTensor. * @li x1_shape: A Tensor of type int64. 1D Tensor, shape of a SparseTensor. * @li x2_indices: A Tensor of type int64. 2D Tensor, indices of a SparseTensor. * @li x2_values: A Tensor. Must have the same type as set1_values. 1D Tensor, values of a SparseTensor. -* @li x2_shape: A Tensor of type int64. 1D Tensor, shape of a SparseTensor. +* @li x2_shape: A Tensor of type int64. 1D Tensor, shape of a SparseTensor . \n *@par Attributes: *@li set_operation: A string. -*@li validate_indices: An optional bool. Defaults to True. +*@li validate_indices: An optional bool. Defaults to True . \n *@par Outputs: *@li y_indices: A Tensor of type int64. *@li y_values: A Tensor. Has the same type as x1_values. -*@li y_shape: A Tensor of type int64. +*@li y_shape: A Tensor of type int64 . \n -*@attention Constraints:\n -*-The implementation for SparseToSparseSetOperation on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for SparseToSparseSetOperation on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow SparseToSparseSetOperation operator. @@ -148,22 +148,22 @@ REG_OP(SparseToSparseSetOperation) .OP_END_FACTORY_REG(SparseToSparseSetOperation) /** -*@brief Number of unique elements along last dimension of input set. +*@brief Number of unique elements along last dimension of input set . \n *@par Inputs: -*Inputs include: \n +*Inputs include: * @li set_indices: A Tensor of type int64. 2D Tensor, indices of a SparseTensor. * @li set_values: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, uint16. -* @li set_shape: A Tensor of type int64. 1D Tensor, shape of a SparseTensor. +* @li set_shape: A Tensor of type int64. 1D Tensor, shape of a SparseTensor . \n *@par Attributes: -*validate_indices: An optional bool. Defaults to True. +*validate_indices: An optional bool. Defaults to True . \n *@par Outputs: -*size: A Tensor of type int32. +*size: A Tensor of type int32 . \n -*@attention Constraints:\n -*-The implementation for SetSize on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for SetSize on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow SetSize operator. diff --git a/third_party/fwkacllib/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/ops/sparse_ops.h index eb3629a4..3eecbeab 100644 --- a/third_party/fwkacllib/inc/ops/sparse_ops.h +++ b/third_party/fwkacllib/inc/ops/sparse_ops.h @@ -26,16 +26,16 @@ namespace ge { /** -*@brief Applies softmax to a batched ND SparseTensor. +*@brief Applies softmax to a batched ND SparseTensor . \n *@par Inputs: *The input must be a batched ND SparseTensor. * @li indices: A matrix Tensor of type int64. 2D. The indices of the SparseTensor. * @li values: A vector Tensor of type float or double. 1D. The values of the SparseTensor. -* @li shape: A vector Tensor of type int64. 1D. The shape of the SparseTensor. +* @li shape: A vector Tensor of type int64. 1D. The shape of the SparseTensor . \n *@par Outputs: -*y: A vector Tensor. 1D. Has the same type as "values". +*y: A vector Tensor. 1D. Has the same type as "values" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator SparseSoftmax. @@ -48,17 +48,17 @@ REG_OP(SparseSoftmax) .OP_END_FACTORY_REG(SparseSoftmax) /** -*@brief Adds up a SparseTensor and a dense Tensor, producing a dense Tensor. +*@brief Adds up a SparseTensor and a dense Tensor, producing a dense Tensor . \n *@par Inputs: *Inputs "x1_*" must be SparseTensors and "x2" must be a dense Tensor. * @li x1_indices: A matrix Tensor of type int32 or int64. 2D. The indices of the SparseTensor. * @li x1_values: The values of the SparseTensor. A vector Tensor. 1D. * @li x1_shape: A vector Tensor of type int32 or int64. 1D. The shape of the SparseTensor. -* @li x2: A matrix Tensor. Has the same type and same shape as the SparseTensors. +* @li x2: A matrix Tensor. Has the same type and same shape as the SparseTensors . \n *@par Outputs: -*y: A matrix Tensor. Has the same type and same shape as "x2". +*y: A matrix Tensor. Has the same type and same shape as "x2" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseTensorDenseAdd. @@ -76,16 +76,16 @@ REG_OP(SparseTensorDenseAdd) .OP_END_FACTORY_REG(SparseTensorDenseAdd) /** -*@brief Reorders a SparseTensor into the canonical, row-major ordering. +*@brief Reorders a SparseTensor into the canonical, row-major ordering . \n *@par Inputs: * @li indices: A matrix Tensor of type int32 or int64. 2D. The indices of the SparseTensor. * @li values: Values of the SparseTensor. A vector Tensor. 1D. -* @li shape: A vector Tensor of type int32 or int64. 1D. The shape of the SparseTensor. +* @li shape: A vector Tensor of type int32 or int64. 1D. The shape of the SparseTensor . \n *@par Outputs: *@li y_indices: The indices of the SparseTensor. Has the same type as "indices". -*@li y_values: The values of the SparseTensorr. Has the same type as "values". +*@li y_values: The values of the SparseTensorr. Has the same type as "values" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseReorder. @@ -103,16 +103,16 @@ REG_OP(SparseReorder) .OP_END_FACTORY_REG(SparseReorder) /** -*@brief Reshapes a SparseTensor to represent values in a new dense shape. +*@brief Reshapes a SparseTensor to represent values in a new dense shape . \n *@par Inputs: * @li indices: A matrix Tensor of type int64. 2D. The indices of the SparseTensor. * @li shape: A vector Tensor of type int64. 1D. The shape of the SparseTensor. -* @li new_shape: A 1D Tensor of type int64. The requested new dense shape. +* @li new_shape: A 1D Tensor of type int64. The requested new dense shape . \n *@par Outputs: *@li y_indices: A Tensor of type int64. The indices of the new dense shape. -*@li y_shape: A Tensor of type int64. The shape of the new dense shape. +*@li y_shape: A Tensor of type int64. The shape of the new dense shape . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseReshape. @@ -128,15 +128,15 @@ REG_OP(SparseReshape) /** *@brief Adds up a SparseTensor and a dense Tensor. *@par Inputs: -*(1) Broadcasts the dense side to have the same shape as the sparse side, if eligible;\n +*(1) Broadcasts the dense side to have the same shape as the sparse side, if eligible; *(2) Then, only the dense values pointed to by the indices of the SparseTensor participate in the cwise addition. * @li x1_indices: A matrix Tensor of type int64. 2D. The indices of the SparseTensor. * @li x1_values: The values of the SparseTensor. A vector Tensor. 1D. * @li x1_shape: A 1D Tensor of type int64. The requested new dense shape. -* @li x2: A dense Tensor of the same type as "x1_values". +* @li x2: A dense Tensor of the same type as "x1_values" . \n *@par Outputs: -*y: A Tensor. Has the same type as "x1_values". +*y: A Tensor. Has the same type as "x1_values" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseDenseCwiseAdd. @@ -156,16 +156,16 @@ REG_OP(SparseDenseCwiseAdd) .OP_END_FACTORY_REG(SparseDenseCwiseAdd) /** -*@brief Divides a SparseTensor by a dense Tensor. +*@brief Divides a SparseTensor by a dense Tensor . \n *@par Inputs: * @li x1_indices: A matrix Tensor of type int64. 2D. The indices of the SparseTensor. * @li x1_values: The values of the SparseTensor. A vector Tensor. 1D. * @li x1_shape: A 1D Tensor of type int64. The requested new dense shape. -* @li x2: A dense Tensor of the same type as "x1_values". +* @li x2: A dense Tensor of the same type as "x1_values" . \n *@par Outputs: -*y: A Tensor. Has the same type as "x1_values". +*y: A Tensor. Has the same type as "x1_values" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseDenseCwiseDiv. @@ -185,16 +185,16 @@ REG_OP(SparseDenseCwiseDiv) .OP_END_FACTORY_REG(SparseDenseCwiseDiv) /** -*@brief Multiplies a SparseTensor by a dense Tensor. +*@brief Multiplies a SparseTensor by a dense Tensor . \n *@par Inputs: * @li x1_indices: A matrix Tensor of type int64. 2D. The indices of the SparseTensor. * @li x1_values: The values of the SparseTensor. A vector Tensor. 1D. * @li x1_shape: A 1D Tensor of type int64. The requested new dense shape. -* @li x2: A dense Tensor of the same type as "x1_values". +* @li x2: A dense Tensor of the same type as "x1_values" . \n *@par Outputs: -*y: A Tensor. Has the same type as "x1_values". +*y: A Tensor. Has the same type as "x1_values" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseDenseCwiseMul. @@ -214,20 +214,20 @@ REG_OP(SparseDenseCwiseMul) .OP_END_FACTORY_REG(SparseDenseCwiseMul) /** -*@brief Adds a SparseTensor to a SparseTensorsMap. +*@brief Adds a SparseTensor to a SparseTensorsMap . \n *@par Inputs: * The input tensor must be a SparseTensor. * @li x1_indices: A matrix Tensor of type int64. 2D. The indices of the SparseTensor. * @li x1_values: The values of the SparseTensor. A vector Tensor. 1D. -* @li x1_shape: A 1D Tensor of type int64. The requested new dense shape. +* @li x1_shape: A 1D Tensor of type int64. The requested new dense shape . \n *@par Attributes: *@li container: An optional string. Defaults to " ". -*@li shared_name: An optional string. Defaults to " ". +*@li shared_name: An optional string. Defaults to " " . \n *@par Outputs: -*handle: A Tensor of type int64. +*handle: A Tensor of type int64 . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator AddSparseToTensorsMap. @@ -244,16 +244,16 @@ REG_OP(AddSparseToTensorsMap) .OP_END_FACTORY_REG(AddSparseToTensorsMap) /** -*@brief The gradient operator for the SparseSlice op. +*@brief The gradient operator for the SparseSlice op . \n *@par Inputs: * @li backprop_val_grad: A Tensor. * @li indices: A matrix Tensor of type int64. 2D. The indices of the SparseTensor. * @li start: A 1D Tensor of type int64. The start of the slice. -* @li new_indices: A matrix Tensor of type int64. 2D. The indices of the sliced SparseTensor. +* @li new_indices: A matrix Tensor of type int64. 2D. The indices of the sliced SparseTensor . \n *@par Outputs: -*y_grad: A Tensor of type int64. +*y_grad: A Tensor of type int64 . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseSliceGrad. @@ -271,19 +271,19 @@ REG_OP(SparseSliceGrad) .OP_END_FACTORY_REG(SparseSliceGrad) /** -*@brief Slices a SparseTensor based on the "start" and "size". +*@brief Slices a SparseTensor based on the "start" and "size" . \n *@par Inputs: * @li indices: A 2D Tensor of type int64. The indices of the SparseTensor. * @li values: A 1D Tensor. The values of the SparseTensor. * @li shape: A 2D Tensor of type int64. The shape of the SparseTensor. * @li start: A 1D Tensor of type int64. The start of the slice. -* @li size: A 1D Tensor of type int64. The size of the slice. +* @li size: A 1D Tensor of type int64. The size of the slice . \n *@par Outputs: *y_indices: A Tensor of type int64. *y_values: A Tensor. Has the same type as "values". -*y_values: A Tensor of type int64. +*y_values: A Tensor of type int64 . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseSlice. @@ -304,17 +304,17 @@ REG_OP(SparseSlice) .OP_END_FACTORY_REG(SparseSlice) /** -*@brief The gradient operator for the SparseAdd op. +*@brief The gradient operator for the SparseAdd op . \n *@par Inputs: * @li backprop_val_grad: A 1D Tensor with shape [nnz(sum)]. The gradient with respect to the non-empty values of the sum. * @li x1_indices: A 2D Tensor of type int64. The indices of the SparseTensor A, with size [nnz(A), ndims]. * @li x2_indices: A 2D Tensor of type int64. The indices of the SparseTensor B, with size [nnz(B), ndims]. -* @li sum_indices: A 2D Tensor of type int64. The indices of the sum SparseTensor, with size [nnz(sum), ndims]. +* @li sum_indices: A 2D Tensor of type int64. The indices of the sum SparseTensor, with size [nnz(sum), ndims] . \n *@par Outputs: *x1_val_grad: A Tensor. Has the same type as "backprop_val_grad". -*x2_val_grad: A Tensor. Has the same type as "backprop_val_grad". +*x2_val_grad: A Tensor. Has the same type as "backprop_val_grad" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseAddGrad. @@ -332,15 +332,15 @@ REG_OP(SparseAddGrad) .OP_END_FACTORY_REG(SparseAddGrad) /** -*@brief The gradient of SparseFillEmptyRows. +*@brief The gradient of SparseFillEmptyRows . \n *@par Inputs: * @li reverse_index_map: A 1D Tensor of type int64. The reverse index map from SparseFillEmptyRows. -* @li grad_values: A 1D Tensor. The gradients from backprop. +* @li grad_values: A 1D Tensor. The gradients from backprop . \n *@par Outputs: *@li y_value: A Tensor. Has the same type as "grad_values". -*@li y_default_value: A Tensor. Has the same type as "grad_values". +*@li y_default_value: A Tensor. Has the same type as "grad_values" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseFillEmptyRowsGrad. @@ -359,23 +359,23 @@ REG_OP(SparseFillEmptyRowsGrad) .OP_END_FACTORY_REG(SparseFillEmptyRowsGrad) /** -*@brief Multiplies SparseTensor A (of rank 2) by dense matrix B. +*@brief Multiplies SparseTensor A (of rank 2) by dense matrix B . \n *@par Inputs: * @li x1_indices: A 2D Tensor of type int32 or int64. * @li The indices of the matrix "SparseTensor", with size [nnz, 2]. * @li x1_values: A 1D Tensor. The values of the SparseTensor, with size [nnz]. * @li x1_shape: A 1D Tensor of type int64. The shape of the SparseTensor, with size [2]. -* @li x2: A dense matrix Tensor of the same type as "x1_values". 2D. +* @li x2: A dense matrix Tensor of the same type as "x1_values". 2D . \n *@par Outputs: -*y: A "Tensor". Has the same type as "x1_values". +*y: A "Tensor". Has the same type as "x1_values" . \n *@par Attributes: *@li adjoint_a: An optional bool. Defaults to "False".Use the adjoint of A in the matrix multiply. *@li If A is complex, this is transpose(conj(A)). Otherwise it is transpose(A). *@li adjoint_b: An optional bool. Defaults to "False".Use the adjoint of B in the matrix multiply. -*@li If B is complex, this is transpose(conj(B)). Otherwise it is transpose(B). +*@li If B is complex, this is transpose(conj(B)). Otherwise it is transpose(B) . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseTensorDenseMatMul. @@ -394,17 +394,17 @@ REG_OP(SparseTensorDenseMatMul) .OP_END_FACTORY_REG(SparseTensorDenseMatMul) /** -*@brief Converts a sparse representation into a dense tensor. +*@brief Converts a sparse representation into a dense tensor . \n *@par Inputs: * @li indices: A 0D, 1D, or 2D Tensor of type int32 or int64. * @li output_shape: A 1D Tensor of the same type as "sparse_indices". The shape of the dense output tensor. * @li values: A 1D Tensor. Values corresponding to each row of "sparse_indices", * @li or a scalar value to be used for all sparse indices. -* @li default_value: A Tensor of the same type as "sparse_values". +* @li default_value: A Tensor of the same type as "sparse_values" . \n *@par Outputs: -*y: A Tensor. Has the same type as "values". +*y: A Tensor. Has the same type as "values" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseToDense. @@ -423,16 +423,17 @@ REG_OP(SparseToDense) .OP_END_FACTORY_REG(SparseToDense) /** -*@brief Concatenates a list of `SparseTensor` along the specified dimension.\n -*Concatenation is with respect to the dense versions of these sparse tensors. +*@brief Concatenates a list of `SparseTensor` along the specified dimension. +*Concatenation is with respect to the dense versions of these sparse tensors . \n *@par Inputs: *3 or 5 inputs,contains: -* @li indices:A list of at least 2 `Tensor` objects with type `int64`.2-D. \n -*Indices of each input `SparseTensor`. +* @li indices:A list of at least 2 `Tensor` objects with type `int64`.2-D. +*Indices of each input `SparseTensor`.It's a dynamic input. * @li values:A list with the same length as `indices` of `Tensor` objects with the same type. -* @li shapes:A list with the same length as `indices` of `Tensor` objects with type `int64`.1-D. \n -* Shapes of each `SparseTensor`. +It's a dynamic input. +* @li shapes:A list with the same length as `indices` of `Tensor` objects with type `int64`.1-D. +* Shapes of each `SparseTensor`. It's a dynamic input. \n *@par Attributes: *@li concat_dim: An `int` Dimension to concatenate along @@ -441,7 +442,7 @@ REG_OP(SparseToDense) *@par Outputs: * @li y_indices:A `Tensor` of type `int64`. * @li y_values:A `Tensor`. Has the same type as `values`. -* @li y_shape:A `Tensor` of type `int64`. +* @li y_shape:A `Tensor` of type `int64` . \n *@par Third-party framework compatibility * Compatible SparseConcat operator in Tensorflow @@ -464,27 +465,27 @@ REG_OP(SparseConcat) .OP_END_FACTORY_REG(SparseConcat) /** -*@brief Adds two `SparseTensor` objects to produce another `SparseTensor`. +*@brief Adds two `SparseTensor` objects to produce another `SparseTensor` . \n *@par Inputs: *7 inputs, contains: -* @li x1_indices:A `Tensor` of type `int64`.2-D. \n +* @li x1_indices:A `Tensor` of type `int64`.2-D. * The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix. * @li x1_values:A `Tensor`. Must be one of the following types:float,int8,int16,int32,int64, float64. -* @li x1_shape:A `Tensor` of type `int64`.1-D. The `shape` of the first `SparseTensor`, \n +* @li x1_shape:A `Tensor` of type `int64`.1-D. The `shape` of the first `SparseTensor`, * size `[ndims]` Vector. -* @li x2_indices:A `Tensor` of type `int64`.2-D.The `indices` of the second `SparseTensor`, \n +* @li x2_indices:A `Tensor` of type `int64`.2-D.The `indices` of the second `SparseTensor`, * size `[nnz, ndims]` Matrix. -* @li x2_values:A `Tensor`. Must have the same type as `a_values`.1-D. \n +* @li x2_values:A `Tensor`. Must have the same type as `a_values`.1-D. * The `values` of the second `SparseTensor`, size `[nnz]` Vector. -* @li x2_shape:A `Tensor` of type `int64`.1-D. \n +* @li x2_shape:A `Tensor` of type `int64`.1-D. * The `shape` of the second `SparseTensor`, size `[ndims]` Vector. -* @li thresh:A `Tensor` 0-D.The magnitude threshold that determines if an output value/index pair takes space. +* @li thresh:A `Tensor` 0-D.The magnitude threshold that determines if an output value/index pair takes space . \n *@par Outputs: * @li sum_indices:A `Tensor` of type `int64`. * @li sum_values:A `Tensor`. Has the same type as `x1_values`. -* @li sum_shape:A `Tensor` of type `int64`. +* @li sum_shape:A `Tensor` of type `int64` . \n *@par Third-party framework compatibility * Compatible SparseAdd operator in Tensorflow @@ -507,22 +508,22 @@ REG_OP(SparseAdd) .OP_END_FACTORY_REG(SparseAdd) /** -*@brief Fills empty rows in the input 2-D `SparseTensor` with a default value. +*@brief Fills empty rows in the input 2-D `SparseTensor` with a default value . \n *@par Inputs: *4 inputs,contains: * @li indices: A `Tensor` of type `int64`.2-D. the indices of the sparse tensor. * @li values: A `Tensor`. 1-D. the values of the sparse tensor. * @li dense_shape: A `Tensor` of type `int64`.1-D. the shape of the sparse tensor. -* @li default_value: `Tensor`. Must have the same type as `values`.\n -*0-D. default value to insert into location `[row, 0, ..., 0]` \n -*for rows missing from the input sparse tensor. +* @li default_value: `Tensor`. Must have the same type as `values`. +*0-D. default value to insert into location `[row, 0, ..., 0]` +*for rows missing from the input sparse tensor . \n *@par Outputs: * @li y_indices:A `Tensor` of type `int64`. * @li y_values:A `Tensor`. Has the same type as `values`. * @li empty_row_indicator:A `Tensor` of type `bool`. -* @li reverse_index_map:A `Tensor` of type `int64`. +* @li reverse_index_map:A `Tensor` of type `int64` . \n *@par Third-party framework compatibility * Compatible SparseFillEmptyRows operator in Tensorflow @@ -547,23 +548,23 @@ REG_OP(SparseFillEmptyRows) .OP_END_FACTORY_REG(SparseFillEmptyRows) /** -*@brief Returns the element-wise max of two SparseTensors. +*@brief Returns the element-wise max of two SparseTensors . \n *@par Inputs: *6 inputs,contains: -* @li x1_indices:A `Tensor` of type `int64`.2-D. \n -*`N x R` matrix with the indices of non-empty values in a SparseTensor, \n +* @li x1_indices:A `Tensor` of type `int64`.2-D. +*`N x R` matrix with the indices of non-empty values in a SparseTensor, * in the canonical lexicographic ordering. * @li x1_values:A `Tensor`. 1-D. the values of the sparse tensor. * @li x1_shape:A `Tensor` of type `int64`.1-D. the shape of the sparse tensor. * @li x2_indices:A `Tensor` of type `int64`.2-D. the indices of the sparse tensor. * @li x2_values:A `Tensor`. 1-D. Must have the same type as `x1_values`. -* @li x2_shape:A `Tensor` of type `int64`.1-D. \n -*counterpart to `a_shape` for the other operand; the two shapes must be equal. +* @li x2_shape:A `Tensor` of type `int64`.1-D. +*counterpart to `a_shape` for the other operand; the two shapes must be equal . \n *@par Outputs: * @li y_indices:A `Tensor` of type `int64`. -* @li y_values:A `Tensor`. Has the same type as `x1_values`. +* @li y_values:A `Tensor`. Has the same type as `x1_values` . \n *@par Third-party framework compatibility * Compatible SparseSparseMaximum operator in Tensorflow @@ -583,23 +584,23 @@ REG_OP(SparseSparseMaximum) .OP_END_FACTORY_REG(SparseSparseMaximum) /** -*@brief Returns the element-wise min of two SparseTensors. +*@brief Returns the element-wise min of two SparseTensors . \n *@par Inputs: *6 inputs,contains: -* @li x1_indices:A `Tensor` of type `int64`.2-D. \n -*`N x R` matrix with the indices of non-empty values in a SparseTensor, \n +* @li x1_indices:A `Tensor` of type `int64`.2-D. +*`N x R` matrix with the indices of non-empty values in a SparseTensor, * in the canonical lexicographic ordering. * @li x1_values:A `Tensor`. 1-D. the values of the sparse tensor. * @li x1_shape:A `Tensor` of type `int64`.1-D. the shape of the sparse tensor. * @li x2_indices:A `Tensor` of type `int64`.2-D. the indices of the sparse tensor. * @li x2_values:A `Tensor`. 1-D. Must have the same type as `x1_values`. -* @li x2_shape:A `Tensor` of type `int64`.1-D. \n -*counterpart to `a_shape` for the other operand; the two shapes must be equal. +* @li x2_shape:A `Tensor` of type `int64`.1-D. +*counterpart to `a_shape` for the other operand; the two shapes must be equal . \n *@par Outputs: * @li y_indices:A `Tensor` of type `int64`. -* @li y_values:A `Tensor`. Has the same type as `x1_values`. +* @li y_values:A `Tensor`. Has the same type as `x1_values` . \n *@par Third-party framework compatibility * Compatible SparseSparseMinimum operator in Tensorflow @@ -622,25 +623,25 @@ REG_OP(SparseSparseMinimum) .OP_END_FACTORY_REG(SparseSparseMinimum) /** -*@brief Computes the max of elements across dimensions of a SparseTensor. +*@brief Computes the max of elements across dimensions of a SparseTensor . \n *@par Inputs: *4 or 5 inputs,contains: -* @li x_indices:A `Tensor` of type `int64`.2-D. \n -*`N x R` matrix with the indices of non-empty values in a \n +* @li x_indices:A `Tensor` of type `int64`.2-D. +*`N x R` matrix with the indices of non-empty values in a *SparseTensor, possibly not in canonical ordering. -* @li x_values:A `Tensor`. 1-D. the values of the sparse tensor. \n +* @li x_values:A `Tensor`. 1-D. the values of the sparse tensor. *`N` non-empty values corresponding to `input_indices`. * @li x_shape:A `Tensor` of type `int64`.1-D. Shape of the input SparseTensor. -* @li reduction_axes:A `Tensor` of type `int32`.1-D.\n -*Length-`K` vector containing the reduction axes. +* @li reduction_axes:A `Tensor` of type `int32`.1-D. +*Length-`K` vector containing the reduction axes . \n *@par Attributes: -* keep_dims:An optional `bool`. Defaults to `False`.\n -*If true, retain reduced dimensions with length 1. +* keep_dims:An optional `bool`. Defaults to `False`. +*If true, retain reduced dimensions with length 1 . \n *@par Outputs: -* y:A `Tensor`. Has the same type as `input_values`. +* y:A `Tensor`. Has the same type as `input_values` . \n *@par Third-party framework compatibility * Compatible SparseReduceMax operator in Tensorflow @@ -657,27 +658,27 @@ REG_OP(SparseReduceMax) .OP_END_FACTORY_REG(SparseReduceMax) /** -*@brief Computes the max of elements across dimensions of a SparseTensor. +*@brief Computes the max of elements across dimensions of a SparseTensor . \n *@par Inputs: *4 or 5 inputs,contains: -* @li x_indices:A `Tensor` of type `int64`.2-D. \n -*`N x R` matrix with the indices of non-empty values in a \n +* @li x_indices:A `Tensor` of type `int64`.2-D. +*`N x R` matrix with the indices of non-empty values in a *SparseTensor, possibly not in canonical ordering. -* @li x_values:A `Tensor`. 1-D. the values of the sparse tensor. \n +* @li x_values:A `Tensor`. 1-D. the values of the sparse tensor. *`N` non-empty values corresponding to `input_indices`. * @li x_shape:A `Tensor` of type `int64`.1-D. Shape of the input SparseTensor. -* @li reduction_axes:A `Tensor` of type `int32`.1-D.\n -*Length-`K` vector containing the reduction axes. +* @li reduction_axes:A `Tensor` of type `int32`.1-D. +*Length-`K` vector containing the reduction axes . \n *@par Attributes: -* keep_dims:An optional `bool`. Defaults to `False`.\n -*If true, retain reduced dimensions with length 1. +* keep_dims:An optional `bool`. Defaults to `False`. +*If true, retain reduced dimensions with length 1 . \n *@par Outputs: * @li y_indices:A `Tensor` of type `int64`. * @li y_values:A `Tensor`. Has the same type as `input_values`. -* @li y_shape:A `Tensor` of type `int64`. +* @li y_shape:A `Tensor` of type `int64` . \n *@par Third-party framework compatibility * Compatible SparseReduceMaxSparse operator in Tensorflow @@ -696,27 +697,27 @@ REG_OP(SparseReduceMaxSparse) .OP_END_FACTORY_REG(SparseReduceMaxSparse) /** -*@brief Computes the sum of elements across dimensions of a SparseTensor. +*@brief Computes the sum of elements across dimensions of a SparseTensor . \n *@par Inputs: *4 or 5 inputs, including: * @li x_indices: A 2D Tensor of type int64. -*"N x R" matrix with the indices of non-empty values in a \n +*"N x R" matrix with the indices of non-empty values in a *SparseTensor, possibly not in canonical ordering. * @li x_values: A 1D Tensor. The values of the SparseTensor. *"N" non-empty values corresponding to "input_indices". * @li x_shape: A 1D Tensor of type int64. Shape of the input SparseTensor. -* @li reduction_axes: A 1D Tensor of type int32. \n -*A length-"K" vector containing the reduction axes. +* @li reduction_axes: A 1D Tensor of type int32. +*A length-"K" vector containing the reduction axes . \n *@par Attributes: -* keep_dims: An optional bool. Defaults to "False". \n -*If true, retains reduced dimensions with length 1. +* keep_dims: An optional bool. Defaults to "False". +*If true, retains reduced dimensions with length 1 . \n *@par Outputs: * @li y_indices: A Tensor of type int64. * @li y_values: A Tensor. Has the same type as "input_values". -* @li y_shape: A Tensor of type int64. +* @li y_shape: A Tensor of type int64 . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseReduceSum. @@ -735,27 +736,27 @@ REG_OP(SparseReduceSum) .OP_END_FACTORY_REG(SparseReduceSum) /** -*@brief Computes the sum of elements across dimensions of a SparseTensor. +*@brief Computes the sum of elements across dimensions of a SparseTensor . \n *@par Inputs: *4 or 5 inputs, including: * @li x_indices: A 2D Tensor of type int64. -*"N x R" matrix with the indices of non-empty values in a \n +*"N x R" matrix with the indices of non-empty values in a *SparseTensor, possibly not in canonical ordering. * @li x_values: A 1D Tensor. The values of the SparseTensor. *"N" non-empty values corresponding to "input_indices". * @li x_shape: A 1D Tensor of type int64. Shape of the input SparseTensor. -* @li reduction_axes: A 1D Tensor of type int32. \n -* A length-"K" vector containing the reduction axes. +* @li reduction_axes: A 1D Tensor of type int32. +* A length-"K" vector containing the reduction axes . \n *@par Attributes: -* keep_dims: An optional bool. Defaults to "False".\n -*If true, retains reduced dimensions with length 1. +* keep_dims: An optional bool. Defaults to "False". +*If true, retains reduced dimensions with length 1 . \n *@par Outputs: * @li y_indices: A Tensor of type int64. * @li y_values: A Tensor. Has the same type as "input_values". -* @li y_shape: A Tensor of type int64. +* @li y_shape: A Tensor of type int64 . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseReduceSumSparse. @@ -776,24 +777,24 @@ REG_OP(SparseReduceSumSparse) .OP_END_FACTORY_REG(SparseReduceSumSparse) /** -*@brief Splits a SparseTensor into "num_split" tensors along one dimension. +*@brief Splits a SparseTensor into "num_split" tensors along one dimension . \n *@par Inputs: *4 or 5 inputs, including: -* @li split_dim: A 0D Tensor of type int64.\n +* @li split_dim: A 0D Tensor of type int64. *The dimension along which to split. Must be in the range "[0, rank(shape))". -* @li indices: A 2D Tensor of type int64.\n +* @li indices: A 2D Tensor of type int64. * The indices of the SparseTensor. * @li values: A 1D Tensor. The values of the SparseTensor. -* @li shape: A 1D Tensor of type int64. Shape of the SparseTensor. +* @li shape: A 1D Tensor of type int64. Shape of the SparseTensor . \n *@par Attributes: -* num_split: An int that is >= 1. The number of ways to split. +* num_split: An int that is >= 1. The number of ways to split . \n *@par Outputs: * @li y_indices: A list of "num_split" Tensor objects of type int64. * @li y_values: A list of "num_split" Tensor objects with the same type as "values". -* @li y_shape: A list of "num_split" Tensor objects of type int64. +* @li y_shape: A list of "num_split" Tensor objects of type int64 . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseSplit. @@ -814,33 +815,33 @@ REG_OP(SparseSplit) .OP_END_FACTORY_REG(SparseSplit) /** -*@brief Generates sparse cross from a list of sparse and dense tensors. +*@brief Generates sparse cross from a list of sparse and dense tensors . \n *@par Inputs: *8 or 10 inputs, including: * @li indices: A list of 2D Tensor objects of type int64. -* Indices of each input SparseTensor. +* Indices of each input SparseTensor.It's a dynamic input. * @li values: A list of 1D Tensor objects of type int64 or string. -* Values of each SparseTensor. +* Values of each SparseTensor.It's a dynamic input. * @li shapes: A list with the same length as "indices" of 1D Tensor objects of type int64. -* Shapes of each SparseTensor. +* Shapes of each SparseTensor.It's a dynamic input. * @li dense_inputs: A list of 2D Tensor objects of type int64 or string. -* Columns represented by dense Tensor. +* Columns represented by dense Tensor .It's a dynamic input. \n *@par Attributes: * @li N: number of sparse. * @li hashed_output: A bool. If true, returns the hash of the cross instead of the string. -* @li num_buckets: An int that is >= 0. It is used if "hashed_output" is true. \n +* @li num_buckets: An int that is >= 0. It is used if "hashed_output" is true. *output = hashed_value%num_buckets if num_buckets > 0 else "hashed_value". -* @li hash_key: An int. Specify the hash_key that will be used by the "FingerprintCat64"\n +* @li hash_key: An int. Specify the hash_key that will be used by the "FingerprintCat64" *function to combine the crosses fingerprints. * @li out_type: An int64 or string. -* @li internal_type: An int64 or string. +* @li internal_type: An int64 or string . \n *@par Outputs: * @li output_indices: A Tensor of type int64. * @li output_values: A Tensor of type "out_type". -* @li output_shape: A Tensor of type int64. +* @li output_shape: A Tensor of type int64 . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SparseCross. @@ -862,23 +863,23 @@ REG_OP(SparseCross) .OP_END_FACTORY_REG(SparseCross) /** -*@brief Generates sparse cross from a list of sparse and dense tensors. +*@brief Generates sparse cross from a list of sparse and dense tensors . \n *@par Inputs: *3 or 5 inputs, including: -* @li indices: A 2D Tensor of type int64. \n +* @li indices: A 2D Tensor of type int64. * The "indices" of the minibatch SparseTensor. * @li values: A 1D Tensor. The "values" of the minibatch SparseTensor. -* @li shape: A 1D Tensor of type int64. The "shape" of the minibatch SparseTensor. +* @li shape: A 1D Tensor of type int64. The "shape" of the minibatch SparseTensor . \n *@par Attributes: -* @li container: An optional string. Defaults to "". \n +* @li container: An optional string. Defaults to "". *The container name for the "SparseTensorsMap" created by this op. -* @li shared_name: An optional string. Defaults to "". \n -*The shared name for the "SparseTensorsMap" created by this op. +* @li shared_name: An optional string. Defaults to "". +*The shared name for the "SparseTensorsMap" created by this op . \n *@par Outputs: -* handles: A Tensor of type int64. +* handles: A Tensor of type int64 . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator AddManySparseToTensorsMap. @@ -895,24 +896,24 @@ REG_OP(AddManySparseToTensorsMap) .OP_END_FACTORY_REG(AddManySparseToTensorsMap) /** -*@brief Reads SparseTensors from a "SparseTensorsMap" and concatenate them. +*@brief Reads SparseTensors from a "SparseTensorsMap" and concatenate them . \n *@par Inputs: *2 or 4 inputs, including: -* handles: A 1D Tensor of type int64. \n -* The "N" serialized SparseTensor objects. +* handles: A 1D Tensor of type int64. +* The "N" serialized SparseTensor objects . \n *@par Attributes: * @li dtype: A tf.DType. The "dtype" of the SparseTensor objects stored in the "SparseTensorsMap". -* @li container: An optional string. Defaults to "". \n +* @li container: An optional string. Defaults to "". *The container name for the "SparseTensorsMap" read by this op. -* @li shared_name: An optional string. Defaults to "". \n -*The shared name for the "SparseTensorsMap" read by this op. +* @li shared_name: An optional string. Defaults to "". +*The shared name for the "SparseTensorsMap" read by this op . \n *@par Outputs: * @li indices: A Tensor of type int64. * @li values: A Tensor of type "dtype". -* @li shape: A Tensor of type int64. +* @li shape: A Tensor of type int64 . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator TakeManySparseFromTensorsMap. @@ -929,19 +930,19 @@ REG_OP(TakeManySparseFromTensorsMap) .OP_END_FACTORY_REG(TakeManySparseFromTensorsMap) /** -*@brief Serializes a SparseTensor into a [3] Tensor object. +*@brief Serializes a SparseTensor into a [3] Tensor object . \n *@par Inputs: *3 or 4 inputs, including: * @li indices: A 2D Tensor of type int64. The indices of the SparseTensor. * @li values: A 1D Tensor. The values of the SparseTensor. -* @li shape: A 1D Tensor of type int64. The shape of the SparseTensor. +* @li shape: A 1D Tensor of type int64. The shape of the SparseTensor . \n *@par Attributes: -* out_type: An optional type. Defaults to "string". +* out_type: An optional type. Defaults to "string" . \n *@par Outputs: -* serialized_sparse: A Tensor of type "out_type". +* serialized_sparse: A Tensor of type "out_type" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SerializeSparse. @@ -957,19 +958,19 @@ REG_OP(SerializeSparse) .OP_END_FACTORY_REG(SerializeSparse) /** -*@brief Serializes an "N"-minibatch SparseTensor into an [N, 3] Tensor object. +*@brief Serializes an "N"-minibatch SparseTensor into an [N, 3] Tensor object . \n *@par Inputs: *3 or 4 inputs, including: * @li indices: A 2D Tensor of type int64. The "indices" of the minibatch SparseTensor. * @li values: A 1D Tensor. The "values" of the minibatch SparseTensor. -* @li shape: A 1D Tensor of type int64. The "shape" of the minibatch SparseTensor. +* @li shape: A 1D Tensor of type int64. The "shape" of the minibatch SparseTensor . \n *@par Attributes: -* out_type: An optional type. Defaults to "string". +* out_type: An optional type. Defaults to "string" . \n *@par Outputs: -* serialized_sparse: A Tensor of type "out_type". +* serialized_sparse: A Tensor of type "out_type" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SerializeManySparse. @@ -985,20 +986,20 @@ REG_OP(SerializeManySparse) .OP_END_FACTORY_REG(SerializeManySparse) /** -*@brief Deserializes SparseTensor objects. +*@brief Deserializes SparseTensor objects . \n *@par Inputs: *Two inputs, including: -* serialized_sparse: A Tensor. The serialized SparseTensor objects. \n -*The last dimension must have 3 columns. +* serialized_sparse: A Tensor. The serialized SparseTensor objects. +*The last dimension must have 3 columns . \n *@par Attributes: -* dtype: An optional type. The type of the serialized SparseTensor objects. +* dtype: An optional type. The type of the serialized SparseTensor objects . \n *@par Outputs: * @li indices: A Tensor of type int64. * @li values: A Tensor of type "dtype". -* @li shape: A Tensor of type int64. +* @li shape: A Tensor of type int64 . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator DeserializeSparse. @@ -1014,20 +1015,20 @@ REG_OP(DeserializeSparse) .OP_END_FACTORY_REG(DeserializeSparse) /** -*@brief Deserializes and concatenates SparseTensors from a serialized minibatch. +*@brief Deserializes and concatenates SparseTensors from a serialized minibatch . \n *@par Inputs: *Two inputs, including: -* serialized_sparse: A 2D Tensor of type string. \n -*The "N" serialized SparseTensor objects. Must have 3 columns. +* serialized_sparse: A 2D Tensor of type string. +*The "N" serialized SparseTensor objects. Must have 3 columns . \n *@par Attributes: -* dtype: An optional type. The type of the serialized SparseTensor objects. +* dtype: An optional type. The type of the serialized SparseTensor objects . \n *@par Outputs: * @li indices: A Tensor of type int64. * @li values: A Tensor of type "dtype". -* @li shape: A Tensor of type int64. +* @li shape: A Tensor of type int64 . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator DeserializeManySparse. diff --git a/third_party/fwkacllib/inc/ops/spectral_ops.h b/third_party/fwkacllib/inc/ops/spectral_ops.h index 4c412a1f..460dada4 100644 --- a/third_party/fwkacllib/inc/ops/spectral_ops.h +++ b/third_party/fwkacllib/inc/ops/spectral_ops.h @@ -27,16 +27,16 @@ namespace ge { /** -*@brief Real-valued fast Fourier transform. +*@brief Real-valued fast Fourier transform . \n *@par Inputs: *@li input: A float32 tensor. -*@li fft_length: An int32 tensor of shape [1]. The FFT length. +*@li fft_length: An int32 tensor of shape [1]. The FFT length . \n *@par Outputs: -*@li y: A complex64 tensor of the same rank as `input`. The inner-most \n -dimension of `input` is replaced with the `fft_length / 2 + 1` unique \n -frequency components of its 1D Fourier transform. +*@li y: A complex64 tensor of the same rank as `input`. The inner-most +dimension of `input` is replaced with the `fft_length / 2 + 1` unique +frequency components of its 1D Fourier transform . \n *@par Third-party framework compatibility * Compatible with TensorFlow RFFT operator. diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h index de7300d2..b0bd14c0 100644 --- a/third_party/fwkacllib/inc/ops/split_combination_ops.h +++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h @@ -24,24 +24,24 @@ namespace ge { /** -*@brief Splits a tensor along dimension "split_dim" into "num_split" smaller tensors. +*@brief Splits a tensor along dimension "split_dim" into "num_split" smaller tensors . \n *@par Inputs: * Two inputs, including: *@li x: An ND Tensor. -*Must be one of the types:float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. -*@li split_dim: Must be the following type:int32. Specifies the dimension along which to split. +*Must be one of the types:float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. +*@li split_dim: Must be the following type:int32. Specifies the dimension along which to split . \n *@par Attributes: -*num_split: A required int32. Specifies the number of output tensors. No default value. +*num_split: A required int32. Specifies the number of output tensors. No default value . \n *@par Outputs: -*y: Dynamic output.A list of output tensors. Has the same type and format as "x". +*y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n *@attention Constraints: *@li "num_split" is greater than or equals to 1. *@li "num_split" is divisible by the size of dimension "split_dim". -*@li "split_dim" is in the range [-len(x.shape), (x.shape)-1]. +*@li "split_dim" is in the range [-len(x.shape), (x.shape)-1] . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator Split. @@ -54,27 +54,30 @@ REG_OP(Split) .OP_END_FACTORY_REG(Split) /** -*@brief Splits a tensor along dimension "split_dim" into "num_split" smaller tensors. +*@brief Splits a tensor along dimension "split_dim" into "num_split" smaller tensors . \n *@par Inputs: * One input: -*: An ND Tensor. \n +*: An ND Tensor. *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 *@par Attributes: *@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value. -*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value. +*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n *@par Outputs: -*y:Dynamic output. A list of output tensors. Has the same type and format as "x". +*y:Dynamic output. A list of output tensors. Has the same type and format as "x" . \n *@attention Constraints: *@li "num_split" is greater than or equals to 1. *@li "num_split" is divisible by the size of dimension "split_dim". -*@li "split_dim" is in the range [-len(x.shape), (x.shape)-1]. +*@li "split_dim" is in the range [-len(x.shape), (x.shape)-1] . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator Split. + +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use Split instead. */ REG_OP(SplitD) .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, @@ -86,25 +89,25 @@ REG_OP(SplitD) .OP_END_FACTORY_REG(SplitD) /** -*@brief Splits a tensor along dimension "split_dim" into "num_split" smaller tensors according to "size_splits". +*@brief Splits a tensor along dimension "split_dim" into "num_split" smaller tensors according to "size_splits" . \n *@par Inputs: * Three inputs, including: -*@li x: An ND Tensor. \n -*Must be one of the following types: +*@li x: An ND Tensor. +*Must be one of the following types: *@li size_splits: A list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension. -*@li split_dim: An int8, int16, int32, or int64. Specifies the dimension along which to split. +*@li split_dim: An int8, int16, int32, or int64. Specifies the dimension along which to split . \n *@par Attributes: -*num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value. +*num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n *@par Outputs: -*y: Dynamic output.A list of output tensors. Has the same type and format as "x". +*y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n *@attention Constraints: *@li Each element in "size_splits" is greater than or equal to 1. *@li "size_splits" and "num_split" have the same length. -*@li The elements in "size_splits" sum to the size of dimension "split_dim". +*@li The elements in "size_splits" sum to the size of dimension "split_dim" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SplitV. @@ -118,20 +121,20 @@ REG_OP(SplitV) .OP_END_FACTORY_REG(SplitV) /** -*@brief Splits a tensor along dimension "split_dim" into "num_split" smaller tensors according to "size_splits". +*@brief Splits a tensor along dimension "split_dim" into "num_split" smaller tensors according to "size_splits" . \n *@par Inputs: * One input: -* x: An ND Tensor. \n +* x: An ND Tensor. *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 *@par Attributes: *@li size_splits: A required list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension. *@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value. -*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value. +*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n *@par Outputs: -*y: Dynamic output.A list of output tensors. Has the same type and format as "x". +*y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n *@attention Constraints: *@li Each element in "size_splits" is greater than or equal to 1. @@ -141,6 +144,9 @@ Under the caffe framework, the conversion of slice_point through the cut point t Under the caffe framework,size_splits or axis transformat to split_dim.Only one can effect. *@par Third-party framework compatibility * Compatible with the TensorFlow operator SplitV. + +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use SplitV instead. */ REG_OP(SplitVD) .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, @@ -156,17 +162,17 @@ REG_OP(SplitVD) *@brief Concatenates a list of N tensors along the first dimension. *@par Inputs: * Two inputs, including: -* @li values: A list of Tensors. Must be one of the following types: int8, int16, int32, \n -* int64, uint8, uint16, uint32, uint64, float16, float32. \n -* Tensors to be concatenated. \n -* All must have size 1 in the first dimension and same shape. -* @li shape: A Tensor of the same type as "x". \n +* @li values: A list of Tensors. Must be one of the following types: int8, int16, int32, +* int64, uint8, uint16, uint32, uint64, float16, float32. +* Tensors to be concatenated. All must have size 1 in the first dimension and same shape. +* It's a dynamic input. +* @li shape: A Tensor of the same type as "x". * The final shape of the result. Should be equal to the shapes of any input -* but with the number of input values in the first dimension. +* but with the number of input values in the first dimension . \n *@par Attributes: * @li shape: A required list of ints. -* @li N: The numble of dynamic_input "values". +* @li N: The numble of dynamic_input "values" . \n *@par Outputs: *output_data: The concatenated tensor with same type as "values". @@ -181,7 +187,7 @@ REG_OP(ParallelConcat) .OP_END_FACTORY_REG(ParallelConcat) /** -*@brief Concatenates tensors along one dimension. +*@brief Concatenates tensors along one dimension . \n *@par Inputs: * One input: @@ -190,14 +196,14 @@ REG_OP(ParallelConcat) *@par Attributes: *concat_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to concatenate. No default value. -*N: An attribute int8, int16, int32, or int64. Specifies the number of elements in "x". Defaults to "1". +*N: An attribute int8, int16, int32, or int64. Specifies the number of elements in "x". Defaults to "1". *@par Outputs: -*y: A Tensor. Has the same type and format as "x". +*y: A Tensor. Has the same type and format as "x" . \n *@attention Constraints: *@li "x" is a list of at least 2 "tensor" objects of the same type. -*@li "concat_dim" is in the range [-len(x.shape), len(x.shape)]. +*@li "concat_dim" is in the range [-len(x.shape), len(x.shape)] . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ConcatV2. @@ -212,22 +218,22 @@ REG_OP(ConcatV2D) .OP_END_FACTORY_REG(ConcatV2D) /** -*@brief Concatenates tensors along one dimension. +*@brief Concatenates tensors along one dimension . \n *@par Inputs: * Two inputs, including: *@li Dynamic input "x" is An NC1HWC0 or ND Tensor. *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 -*@li concat_dim: An int32, or int64. Specifies the dimension along which to concatenate. +*@li concat_dim: An int32, or int64. Specifies the dimension along which to concatenate . \n *@par Attributes: -*N: An optional int8, int16, int32, or int64. Specifies the number of elements in "x". No default value. +*N: An optional int8, int16, int32, or int64. Specifies the number of elements in "x". No default value . \n *@par Outputs: -*y: A Tensor. Has the same type and format as "x". +*y: A Tensor. Has the same type and format as "x" . \n *@attention Constraints: -* "x" is a list of at least 2 "tensor" objects of the same type. +* "x" is a list of at least 2 "tensor" objects of the same type . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ConcatV2. @@ -240,7 +246,7 @@ REG_OP(ConcatV2) .OP_END_FACTORY_REG(ConcatV2) /** -*@brief Concatenates tensors along one dimension. +*@brief Concatenates tensors along one dimension . \n *@par Inputs: * One input: @@ -249,14 +255,14 @@ REG_OP(ConcatV2) *@par Attributes: *@li concat_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to concatenate. No default value. -*@li N: An optional int8, int16, int32, or int64. Specifies the number of elements in "x". No default value. +*@li N: An optional int8, int16, int32, or int64. Specifies the number of elements in "x". No default value . \n *@par Outputs: -*y: A Tensor. Has the same type and format as "x". +*y: A Tensor. Has the same type and format as "x" . \n *@attention Constraints: *@li "x" is a list of at least 2 "tensor" objects of the same type. -*@li "concat_dim" is in the range [-len(x.shape), len(x.shape)]. +*@li "concat_dim" is in the range [-len(x.shape), len(x.shape)] . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator Concat. @@ -271,7 +277,7 @@ REG_OP(ConcatD) .OP_END_FACTORY_REG(ConcatD) /** -*@brief Concatenates tensors along one dimension. +*@brief Concatenates tensors along one dimension . \n *@par Inputs: * Two inputs, including: @@ -279,17 +285,17 @@ REG_OP(ConcatD) *Must be one of the following types: float16, float32, double, int32, * uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, * complex128, uint32, uint64, qint16, quint16. -*@li concat_dim: An int32, or int64. Specifies the dimension along which to concatenate. +*@li concat_dim: An int32, or int64. Specifies the dimension along which to concatenate . \n *@par Attributes: -*N: An optional int8, int16, int32, or int64. Specifies the number of elements in "x". +*N: An optional int8, int16, int32, or int64. Specifies the number of elements in "x" . \n *@par Outputs: -*y: A Tensor. Has the same type and format as "x". +*y: A Tensor. Has the same type and format as "x" . \n *@attention Constraints: *@li "x" is a list of at least 2 "tensor" objects of the same type. -*@li "concat_dim" is in the range [-len(x.shape), len(x.shape)]. +*@li "concat_dim" is in the range [-len(x.shape), len(x.shape)] . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator Concat. @@ -304,21 +310,22 @@ REG_OP(Concat) /** *@brief Packs the list of tensors in values into a tensor with rank one higher than each tensor in * values, by packing them along the axis dimension. Given a list of length N of tensors of -* shape (A, B, C); if axis == 0 then the output tensor will have the shape (N, A, B, C). +* shape (A, B, C); if axis == 0 then the output tensor will have the shape (N, A, B, C) . \n *@par Inputs: * x: A list of N Tensors. Must be one of the following types: int8, int16, int32, -* int64, uint8, uint16, uint32, uint64, float16, float32, bool. +* int64, uint8, uint16, uint32, uint64, float16, float32, bool . It's a dynamic input. \n *@par Attributes: *@li axis: A optional int, defaultvalue is 0. * Dimension along which to pack. The range is [-(R+1), R+1). -*@li N: A required int. Number of tensors. +*@li N: A required int. Number of tensors . \n *@par Outputs: *y: A Tensor. Has the same type as "x". *@par Third-party framework compatibility *Compatible with the TensorFlow operator Pack. +It's a dynamic output. */ REG_OP(Pack) .DYNAMIC_INPUT(x, TensorType::BasicType()) @@ -328,18 +335,18 @@ REG_OP(Pack) .OP_END_FACTORY_REG(Pack) /** -*@brief Computes offsets of concat inputs within its output. +*@brief Computes offsets of concat inputs within its output . \n *@par Inputs: *Two inputs, including: * @li concat_dim: A Tensor of type int32. -* @li x: A list of 1D Tensor objects of type int32. +* @li x: A list of 1D Tensor objects of type int32 . It's a dynamic input. \n *@par Attributes: -*N: A required int. +*N: A required int . \n *@par Outputs: -*y: A Tensor list with same type as "x". +*y: A Tensor list with same type as "x" . It's a dynamic output. \n *@par Third-party framework compatibility *@ Compatible with the TensorFlow operator ConcatOffset. @@ -352,19 +359,19 @@ REG_OP(ConcatOffset) .OP_END_FACTORY_REG(ConcatOffset) /** -*@brief Computes offsets of concat inputs within its output. +*@brief Computes offsets of concat inputs within its output . \n *@par Inputs: *Two inputs, including: * @li concat_dim: A Tensor of type int32. -* @li x: A list of 1D Tensor objects of type int32. +* @li x: A list of 1D Tensor objects of type int32 . It's a dynamic input. \n *@par Attributes: *@li Concat_dim: A required int. Must be within the rank of input "x". -*@li N: A required int. +*@li N: A required int . \n *@par Outputs: -*y: A Tensor list with same type as "x". +*y: A Tensor list with same type as "x" . It's a dynamic output. \n *@par Third-party framework compatibility *@ Compatible with the TensorFlow operator ConcatOffset. diff --git a/third_party/fwkacllib/inc/ops/state_ops.h b/third_party/fwkacllib/inc/ops/state_ops.h index 2261cd3e..ca85067b 100644 --- a/third_party/fwkacllib/inc/ops/state_ops.h +++ b/third_party/fwkacllib/inc/ops/state_ops.h @@ -26,20 +26,20 @@ namespace ge { /** -*@brief Creates a variable tensor. +*@brief Creates a variable tensor . \n *@par Inputs: -*x: A tensor, used to assign a value to the variable tensor internally. \n -The caller does not need to pass the value of the variable tensor. +*x: A tensor, used to assign a value to the variable tensor internally. +The caller does not need to pass the value of the variable tensor . \n *@par Attributes: *@li index: An integer. Index of the input tensor. *@li value: A tensor, used to pass and record the value of the variable tensor. *@li container: A string. The container of the variable tensor. -*@li shared_name: A string. The shared name of the variable tensor. +*@li shared_name: A string. The shared name of the variable tensor . \n *@par Outputs: -*y: The created variable tensor. +*y: The created variable tensor . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Variable. @@ -56,16 +56,16 @@ REG_OP(Variable) .OP_END_FACTORY_REG(Variable) /** -*@brief Returns a temporary variable tensor. After the use of TemporaryVariable, \n -pass the reference to the variable tensor to the matching DestroyTemporaryVariable op for destruction. +*@brief Returns a temporary variable tensor. After the use of TemporaryVariable, +pass the reference to the variable tensor to the matching DestroyTemporaryVariable op for destruction . \n *@par Attributes: *@li shape: A required list of int32 or int64. The shape of the variable tensor. *@li dtype: Required. The type of elements in the variable tensor. -*@li var_name: An optional string. The name of the variable to be created. +*@li var_name: An optional string. The name of the variable to be created . \n *@par Outputs: -*y: The created variable tensor. +*y: The created variable tensor . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator TemporaryVariable. @@ -78,18 +78,18 @@ REG_OP(TemporaryVariable) .OP_END_FACTORY_REG(TemporaryVariable) /** -*@brief Destroys the temporary variable and returns its final value. \n -All other uses of the temporary variable must have been executed before this op. +*@brief Destroys the temporary variable and returns its final value. +All other uses of the temporary variable must have been executed before this op . \n *@par Inputs: -*x: A reference to the temporary variable tensor. +*x: A reference to the temporary variable tensor . \n *@par Attributes: -*var_name: A required string. Name of the temporary variable. \n -Must be the same as the "var_name" attribute of the reference to the temporary variable tensor. +*var_name: A required string. Name of the temporary variable. +Must be the same as the "var_name" attribute of the reference to the temporary variable tensor . \n *@par Outputs: -*y: Final value of the reference to the temporary variable tensor. +*y: Final value of the reference to the temporary variable tensor . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator DestroyTemporaryVariable. @@ -101,13 +101,13 @@ REG_OP(DestroyTemporaryVariable) .OP_END_FACTORY_REG(DestroyTemporaryVariable) /** -*@brief Checks whether a tensor has been initialized. Outputs boolean scalar indicating whether the tensor has been initialized. +*@brief Checks whether a tensor has been initialized. Outputs boolean scalar indicating whether the tensor has been initialized . \n *@par Inputs: -*x: A tensor. +*x: A tensor . \n *@par Outputs: -*y: A tensor, indicating whether "x" has been initialized. +*y: A tensor, indicating whether "x" has been initialized . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator IsVariableInitialized. @@ -119,13 +119,13 @@ REG_OP(IsVariableInitialized) .OP_END_FACTORY_REG(IsVariableInitialized) /** -*@brief Checks whether a tensor has been initialized. Outputs boolean scalar indicating whether the tensor has been initialized. +*@brief Checks whether a tensor has been initialized. Outputs boolean scalar indicating whether the tensor has been initialized . \n *@par Inputs: -*x: A tensor. +*x: A tensor . \n *@par Outputs: -*y: A tensor, indicating whether "x" has been initialized, and the data type is boolean. +*y: A tensor, indicating whether "x" has been initialized, and the data type is boolean . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator VarIsInitializedOp. @@ -137,21 +137,21 @@ REG_OP(VarIsInitializedOp) .OP_END_FACTORY_REG(VarIsInitializedOp) /** -*@brief Increments 'ref' until it reaches 'limit'. +*@brief Increments 'ref' until it reaches 'limit' . \n *@par Inputs: -*Inputs include: \n -*ref: A mutable Tensor. Must be one of the following types: int32, int64. +*Inputs include: +*ref: A mutable Tensor. Must be one of the following types: int32, int64 . \n *@par Attributes: -*limit: An int. If incrementing ref would bring it above limit, instead \n - generates an 'OutOfRange' error. +*limit: An int. If incrementing ref would bring it above limit, instead + generates an 'OutOfRange' error . \n *@par Outputs: -*y: A Tensor. Has the same type as ref. +*y: A Tensor. Has the same type as ref . \n -*@attention Constraints:\n -*-The implementation for CountUpTo on Ascend uses AICPU, with bad performance.\n +*@attention Constraints: +*The implementation for CountUpTo on Ascend uses AICPU, with bad performance. *@par Third-party framework compatibility *@li compatible with tensorflow CountUpTo operator. diff --git a/third_party/fwkacllib/inc/ops/stateful_random_ops.h b/third_party/fwkacllib/inc/ops/stateful_random_ops.h index 0bcb87cd..779e7cea 100644 --- a/third_party/fwkacllib/inc/ops/stateful_random_ops.h +++ b/third_party/fwkacllib/inc/ops/stateful_random_ops.h @@ -27,15 +27,15 @@ namespace ge { /** -*@brief Non-deterministically generates some integers. +*@brief Non-deterministically generates some integers . \n *@par Inputs: -*This op may use some OS-provided source of non-determinism (e.g. an RNG), \n +*This op may use some OS-provided source of non-determinism (e.g. an RNG), *so each execution will give different results. Inputs included: -*@li shape: The shape of the output tensor. +*@li shape: The shape of the output tensor . \n *@par Outputs: -*y:A Returns Non-deterministic integer values with specified shape. +*y:A Returns Non-deterministic integer values with specified shape . \n *@par Third-party framework compatibility *Compatible with tensorflow NonDeterministicInts operator. @@ -48,18 +48,18 @@ REG_OP(NonDeterministicInts) .OP_END_FACTORY_REG(NonDeterministicInts) /** -*@brief Advance the counter of a counter-based RNG. The state of the RNG after \n -*`rng_skip(n)` will be the same as that after `stateful_uniform([n])` \n -*(or any other distribution). The actual increment added to the \n -*counter is an unspecified implementation detail. +*@brief Advance the counter of a counter-based RNG. The state of the RNG after +*`rng_skip(n)` will be the same as that after `stateful_uniform([n])` +*(or any other distribution). The actual increment added to the +*counter is an unspecified implementation detail . \n *@par Inputs: *@li resource: The handle of the resource variable that stores the state of the RNG. *@li algorithm: The RNG algorithm. -*@li delta: The amount of advancement. +*@li delta: The amount of advancement . \n *@par Outputs: -*y:A Returns the created operation. +*y:A Returns the created operation . \n *@par Third-party framework compatibility * Compatible with tensorflow RngSkip operator. @@ -72,23 +72,23 @@ REG_OP(RngSkip) .OP_END_FACTORY_REG(RngSkip) /** -*@brief Outputs random integers from a uniform distribution. \n -The generated values are uniform integers in the range `[minval, maxval)`. \n -The lower bound `minval` is included in the range, while the upper bound \n -`maxval` is excluded. \n -The random integers are slightly biased unless `maxval - minval` is an exact \n -power of two. The bias is small for values of `maxval - minval` significantly \n -smaller than the range of the output (either `2^32` or `2^64`). +*@brief Outputs random integers from a uniform distribution. +The generated values are uniform integers in the range `[minval, maxval)`. +The lower bound `minval` is included in the range, while the upper bound +`maxval` is excluded. +The random integers are slightly biased unless `maxval - minval` is an exact +power of two. The bias is small for values of `maxval - minval` significantly +smaller than the range of the output (either `2^32` or `2^64`) . \n *@par Inputs: *@li resource: The handle of the resource variable that stores the state of the RNG. *@li algorithm: The RNG algorithm. *@li shape: The shape of the output tensor. *@li minval: Minimum value (inclusive, scalar). -*@li maxval: Maximum value (exclusive, scalar). +*@li maxval: Maximum value (exclusive, scalar) . \n *@par Outputs: -*y:A Returns Random values with specified shape. +*y:A Returns Random values with specified shape . \n *@par Third-party framework compatibility * Compatible with tensorflow StatefulRandomBinomial operator. @@ -105,16 +105,16 @@ REG_OP(StatefulRandomBinomial) .OP_END_FACTORY_REG(StatefulRandomBinomial) /** -*@brief Outputs random values from a normal distribution. \n -*The generated values will have mean 0 and standard deviation 1. +*@brief Outputs random values from a normal distribution. +*The generated values will have mean 0 and standard deviation 1 . \n *@par Inputs: *@li resource: The handle of the resource variable that stores the state of the RNG. *@li algorithm: The RNG algorithm. -*@li shape: The shape of the output tensor. +*@li shape: The shape of the output tensor . \n *@par Outputs: -*y:A Returns A tensor of the specified shape filled with random normal values. +*y:A Returns A tensor of the specified shape filled with random normal values . \n *@par Third-party framework compatibility * Compatible with tensorflow StatefulStandardNormalV2 operator. @@ -128,18 +128,18 @@ REG_OP(StatefulStandardNormalV2) .OP_END_FACTORY_REG(StatefulStandardNormalV2) /** -*@brief Outputs random values from a truncated normal distribution. \n -*The generated values follow a normal distribution with mean 0 and standard \n -*deviation 1, except that values whose magnitude is more than 2 standard \n -*deviations from the mean are dropped and re-picked. +*@brief Outputs random values from a truncated normal distribution. +*The generated values follow a normal distribution with mean 0 and standard +*deviation 1, except that values whose magnitude is more than 2 standard +*deviations from the mean are dropped and re-picked . \n *@par Inputs: *@li resource: The handle of the resource variable that stores the state of the RNG. *@li algorithm: The RNG algorithm. -*@li shape: The shape of the output tensor. +*@li shape: The shape of the output tensor . \n *@par Outputs: -*y:A Returns Random values with specified shape. +*y:A Returns Random values with specified shape . \n *@par Third-party framework compatibility * Compatible with tensorflow StatefulTruncatedNormal operator. @@ -153,17 +153,17 @@ REG_OP(StatefulTruncatedNormal) .OP_END_FACTORY_REG(StatefulTruncatedNormal) /** -*@brief Outputs random values from a uniform distribution. \n -The generated values follow a uniform distribution in the range `[0, 1)`. The \n -lower bound 0 is included in the range, while the upper bound 1 is excluded. \n +*@brief Outputs random values from a uniform distribution. +The generated values follow a uniform distribution in the range `[0, 1)`. The +lower bound 0 is included in the range, while the upper bound 1 is excluded. *@par Inputs: *@li resource: The handle of the resource variable that stores the state of the RNG. *@li algorithm: The RNG algorithm. -*@li shape: The shape of the output tensor. +*@li shape: The shape of the output tensor . \n *@par Outputs: -*y:A Returns Random values with specified shape. +*y:A Returns Random values with specified shape . \n *@par Third-party framework compatibility * Compatible with tensorflow StatefulUniform operator. @@ -177,16 +177,16 @@ REG_OP(StatefulUniform) .OP_END_FACTORY_REG(StatefulUniform) /** -*@brief Outputs random integers from a uniform distribution. \n -The generated values are uniform integers covering the whole range of `dtype`. +*@brief Outputs random integers from a uniform distribution. +The generated values are uniform integers covering the whole range of `dtype` . \n *@par Inputs: *@li resource: The handle of the resource variable that stores the state of the RNG. *@li algorithm: The RNG algorithm. -*@li shape: The shape of the output tensor. +*@li shape: The shape of the output tensor . \n *@par Outputs: -*y:A Returns Random values with specified shape. +*y:A Returns Random values with specified shape . \n *@par Third-party framework compatibility * Compatible with tensorflow StatefulUniformFullInt operator. @@ -200,23 +200,23 @@ REG_OP(StatefulUniformFullInt) .OP_END_FACTORY_REG(StatefulUniformFullInt) /** -*@brief Outputs random integers from a uniform distribution. \n -The generated values are uniform integers in the range `[minval, maxval)`. \n -The lower bound `minval` is included in the range, while the upper bound \n -`maxval` is excluded. \n -The random integers are slightly biased unless `maxval - minval` is an exact \n -power of two. The bias is small for values of `maxval - minval` significantly \n -smaller than the range of the output (either `2^32` or `2^64`). +*@brief Outputs random integers from a uniform distribution. +The generated values are uniform integers in the range `[minval, maxval)`. +The lower bound `minval` is included in the range, while the upper bound +`maxval` is excluded. +The random integers are slightly biased unless `maxval - minval` is an exact +power of two. The bias is small for values of `maxval - minval` significantly +smaller than the range of the output (either `2^32` or `2^64`) . \n *@par Inputs: *@li resource: The handle of the resource variable that stores the state of the RNG. *@li algorithm: The RNG algorithm. *@li shape: The shape of the output tensor. *@li minval: Minimum value (inclusive, scalar). -*@li maxval: Maximum value (exclusive, scalar). +*@li maxval: Maximum value (exclusive, scalar) . \n *@par Outputs: -*y:A Returns Random values with specified shape. +*y:A Returns Random values with specified shape . \n *@par Third-party framework compatibility * Compatible with tensorflow StatefulUniformInt operator. diff --git a/third_party/fwkacllib/inc/ops/stateless_random_ops.h b/third_party/fwkacllib/inc/ops/stateless_random_ops.h index ddfda47d..d91bc38a 100644 --- a/third_party/fwkacllib/inc/ops/stateless_random_ops.h +++ b/third_party/fwkacllib/inc/ops/stateless_random_ops.h @@ -27,20 +27,20 @@ namespace ge { /** -*@brief Draws samples from a multinomial distribution. +*@brief Draws samples from a multinomial distribution . \n *@par Inputs: -include: \n -*@li logits:2-D Tensor with shape [batch_size, num_classes]. Each slice [i, :]\n +include: +*@li logits:2-D Tensor with shape [batch_size, num_classes]. Each slice [i, :] *represents the unnormalized log probabilities for all classes. *@li num_samples:0-D. Number of independent samples to draw for each row slice. -*@li seed:The seed to generate random. +*@li seed:The seed to generate random . \n *@par Attributes: -*output_dtype:Output data type. +*output_dtype:Output data type . \n *@par Outputs: -*y:Output random number. +*y:Output random number . \n *@see StatelessMultinomial() @@ -56,16 +56,16 @@ REG_OP(StatelessMultinomial) .OP_END_FACTORY_REG(StatelessMultinomial) /** -*@brief Outputs deterministic pseudorandom random integers from a uniform distribution. +*@brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n *@par Inputs: *@li shape: The shape of the output tensor. *@li seed: 2 seeds (shape [2]). *@li minval: Minimum value (inclusive, scalar). -*@li maxval: Maximum value (exclusive, scalar). +*@li maxval: Maximum value (exclusive, scalar) . \n *@par Outputs: -*y: Returns Random values with specified shape. +*y: Returns Random values with specified shape . \n *@par Third-party framework compatibility * Compatible with TensorFlow StatelessRandomUniformInt operator. diff --git a/third_party/fwkacllib/inc/ops/string_ops.h b/third_party/fwkacllib/inc/ops/string_ops.h index 8b4b7250..90ee700d 100644 --- a/third_party/fwkacllib/inc/ops/string_ops.h +++ b/third_party/fwkacllib/inc/ops/string_ops.h @@ -27,22 +27,22 @@ namespace ge { /** -*@brief Split elements of input based on delimiter into a SparseTensor. +*@brief Split elements of input based on delimiter into a SparseTensor . \n *@par Inputs: -include: \n +include: *@li input:1-D. Strings to split. -*@li delimiter:0-D. Delimiter characters (bytes), or empty string. +*@li delimiter:0-D. Delimiter characters (bytes), or empty string . \n *@par Attributes: -* skip_empty:A bool. If True, skip the empty strings from the result. +* skip_empty:A bool. If True, skip the empty strings from the result . \n *@par Outputs: *@li indices:A dense matrix of int64 representing the indices of the sparse tensor. *@li values:A vector of strings corresponding to the splited values. -*@li shape:A length-2 vector of int64 representing the shape of the sparse tensor,\n -*where the first value is N and the second value is the maximum number of tokens\n -*in a single input entry. +*@li shape:A length-2 vector of int64 representing the shape of the sparse tensor, +*where the first value is N and the second value is the maximum number of tokens +*in a single input entry . \n *@see StringSplit() @@ -62,22 +62,22 @@ REG_OP(StringSplit) .OP_END_FACTORY_REG(StringSplit) /** -*@brief Split elements of source based on sep into a SparseTensor. +*@brief Split elements of source based on sep into a SparseTensor . \n *@par Inputs: -include: \n +include: *@li input:1-D. Strings to split. -*@li sep:0-D string Tensor, the delimiter character. +*@li sep:0-D string Tensor, the delimiter character . \n *@par Attributes: -* maxsplit:An int. If maxsplit > 0, limit of the split of the result. +* maxsplit:An int. If maxsplit > 0, limit of the split of the result . \n *@par Outputs: *@li indices:A dense matrix of int64 representing the indices of the sparse tensor. *@li values:A vector of strings corresponding to the splited values. -*@li shape:A length-2 vector of int64 representing the shape of the sparse tensor,\n -*where the first value is N and the second value is the maximum number of tokens\n -*in a single input entry. +*@li shape:A length-2 vector of int64 representing the shape of the sparse tensor, +*where the first value is N and the second value is the maximum number of tokens +*in a single input entry . \n *@see StringSplitV2() @@ -97,22 +97,22 @@ REG_OP(StringSplitV2) .OP_END_FACTORY_REG(StringSplitV2) /** -*@brief Determine the script codes of a given tensor of Unicode integer code points. +*@brief Determine the script codes of a given tensor of Unicode integer code points . \n *@par Inputs: -include: \n -*x:A Tensor of int32 Unicode code points. +include: +*x:A Tensor of int32 Unicode code points . \n *@par Outputs: -*y:A Tensor of int32 script codes corresponding to each input code point. +*y:A Tensor of int32 script codes corresponding to each input code point . \n -*@attention Constraints:\n -*This operation converts Unicode code points to script codes corresponding to\n -*each code point.\nScript codes correspond to International Components for\n -*Unicode (ICU) UScriptCode values.\n -*See http://icu-project.org/apiref/icu4c/uscript_8h.html.\n -*Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints.\n -*Output shape will match input shape. +*@attention Constraints: +*This operation converts Unicode code points to script codes corresponding to +*each code point. Script codes correspond to International Components for +*Unicode (ICU) UScriptCode values. +*See http://icu-project.org/apiref/icu4c/uscript_8h.html. +*Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints. +*Output shape will match input shape . \n *@see UnicodeScript() @@ -128,25 +128,25 @@ REG_OP(UnicodeScript) .OP_END_FACTORY_REG(UnicodeScript) /** -*@brief Return substrings from Tensor of strings. +*@brief Return substrings from Tensor of strings . \n *@par Inputs: -include: \n +include: *@li input:Tensor of strings. *@li pos:Scalar defining the position of first character in each substring. -*@li len:Scalar defining the number of characters to include in each substring. +*@li len:Scalar defining the number of characters to include in each substring . \n *@par Outputs: -*output:Tensor of substrings. +*output:Tensor of substrings . \n -*@attention Constraints:\n -*The hash function is deterministic on the content of the string within\n -*the process and will never change. However, it is not suitable for\n -*cryptography. This function may be used when CPU time is scarce and\n -*inputs are trusted or unimportant. There is a risk of adversaries\n -*constructing inputs that all hash to the same bucket.\n -*To prevent this problem, use a strong hash function with\n -*tf.string_to_hash_bucket_strong. +*@attention Constraints: +*The hash function is deterministic on the content of the string within +*the process and will never change. However, it is not suitable for +*cryptography. This function may be used when CPU time is scarce and +*inputs are trusted or unimportant. There is a risk of adversaries +*constructing inputs that all hash to the same bucket. +*To prevent this problem, use a strong hash function with +*tf.string_to_hash_bucket_strong . \n *@see Substr() @@ -164,22 +164,22 @@ REG_OP(Substr) .OP_END_FACTORY_REG(Substr) /** -*@brief Converts each string in the input Tensor to its hash mod by a number of buckets. +*@brief Converts each string in the input Tensor to its hash mod by a number of buckets . \n *@par Inputs: -include: \n -*string_tensor:The strings to assign a hash bucket. +include: +*string_tensor:The strings to assign a hash bucket . \n *@par Outputs: -*y:A Tensor of the same shape as the input x. +*y:A Tensor of the same shape as the input x . \n -*@attention Constraints:\n -*The hash function is deterministic on the content of the string within\n -*the process and will never change. However, it is not suitable for cryptography.\n -*This function may be used when CPU time is scarce and inputs are trusted or\n -*unimportant. There is a risk of adversaries constructing inputs that all hash\n -*to the same bucket. To prevent this problem, use a strong hash function with\n -*tf.string_to_hash_bucket_strong. +*@attention Constraints: +*The hash function is deterministic on the content of the string within +*the process and will never change. However, it is not suitable for cryptography. +*This function may be used when CPU time is scarce and inputs are trusted or +*unimportant. There is a risk of adversaries constructing inputs that all hash +*to the same bucket. To prevent this problem, use a strong hash function with +*tf.string_to_hash_bucket_strong . \n *@see StringToHashBucketFast() @@ -196,27 +196,27 @@ REG_OP(StringToHashBucketFast) .OP_END_FACTORY_REG(StringToHashBucketFast) /** -*@brief Converts each string in the input Tensor to its hash mod by a number of buckets. +*@brief Converts each string in the input Tensor to its hash mod by a number of buckets . \n *@par Inputs: -include: \n -*x:The strings to assign a hash bucket. +include: +*x:The strings to assign a hash bucket . \n *@par Attributes: -*num_buckets:The number of buckets. +*num_buckets:The number of buckets . \n *@par Outputs: -*y:A Tensor of the same shape as the input x. +*y:A Tensor of the same shape as the input x . \n -*@attention Constraints:\n -*@li A strong hash is important when inputs may be malicious, e.g. URLs with\n -*additional components. Adversaries could try to make their inputs hash to\n -*the same bucket for a denial-of-service attack or to skew the results.\n -*A strong hash can be used to make it difficult to find inputs with a skewed\n +*@attention Constraints: +*@li A strong hash is important when inputs may be malicious, e.g. URLs with +*additional components. Adversaries could try to make their inputs hash to +*the same bucket for a denial-of-service attack or to skew the results. +*A strong hash can be used to make it difficult to find inputs with a skewed * hash value distribution over buckets. This requires that the hash function\ *is seeded by a high-entropy (random) "key" unknown to the adversary. -*@li The additional robustness comes at a cost of roughly 4x higher\n -*compute time than tf.string_to_hash_bucket_fast. +*@li The additional robustness comes at a cost of roughly 4x higher +*compute time than tf.string_to_hash_bucket_fast . \n *@see StringToHashBucketStrong() @@ -234,17 +234,17 @@ REG_OP(StringToHashBucketStrong) .OP_END_FACTORY_REG(StringToHashBucketStrong) /** -*@brief Converts each string in the input Tensor to its hash mod by a number of buckets. +*@brief Converts each string in the input Tensor to its hash mod by a number of buckets . \n *@par Inputs: -include: \n -*string_tensor:The strings to assign a hash bucket. +include: +*string_tensor:The strings to assign a hash bucket . \n *@par Attributes: -*num_buckets:The number of buckets. +*num_buckets:The number of buckets . \n *@par Outputs: -*y:A Tensor of the same shape as the input string_tensor. +*y:A Tensor of the same shape as the input string_tensor . \n *@see StringToHashBucket() @@ -261,14 +261,14 @@ REG_OP(StringToHashBucket) .OP_END_FACTORY_REG(StringToHashBucket) /** -*@brief Strip leading and trailing whitespaces from the Tensor. +*@brief Strip leading and trailing whitespaces from the Tensor . \n *@par Inputs: -include: \n -*x:A string Tensor of any shape. +include: +*x:A string Tensor of any shape . \n *@par Outputs: -*y:A string Tensor of the same shape as the input. +*y:A string Tensor of the same shape as the input . \n *@see StringStrip() @@ -284,22 +284,22 @@ REG_OP(StringStrip) .OP_END_FACTORY_REG(StringStrip) /** -*@brief Computes the length of each string given in the input tensor. +*@brief Computes the length of each string given in the input tensor . \n *@par Inputs: -include: \n -*x:The string for which to compute the length. +include: +*x:The string for which to compute the length . \n *@par Attributes: -*unit:The unit that is counted to compute string length.\n -*One of: "BYTE" (for the number of bytes in each string) or\n -*"UTF8_CHAR" (for the number of UTF-8 encoded Unicode code points in each string).\n -*Results are undefined if unit=UTF8_CHAR and the input strings do not contain\N -*structurally valid UTF-8. +*unit:The unit that is counted to compute string length. +*One of: "BYTE" (for the number of bytes in each string) or +*"UTF8_CHAR" (for the number of UTF-8 encoded Unicode code points in each string). +*Results are undefined if unit=UTF8_CHAR and the input strings do not contain +*structurally valid UTF-8 . \n *@par Outputs: -*y:Integer tensor that has the same shape as input.\n -*The output contains the element-wise string lengths of input. +*y:Integer tensor that has the same shape as input. +*The output contains the element-wise string lengths of input . \n *@see StringLength() @@ -316,25 +316,25 @@ REG_OP(StringLength) .OP_END_FACTORY_REG(StringLength) /** -*@brief Joins the strings in the given list of string tensors into one tensor. +*@brief Joins the strings in the given list of string tensors into one tensor . \n *@par Inputs: -*The input is a string tensor of any shape. The pattern is a scalar string tensor\n -*which is applied to every element of the input tensor. The boolean values\n -*(True or False) of the output tensor indicate if the input matches the regex\n -*pattern provided. The pattern follows the re2 syntax\n -*(https://github.com/google/re2/wiki/Syntax).: \n -include: \n -*x:A list of string tensors. The tensors must all have the same shape,\n -*or be scalars. Scalars may be mixed in; these will be broadcast to the shape\n -*of non-scalar inputs. +*The input is a string tensor of any shape. The pattern is a scalar string tensor +*which is applied to every element of the input tensor. The boolean values +*(True or False) of the output tensor indicate if the input matches the regex +*pattern provided. The pattern follows the re2 syntax +*(https://github.com/google/re2/wiki/Syntax).: +include: +*x:A list of string tensors. The tensors must all have the same shape, +*or be scalars. Scalars may be mixed in; these will be broadcast to the shape +*of non-scalar inputs . It's a dynamic input. \n *@par Attributes: *@li N:The length of input x. -*@li separator:string, an optional join separator. +*@li separator:string, an optional join separator . \n *@par Outputs: -*y:The output tensor. +*y:The output tensor . \n *@see StringJoin() @@ -352,24 +352,24 @@ REG_OP(StringJoin) .OP_END_FACTORY_REG(StringJoin) /** -*@brief Formats a string template using a list of tensors. +*@brief Formats a string template using a list of tensors . \n *@par Inputs: -*The input is a string tensor of any shape. The pattern is a scalar string tensor\n -*which is applied to every element of the input tensor.\n -*The boolean values (True or False) of the output tensor indicate if the input\n -*matches the regex pattern provided. The pattern follows the re2 syntax\n -*(https://github.com/google/re2/wiki/Syntax).: \n -include: \n -*x:The tensors to format into the placeholder string. +*The input is a string tensor of any shape. The pattern is a scalar string tensor +*which is applied to every element of the input tensor. +*The boolean values (True or False) of the output tensor indicate if the input +*matches the regex pattern provided. The pattern follows the re2 syntax +*(https://github.com/google/re2/wiki/Syntax).: +include: +*x:The tensors to format into the placeholder string . It's a dynamic input. \n *@par Attributes: *@li template:A string, the template to format tensor summaries into. *@li placeholder:A string, at each placeholder in the template a subsequent tensor summary will be inserted. -*@li summarize:When formatting the tensor summaries print the first and last summarize entries of each tensor dimension. +*@li summarize:When formatting the tensor summaries print the first and last summarize entries of each tensor dimension . \n *@par Outputs: -*y:The resulting string scalar. +*y:The resulting string scalar . \n *@see StringFormat() @@ -390,20 +390,20 @@ REG_OP(StringFormat) .OP_END_FACTORY_REG(StringFormat) /** -*@brief Check if the input matches the regex pattern. +*@brief Check if the input matches the regex pattern . \n *@par Inputs: -*The input is a string tensor of any shape. The pattern is a scalar string tensor\n -*which is applied to every element of the input tensor. The boolean values \n -*(True or False) of the output tensor indicate if the input matches the regex\n -*pattern provided. The pattern follows the re2 syntax\n -*(https://github.com/google/re2/wiki/Syntax).: \n -include: \n +*The input is a string tensor of any shape. The pattern is a scalar string tensor +*which is applied to every element of the input tensor. The boolean values +*(True or False) of the output tensor indicate if the input matches the regex +*pattern provided. The pattern follows the re2 syntax +*(https://github.com/google/re2/wiki/Syntax).: +include: *@li x:A string tensor of the text to be processed. -*@li pattern:A scalar string tensor containing the regular expression to match the input. +*@li pattern:A scalar string tensor containing the regular expression to match the input . \n *@par Outputs: -*y:A bool tensor with the same shape as input. +*y:A bool tensor with the same shape as input . \n *@see RegexFullMatch() @@ -420,25 +420,25 @@ REG_OP(RegexFullMatch) .OP_END_FACTORY_REG(RegexFullMatch) /** -*@brief Replaces matches of the pattern regular expression in input with the\n -*replacement string provided in rewrite. +*@brief Replaces matches of the pattern regular expression in input with the +*replacement string provided in rewrite . \n *@par Inputs: -*It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax).: \n -include: \n +*It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax).: +include: *@li x:The text to be processed. *@li pattern:The regular expression to be matched in the input strings. -*@li rewrite:The rewrite string to be substituted for the pattern expression\n -*where it is matched in the input strings. +*@li rewrite:The rewrite string to be substituted for the pattern expression +*where it is matched in the input strings . \n *@par Attributes: -*replace_global:If True, the replacement is global\n -*(that is, all matches of the pattern regular expression in each input string\n -*are rewritten), otherwise the rewrite substitution is only made for the first\n -* pattern match. +*replace_global:If True, the replacement is global +*(that is, all matches of the pattern regular expression in each input string +*are rewritten), otherwise the rewrite substitution is only made for the first +* pattern match . \n *@par Outputs: -*y:The text after applying pattern match and rewrite substitution. +*y:The text after applying pattern match and rewrite substitution . \n *@see RegexReplace() @@ -457,26 +457,26 @@ REG_OP(RegexReplace) .OP_END_FACTORY_REG(RegexReplace) /** -*@brief Converts each entry in the given tensor to strings. +*@brief Converts each entry in the given tensor to strings . \n *@par Inputs: -*Supports many numeric types and boolean.: \n -include: \n -*x:A tensor can be trans to string. +*Supports many numeric types and boolean.: +include: +*x:A tensor can be trans to string . \n *@par Attributes: -*@li precision:The post-decimal precision to use for floating point numbers.\n +*@li precision:The post-decimal precision to use for floating point numbers. *Only used if precision > -1. *@li scientific:Use scientific notation for floating point numbers. -*@li shortest:Use shortest representation (either scientific or standard)\n +*@li shortest:Use shortest representation (either scientific or standard) *for floating point numbers.. -*@li width:Pad pre-decimal numbers to this width. Applies to both floating\n +*@li width:Pad pre-decimal numbers to this width. Applies to both floating *point and integer numbers. Only used if width > -1. -*@li fill:The value to pad if width > -1. If empty, pads with spaces.\n -*Another typical value is '0'. String cannot be longer than 1 character. +*@li fill:The value to pad if width > -1. If empty, pads with spaces. +*Another typical value is '0'. String cannot be longer than 1 character . \n *@par Outputs: -*y:The output tensor. +*y:The output tensor . \n *@see AsString() @@ -498,26 +498,26 @@ REG_OP(AsString) .OP_END_FACTORY_REG(AsString) /** -*@brief Encode strings into web-safe base64 format. +*@brief Encode strings into web-safe base64 format . \n *@par Inputs: -*Input may or may not have padding at the end. See EncodeBase64 for padding.\n -*Web-safe means that input must use - and _ instead of + and /.: \n -include: \n -*x:Strings to be encoded. +*Input may or may not have padding at the end. See EncodeBase64 for padding. +*Web-safe means that input must use - and _ instead of + and /.: +include: +*x:Strings to be encoded . \n *@par Attributes: -*pad:Bool whether padding is applied at the ends. +*pad:Bool whether padding is applied at the ends . \n *@par Outputs: -*y:Input strings encoded in base64. +*y:Input strings encoded in base64 . \n -*@attention Constraints:\n -*Refer to the following article for more information on base64 format:\n -*en.wikipedia.org/wiki/Base64. Base64 strings may have padding with '='\n -*at the end so that the encoded has length multiple of 4.\n -*See Padding section of the link above. Web-safe means that the encoder\n -*uses - and _ instead of + and /. +*@attention Constraints: +*Refer to the following article for more information on base64 format: +*en.wikipedia.org/wiki/Base64. Base64 strings may have padding with '=' +*at the end so that the encoded has length multiple of 4. +*See Padding section of the link above. Web-safe means that the encoder +*uses - and _ instead of + and / . \n *@see EncodeBase64() @@ -534,16 +534,16 @@ REG_OP(EncodeBase64) .OP_END_FACTORY_REG(EncodeBase64) /** -*@brief Decode web-safe base64-encoded strings. +*@brief Decode web-safe base64-encoded strings . \n *@par Inputs: -*Input may or may not have padding at the end. See EncodeBase64 for padding.\n -*Web-safe means that input must use - and _ instead of + and /.: \n -include: \n -*x:Base64 strings to decode. +*Input may or may not have padding at the end. See EncodeBase64 for padding. +*Web-safe means that input must use - and _ instead of + and /.: +include: +*x:Base64 strings to decode . \n *@par Outputs: -*y:Decoded strings. +*y:Decoded strings . \n *@see DecodeBase64() diff --git a/third_party/fwkacllib/inc/ops/swap_co_ops.h b/third_party/fwkacllib/inc/ops/swap_co_ops.h index a6c0f9ca..fb25c741 100644 --- a/third_party/fwkacllib/inc/ops/swap_co_ops.h +++ b/third_party/fwkacllib/inc/ops/swap_co_ops.h @@ -26,26 +26,28 @@ namespace ge { /** -*@brief Folds the convolution input weight constant of the preceding layer \n -* of PSROIPooling to convert the N dimension of the weight from \n -* (output_dim, group_size*group_size) to \n +*@brief Folds the convolution input weight constant of the preceding layer +* of PSROIPooling to convert the N dimension of the weight from +* (output_dim, group_size*group_size) to * (group_size*group_size, int((output_dim+15)/C0)*C0). *@see PSROIPooling *@par Inputs: * One input: -*x: An NCHW tensor of type float16 or float32, describing the weight of\n -* convolution. Dim N must equal output_dim*group_size*group_size. +*x: An NCHW tensor of type float16 or float32, describing the weight of +* convolution. Dim N must equal output_dim*group_size*group_size . \n *@par Attributes: -*@li output_dim: A required int32, specifying the number of output channels.\n +*@li output_dim: A required int32, specifying the number of output channels. * Must be greater than "0". -*@li group_size: A required int32, specifying the number of groups to encode\n -* position-sensitive score maps. Must be within the range (0, 128). +*@li group_size: A required int32, specifying the number of groups to encode +* position-sensitive score maps. Must be within the range (0, 128) . \n *@par Outputs: -*y: An NCHW tensor of type float16 or float32, describing the result weight\n +*y: An NCHW tensor of type float16 or float32, describing the result weight * of convolution. +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(SwapCo) diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index a7c33ab5..5414f122 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -28,16 +28,16 @@ namespace ge { *@brief This operation convert output dataType and shape *@par Inputs: -*The input handle must have the resource type. Inputs include: \n -*@li x:A list of Tensor objects. One or more tensors from which \n -the enqueued tensors should be taken. +*The input handle must have the resource type. Inputs include: +*@li x:A list of Tensor objects. One or more tensors from which +the enqueued tensors should be taken . \n *@par Outputs: -*@li y:A list of Tensor objects. One or more tensors from which \n -the enqueued tensors should be taken. +*@li y:A list of Tensor objects. One or more tensors from which +the enqueued tensors should be taken . \n *@par Attributes: -*@li type: An optional ge::DataType. It refers to the target data type of outputs. +*@li type: An optional ge::DataType. It refers to the target data type of outputs . \n *@par Third-party framework compatibility *Compatible with tensorflow QueueIsClosed operator. @@ -54,10 +54,10 @@ REG_OP(Bitcast) .OP_END_FACTORY_REG(Bitcast) /** -*@brief Convert tensor format from HWCN to C1HWNCoC0. +*@brief Convert tensor format from HWCN to C1HWNCoC0 . \n *@par Inputs: -*x: A Tensor. Must be 4D Tensor of type float16, float32, int32, uint16, with format HWCN. +*x: A Tensor. Must be 4D Tensor of type float16, float32, int32, uint16, with format HWCN . \n *@par Outputs: *y: A 6D Tensor. Has the same type as "x", with format C1HWNCoC0. @@ -68,13 +68,13 @@ REG_OP(DepthwiseWeight4DTo6D) .OP_END_FACTORY_REG(DepthwiseWeight4DTo6D) /** -*@brief Convert tensor format from C1HWNCoC0 to HWCN. +*@brief Convert tensor format from C1HWNCoC0 to HWCN . \n *@par Inputs: -*x: A Tensor. Must be 6D Tensor of type float16, float32, int32, uint16, with format C1HWNCoC0. +*x: A Tensor. Must be 6D Tensor of type float16, float32, int32, uint16, with format C1HWNCoC0 . \n *@par Attributes: -*channel_size: An optional int, specifying the channel size of 4D Tensor with format HWCN. +*channel_size: An optional int, specifying the channel size of 4D Tensor with format HWCN . \n *@par Outputs: *y: A 4D Tensor. Has the same type as "x", with format HWCN. @@ -86,14 +86,14 @@ REG_OP(DepthwiseWeight6DTo4D) .OP_END_FACTORY_REG(DepthwiseWeight6DTo4D) /** -*@brief Permutes the dimensions according to perm.\n - The returned tensor's dimension i will correspond to the input dimension perm[i]. +*@brief Permutes the dimensions according to perm. + The returned tensor's dimension i will correspond to the input dimension perm[i] . \n *@par Inputs: -*x: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. +*x: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n *@par Attributes: -*perm: A permutation of the dimensions of "x". +*perm: A permutation of the dimensions of "x" . \n *@par Outputs: *y: A Tensor. Has the same type as "x". @@ -109,16 +109,16 @@ REG_OP(TransposeD) .OP_END_FACTORY_REG(TransposeD) /** -*@brief Permutes the dimensions according to perm.\n - The returned tensor's dimension i will correspond to the input dimension perm[i]. +*@brief Permutes the dimensions according to perm. + The returned tensor's dimension i will correspond to the input dimension perm[i] . \n *@par Inputs: *Two inputs, including: *@li x: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. -*@li perm: A Tensor of type int32 or int64. A permutation of the dimensions of "x". +*@li perm: A Tensor of type int32 or int64. A permutation of the dimensions of "x" . \n *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type as "x" . \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Transpose. @@ -133,14 +133,14 @@ REG_OP(Transpose) *@brief Doing format_transfer for various data format only support "NHWC/NCHW" to "NC1HWC0" and "NC1HWC0" to "NHWC/NCHW" "NCHW" to "FRACTAL_Zn" or "FRACTAL_Zn" to "NCHW". -"HWCN" to "FRACTAL_Zn" or "FRACTAL_Zn" to "HWCN". +"HWCN" to "FRACTAL_Zn" or "FRACTAL_Zn" to "HWCN" . \n *@par Inputs: -*src: A Tensor dtype of all types. +*src: A Tensor dtype of all types . \n *@par Attributes: *@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Zn" etc. -*@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Zn" etc. +*@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Zn" etc . \n *@par Outputs: *dst: A Tensor dtype of all types. @@ -153,11 +153,11 @@ REG_OP(TransData) .OP_END_FACTORY_REG(TransData) /** -*@brief Permutes the dimensions according to order.\n - The returned tensor's dimension i will correspond to the input dimension order[i]. +*@brief Permutes the dimensions according to order. + The returned tensor's dimension i will correspond to the input dimension order[i] . \n *@par Inputs: -*x: A Tensor. Must be one of the following types: float16, float32. +*x: A Tensor. Must be one of the following types: float16, float32 . \n *@par Attributes: *order: A permutation of the dimensions of "x".Type is int32.support any axis transformation.Defaults to "{0}" @@ -173,17 +173,17 @@ REG_OP(Permute) /** *@brief Flattens the inputs. Reserves axis 0 and flattens the input tensors -* along axis 1. +* along axis 1 . \n *@par Inputs: -*One input: \n +*One input: *x: A multi-dimensional Tensor. Must be one of the following types: -* int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32. +* int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32 . \n *@par Outputs: *y: A 2D flattened Tensor (Reserves axis 0 and flattens the input tensors * along axis 1). Must be one of the following data types: int8, uint8, int16, -* uint16, int32, uint32, int64,uint64, float16, float32. +* uint16, int32, uint32, int64,uint64, float16, float32 . \n *@par Third-party framework compatibility * Compatible with TensorFlow operator Flatten. @@ -198,17 +198,17 @@ REG_OP(Flatten) .OP_END_FACTORY_REG(Flatten) /** -*@brief Permutes and crops the input tensor. +*@brief Permutes and crops the input tensor . \n *@par Inputs: * Three inputs, including: *@li x: A 5D Tensor of type float16 or int8 or uint8, with format NC1HWC0. *@li block_shape: A 1D list or tuple of int32 or int64. -*@li crops: A 2D list or tuple of int32 or int64. Specifies the amount to -*crop from start and end dimensions after permutation. +*@li crops: A 2D list or tuple of int32 or int64. Specifies the amount to +*crop from start and end dimensions after permutation . \n *@par Outputs: -*y: A Tensor with format NC1HWC0. Has the same type as input "x". +*y: A Tensor with format NC1HWC0. Has the same type as input "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator BatchToSpaceND. @@ -221,16 +221,16 @@ REG_OP(BatchToSpaceND) .OP_END_FACTORY_REG(BatchToSpaceND) /** -*@brief Permutes and crops the input tensor. +*@brief Permutes and crops the input tensor . \n *@par Inputs: * One input: -*x: A 5D Tensor of type float16 or int8 or uint8, with format NC1HWC0. +*x: A 5D Tensor of type float16 or int8 or uint8, with format NC1HWC0 . \n *@par Attributes: *@li block_shape: A required 1D list or tuple of int32 or int64. *@li crops: A required 2D list or tuple of int32 or int64. Specifies the amount to crop -* from the start and end dimensions after permutation. +* from the start and end dimensions after permutation . \n *@par Outputs: *y: A Tensor with format NC1HWC0. Has the same type as input "x". @@ -238,6 +238,9 @@ REG_OP(BatchToSpaceND) *@par Third-party framework compatibility * Compatible with the TensorFlow operator BatchToSpaceND. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpaceND instead. */ REG_OP(BatchToSpaceNDD) .INPUT(x, TensorType::BasicType()) @@ -247,16 +250,16 @@ REG_OP(BatchToSpaceNDD) .OP_END_FACTORY_REG(BatchToSpaceNDD) /** -*@brief Pads and permutes the input tensor. +*@brief Pads and permutes the input tensor . \n *@par Inputs: -* Three inputs, including: \n +* Three inputs, including: *@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0. *@li block_shape: A 1D list or tuple of int32 or int64. -*@li paddings: A 2D list or tuple of int32 or int64. Specifies the padding for the start and end dimensions after permutation. +*@li paddings: A 2D list or tuple of int32 or int64. Specifies the padding for the start and end dimensions after permutation . \n *@par Outputs: -*y: A Tensor with format NC1HWC0. Has the same type as input "x". +*y: A Tensor with format NC1HWC0. Has the same type as input "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SpaceToBatchND. @@ -269,21 +272,24 @@ REG_OP(SpaceToBatchND) .OP_END_FACTORY_REG(SpaceToBatchND) /** -*@brief Pads and permutes the input tensor. +*@brief Pads and permutes the input tensor . \n *@par Inputs: -* One input: \n -*x: A 5D Tensor of type float16 or float32, with format NC1HWC0. +* One input: +*x: A 5D Tensor of type float16 or float32, with format NC1HWC0 . \n *@par Attributes: *@li block_shape: A required 1D list or tuple of int32 or int64. -*@li paddings: A required 2D list or tuple of int32 or int64. Specifies the padding for the start and end dimensions after permutation. +*@li paddings: A required 2D list or tuple of int32 or int64. Specifies the padding for the start and end dimensions after permutation . \n *@par Outputs: -*y: A Tensor with format NC1HWC0. Has the same type as input "x". +*y: A Tensor with format NC1HWC0. Has the same type as input "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator SpaceToBatchND. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use SpaceToBatchND instead. */ REG_OP(SpaceToBatchNDD) .INPUT(x, TensorType::BasicType()) @@ -294,7 +300,7 @@ REG_OP(SpaceToBatchNDD) /** *@brief Outputs a copy of the input tensor where values from the "height" and -* "width" dimensions are moved to the "depth" dimension. +* "width" dimensions are moved to the "depth" dimension . \n *@par Inputs: *x: An NHWC Tensor. Must be one of the following types: @@ -305,7 +311,7 @@ REG_OP(SpaceToBatchNDD) *@par Attributes: *@li block_size: A required int, specifying the input block size. *@li data_format: An optional string, specifying the data format. Defaults to -* "NHWC". +* "NHWC" . \n *@par Outputs: *y: A Tensor. Has the same type as input "x". @@ -320,7 +326,7 @@ REG_OP(SpaceToDepth) .OP_END_FACTORY_REG(SpaceToDepth) /** -*@brief Rearranges data from depth into blocks of spatial data. +*@brief Rearranges data from depth into blocks of spatial data . \n *@par Inputs: *x: A Tensor. Must be one of the following types: float16, float32, double, int32, uint8, @@ -330,10 +336,10 @@ REG_OP(SpaceToDepth) *@par Attributes: *Two attributes, including: * @li block_size: An int >= 2, specifying the size of the spatial block. -* @li data_format: An optional string, specifying the data format. Defaults to "NHWC". +* @li data_format: An optional string, specifying the data format. Defaults to "NHWC" . \n *@par Outputs: -*y: A Tensor of the same type as "x". +*y: A Tensor of the same type as "x" . \n *@par Third-party framework compatibility: * Compatible with TensorFlow operator DepthToSpace. @@ -346,25 +352,25 @@ REG_OP(DepthToSpace) .OP_END_FACTORY_REG(DepthToSpace) /** -*@brief Permutes data into spatial data blocks and then prunes them. +*@brief Permutes data into spatial data blocks and then prunes them . \n *@par Inputs: *@li x: A 4D Tensor with format NHWC. -*@li crops: A 1D list or tuple of int32 or int64. +*@li crops: A 1D list or tuple of int32 or int64 . \n *Must be one of the following types: float16, float32 *@par Attributes: -*block_size: A required int8, int16, int32, or int64. No default value. +*block_size: A required int8, int16, int32, or int64. No default value . \n *@par Outputs: *y: A 4D Tensor with format NHWC, -* of type float16 or float32. +* of type float16 or float32 . \n *@attention Constraints: *@li The size of the first dimension of input "x" must be divisible by (block_size * block_size). -*@li "crops" is a 4Dshape [batch, height, width, depth], height = height_pad - crop_top - crop_bottom, +*@li "crops" is a 4Dshape [batch, height, width, depth], height = height_pad - crop_top - crop_bottom, *width = width_pad - crop_left - crop_right. *@li block_size > 2 @@ -379,23 +385,23 @@ REG_OP(BatchToSpace) .OP_END_FACTORY_REG(BatchToSpace) /** -*@brief Rearrange the batch (permutes) data into spatial data blocks, and then crop them. +*@brief Rearrange the batch (permutes) data into spatial data blocks, and then crop them . \n *@par Inputs: * One input: *x: An Tensor of shape [batch*block_size*block_size, height_pad/block_size, width_pad/block_size, depth]. *The batch size of the input tensor must be divisible by (block size * block size). -*Must be one of the following types: float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, -*int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. +*Must be one of the following types: float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, +*int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32 . \n *@par Attributes: *@li block_size: Must be one of the following types: `int32`, `int64`. *@li crops: An Tensor. Must be one of the following types: int32, Int64. *2D tensor with non negative integer of shape [2, 2]. It specifies how many -*elements are clipped from the intermediate result of spatial dimension. +*elements are clipped from the intermediate result of spatial dimension . \n *@par Outputs: -*y: A Tensor. Has the same type and format as input "x". +*y: A Tensor. Has the same type and format as input "x" . \n *@attention Constraints: *@li The size of the first dimension of input "x" must be divisible by (block_size * block_size). @@ -405,6 +411,9 @@ REG_OP(BatchToSpace) *@par Third-party framework compatibility * Compatible with the TensorFlow operator BatchToSpace. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpace instead. */ REG_OP(BatchToSpaceD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, @@ -419,17 +428,17 @@ REG_OP(BatchToSpaceD) /** *@brief Outputs a copy of the input tensor where values from the "height" and -* "width" dimensions are padded and rearranged to the "batch" dimension. +* "width" dimensions are padded and rearranged to the "batch" dimension . \n *@par Inputs: * Two inputs, including: *@li x: An NHWC Tensor. Must be one of the following types: * float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, * int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. -*@li paddings: A 2D tensor of type int, specifying the input. +*@li paddings: A 2D tensor of type int, specifying the input . \n *@par Attributes: -*block_size: A required int, specifying the input block size. +*block_size: A required int, specifying the input block size . \n *@par Outputs: *y: A Tensor. Has the same type as input "x". @@ -444,7 +453,7 @@ REG_OP(SpaceToBatch) .OP_END_FACTORY_REG(SpaceToBatch) /** -*@brief Outputs a copy of the input tensor where values from the "height" and "width" dimensions are padded and rearranged to the "batch" dimension. +*@brief Outputs a copy of the input tensor where values from the "height" and "width" dimensions are padded and rearranged to the "batch" dimension . \n *@par Inputs: *x: An NHWC Tensor. Must be one of the following types: float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. @@ -452,12 +461,15 @@ REG_OP(SpaceToBatch) *@par Attributes: *@li block_size: A required int, specifying the input block size. -*@li paddings: A 2D tensor. All data types are supported. +*@li paddings: A 2D tensor. All data types are supported . \n *@par Outputs: *y: A Tensor. Has the same type as input "x". *@par Third-party framework compatibility *@ Compatible with the TensorFlow operator SpaceToBatch. +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use SpaceToBatch instead. */ REG_OP(SpaceToBatchD) .INPUT(x, TensorType::BasicType()) @@ -468,24 +480,24 @@ REG_OP(SpaceToBatchD) /** * @brief Unpacks the given dimension of a rank-R Tensor "x" into rank-(R-1) -* tensors. +* tensors . \n * @par Inputs: -* x: A rank-R tensor (R > 0) of type BasicType, with format ND or NC1HWC0. +* x: A rank-R tensor (R > 0) of type BasicType, with format ND or NC1HWC0 . \n * @par Attributes: * @li num: A required int, specifying the number of tensors to be unpacked to. * Defaults to "None". * @li axis: An optional int, specifying the axis to unpack along. The value range -* is [-R, R). +* is [-R, R) . \n * @par Outputs: -* y: Dynamic output. The list of Tensor objects unpacked from "x", of type BasicType. +* y: Dynamic output. The list of Tensor objects unpacked from "x", of type BasicType . \n * @attention Constraints: * @li If "num" is not specified, it is inferred from the shape of "x". * @li For the ND format, "axis" is in the range [-R, R); For the NC1HWC0 format, -* "axis" must not be 2, 3, -2, or -3. +* "axis" must not be 2, 3, -2, or -3 . \n * @par Third-party framework compatibility * Compatible with the TensorFlow operator Unpack. @@ -499,7 +511,7 @@ REG_OP(Unpack) /** * @brief Extract "patches" from "images" and stacks them in the "depth" -* dimension of the output. +* dimension of the output . \n * @par Inputs: * x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the @@ -511,22 +523,22 @@ REG_OP(Unpack) * dimension of images. * @li strides: A required list or tuple. How far the centers of two consecutive * patches are in the images. Must be: [1, stride_rows, stride_cols, 1]. -* @li rates: A required list or tuple. Must be: [1, rate_rows, rate_cols, 1].\n -* This is the input stride, specifying how far two consecutive patch\n +* @li rates: A required list or tuple. Must be: [1, rate_rows, rate_cols, 1]. +* This is the input stride, specifying how far two consecutive patch * samples are in the input. Equivalent to extracting patches -* with patch_sizes_eff = patch_sizes + (patch_sizes - 1) *\n -* (rates - 1), followed by subsampling them spatially by a factor of rates.\n +* with patch_sizes_eff = patch_sizes + (patch_sizes - 1) * +* (rates - 1), followed by subsampling them spatially by a factor of rates. * This is equivalent to rate in dilated (a.k.a. Atrous) convolutions. -* @li padding: A required string. The type of padding algorithm to use. +* @li padding: A required string. The type of padding algorithm to use . \n * @par Outputs: -* y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows *\n -* ksize_cols * depth] containing image patches with size ksize_rows x ksize_cols\n -* x depth vectorized in the "depth" dimension. Note "out_rows" and "out_cols"\n -* are the dimensions of the output patches. +* y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows * +* ksize_cols * depth] containing image patches with size ksize_rows x ksize_cols +* x depth vectorized in the "depth" dimension. Note "out_rows" and "out_cols" +* are the dimensions of the output patches . \n * @attention Constraints: -* "ksizes", "strides" and "rates" are lists of integers. +* "ksizes", "strides" and "rates" are lists of integers . \n * @par Third-party framework compatibility * Compatible with the TensorFlow operator ExtractImagePatches. @@ -542,23 +554,23 @@ REG_OP(ExtractImagePatches) /** * @brief Extract "patches" from "input" and put them in the "depth" -* dimension of the output. +* dimension of the output . \n * @par Inputs: -* x: A 5D Tensor with shape [batch, in_planes, in_rows, in_cols, depth]. +* x: A 5D Tensor with shape [batch, in_planes, in_rows, in_cols, depth] . \n * @par Attributes: * @li ksizes: A required list or tuple. The size of the sliding window for each * dimension of "x". * @li strides: A required list or tuple. How far the centers of two consecutive * patches are in "x". Must be: [1, stride_planes, stride_rows, stride_cols, 1]. -* @li padding: A required string. The type of padding algorithm to use. +* @li padding: A required string. The type of padding algorithm to use . \n * @par Outputs: -* Output: A 5D Tensor with shape [batch, out_planes, out_rows, out_cols, ksize_planes * \n -* ksize_rows * ksize_cols * depth] containing patches with size (ksize_rows * ksize_cols\n -* * depth) vectorized in the "depth" dimension. Note "out_planes", "out_rows" and "out_cols"\n -* are the dimensions of the output patches. +* Output: A 5D Tensor with shape [batch, out_planes, out_rows, out_cols, ksize_planes * +* ksize_rows * ksize_cols * depth] containing patches with size (ksize_rows * ksize_cols +* * depth) vectorized in the "depth" dimension. Note "out_planes", "out_rows" and "out_cols" +* are the dimensions of the output patches . \n * @attention Constraints: * "ksizes" and "strides" are lists of integers. @@ -574,18 +586,21 @@ REG_OP(ExtractVolumePatches) .OP_END_FACTORY_REG(ExtractVolumePatches) /** -*@brief Confuse reshape and transpose. +*@brief Confuse reshape and transpose . \n *@par Inputs: -*x: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. +*x: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n *@par Attributes: *@li perm: A permutation of the dimensions of "x". *@li shape: The shape of the input. -*@li transpose_first: If True, the transpose is first, otherwise the reshape is first. +*@li transpose_first: If True, the transpose is first, otherwise the reshape is first . \n *@par Outputs: *y: A Tensor. Has the same type as "x". +* +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ConfusionTranspose instead. */ REG_OP(ConfusionTransposeD) .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, @@ -598,15 +613,15 @@ REG_OP(ConfusionTransposeD) .OP_END_FACTORY_REG(ConfusionTransposeD) /** -*@brief Confuse reshape and transpose. +*@brief Confuse reshape and transpose . \n *@par Inputs: *@li x: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. -*@li shape: The shape of the input. +*@li shape: The shape of the input . \n *@par Attributes: *@li perm: A permutation of the dimensions of "x". -*@li transpose_first: If True, the transpose is first, otherwise the reshape is first. +*@li transpose_first: If True, the transpose is first, otherwise the reshape is first . \n *@par Outputs: *y: A Tensor. Has the same type as "x". @@ -620,17 +635,17 @@ REG_OP(ConfusionTranspose) .OP_END_FACTORY_REG(ConfusionTranspose) /** -*@brief Flattens the input tensor to one-dimensional. +*@brief Flattens the input tensor to one-dimensional . \n *@par Inputs: -*x: An ND tensor. All data types are supported. +*x: An ND tensor. All data types are supported . \n *@par Attributes: *@li axis: An optional int32, specifying the first axis to flatten. All preceding axes are retained in the output. Defaults to "1". -*@li end_axis: An optional int32, specifying the last axis to flatten. All following axes are retained in the output. Defaults to "-1". +*@li end_axis: An optional int32, specifying the last axis to flatten. All following axes are retained in the output. Defaults to "-1" . \n *@par Outputs: -*y: The flattened ND tensor. All data types are supported. +*y: The flattened ND tensor. All data types are supported . \n *@attention Constraints: * "axis" and "end_axis" must be within the dimension range of the input. This operator cannot be directly called by the acllopExecute API. @@ -651,19 +666,51 @@ REG_OP(DeConvTrans) .OUTPUT(y, TensorType({DT_INT8})) .OP_END_FACTORY_REG(DeConvTrans) +/** +*@brief Compress large weight to small one. Usually inserted before Conv2d. +* +*@par Inputs: +*weight: A tensor before compress. Must be one of the following types: DT_INT8, DT_FLOAT16 +* +*@par Outputs: +*@li weight_compress: A tensor after compress. Must be one of the following types: DT_INT8, DT_FLOAT16 +*@li compress_index: A tensor. Must be one of the following types: DT_INT8 +* +*@par Attributes: +*compress_parameters: A required int8, specifying the compressing block. +* +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ REG_OP(Compress) - .INPUT(weight, TensorType({DT_INT8, DT_FLOAT16})) - .OUTPUT(weight_compress, TensorType({DT_INT8, DT_FLOAT16})) - .OUTPUT(compress_index, TensorType({DT_INT8})) - .REQUIRED_ATTR(compress_parameters, ListInt) - .OP_END_FACTORY_REG(Compress) +.INPUT(weight, TensorType({DT_INT8, DT_FLOAT16})) +.OUTPUT(weight_compress, TensorType({DT_INT8, DT_FLOAT16})) +.OUTPUT(compress_index, TensorType({DT_INT8})) +.REQUIRED_ATTR(compress_parameters, ListInt) +.OP_END_FACTORY_REG(Compress) +/** +*@brief Compress large weight to small one. Usually inserted before FullyConnection. +* +*@par Inputs: +*weight: A tensor before compress. Must be one of the following types: DT_INT8, DT_FLOAT16 +* +*@par Outputs: +*@li weight_compress: A tensor after compress. Must be one of the following types: DT_INT8, DT_FLOAT16 +*@li compress_index: A tensor. Must be one of the following types: DT_INT8 +* +*@par Attributes: +*compress_parameters: A required int8, specifying the compressing block. +* +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ REG_OP(CompressFcOp) - .INPUT(weight, TensorType({DT_INT8})) - .OUTPUT(weight_compress, TensorType({DT_INT8})) - .OUTPUT(compress_index, TensorType({DT_INT8})) - .REQUIRED_ATTR(compress_parameters, ListInt) - .OP_END_FACTORY_REG(CompressFcOp) +.INPUT(weight, TensorType({DT_INT8})) +.OUTPUT(weight_compress, TensorType({DT_INT8})) +.OUTPUT(compress_index, TensorType({DT_INT8})) +.REQUIRED_ATTR(compress_parameters, ListInt) +.OP_END_FACTORY_REG(CompressFcOp) } // namespace ge #endif // GE_OP_TRANSFORMATION_OPS_H diff --git a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h index bf8ecd53..2f014937 100644 --- a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h +++ b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h @@ -26,17 +26,17 @@ namespace ge { /** -*@brief Applies a perspective transformation to an image. +*@brief Applies a perspective transformation to an image . \n *@par Inputs: *@li x: input tensor, format NCHW, type must be float. -*@li matrix: transformation matrix, format ND , shape must be (N, 9), type must be float. +*@li matrix: transformation matrix, format ND , shape must be (N, 9), type must be float . \n *@par Attributes: *@li out_height:output height. *@li out_width:output width. *@li borderType:border processing way, only support BORDER_CONSTANT and BORDER_REPLICATE, default BORDER_CONSTANT. -*@li constant: border processed value when borderType is BORDER_CONSTANT. +*@li constant: border processed value when borderType is BORDER_CONSTANT . \n *@par Outputs: *@li y: output tensor, format NCHW, type must be float. diff --git a/third_party/fwkacllib/inc/register/op_tiling.h b/third_party/fwkacllib/inc/register/op_tiling.h index e9d19f94..bcd4cd5e 100644 --- a/third_party/fwkacllib/inc/register/op_tiling.h +++ b/third_party/fwkacllib/inc/register/op_tiling.h @@ -18,6 +18,7 @@ #define INC_OP_TILING_H_ #include "external/register/register_types.h" +#include "external/graph/tensor.h" #include "graph/debug/ge_attr_define.h" #include "graph/node.h" @@ -73,7 +74,7 @@ struct OpRunInfo { using TeOpAttrArgs = std::vector; -using TeConstTensorData = std::tuple; +using TeConstTensorData = std::tuple; struct TeOpParas { std::vector inputs; @@ -94,6 +95,7 @@ public: OpTilingInterf(std::string op_type, OpTilingFunc func); ~OpTilingInterf() = default; static std::map &RegisteredOpInterf(); + static std::string OpTilingUuid; }; diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index ba9503cc..4b08916e 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -130,6 +130,7 @@ typedef enum tagRtError { RT_ERROR_KERNEL_TYPE, RT_ERROR_KERNEL_OFFSET, RT_ERROR_KERNEL_DUPLICATE, + RT_ERROR_KERNEL_UNREGISTERING, RT_ERROR_PROGRAM_BASE = 0x07090000, RT_ERROR_PROGRAM_NULL, diff --git a/third_party/patch/securec/securec.patch001 b/third_party/patch/securec/securec.patch001 index 8376784b..666f28ce 100644 --- a/third_party/patch/securec/securec.patch001 +++ b/third_party/patch/securec/securec.patch001 @@ -1,12 +1,11 @@ -diff -Npur bounds_checking_function/CMakeLists.txt securec/CMakeLists.txt +diff -Npur -x .git bounds_checking_function/CMakeLists.txt securec/CMakeLists.txt --- bounds_checking_function/CMakeLists.txt 1970-01-01 08:00:00.000000000 +0800 -+++ securec/CMakeLists.txt 2020-05-11 17:10:49.406735400 +0800 -@@ -0,0 +1,19 @@ ++++ securec/CMakeLists.txt 2020-09-19 16:53:48.689460700 +0800 +@@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 3.14) +project(Securec) -+set(CMAKE_BUILD_TYPE "Debug") +set(CMAKE_C_FLAGS_DEBUG "$ENV{CFLAGS} -fPIC -O0 -Wall -Wno-deprecated-declarations -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)='") -+set(CMAKE_C_FLAGS_RELEASE "$ENV{CFLAGS} -fPIC -O3 -Wall -Wno-deprecated-declarations") ++set(CMAKE_C_FLAGS_RELEASE "$ENV{CFLAGS} -fPIC -Wall -D_FORTIFY_SOURCE=2 -O2 -Wno-deprecated-declarations -fstack-protector-all -Wl,-z,relro,-z,now") +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +#add flags