From 4c708fd78ee5e4d5a3b4852f3eafc8b3b4142a39 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Wed, 18 May 2022 20:30:04 +0800 Subject: [PATCH] upgrade Ascend package 18 May 22 --- inc/external/ge/ge_api_types.h | 2 + inc/external/hccl/hccl.h | 28 +++- inc/framework/common/debug/log.h | 2 +- inc/framework/common/ge_types.h | 1 + inc/framework/common/op_types.h | 10 +- inc/framework/common/profiling_definitions.h | 5 +- inc/framework/common/runtime_tensor_desc.h | 38 +++++ inc/framework/common/types.h | 1 + inc/framework/common/util.h | 14 +- inc/framework/omg/parser/model_parser.h | 35 ++-- inc/framework/omg/parser/parser_factory.h | 9 +- inc/framework/omg/parser/weights_parser.h | 11 ++ inc/framework/pne/process_node_engine.h | 12 ++ inc/framework/runtime/gert_api.h | 27 ++++ inc/framework/runtime/model_desc.h | 94 +++++++++++ inc/framework/runtime/model_v2_executor.h | 142 ++++++++++++++++ metadef | 2 +- third_party/fwkacllib/inc/hccl/base.h | 2 + third_party/fwkacllib/inc/ops/all_ops.h | 2 + third_party/fwkacllib/inc/ops/array_ops.h | 33 ++++ .../fwkacllib/inc/ops/case_condition_ops.h | 53 ------ .../inc/ops/coordinates_1d_to_2d_ops.h | 48 ------ .../inc/ops/elewise_calculation_ops.h | 4 +- third_party/fwkacllib/inc/ops/encoding_ops.h | 49 ++++++ .../fwkacllib/inc/ops/functional_ops.h | 24 +++ .../fwkacllib/inc/ops/index_to_addr_ops.h | 63 -------- third_party/fwkacllib/inc/ops/map_ops.h | 152 ++++++++++++++++++ .../inc/ops/matrix_calculation_ops.h | 79 +++++++++ third_party/fwkacllib/inc/ops/nn_detect_ops.h | 40 +++++ .../fwkacllib/inc/ops/nonlinear_fuc_ops.h | 32 ++-- third_party/fwkacllib/inc/ops/selection_ops.h | 28 ++++ .../fwkacllib/inc/ops/slice_write_ops.h | 50 ------ third_party/fwkacllib/inc/ops/sparse_ops.h | 6 +- third_party/fwkacllib/inc/ops/vector_search.h | 92 +++++++++++ third_party/fwkacllib/inc/runtime/rt_model.h | 14 ++ 35 files changed, 933 insertions(+), 271 deletions(-) create mode 100644 inc/framework/common/runtime_tensor_desc.h create mode 100644 inc/framework/runtime/gert_api.h create mode 100644 inc/framework/runtime/model_desc.h create mode 100644 inc/framework/runtime/model_v2_executor.h delete mode 100644 third_party/fwkacllib/inc/ops/case_condition_ops.h delete mode 100644 third_party/fwkacllib/inc/ops/coordinates_1d_to_2d_ops.h create mode 100644 third_party/fwkacllib/inc/ops/encoding_ops.h delete mode 100644 third_party/fwkacllib/inc/ops/index_to_addr_ops.h create mode 100644 third_party/fwkacllib/inc/ops/map_ops.h delete mode 100644 third_party/fwkacllib/inc/ops/slice_write_ops.h diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index f513cd51..8ed1174c 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -70,6 +70,8 @@ const char_t *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput"; const char_t *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode"; const char_t *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; const char_t *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr"; +const char_t *const OPTION_EXEC_GRAPH_EXEC_TIMEOUT = "ge.exec.graphExecTimeout"; +const char_t *const OPTION_EXEC_MODEL_EXEC_TIMEOUT = "ge.exec.modelExecTimeout"; // Option key: memory init const char_t *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; diff --git a/inc/external/hccl/hccl.h b/inc/external/hccl/hccl.h index ea077054..170c7862 100644 --- a/inc/external/hccl/hccl.h +++ b/inc/external/hccl/hccl.h @@ -145,7 +145,7 @@ extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank); extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream); /** - * @brief AllGather operator. + * @brief Send operator. * * @param sendBuff A pointer identifying the input data address of the operator. * @param count An integer(u64) identifying the number of the send data. @@ -158,7 +158,7 @@ extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream); extern HcclResult HcclSend(void *sendBuf, uint64_t count, HcclDataType dataType, uint32_t destRank, HcclComm comm, aclrtStream stream); /** - * @brief AllGather operator. + * @brief Recv operator. * * @param recvBuff A pointer identifying the output data address of the operator. * @param count An integer(u64) identifying the number of the receive data. @@ -171,6 +171,30 @@ extern HcclResult HcclSend(void *sendBuf, uint64_t count, HcclDataType dataType, extern HcclResult HcclRecv(void *recvBuf, uint64_t count, HcclDataType dataType, uint32_t srcRank, HcclComm comm, aclrtStream stream); +/** + * @brief AlltoAllV operator. + * + * @param sendBuff A pointer identifying the input data address of the operator. + * @param sendCounts Integer array, where entry i specifies the number of elements to send to rank i. + * @param sdispls Integer array, where entry i specifies the displacement (offset from sendbuf, in units of sendtype) + * from which to send data to rank i. + * @param sendType Datatype of send buffer elements, must be one of the following types: int8, int32, int64, uint64, + * float16, float32. + * @param recvBuf A pointer identifying the output data address of the operator. + * @param recvCounts Integer array, where entry j specifies the number of elements to receive from rank j. + * @param rdispls Integer array, where entry j specifies the displacement (offset from recvbuf, in units of recvtype) to + * which data from rank j should be written. + * @param recvType Datatype of receive buffer elements, must be one of the following types: int8, int32, int64, uint64, + * float16, float32. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ + +extern HcclResult HcclAlltoAllV(const void *sendBuf, const void *sendCounts, const void *sdispls, HcclDataType sendType, + const void *recvBuf, const void *recvCounts, const void *rdispls, HcclDataType recvType, + HcclComm comm, aclrtStream stream); + /** * @brief Destroy HCCL comm * diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index a796670c..1f755faa 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -88,7 +88,7 @@ if ((expr) != ge::GRAPH_SUCCESS) { \ REPORT_CALL_ERROR("E19999", "Operator graph failed"); \ GELOGE(ge::FAILED, __VA_ARGS__); \ - return (FAILED); \ + return (ge::FAILED); \ } \ } while (false) diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index 74a386a7..bbbbf4b2 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -309,6 +309,7 @@ struct Options { int32_t physical_device_id; std::string profiling_mode; std::string profiling_options; + int32_t graphExecTimeout; }; // Profiling info of task diff --git a/inc/framework/common/op_types.h b/inc/framework/common/op_types.h index 2b7009e8..12123925 100644 --- a/inc/framework/common/op_types.h +++ b/inc/framework/common/op_types.h @@ -45,14 +45,6 @@ class GE_FUNC_VISIBILITY OpTypeContainer { private: std::set op_type_list_; }; - -class GE_FUNC_VISIBILITY OpTypeRegistrar { - public: - explicit OpTypeRegistrar(const std::string &op_type) noexcept { - OpTypeContainer::Instance()->Register(op_type); - } - ~OpTypeRegistrar() {} -}; } // namespace ge #define REGISTER_OPTYPE_DECLARE(var_name, str_name) \ @@ -60,7 +52,7 @@ class GE_FUNC_VISIBILITY OpTypeRegistrar { #define REGISTER_OPTYPE_DEFINE(var_name, str_name) \ const char_t *var_name = str_name; \ - const ge::OpTypeRegistrar g_##var_name##_reg(str_name); + const bool g_##var_name##_reg = (static_cast(OpTypeContainer::Instance()->Register(str_name)), true); #define IS_OPTYPE_EXISTING(str_name) (ge::OpTypeContainer::Instance()->IsExisting(str_name)) #endif // INC_FRAMEWORK_COMMON_OP_TYPES_H_ diff --git a/inc/framework/common/profiling_definitions.h b/inc/framework/common/profiling_definitions.h index 5506fa5d..f814ba76 100644 --- a/inc/framework/common/profiling_definitions.h +++ b/inc/framework/common/profiling_definitions.h @@ -164,9 +164,8 @@ class ProfilingContext { int64_t RegisterString(const std::string &str); int64_t RegisterStringHash(const uint64_t hash_id, const std::string &str); - void UpdateElementHashId(const MsprofReporterCallback reporter_callback); - static Status QueryHashId(const MsprofReporterCallback reporter_callback, const std::string &src_str, - uint64_t &hash_id); + void UpdateElementHashId(); + static Status QueryHashId(const std::string &src_str, uint64_t &hash_id); size_t GetRegisterStringNum() const { return strings_to_index_.size(); } diff --git a/inc/framework/common/runtime_tensor_desc.h b/inc/framework/common/runtime_tensor_desc.h new file mode 100644 index 00000000..ebd28e25 --- /dev/null +++ b/inc/framework/common/runtime_tensor_desc.h @@ -0,0 +1,38 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2021-2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef INC_FRAMEWORK_COMMON_RUNTIME_TENSOR_DESC_H_ +#define INC_FRAMEWORK_COMMON_RUNTIME_TENSOR_DESC_H_ + +#include + +namespace ge { +constexpr int64_t kMaxDimSize = 32; + +#pragma pack(push, 1) +struct RuntimeTensorDesc { + uint64_t data_addr; + int64_t data_offset_size; + int64_t dtype; + int64_t shape[kMaxDimSize + 1]; // shape:Dim_Num|DIM0|DIM1|...|DIM31 + int64_t original_shape[kMaxDimSize + 1]; // original_shape:Dim_Num|DIM0|DIM1|...|DIM31 + int64_t format; + int64_t sub_format; + uint8_t reserved[456]; // padding to 1024 bytes +}; +#pragma pack(pop) +} // namespace ge + +#endif // INC_FRAMEWORK_COMMON_RUNTIME_TENSOR_DESC_H_ \ No newline at end of file diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index a4df4e2f..ac2c0fde 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -88,6 +88,7 @@ REGISTER_OPTYPE_DECLARE(DROPOUTDOMASKV3, "DropOutDoMaskV3"); REGISTER_OPTYPE_DECLARE(DROPOUTDOMASKV3D, "DropOutDoMaskV3D"); REGISTER_OPTYPE_DECLARE(SOFTMAXV2WITHDROPOUTDOMASKV3D, "SoftmaxV2WithDropOutDoMaskV3D"); REGISTER_OPTYPE_DECLARE(DROPOUTGENMASK, "DropOutGenMask"); +REGISTER_OPTYPE_DECLARE(AXPYWITHSOFTMAXANDDROPOUTDOMASK, "AxpyWithSoftmaxAndDropOutDoMask"); REGISTER_OPTYPE_DECLARE(CONCAT, "Concat"); REGISTER_OPTYPE_DECLARE(ROIPOOLING, "ROIPooling"); REGISTER_OPTYPE_DECLARE(PROPOSAL, "Proposal"); diff --git a/inc/framework/common/util.h b/inc/framework/common/util.h index 666f5ccd..84912e64 100644 --- a/inc/framework/common/util.h +++ b/inc/framework/common/util.h @@ -118,13 +118,13 @@ } while (false) // Check if the parameter is null. If yes, return PARAM_INVALID and record the error -#define GE_CHECK_NOTNULL(val) \ - do { \ - if ((val) == nullptr) { \ - REPORT_INNER_ERROR("E19999", "Param:%s is nullptr, check invalid", #val); \ - GELOGE(ge::FAILED, "[Check][Param:%s]null is invalid.", #val); \ - return ge::PARAM_INVALID; \ - } \ +#define GE_CHECK_NOTNULL(val, ...) \ + do { \ + if ((val) == nullptr) { \ + REPORT_INNER_ERROR("E19999", "Param:" #val " is nullptr, check invalid" __VA_ARGS__); \ + GELOGE(ge::FAILED, "[Check][Param:" #val "]null is invalid" __VA_ARGS__); \ + return ge::PARAM_INVALID; \ + } \ } while (false) // Check if the parameter is null. If yes, just return and record the error diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h index 96a5a018..8c0130c8 100644 --- a/inc/framework/omg/parser/model_parser.h +++ b/inc/framework/omg/parser/model_parser.h @@ -52,7 +52,7 @@ class GE_FUNC_VISIBILITY ModelParser { * @return SUCCESS * @return Others failed */ - virtual domi::Status Parse(const char *file, ge::Graph &graph) = 0; + virtual Status Parse(const char *file, ge::Graph &graph) = 0; /** * @ingroup domi_omg @@ -64,7 +64,7 @@ class GE_FUNC_VISIBILITY ModelParser { * @return FAILED * @author */ - virtual domi::Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; + virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; /** * @ingroup domi_omg @@ -76,7 +76,7 @@ class GE_FUNC_VISIBILITY ModelParser { * @return FAILED * @author */ - virtual domi::Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0; + virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0; /** * @ingroup domi_omg @@ -86,7 +86,7 @@ class GE_FUNC_VISIBILITY ModelParser { * @return SUCCESS * @return Others failed */ - virtual domi::Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0; + virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0; /** * @ingroup domi_omg @@ -97,8 +97,8 @@ class GE_FUNC_VISIBILITY ModelParser { * @return SUCCESS * @return Others failed */ - virtual domi::Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback, - ge::ComputeGraphPtr &graph) = 0; + virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback, + ge::ComputeGraphPtr &graph) = 0; /** * @ingroup domi_omg * @brief Convert model files to JSON format @@ -107,10 +107,10 @@ class GE_FUNC_VISIBILITY ModelParser { * @return SUCCESS * @return Others failed */ - virtual domi::Status ToJson(const char *model_file, const char *json_file) { + virtual Status ToJson(const char *model_file, const char *json_file) { (void)model_file; (void)json_file; - return domi::SUCCESS; + return SUCCESS; } /* @@ -121,7 +121,7 @@ class GE_FUNC_VISIBILITY ModelParser { */ virtual ge::DataType ConvertToGeDataType(const uint32_t type) = 0; - virtual domi::Status ParseAllGraph(const google::protobuf::Message *root_proto, ge::ComputeGraphPtr &root_graph) = 0; + virtual Status ParseAllGraph(const google::protobuf::Message *root_proto, ge::ComputeGraphPtr &root_graph) = 0; /** * @ingroup domi_omg @@ -131,7 +131,7 @@ class GE_FUNC_VISIBILITY ModelParser { * @return SUCCESS * @return Others failed */ - virtual domi::Status ParseProto(const std::string &serialized_proto, ge::ComputeGraphPtr &graph) { + virtual Status ParseProto(const std::string &serialized_proto, ge::ComputeGraphPtr &graph) { (void)serialized_proto; (void)graph; return UNSUPPORTED; @@ -146,13 +146,24 @@ class GE_FUNC_VISIBILITY ModelParser { * @return SUCCESS * @return Others failed */ - virtual domi::Status ParseProtoWithSubgraph(const std::string &serialized_proto, GetGraphCallbackV2 callback, - ge::ComputeGraphPtr &graph) { + virtual Status ParseProtoWithSubgraph(const std::string &serialized_proto, GetGraphCallbackV2 callback, + ge::ComputeGraphPtr &graph) { (void)serialized_proto; (void)callback; (void)graph; return UNSUPPORTED; } + + virtual bool HasError() { + return false; + } + + virtual Status Save(const std::string &file) { + (void)file; + return SUCCESS; + } + + virtual void Clear(){}; }; } // namespace domi diff --git a/inc/framework/omg/parser/parser_factory.h b/inc/framework/omg/parser/parser_factory.h index cd5faa73..d64a4c2d 100644 --- a/inc/framework/omg/parser/parser_factory.h +++ b/inc/framework/omg/parser/parser_factory.h @@ -23,6 +23,7 @@ #include #include "framework/omg/omg_inner_types.h" #include "framework/omg/parser/parser_types.h" +#include "external/register/register.h" namespace domi { class WeightsParser; @@ -131,6 +132,12 @@ class GE_FUNC_VISIBILITY WeightsParserRegisterar { return std::shared_ptr(ptr); \ } \ WeightsParserRegisterar g_##type##_Weights_Parser_Creator(type, Creator_##type##_Weights_Parser) -}; // namespace domi + +class GE_FUNC_VISIBILITY OpRegTbeParserFactory { + public: + static OpRegTbeParserFactory *Instance(); + void Finalize(const domi::OpRegistrationData ®_data); +}; +} // namespace domi #endif // INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_ diff --git a/inc/framework/omg/parser/weights_parser.h b/inc/framework/omg/parser/weights_parser.h index 04f09b14..52c360af 100644 --- a/inc/framework/omg/parser/weights_parser.h +++ b/inc/framework/omg/parser/weights_parser.h @@ -67,6 +67,17 @@ class GE_FUNC_VISIBILITY WeightsParser { * @author */ virtual Status ParseFromMemory(const char *input, uint32_t lengt, ge::ComputeGraphPtr &graph) = 0; + + virtual bool HasError() { + return false; + } + + virtual Status Save(const std::string &file) { + (void)file; + return SUCCESS; + } + + virtual void Clear() {} }; } // namespace domi diff --git a/inc/framework/pne/process_node_engine.h b/inc/framework/pne/process_node_engine.h index 55ceac8b..bb18b553 100644 --- a/inc/framework/pne/process_node_engine.h +++ b/inc/framework/pne/process_node_engine.h @@ -27,6 +27,15 @@ #include "framework/pne/pne_model.h" namespace ge { +class ProcessNodeEngineImpl { + public: + virtual Status OptimizeGraph(const std::vector &inputs, ComputeGraphPtr &compute_graph) = 0; + + virtual Status BuildGraph(ComputeGraphPtr &compute_graph, PneModelPtr &model) = 0; +}; + +using ProcessNodeEngineImplPtr = std::shared_ptr; + class ProcessNodeEngine { public: ProcessNodeEngine() = default; @@ -45,8 +54,11 @@ class ProcessNodeEngine { virtual const std::string &GetEngineName(const ge::NodePtr &node_ptr = nullptr) const = 0; + virtual void SetImpl(ProcessNodeEngineImplPtr impl) = 0; + protected: std::string engine_id_; + ProcessNodeEngineImplPtr impl_ = nullptr; }; using ProcessNodeEnginePtr = std::shared_ptr; diff --git a/inc/framework/runtime/gert_api.h b/inc/framework/runtime/gert_api.h new file mode 100644 index 00000000..007993e8 --- /dev/null +++ b/inc/framework/runtime/gert_api.h @@ -0,0 +1,27 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AIR_CXX_INC_FRAMEWORK_RUNTIME_GERT_API_H_ +#define AIR_CXX_INC_FRAMEWORK_RUNTIME_GERT_API_H_ +#include "model_v2_executor.h" +#include "common/ge_types.h" + +namespace gert { +std::unique_ptr LoadExecutorFromFile(const char *file_path, ge::graphStatus &error_code); +std::unique_ptr LoadExecutorFromModelData(const ge::ModelData &model_data, + ge::graphStatus &error_code); +} // namespace gert +#endif // AIR_CXX_INC_FRAMEWORK_RUNTIME_GERT_API_H_ diff --git a/inc/framework/runtime/model_desc.h b/inc/framework/runtime/model_desc.h new file mode 100644 index 00000000..46c21636 --- /dev/null +++ b/inc/framework/runtime/model_desc.h @@ -0,0 +1,94 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AIR_CXX_INC_FRAMEWORK_RUNTIME_MODEL_DESC_H_ +#define AIR_CXX_INC_FRAMEWORK_RUNTIME_MODEL_DESC_H_ +#include "common/ge_types.h" +#include "exe_graph/runtime/shape.h" +#include "exe_graph/runtime/continuous_vector.h" +#include "exe_graph/runtime/storage_format.h" +#include "exe_graph/runtime/storage_shape.h" + +namespace gert { +class ShapeRange { + public: + const Shape &GetMin() const; + const Shape &GetMax() const; + Shape &MutableMin(); + Shape &MutableMax(); + + private: + Shape min_; + Shape max_; +}; + +class ModelIoDesc { + public: + const char *GetName() const; + int32_t GetDataType() const; + ge::Format GetStorageFormat() const; + ge::Format GetOriginFormat() const; + int64_t GetSize() const; + const Shape &GetStorageShape() const; + const Shape &GetOriginShape() const; + const ShapeRange &GetOriginShapeRange() const; + const ShapeRange &GetStorageShapeRange() const; + + void SetName(const char *name); + void SetDataType(int32_t data_type); + void SetStorageFormat(ge::Format format); + void SetOriginFormat(ge::Format format); + Shape &MutableStorageShape(); + Shape &MutableOriginShape(); + ShapeRange &MutableOriginShapeRange(); + ShapeRange &MutableStorageShapeRange(); + + private: + const char *name_; + int32_t data_type_; + StorageFormat format_; + StorageShape shape_; + ShapeRange storage_shape_range_; + ShapeRange origin_shape_range_; +}; + +class ModelDesc { + public: + static size_t CalcSize(size_t input_num, size_t output_num); + const ModelIoDesc *GetInputDesc(size_t index) const; + const ModelIoDesc *GetAllInputsDesc(size_t &input_num) const; + + const ModelIoDesc *GetOutputDesc(size_t index) const; + const ModelIoDesc *GetAllOutputsDesc(size_t &output_num) const; + + ModelIoDesc *MutableInputDesc(size_t index); + ModelIoDesc *MutableOutputDesc(size_t index); + ModelIoDesc *AllMutableIoDesc(size_t &input_num, size_t &output_num); + void SetInputNum(size_t input_num); + void SetOutputNum(size_t output_num); + + ge::graphStatus GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) const; + ge::graphStatus GetUserDesignateShapeOrder(std::vector &user_designate_shape_order) const; + ge::graphStatus GetModelAttrs(std::vector &attrs) const; + + private: + size_t input_num_; + size_t output_num_; + ContinuousVector model_io_descs_; +}; +} // namespace gert + +#endif // AIR_CXX_INC_FRAMEWORK_RUNTIME_MODEL_DESC_H_ \ No newline at end of file diff --git a/inc/framework/runtime/model_v2_executor.h b/inc/framework/runtime/model_v2_executor.h new file mode 100644 index 00000000..277a23d0 --- /dev/null +++ b/inc/framework/runtime/model_v2_executor.h @@ -0,0 +1,142 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AIR_CXX_RUNTIME_V2_CORE_MODEL_V_2_EXECUTOR_H_ +#define AIR_CXX_RUNTIME_V2_CORE_MODEL_V_2_EXECUTOR_H_ +#include +#include "graph/compute_graph.h" +#include "graph/ge_error_codes.h" +#include "model_desc.h" +#include "runtime/stream.h" +#include "exe_graph/runtime/tensor.h" + +namespace gert { +enum SubExeGraphType { kInitExeGraph, kMainExeGraph, kDeInitExeGraph, kSubExeGraphTypeEnd }; +static constexpr char *kSubExeGraphTypeStrs[kSubExeGraphTypeEnd] = {(char *)"Init", (char *)"Main", (char *)"DeInit"}; +inline const char *GetSubExeGraphTypeStr(SubExeGraphType type) { + return kSubExeGraphTypeStrs[type]; +} + +class ResourceGuard { + public: + void *ResetExecutionData(std::unique_ptr execution_data); + void ResetAnyValue(std::unique_ptr any_values, size_t count); + void PushNode(void *node); + void PushWatcher(void *watcher); + void *ResetNodesArray(std::unique_ptr nodes_array); + void *ResetStartNodesArray(std::unique_ptr start_nodes_array); + void *ResetNodesIndgreeArray(std::unique_ptr nodes_indgree_array); + void *ResetNodesWaitIndgreeArray(std::unique_ptr nodes_indgree_array); + void *ResetInputsArray(std::unique_ptr inputs_array); + void *ResetOutputsArray(std::unique_ptr outputs_array); + void *ResetWatchersArray(std::unique_ptr watchers_array); + void *ResetReadyQueue(void *ready_queue); + void *ResetBuffer(std::unique_ptr buffer); + void *ResetComputeNodeInfo(std::unique_ptr compute_node_info); + void *ResetKernelExtendInfo(std::unique_ptr kernel_extend_info); + void *ResetModelDesc(std::unique_ptr model_desc); + + ~ResourceGuard(); + + private: + std::unique_ptr execution_data_holder_; + size_t any_values_num_; + std::unique_ptr any_values_guard_; + + std::vector> nodes_guarder_; + std::vector> watchers_guarder_; + std::unique_ptr continuous_buffer_guarder_; + std::unique_ptr buffer_guarder_; + std::unique_ptr compute_node_info_guarder_; + std::unique_ptr kernel_extend_info_guarder_; + std::unique_ptr model_desc_guarder_; + + std::unique_ptr nodes_array_guarder_; + std::unique_ptr start_nodes_array_guarder_; + std::unique_ptr nodes_indgree_array_guarder_; + std::unique_ptr nodes_wait_indgree_array_guarder_; + std::unique_ptr inputs_array_guarder_; + std::unique_ptr outputs_array_guarder_; + std::unique_ptr watchers_array_guarder_; + std::unique_ptr ready_queue_guarder_{nullptr, nullptr}; +}; + +struct ModelExecuteArg { + rtStream_t stream; +}; +static_assert(std::is_standard_layout::value, "The class ModelExecuteArg must be a POD"); + +class ExeGraphExecutor { + public: + // todo unload时释放anyvalue资源 + ge::graphStatus Load() { + return ge::GRAPH_SUCCESS; + } + ge::graphStatus UnLoad() { + return ge::GRAPH_SUCCESS; + } + + /** + * 设置图执行的输入/输出,需要注意的是,使用者需要自己保证inputs/outputs刷新完全!!! + */ + ge::graphStatus SpecifyInputs(void **inputs, size_t start, size_t num); + ge::graphStatus SpecifyOutputs(void **outputs, size_t num); + ge::graphStatus Execute(); + + const void *GetExecutionData() const { + return execution_data_; + } + + ResourceGuard &GetResourceGuard(); + void *SetExecutionData(std::unique_ptr execution_data); + + private: + friend class ModelV2ExecutorTestHelper; + + void *execution_data_; + ResourceGuard resource_guard_; +}; +class ModelV2Executor { + public: + static std::unique_ptr Create(const ge::ComputeGraphPtr &root_graph); + + ge::graphStatus Load(); + ge::graphStatus Execute(const ModelExecuteArg &arg, Tensor **inputs, size_t input_num, Tensor **outputs, + size_t output_num); + ge::graphStatus ExecuteSync(Tensor **inputs, size_t input_num, Tensor **outputs, size_t output_num); + ge::graphStatus UnLoad(); + + const ModelDesc &GetModelDesc() const; + void SetModelDesc(ModelDesc *model_desc); + ModelV2Executor(const ModelV2Executor &) = delete; + ModelV2Executor(ModelV2Executor &&) = delete; + ModelV2Executor &operator=(const ModelV2Executor &) = delete; + ModelV2Executor &operator=(ModelV2Executor &&) = delete; + + private: + friend class ModelV2ExecutorBuilder; + friend class ModelV2ExecutorTestHelper; + ModelV2Executor() = default; + + private: + std::array graphs_; + ResourceGuard resource_guard_; + ModelDesc *model_desc_ = nullptr; + rtStream_t default_stream_ = nullptr; +}; +} // namespace gert + +#endif // AIR_CXX_RUNTIME_V2_CORE_MODEL_V_2_EXECUTOR_H_ diff --git a/metadef b/metadef index f3e9df35..00261785 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit f3e9df35da67ff00a22a09ec5b369bbc4bac9e74 +Subproject commit 002617852e22767bd864db3c01595630e23f5496 diff --git a/third_party/fwkacllib/inc/hccl/base.h b/third_party/fwkacllib/inc/hccl/base.h index 8ca4015c..5b2f34be 100644 --- a/third_party/fwkacllib/inc/hccl/base.h +++ b/third_party/fwkacllib/inc/hccl/base.h @@ -211,6 +211,8 @@ typedef struct { #define HCCL_REQUEST_NULL NULL +#define HCCL_TAG_ANY (1 << 30) + #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/ops/all_ops.h b/third_party/fwkacllib/inc/ops/all_ops.h index 6c4d615d..78ef3446 100644 --- a/third_party/fwkacllib/inc/ops/all_ops.h +++ b/third_party/fwkacllib/inc/ops/all_ops.h @@ -42,6 +42,7 @@ #include "list_ops.h" #include "logging_ops.h" #include "lookup_ops.h" +#include "map_ops.h" #include "math_ops.h" #include "matrix_calculation_ops.h" #include "nn_batch_norm_ops.h" @@ -79,4 +80,5 @@ #include "warp_perspective_ops.h" #include "vector_search.h" #include "deep_md.h" +#include "encoding_ops.h" #endif // OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h index 37b0f9f5..924f98e4 100644 --- a/third_party/fwkacllib/inc/ops/array_ops.h +++ b/third_party/fwkacllib/inc/ops/array_ops.h @@ -1550,6 +1550,39 @@ REG_OP(EnsureShape) DT_FLOAT,DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) .REQUIRED_ATTR(shape, ListInt) .OP_END_FACTORY_REG(EnsureShape) + +/** +* @brief Finds the first unique element from every consecutive group of equivalent elements. + +* @par Inputs: +* x: A ND tensor. + +* @par Attributes: +* @li return_idx: An optional bool. Whether to also return the indices. The default value is False +* @li return_count: An optional bool. Whether to also return the counts for each element. The default is False. +* @li axis: An optional int. Which one axis to apply unique. The default is 1000, which means None. + +* @par Outputs: +* @li y: "x" in the unique output "y". +* @li idx: The index of each value of "x". +* @li count: The counts of each value of "y". + +* @attention Constraints: +* UniqueConsecutive runs on the Ascend AI CPU, which delivers poor performance. + +* @par Third-party framework compatibility +* Compatible with the PyTorch operator UniqueConsecutive. +*/ + +REG_OP(UniqueConsecutive) + .INPUT(x, TensorType::BasicType()) + .OUTPUT(y, TensorType::BasicType()) + .OUTPUT(idx, TensorType::IndexNumberType()) + .OUTPUT(count, TensorType::IndexNumberType()) + .ATTR(return_idx, Bool, false) + .ATTR(return_counts, Bool, false) + .ATTR(axis, Int, 1000) + .OP_END_FACTORY_REG(UniqueConsecutive) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/case_condition_ops.h b/third_party/fwkacllib/inc/ops/case_condition_ops.h deleted file mode 100644 index 85dba609..00000000 --- a/third_party/fwkacllib/inc/ops/case_condition_ops.h +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! - * \file case_condition_ops.h - * \brief - */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_CASE_CONDITION_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_CASE_CONDITION_OPS_H_ - -#include "graph/operator_reg.h" - -namespace ge { -/** -*@brief x[0] is i, x[1] is j and x[2] is k when algorithm is LU, -y = 0 when i >= k && j < k, -y = 1 when i == k && j == k, -y = 2 when i > k && j == k, -y = 3 when i == k && j > k, -y = 4 when i > k && j > k, -default y = 5 -use for lu decomposition -*@par Inputs: -*x: A Tensor of type int32/int64/uint64. \n - -*@par Attributes: -*algorithm: A string, only support LU now -*@par Outputs: -*y: A Tensor of type int32 -*/ -REG_OP(CaseCondition) - .INPUT(x, TensorType({DT_INT32, DT_INT64, DT_UINT64})) - .OUTPUT(y, TensorType({DT_INT32})) - .ATTR(algorithm, String, "LU") - .OP_END_FACTORY_REG(CaseCondition) - -} // namespace ge - - -#endif // OPS_BUILT_IN_OP_PROTO_INC_CASE_CONDITION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/coordinates_1d_to_2d_ops.h b/third_party/fwkacllib/inc/ops/coordinates_1d_to_2d_ops.h deleted file mode 100644 index f52c90b0..00000000 --- a/third_party/fwkacllib/inc/ops/coordinates_1d_to_2d_ops.h +++ /dev/null @@ -1,48 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! - * \file coordinates_1d_to_2d_ops.h - * \brief - */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_COORDINATES_1D_TO_2D_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_COORDINATES_1D_TO_2D_OPS_H_ - -#include "graph/operator_reg.h" - -namespace ge { -/** -*@brief Convert one-dimensional coordinates to two-dimensional coordinates. -*@par Inputs: -*@li x: A Tensor of type int32/int64/uint64. One-dimensional coordinates. -*@li shape: A Tensor of type int32/int64/uint64. 4D tensor [N,C,H,W]. -*@par Outputs: -*@li row: row of two-dimensional -*@li col: col of two-dimensional -*@li n: col number of two-dimensional -*/ -REG_OP(Coordinates1DTo2D) - .INPUT(x, TensorType({DT_INT32, DT_INT64, DT_UINT64})) - .INPUT(shape, TensorType({DT_INT32, DT_INT64, DT_UINT64})) - .OUTPUT(row, TensorType({DT_INT32, DT_INT64, DT_UINT64})) - .OUTPUT(col, TensorType({DT_INT32, DT_INT64, DT_UINT64})) - .OUTPUT(n, TensorType({DT_INT32, DT_INT64, DT_UINT64})) - .OP_END_FACTORY_REG(Coordinates1DTo2D) - -} // namespace ge - - -#endif // OPS_BUILT_IN_OP_PROTO_INC_COORDINATES_1D_TO_2D_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index c4b38d06..f8a593b0 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -1620,8 +1620,8 @@ REG_OP(Greater) * Compatible with the TensorFlow operator zeros_like. */ REG_OP(ZerosLike) - .INPUT(x, TensorType::BasicType()) - .OUTPUT(y, TensorType::BasicType()) + .INPUT(x, TensorType({BasicType(), DT_VARIANT})) + .OUTPUT(y, TensorType({BasicType(), DT_VARIANT})) .OP_END_FACTORY_REG(ZerosLike) /** diff --git a/third_party/fwkacllib/inc/ops/encoding_ops.h b/third_party/fwkacllib/inc/ops/encoding_ops.h new file mode 100644 index 00000000..f96a67e7 --- /dev/null +++ b/third_party/fwkacllib/inc/ops/encoding_ops.h @@ -0,0 +1,49 @@ +/** + * Copyright (C) Huawei Technologies Co., Ltd 2022-2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * \file encoding_ops.h + * \brief + */ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_ENCODING_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_ENCODING_OPS_H_ + +#include "graph/operator_reg.h" +#include "graph/operator.h" + +namespace ge { +/** +* @brief An op to decode indices for LDPC code. \n + +* @par Inputs: +* @li valid_num: an int32 tensor indicates index limit for each line. +* @li matrix_info: an int32 2D-tensor store the block indices info of connection H matrix. \n + +* @par Outputs: +* indices: an int32 2D-tensor store the concrete indices value. +* +* @par Restrictions: +* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ + +REG_OP(LDPCDecode) + .INPUT(valid_num, TensorType({DT_INT32})) + .INPUT(matrix_info, TensorType({DT_INT32})) + .OUTPUT(indices, TensorType({DT_INT32})) + .OP_END_FACTORY_REG(LDPCDecode) +} // namespace ge + +#endif // OPS_BUILT_IN_OP_PROTO_INC_ENCODING_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/functional_ops.h b/third_party/fwkacllib/inc/ops/functional_ops.h index 7cfe39c4..005b716c 100644 --- a/third_party/fwkacllib/inc/ops/functional_ops.h +++ b/third_party/fwkacllib/inc/ops/functional_ops.h @@ -124,6 +124,30 @@ REG_OP(If) .GRAPH(else_branch) .OP_END_FACTORY_REG(If) +/** + *@brief Select one of the subgraphs to pass the input tensors and return the output tensors . \n + + *@par Inputs: + *@li branch_index: A int32 scalar which determines the selected subgraph. + *@li input: The input tensors, which will be passed to the subgraph . It's a dynamic input. \n + + *@par Graphs: + *branches: A list of subgraphs, each of which takes 'input' and returns a list of tensors, + * whose types are the same as what every other subgraph returns . \n + + *@par Outputs: + *output: The output tensors returned by one of branches . It's a dynamic output. \n + + *@par Third-party framework compatibility + *@Compatible with the TensorFlow operator Case. + */ +REG_OP(StatelessCase) + .INPUT(branch_index, DT_INT32) + .DYNAMIC_INPUT(input, TensorType::ALL()) + .DYNAMIC_OUTPUT(output, TensorType::ALL()) + .DYNAMIC_GRAPH(branches) + .OP_END_FACTORY_REG(StatelessCase) + /** *@brief Select one of the subgraphs to pass the input tensors and return the output tensors . \n diff --git a/third_party/fwkacllib/inc/ops/index_to_addr_ops.h b/third_party/fwkacllib/inc/ops/index_to_addr_ops.h deleted file mode 100644 index 3af17a45..00000000 --- a/third_party/fwkacllib/inc/ops/index_to_addr_ops.h +++ /dev/null @@ -1,63 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! - * \file index_to_addr_ops.h - * \brief - */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_INDEX_TO_ADDR_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_INDEX_TO_ADDR_OPS_H_ - -#include "graph/operator_reg.h" - -namespace ge { - -/** -*@brief get block tensor according to base addr tensor, for hccl remote read to use. -*@par Inputs: -*@li base_addr: A Tensor of type int64/uint64. \n -*@li row:A Tensor of type int64/uint64. \n -*@li col: A Tensor of type int64/uint64. - -*@par Outputs: -*addr_table: list of [rank id, host addr, device addr, read size] - -*@par Attributes: -*@li ori_shape: An required list int. Shape of base tensor. -*@li block_size: An required list int. Shape of split block tensor. -*@li ori_storage_mode: An optional string from: '"Matrix", "UT"'. Defaults to -"Matrix". Currently only support Matrix storage -*@li block_storage_mode: An optional string from: '"Matrix", "UT"'. Defaults to -"Matrix". Currently only support Matrix storage -*@li rank_id: An optional int of rank id. Defaults is 0 -*@li dtype: An optional Type of base tensor. Defaults is DT_FLOAT -*/ -REG_OP(IndexToAddr) - .INPUT(base_addr, TensorType({DT_INT64, DT_UINT64})) - .INPUT(x, TensorType({DT_INT64, DT_UINT64})) - .OUTPUT(addrs_table, TensorType({DT_INT64, DT_UINT64})) - .REQUIRED_ATTR(ori_shape, ListInt) - .REQUIRED_ATTR(block_size, ListInt) - .ATTR(ori_storage_mode, String, "Matrix") - .ATTR(block_storage_mode, String, "Matrix") - .ATTR(rank_id, Int, 0) - .ATTR(dtype, Type, DT_FLOAT) - .OP_END_FACTORY_REG(IndexToAddr) - -} // namespace ge - - -#endif // OPS_BUILT_IN_OP_PROTO_INC_INDEX_TO_ADDR_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/map_ops.h b/third_party/fwkacllib/inc/ops/map_ops.h new file mode 100644 index 00000000..6ac15bf6 --- /dev/null +++ b/third_party/fwkacllib/inc/ops/map_ops.h @@ -0,0 +1,152 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * \file map_ops.h + * \brief + */ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_MAP_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_MAP_OPS_H_ +#include "graph/operator_reg.h" + +namespace ge { +/** +* @brief Returns whether the given key exists in the map. \n + +* @par Inputs: +* @li input_handle: A scalar Tensor of type variant. The original map. +* @li key: The key to check. Supports int32, int64, string. \n + +* @par Outputs: +* has_key: A scalar Tensor of type bool. Whether the key is already in the map or not. \n + +* @par Third-party framework compatibility. +* Compatible with tensorflow TensorMapHasKey operator. +*/ +REG_OP(TensorMapHasKey) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(key, TensorType({DT_INT32, DT_INT64, DT_STRING})) + .OUTPUT(has_key, TensorType({DT_BOOL})) + .OP_END_FACTORY_REG(TensorMapHasKey) + +/** +* @brief Returns a tensor map with item from given key erased. \n + +* @par Inputs: +* @li input_handle: A scalar Tensor of type variant. The original map. +* @li key: The key of the value to be erased. Supports int32, int64, string. \n + +* @par Outputs: +* output_handle: A scalar Tensor of type variant. The map with value from given key removed. \n + +* @par Third-party framework compatibility. +* Compatible with tensorflow TensorMapErase operator. +*/ +REG_OP(TensorMapErase) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(key, TensorType({DT_INT32, DT_INT64, DT_STRING})) + .OUTPUT(output_handle, TensorType({DT_VARIANT})) + .OP_END_FACTORY_REG(TensorMapErase) + +/** +* @brief Returns a map that is the 'input_handle' + with the given key-value pair inserted. \n + +* @par Inputs: +* @li input_handle: The original map, Must be type: DT_VARIANT. +* @li key: A Tensor,the key to be inserted.Must be one of + the following types: int32, int64, string. +* @li value: A Tensor,the value to be inserted.Must be + one of BasicType types. \n + +* @par Outputs: +* output_handle: The map with key and value inserted. + Must be type: DT_VARIANT. \n +*/ +REG_OP(TensorMapInsert) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(key, TensorType({DT_INT32, DT_INT64, DT_STRING})) + .INPUT(value, BasicType) + .OUTPUT(output_handle, TensorType({DT_VARIANT})) + .OP_END_FACTORY_REG(TensorMapInsert) + +/** +* @brief Returns the value from a given key in a tensor map . \n + +* @par Inputs: +* @li input_handle: The input map. Must be type: DT_VARIANT. +* @li key: A Tensor,the key to be looked up. Must be one of + the following types: int32,int64,string . \n + +* @par Attributes: +* value_dtype: A int. Representing the type of value . \n + +* @par Outputs: +* value: A Tensor,the value found from the given key. +*/ +REG_OP(TensorMapLookup) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(key, TensorType({DT_INT32, DT_INT64, DT_STRING})) + .OUTPUT(value, BasicType) + .REQUIRED_ATTR(value_dtype, Type) + .OP_END_FACTORY_REG(TensorMapLookup) + +/** +* @brief return TensorMap Size. \n +* +* @par Inputs: +* input_handle: A Tensor. Must be one of the following types: variant. \n +* +* @par Outputs: +* size: A Tensor. Must be one of the following types: int32. \n +*/ +REG_OP(TensorMapSize) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .OUTPUT(size, TensorType({DT_INT32})) + .OP_END_FACTORY_REG(TensorMapSize) + +/** + * @brief Return TensorMapStackKeys \n + * + * @par Inputs: + * input_handle: A Tensor. Must be one of the following types: variant. \n + * + * @par Outputs: + * keys: A Tensor. Must be one of the following types: int32, int64, string. \n + * + * @par Attributes: + * key_dtype: An required param. It is the dtype of the key. + */ +REG_OP(TensorMapStackKeys) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .OUTPUT(keys, TensorType({DT_INT32, DT_INT64, DT_STRING})) + .REQUIRED_ATTR(key_dtype, Type) + .OP_END_FACTORY_REG(TensorMapStackKeys) + +/** +* @brief Creates and returns an empty tensor map. \n + +* @par Outputs: +* handle: An empty tensor map . \n + +* @par Third-party framework compatibility. +* Compatible with tensorflow EmptyTensorMap operator. +*/ +REG_OP(EmptyTensorMap) + .OUTPUT(handle, TensorType({DT_VARIANT})) + .OP_END_FACTORY_REG(EmptyTensorMap) +} // namespace ge +#endif // OPS_BUILT_IN_OP_PROTO_INC_MAP_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index a7465af0..c856d9e5 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -24,6 +24,82 @@ #include "graph/operator_reg.h" namespace ge { +/** +* @brief Backprop W of AttentionLnQKV + ReduceSumD \n +* @par Inputs: +* Four inputs, including: +* @li x: A Tensor. Must be one of the following types: float16. +* @li query_dx: A Tensor. Must be one of the following types: float16. +* @li key_dw: A Tensor. Must be one of the following types: float16. +* @li value_dw: A Tensor. Must be one of the following types: float16. + +* @par Attributes: +* @li trans_a: A optional attribute, the type is bool. Defaults to True. +* @li trans_b: A optional attribute, the type is bool. Defaults to False. \n + +* @par Outputs: +* Six outputs, including: +* @li dw_query: A Tensor. Must be one of the following types: float16. +* @li dw_key: A Tensor. Must be one of the following types: float16. +* @li dw_value: A Tensor. Must be one of the following types: float16. +* @li dbias_query: A Tensor. Must be one of the following types: float16. +* @li dbias_key: A Tensor. Must be one of the following types: float16. +* @li dbias_value: A Tensor. Must be one of the following types: float16. \n + +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n +*/ +REG_OP(AttentionQKVGradW) + .INPUT(x, TensorType({DT_FLOAT16})) + .INPUT(query_dx, TensorType({DT_FLOAT16})) + .INPUT(key_dw, TensorType({DT_FLOAT16})) + .INPUT(value_dw, TensorType({DT_FLOAT16})) + .OUTPUT(dw_query, TensorType({DT_FLOAT16})) + .OUTPUT(dw_key, TensorType({DT_FLOAT16})) + .OUTPUT(dw_value, TensorType({DT_FLOAT16})) + .OUTPUT(dbias_query, TensorType({DT_FLOAT16})) + .OUTPUT(dbias_key, TensorType({DT_FLOAT16})) + .OUTPUT(dbias_value, TensorType({DT_FLOAT16})) + .ATTR(trans_a, Bool, true) + .ATTR(trans_b, Bool, false) + .OP_END_FACTORY_REG(AttentionQKVGradW) + +/** +* @brief Backprop X of AttentionLnQKV + AddN \n +* @par Inputs: +* Seven inputs, including: +* @li ln_dx: A Tensor. Must be one of the following types: float16. +* @li query_dx: A Tensor. Must be one of the following types: float16. +* @li key_dw: A Tensor. Must be one of the following types: float16. +* @li value_dw: A Tensor. Must be one of the following types: float16. +* @li kernel_query: A Tensor. Must be one of the following types: float16. +* @li kernel_key: A Tensor. Must be one of the following types: float16. +* @li kernel_value: A Tensor. Must be one of the following types: float16. \n + +* @par Attributes: +* @li trans_a: A optional attribute, the type is bool. Defaults to False. +* @li trans_b: A optional attribute, the type is bool. Defaults to True. \n + +* @par Outputs: +* One outputs, including: +* @li dx: A Tensor. Must be one of the following types: float16. \n + +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n +*/ +REG_OP(AttentionQKVGradX) + .INPUT(ln_dx, TensorType({DT_FLOAT16})) + .INPUT(query_dx, TensorType({DT_FLOAT16})) + .INPUT(key_dw, TensorType({DT_FLOAT16})) + .INPUT(value_dw, TensorType({DT_FLOAT16})) + .INPUT(kernel_query, TensorType({DT_FLOAT16})) + .INPUT(kernel_key, TensorType({DT_FLOAT16})) + .INPUT(kernel_value, TensorType({DT_FLOAT16})) + .OUTPUT(dx, TensorType({DT_FLOAT16})) + .ATTR(trans_a, Bool, false) + .ATTR(trans_b, Bool, true) + .OP_END_FACTORY_REG(AttentionQKVGradX) + /** * @brief / (MatMul -> ConfusionTransposeD). @@ -54,6 +130,9 @@ namespace ge { * @li value_output: A Tensor. Must be one of the following types: float16. * @li mean: A Tensor. Must be one of the following types: float16. * @li variance: A Tensor. Must be one of the following types: float16. \n + +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n */ REG_OP(AttentionLnQKV) .INPUT(x, TensorType({DT_FLOAT16})) diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index f35f67c5..09e94523 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -2276,6 +2276,46 @@ REG_OP(BalanceRois) .OUTPUT(balance_rois, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(index, TensorType({DT_INT32})) .OP_END_FACTORY_REG(BalanceRois) + +/** +* @brief First calculate the minimum closure area of the two boxes, IoU, +* The CIoU is obtained by combining the center distance and width to height ratio and IoU. \n + +* @par Inputs: +* Two inputs, including: +* @li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with +* shape (4, N). "N" indicates the number of bounding boxes, and the value +* "4" refers to [x1, y1, x2, y2] or [x, y, w, h]. +* @li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32 +* with shape (4, M). "M" indicates the number of ground truth boxes, and +* the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n + +* @par Attributes: +* @li trans: An optional bool, true for 'xywh', false for 'xyxy'. +* @li is_cross: An optional bool, control whether the output shape is [N, M] or [1, N] +* @li mode: An optional string, computation mode, a character string with the value range of [iou, iof] +* @li atan_sub_flag: An optional bool, control whether to output atan_sub. \n + +* @par Outputs: +* Two outputs, including: +* @li overlap: A 2D Tensor of type float16 or float32 with shape [N, M] or [1, N], +* specifying the IoU or IoF ratio . +* @li atan_sub: A 2D Tensor of type float16 or float32 with shape [N, M] or [1, N], +* specifying the IoU or IoF ratio . \n + +* @attention Constraints: +* "is_cross" only support false, "atan_sub_flag" only support true. +*/ +REG_OP(CIoU) + .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(atan_sub, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(trans, Bool, false) + .ATTR(is_cross, Bool, true) + .ATTR(mode, String, "iou") + .ATTR(atan_sub_flag, Bool, false) + .OP_END_FACTORY_REG(CIoU) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index 7930ae07..2a939e97 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -458,22 +458,22 @@ REG_OP(Softsign) .OP_END_FACTORY_REG(Softsign) /** - * @brief Computes softsignGrad: y_grad / (1 + abs(x)) ** 2 . + * @brief Computes softsignGrad: gradients / (1 + abs(features)) ** 2 . * * @par Inputs: * Two inputs, including: - * @li y_grad: A Tensor.Must be one of the following types:float16, float32, - * @li x: A Tensor of the same type and shape as "gradients". + * @li gradients: A Tensor.Must be one of the following types:float16, float32, + * @li features: A Tensor of the same type and shape as "gradients". - * @par x_grad: - * output:A Tensor. Has the same type as "y_grad". + * @par Outputs: + * output:A Tensor. Has the same type as "gradients". * @par Third-party framework compatibility * Compatible with the TensorFlow operator SoftsignGrad. */ REG_OP(SoftsignGrad) - .INPUT(y_grad, TensorType::FloatingDataType()) - .INPUT(x, TensorType::FloatingDataType()) - .OUTPUT(x_grad, TensorType::FloatingDataType()) + .INPUT(gradients, TensorType::FloatingDataType()) + .INPUT(features, TensorType::FloatingDataType()) + .OUTPUT(output, TensorType::FloatingDataType()) .OP_END_FACTORY_REG(SoftsignGrad) /** @@ -500,23 +500,23 @@ REG_OP(Selu) .OP_END_FACTORY_REG(Selu) /** -* @brief Computes SeluGrad backprops: y_grad * (y + scale * alpha) -* if y < 0, scale * y_grad otherwise . +* @brief Computes SeluGrad backprops: gradients * (outputs + scale * alpha) +* if outputs < 0, scale * gradients otherwise . * @par Inputs: * Two inputs, including: -* @li y_grad: A Tensor of type RealNumberType . -* @li y: A Tensor of type RealNumberType . +* @li gradients: A Tensor of type RealNumberType . +* @li outputs: A Tensor of type RealNumberType . * @par Outputs: -* x_grad: A Tensor. Must have the same type as "y_grad" . +* y: A Tensor. Must have the same type as "gradients" . * @par Third-party framework compatibility * Compatible with the TensorFlow operator SeluGrad. */ REG_OP(SeluGrad) - .INPUT(y_grad, TensorType::RealNumberType()) - .INPUT(y, TensorType::RealNumberType()) - .OUTPUT(x_grad, TensorType::RealNumberType()) + .INPUT(gradients, TensorType::RealNumberType()) + .INPUT(outputs, TensorType::RealNumberType()) + .OUTPUT(y, TensorType::RealNumberType()) .OP_END_FACTORY_REG(SeluGrad) /** diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index d29f3d63..5222bf7d 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -2612,6 +2612,34 @@ REG_OP(DynSeqOuter) .INPUT(seq_len2, TensorType({DT_INT32})) .OUTPUT(y, TensorType::BasicType()) .OP_END_FACTORY_REG(DynSeqOuter) + +/** +* @brief Returns sliced data based on max nmsed_num. \n + +* @par Inputs: +* Four inputs, including: +* @li input_nmsed_boxes: A Tensor. Must be the following types: float16. +* @li input_nmsed_score: A Tensor. Must be the following types: float16. +* @li input_nmsed_class: A Tensor. Must be the following types: float16. +* @li input_nmsed_num: A Tensor. Must be the following types: int32. \n + +* @par Outputs: +* output_nmsed_boxes: A Tensor. Must be the following type: float. +* output_nmsed_score: A Tensor. Must be the following type: float. +* output_nmsed_class: A Tensor. Must be the following type: float. \n + +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(NonMaxSuppressionBucketize) + .INPUT(input_nmsed_boxes, TensorType({DT_FLOAT16})) + .INPUT(input_nmsed_score, TensorType({DT_FLOAT16})) + .INPUT(input_nmsed_class, TensorType({DT_FLOAT16})) + .INPUT(input_nmsed_num, TensorType({DT_INT32})) + .OUTPUT(output_nmsed_boxes, TensorType({DT_FLOAT})) + .OUTPUT(output_nmsed_score, TensorType({DT_FLOAT})) + .OUTPUT(output_nmsed_class, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(NonMaxSuppressionBucketize) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/slice_write_ops.h b/third_party/fwkacllib/inc/ops/slice_write_ops.h deleted file mode 100644 index 0c161b2d..00000000 --- a/third_party/fwkacllib/inc/ops/slice_write_ops.h +++ /dev/null @@ -1,50 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! - * \file slice_write_ops.h - * \brief - */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_SLICE_WRITE_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_SLICE_WRITE_OPS_H_ - -#include "graph/operator_reg.h" - -namespace ge { - -/** -*@brief write tensor value to tensor x. -*@par Inputs: -*x: A Tensor of type float16/float/double/int32/int64. \n -*begin:A Tensor of type int32/int64. \n -*value: A Tensor of type float16/float/double/int32/int64. -*@par Outputs: -*x: same tensor with input x -*/ -REG_OP(SliceWrite) - .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \ - DT_INT32, DT_INT64})) - .INPUT(begin, TensorType({DT_INT32, DT_INT64})) - .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \ - DT_INT32, DT_INT64})) - .OUTPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \ - DT_INT32, DT_INT64})) - .OP_END_FACTORY_REG(SliceWrite) - -} // namespace ge - - -#endif // OPS_BUILT_IN_OP_PROTO_INC_SLICE_WRITE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/ops/sparse_ops.h index 8eb7b521..bf0f670a 100644 --- a/third_party/fwkacllib/inc/ops/sparse_ops.h +++ b/third_party/fwkacllib/inc/ops/sparse_ops.h @@ -951,7 +951,7 @@ REG_OP(SerializeSparse) DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \ DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING})) .INPUT(shape, TensorType({DT_INT64})) - .OUTPUT(serialized_sparse, TensorType({DT_STRING})) + .OUTPUT(serialized_sparse, TensorType({DT_STRING, DT_VARIANT})) .ATTR(out_type, Type, DT_STRING) .OP_END_FACTORY_REG(SerializeSparse) @@ -979,7 +979,7 @@ REG_OP(SerializeManySparse) DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \ DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING})) .INPUT(shape, TensorType({DT_INT64})) - .OUTPUT(serialized_sparse, TensorType({DT_STRING})) + .OUTPUT(serialized_sparse, TensorType({DT_STRING, DT_VARIANT})) .ATTR(out_type, Type, DT_STRING) .OP_END_FACTORY_REG(SerializeManySparse) @@ -1002,7 +1002,7 @@ REG_OP(SerializeManySparse) * Compatible with the TensorFlow operator DeserializeSparse. */ REG_OP(DeserializeSparse) - .INPUT(serialized_sparse, TensorType({DT_STRING})) + .INPUT(serialized_sparse, TensorType({DT_STRING, DT_VARIANT})) .OUTPUT(indices, TensorType({DT_INT64})) .OUTPUT(values, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, \ DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \ diff --git a/third_party/fwkacllib/inc/ops/vector_search.h b/third_party/fwkacllib/inc/ops/vector_search.h index ab4daa70..425eb5d6 100644 --- a/third_party/fwkacllib/inc/ops/vector_search.h +++ b/third_party/fwkacllib/inc/ops/vector_search.h @@ -154,6 +154,98 @@ REG_OP(CalcBucketsLimitAndOffset) .OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) .REQUIRED_ATTR(total_limit, Int) .OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) + +/** +*@brief get block tensor according to base addr tensor, for hccl remote read to use. +*@par Inputs: +*@li base_addr: A Tensor of type int64/uint64. \n +*@li row:A Tensor of type int64/uint64. \n +*@li col: A Tensor of type int64/uint64. + +*@par Outputs: +*addr_table: list of [rank id, host addr, device addr, read size] + +*@par Attributes: +*@li ori_shape: An required list int. Shape of base tensor. +*@li block_size: An required list int. Shape of split block tensor. +*@li ori_storage_mode: An optional string from: '"Matrix", "UT"'. Defaults to +"Matrix". Currently only support Matrix storage +*@li block_storage_mode: An optional string from: '"Matrix", "UT"'. Defaults to +"Matrix". Currently only support Matrix storage +*@li rank_id: An optional int of rank id. Defaults is 0 +*@li dtype: An optional Type of base tensor. Defaults is DT_FLOAT +*/ +REG_OP(IndexToAddr) + .INPUT(base_addr, TensorType({DT_INT64, DT_UINT64})) + .INPUT(x, TensorType({DT_INT64, DT_UINT64})) + .OUTPUT(addrs_table, TensorType({DT_INT64, DT_UINT64})) + .REQUIRED_ATTR(ori_shape, ListInt) + .REQUIRED_ATTR(block_size, ListInt) + .ATTR(ori_storage_mode, String, "Matrix") + .ATTR(block_storage_mode, String, "Matrix") + .ATTR(rank_id, Int, 0) + .ATTR(dtype, Type, DT_FLOAT) + .OP_END_FACTORY_REG(IndexToAddr) + +/** +*@brief Convert one-dimensional coordinates to two-dimensional coordinates. +*@par Inputs: +*@li x: A Tensor of type int32/int64/uint64. One-dimensional coordinates. +*@li shape: A Tensor of type int32/int64/uint64. 4D tensor [N,C,H,W]. +*@par Outputs: +*@li row: row of two-dimensional +*@li col: col of two-dimensional +*@li n: col number of two-dimensional +*/ +REG_OP(Coordinates1DTo2D) + .INPUT(x, TensorType({DT_INT32, DT_INT64, DT_UINT64})) + .INPUT(shape, TensorType({DT_INT32, DT_INT64, DT_UINT64})) + .OUTPUT(row, TensorType({DT_INT32, DT_INT64, DT_UINT64})) + .OUTPUT(col, TensorType({DT_INT32, DT_INT64, DT_UINT64})) + .OUTPUT(n, TensorType({DT_INT32, DT_INT64, DT_UINT64})) + .OP_END_FACTORY_REG(Coordinates1DTo2D) + +/** +*@brief x[0] is i, x[1] is j and x[2] is k when algorithm is LU, +y = 0 when i >= k && j < k, +y = 1 when i == k && j == k, +y = 2 when i > k && j == k, +y = 3 when i == k && j > k, +y = 4 when i > k && j > k, +default y = 5 +use for lu decomposition +*@par Inputs: +*x: A Tensor of type int32/int64/uint64. \n + +*@par Attributes: +*algorithm: A string, only support LU now +*@par Outputs: +*y: A Tensor of type int32 +*/ +REG_OP(CaseCondition) + .INPUT(x, TensorType({DT_INT32, DT_INT64, DT_UINT64})) + .OUTPUT(y, TensorType({DT_INT32})) + .ATTR(algorithm, String, "LU") + .OP_END_FACTORY_REG(CaseCondition) + +/** +*@brief write tensor value to tensor x. +*@par Inputs: +*x: A Tensor of type float16/float/double/int32/int64. \n +*begin:A Tensor of type int32/int64. \n +*value: A Tensor of type float16/float/double/int32/int64. +*@par Outputs: +*x: same tensor with input x +*/ +REG_OP(SliceWrite) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \ + DT_INT32, DT_INT64})) + .INPUT(begin, TensorType({DT_INT32, DT_INT64})) + .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \ + DT_INT32, DT_INT64})) + .OUTPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \ + DT_INT32, DT_INT64})) + .OP_END_FACTORY_REG(SliceWrite) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_ diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index b4919a93..c3d4bbd1 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -263,6 +263,18 @@ typedef struct tagrtStreamLabelGotoTask_t { uint8_t reserved[36]; } rtStreamLabelGotoTask_t; +typedef struct tagrtNpuGetFloatStatusTask_t { + uint64_t outputAddr; + uint64_t outputSize; + uint32_t checkMode; + uint8_t reserved[20]; +} rtNpuGetFloatStatusTask_t; + +typedef struct tagrtNpuClearFloatStatusTask_t { + uint32_t checkMode; + uint8_t reserved[36]; +} rtNpuClearFloatStatusTask_t; + typedef struct tagTaskInfo { uint32_t type; uint32_t streamID; @@ -288,6 +300,8 @@ typedef struct tagTaskInfo { rtStreamSwitchNTaskInfo_t streamSwitchNTask; rtStreamLabelSwitchByIndexTask_t streamLabelSwitchIndexTask; rtStreamLabelGotoTask_t streamLabelGotoTask; + rtNpuGetFloatStatusTask_t npuGetFloatStatusTask; + rtNpuClearFloatStatusTask_t npuClearFloatStatusTask; uint32_t reserved[10]; } u; } rtTaskInfo_t;