Browse Source

upgrade Ascend package 18 May 22

tags/v1.8.0^2
yanghaoran 2 years ago
parent
commit
4c708fd78e
35 changed files with 933 additions and 271 deletions
  1. +2
    -0
      inc/external/ge/ge_api_types.h
  2. +26
    -2
      inc/external/hccl/hccl.h
  3. +1
    -1
      inc/framework/common/debug/log.h
  4. +1
    -0
      inc/framework/common/ge_types.h
  5. +1
    -9
      inc/framework/common/op_types.h
  6. +2
    -3
      inc/framework/common/profiling_definitions.h
  7. +38
    -0
      inc/framework/common/runtime_tensor_desc.h
  8. +1
    -0
      inc/framework/common/types.h
  9. +7
    -7
      inc/framework/common/util.h
  10. +23
    -12
      inc/framework/omg/parser/model_parser.h
  11. +8
    -1
      inc/framework/omg/parser/parser_factory.h
  12. +11
    -0
      inc/framework/omg/parser/weights_parser.h
  13. +12
    -0
      inc/framework/pne/process_node_engine.h
  14. +27
    -0
      inc/framework/runtime/gert_api.h
  15. +94
    -0
      inc/framework/runtime/model_desc.h
  16. +142
    -0
      inc/framework/runtime/model_v2_executor.h
  17. +1
    -1
      metadef
  18. +2
    -0
      third_party/fwkacllib/inc/hccl/base.h
  19. +2
    -0
      third_party/fwkacllib/inc/ops/all_ops.h
  20. +33
    -0
      third_party/fwkacllib/inc/ops/array_ops.h
  21. +0
    -53
      third_party/fwkacllib/inc/ops/case_condition_ops.h
  22. +0
    -48
      third_party/fwkacllib/inc/ops/coordinates_1d_to_2d_ops.h
  23. +2
    -2
      third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
  24. +49
    -0
      third_party/fwkacllib/inc/ops/encoding_ops.h
  25. +24
    -0
      third_party/fwkacllib/inc/ops/functional_ops.h
  26. +0
    -63
      third_party/fwkacllib/inc/ops/index_to_addr_ops.h
  27. +152
    -0
      third_party/fwkacllib/inc/ops/map_ops.h
  28. +79
    -0
      third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
  29. +40
    -0
      third_party/fwkacllib/inc/ops/nn_detect_ops.h
  30. +16
    -16
      third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
  31. +28
    -0
      third_party/fwkacllib/inc/ops/selection_ops.h
  32. +0
    -50
      third_party/fwkacllib/inc/ops/slice_write_ops.h
  33. +3
    -3
      third_party/fwkacllib/inc/ops/sparse_ops.h
  34. +92
    -0
      third_party/fwkacllib/inc/ops/vector_search.h
  35. +14
    -0
      third_party/fwkacllib/inc/runtime/rt_model.h

+ 2
- 0
inc/external/ge/ge_api_types.h View File

@@ -70,6 +70,8 @@ const char_t *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput";
const char_t *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode"; const char_t *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode";
const char_t *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; const char_t *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange";
const char_t *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr"; const char_t *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr";
const char_t *const OPTION_EXEC_GRAPH_EXEC_TIMEOUT = "ge.exec.graphExecTimeout";
const char_t *const OPTION_EXEC_MODEL_EXEC_TIMEOUT = "ge.exec.modelExecTimeout";


// Option key: memory init // Option key: memory init
const char_t *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; const char_t *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize";


+ 26
- 2
inc/external/hccl/hccl.h View File

@@ -145,7 +145,7 @@ extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank);
extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream); extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream);


/** /**
* @brief AllGather operator.
* @brief Send operator.
* *
* @param sendBuff A pointer identifying the input data address of the operator. * @param sendBuff A pointer identifying the input data address of the operator.
* @param count An integer(u64) identifying the number of the send data. * @param count An integer(u64) identifying the number of the send data.
@@ -158,7 +158,7 @@ extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream);
extern HcclResult HcclSend(void *sendBuf, uint64_t count, HcclDataType dataType, uint32_t destRank, HcclComm comm, extern HcclResult HcclSend(void *sendBuf, uint64_t count, HcclDataType dataType, uint32_t destRank, HcclComm comm,
aclrtStream stream); aclrtStream stream);
/** /**
* @brief AllGather operator.
* @brief Recv operator.
* *
* @param recvBuff A pointer identifying the output data address of the operator. * @param recvBuff A pointer identifying the output data address of the operator.
* @param count An integer(u64) identifying the number of the receive data. * @param count An integer(u64) identifying the number of the receive data.
@@ -171,6 +171,30 @@ extern HcclResult HcclSend(void *sendBuf, uint64_t count, HcclDataType dataType,
extern HcclResult HcclRecv(void *recvBuf, uint64_t count, HcclDataType dataType, uint32_t srcRank, HcclComm comm, extern HcclResult HcclRecv(void *recvBuf, uint64_t count, HcclDataType dataType, uint32_t srcRank, HcclComm comm,
aclrtStream stream); aclrtStream stream);


/**
* @brief AlltoAllV operator.
*
* @param sendBuff A pointer identifying the input data address of the operator.
* @param sendCounts Integer array, where entry i specifies the number of elements to send to rank i.
* @param sdispls Integer array, where entry i specifies the displacement (offset from sendbuf, in units of sendtype)
* from which to send data to rank i.
* @param sendType Datatype of send buffer elements, must be one of the following types: int8, int32, int64, uint64,
* float16, float32.
* @param recvBuf A pointer identifying the output data address of the operator.
* @param recvCounts Integer array, where entry j specifies the number of elements to receive from rank j.
* @param rdispls Integer array, where entry j specifies the displacement (offset from recvbuf, in units of recvtype) to
* which data from rank j should be written.
* @param recvType Datatype of receive buffer elements, must be one of the following types: int8, int32, int64, uint64,
* float16, float32.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
*/

extern HcclResult HcclAlltoAllV(const void *sendBuf, const void *sendCounts, const void *sdispls, HcclDataType sendType,
const void *recvBuf, const void *recvCounts, const void *rdispls, HcclDataType recvType,
HcclComm comm, aclrtStream stream);

/** /**
* @brief Destroy HCCL comm * @brief Destroy HCCL comm
* *


+ 1
- 1
inc/framework/common/debug/log.h View File

@@ -88,7 +88,7 @@
if ((expr) != ge::GRAPH_SUCCESS) { \ if ((expr) != ge::GRAPH_SUCCESS) { \
REPORT_CALL_ERROR("E19999", "Operator graph failed"); \ REPORT_CALL_ERROR("E19999", "Operator graph failed"); \
GELOGE(ge::FAILED, __VA_ARGS__); \ GELOGE(ge::FAILED, __VA_ARGS__); \
return (FAILED); \
return (ge::FAILED); \
} \ } \
} while (false) } while (false)




+ 1
- 0
inc/framework/common/ge_types.h View File

@@ -309,6 +309,7 @@ struct Options {
int32_t physical_device_id; int32_t physical_device_id;
std::string profiling_mode; std::string profiling_mode;
std::string profiling_options; std::string profiling_options;
int32_t graphExecTimeout;
}; };


// Profiling info of task // Profiling info of task


+ 1
- 9
inc/framework/common/op_types.h View File

@@ -45,14 +45,6 @@ class GE_FUNC_VISIBILITY OpTypeContainer {
private: private:
std::set<std::string> op_type_list_; std::set<std::string> op_type_list_;
}; };

class GE_FUNC_VISIBILITY OpTypeRegistrar {
public:
explicit OpTypeRegistrar(const std::string &op_type) noexcept {
OpTypeContainer::Instance()->Register(op_type);
}
~OpTypeRegistrar() {}
};
} // namespace ge } // namespace ge


#define REGISTER_OPTYPE_DECLARE(var_name, str_name) \ #define REGISTER_OPTYPE_DECLARE(var_name, str_name) \
@@ -60,7 +52,7 @@ class GE_FUNC_VISIBILITY OpTypeRegistrar {


#define REGISTER_OPTYPE_DEFINE(var_name, str_name) \ #define REGISTER_OPTYPE_DEFINE(var_name, str_name) \
const char_t *var_name = str_name; \ const char_t *var_name = str_name; \
const ge::OpTypeRegistrar g_##var_name##_reg(str_name);
const bool g_##var_name##_reg = (static_cast<void>(OpTypeContainer::Instance()->Register(str_name)), true);


#define IS_OPTYPE_EXISTING(str_name) (ge::OpTypeContainer::Instance()->IsExisting(str_name)) #define IS_OPTYPE_EXISTING(str_name) (ge::OpTypeContainer::Instance()->IsExisting(str_name))
#endif // INC_FRAMEWORK_COMMON_OP_TYPES_H_ #endif // INC_FRAMEWORK_COMMON_OP_TYPES_H_

+ 2
- 3
inc/framework/common/profiling_definitions.h View File

@@ -164,9 +164,8 @@ class ProfilingContext {


int64_t RegisterString(const std::string &str); int64_t RegisterString(const std::string &str);
int64_t RegisterStringHash(const uint64_t hash_id, const std::string &str); int64_t RegisterStringHash(const uint64_t hash_id, const std::string &str);
void UpdateElementHashId(const MsprofReporterCallback reporter_callback);
static Status QueryHashId(const MsprofReporterCallback reporter_callback, const std::string &src_str,
uint64_t &hash_id);
void UpdateElementHashId();
static Status QueryHashId(const std::string &src_str, uint64_t &hash_id);
size_t GetRegisterStringNum() const { size_t GetRegisterStringNum() const {
return strings_to_index_.size(); return strings_to_index_.size();
} }


+ 38
- 0
inc/framework/common/runtime_tensor_desc.h View File

@@ -0,0 +1,38 @@
/**
* Copyright (c) Huawei Technologies Co., Ltd. 2021-2022. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef INC_FRAMEWORK_COMMON_RUNTIME_TENSOR_DESC_H_
#define INC_FRAMEWORK_COMMON_RUNTIME_TENSOR_DESC_H_

#include <cstdint>

namespace ge {
constexpr int64_t kMaxDimSize = 32;

#pragma pack(push, 1)
struct RuntimeTensorDesc {
uint64_t data_addr;
int64_t data_offset_size;
int64_t dtype;
int64_t shape[kMaxDimSize + 1]; // shape:Dim_Num|DIM0|DIM1|...|DIM31
int64_t original_shape[kMaxDimSize + 1]; // original_shape:Dim_Num|DIM0|DIM1|...|DIM31
int64_t format;
int64_t sub_format;
uint8_t reserved[456]; // padding to 1024 bytes
};
#pragma pack(pop)
} // namespace ge

#endif // INC_FRAMEWORK_COMMON_RUNTIME_TENSOR_DESC_H_

+ 1
- 0
inc/framework/common/types.h View File

@@ -88,6 +88,7 @@ REGISTER_OPTYPE_DECLARE(DROPOUTDOMASKV3, "DropOutDoMaskV3");
REGISTER_OPTYPE_DECLARE(DROPOUTDOMASKV3D, "DropOutDoMaskV3D"); REGISTER_OPTYPE_DECLARE(DROPOUTDOMASKV3D, "DropOutDoMaskV3D");
REGISTER_OPTYPE_DECLARE(SOFTMAXV2WITHDROPOUTDOMASKV3D, "SoftmaxV2WithDropOutDoMaskV3D"); REGISTER_OPTYPE_DECLARE(SOFTMAXV2WITHDROPOUTDOMASKV3D, "SoftmaxV2WithDropOutDoMaskV3D");
REGISTER_OPTYPE_DECLARE(DROPOUTGENMASK, "DropOutGenMask"); REGISTER_OPTYPE_DECLARE(DROPOUTGENMASK, "DropOutGenMask");
REGISTER_OPTYPE_DECLARE(AXPYWITHSOFTMAXANDDROPOUTDOMASK, "AxpyWithSoftmaxAndDropOutDoMask");
REGISTER_OPTYPE_DECLARE(CONCAT, "Concat"); REGISTER_OPTYPE_DECLARE(CONCAT, "Concat");
REGISTER_OPTYPE_DECLARE(ROIPOOLING, "ROIPooling"); REGISTER_OPTYPE_DECLARE(ROIPOOLING, "ROIPooling");
REGISTER_OPTYPE_DECLARE(PROPOSAL, "Proposal"); REGISTER_OPTYPE_DECLARE(PROPOSAL, "Proposal");


+ 7
- 7
inc/framework/common/util.h View File

@@ -118,13 +118,13 @@
} while (false) } while (false)


// Check if the parameter is null. If yes, return PARAM_INVALID and record the error // Check if the parameter is null. If yes, return PARAM_INVALID and record the error
#define GE_CHECK_NOTNULL(val) \
do { \
if ((val) == nullptr) { \
REPORT_INNER_ERROR("E19999", "Param:%s is nullptr, check invalid", #val); \
GELOGE(ge::FAILED, "[Check][Param:%s]null is invalid.", #val); \
return ge::PARAM_INVALID; \
} \
#define GE_CHECK_NOTNULL(val, ...) \
do { \
if ((val) == nullptr) { \
REPORT_INNER_ERROR("E19999", "Param:" #val " is nullptr, check invalid" __VA_ARGS__); \
GELOGE(ge::FAILED, "[Check][Param:" #val "]null is invalid" __VA_ARGS__); \
return ge::PARAM_INVALID; \
} \
} while (false) } while (false)


// Check if the parameter is null. If yes, just return and record the error // Check if the parameter is null. If yes, just return and record the error


+ 23
- 12
inc/framework/omg/parser/model_parser.h View File

@@ -52,7 +52,7 @@ class GE_FUNC_VISIBILITY ModelParser {
* @return SUCCESS * @return SUCCESS
* @return Others failed * @return Others failed
*/ */
virtual domi::Status Parse(const char *file, ge::Graph &graph) = 0;
virtual Status Parse(const char *file, ge::Graph &graph) = 0;


/** /**
* @ingroup domi_omg * @ingroup domi_omg
@@ -64,7 +64,7 @@ class GE_FUNC_VISIBILITY ModelParser {
* @return FAILED * @return FAILED
* @author * @author
*/ */
virtual domi::Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0;
virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0;


/** /**
* @ingroup domi_omg * @ingroup domi_omg
@@ -76,7 +76,7 @@ class GE_FUNC_VISIBILITY ModelParser {
* @return FAILED * @return FAILED
* @author * @author
*/ */
virtual domi::Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0;
virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0;


/** /**
* @ingroup domi_omg * @ingroup domi_omg
@@ -86,7 +86,7 @@ class GE_FUNC_VISIBILITY ModelParser {
* @return SUCCESS * @return SUCCESS
* @return Others failed * @return Others failed
*/ */
virtual domi::Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0;
virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0;


/** /**
* @ingroup domi_omg * @ingroup domi_omg
@@ -97,8 +97,8 @@ class GE_FUNC_VISIBILITY ModelParser {
* @return SUCCESS * @return SUCCESS
* @return Others failed * @return Others failed
*/ */
virtual domi::Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback,
ge::ComputeGraphPtr &graph) = 0;
virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback,
ge::ComputeGraphPtr &graph) = 0;
/** /**
* @ingroup domi_omg * @ingroup domi_omg
* @brief Convert model files to JSON format * @brief Convert model files to JSON format
@@ -107,10 +107,10 @@ class GE_FUNC_VISIBILITY ModelParser {
* @return SUCCESS * @return SUCCESS
* @return Others failed * @return Others failed
*/ */
virtual domi::Status ToJson(const char *model_file, const char *json_file) {
virtual Status ToJson(const char *model_file, const char *json_file) {
(void)model_file; (void)model_file;
(void)json_file; (void)json_file;
return domi::SUCCESS;
return SUCCESS;
} }


/* /*
@@ -121,7 +121,7 @@ class GE_FUNC_VISIBILITY ModelParser {
*/ */
virtual ge::DataType ConvertToGeDataType(const uint32_t type) = 0; virtual ge::DataType ConvertToGeDataType(const uint32_t type) = 0;


virtual domi::Status ParseAllGraph(const google::protobuf::Message *root_proto, ge::ComputeGraphPtr &root_graph) = 0;
virtual Status ParseAllGraph(const google::protobuf::Message *root_proto, ge::ComputeGraphPtr &root_graph) = 0;


/** /**
* @ingroup domi_omg * @ingroup domi_omg
@@ -131,7 +131,7 @@ class GE_FUNC_VISIBILITY ModelParser {
* @return SUCCESS * @return SUCCESS
* @return Others failed * @return Others failed
*/ */
virtual domi::Status ParseProto(const std::string &serialized_proto, ge::ComputeGraphPtr &graph) {
virtual Status ParseProto(const std::string &serialized_proto, ge::ComputeGraphPtr &graph) {
(void)serialized_proto; (void)serialized_proto;
(void)graph; (void)graph;
return UNSUPPORTED; return UNSUPPORTED;
@@ -146,13 +146,24 @@ class GE_FUNC_VISIBILITY ModelParser {
* @return SUCCESS * @return SUCCESS
* @return Others failed * @return Others failed
*/ */
virtual domi::Status ParseProtoWithSubgraph(const std::string &serialized_proto, GetGraphCallbackV2 callback,
ge::ComputeGraphPtr &graph) {
virtual Status ParseProtoWithSubgraph(const std::string &serialized_proto, GetGraphCallbackV2 callback,
ge::ComputeGraphPtr &graph) {
(void)serialized_proto; (void)serialized_proto;
(void)callback; (void)callback;
(void)graph; (void)graph;
return UNSUPPORTED; return UNSUPPORTED;
} }

virtual bool HasError() {
return false;
}

virtual Status Save(const std::string &file) {
(void)file;
return SUCCESS;
}

virtual void Clear(){};
}; };
} // namespace domi } // namespace domi




+ 8
- 1
inc/framework/omg/parser/parser_factory.h View File

@@ -23,6 +23,7 @@
#include <string> #include <string>
#include "framework/omg/omg_inner_types.h" #include "framework/omg/omg_inner_types.h"
#include "framework/omg/parser/parser_types.h" #include "framework/omg/parser/parser_types.h"
#include "external/register/register.h"


namespace domi { namespace domi {
class WeightsParser; class WeightsParser;
@@ -131,6 +132,12 @@ class GE_FUNC_VISIBILITY WeightsParserRegisterar {
return std::shared_ptr<WeightsParser>(ptr); \ return std::shared_ptr<WeightsParser>(ptr); \
} \ } \
WeightsParserRegisterar g_##type##_Weights_Parser_Creator(type, Creator_##type##_Weights_Parser) WeightsParserRegisterar g_##type##_Weights_Parser_Creator(type, Creator_##type##_Weights_Parser)
}; // namespace domi

class GE_FUNC_VISIBILITY OpRegTbeParserFactory {
public:
static OpRegTbeParserFactory *Instance();
void Finalize(const domi::OpRegistrationData &reg_data);
};
} // namespace domi


#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_ #endif // INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_

+ 11
- 0
inc/framework/omg/parser/weights_parser.h View File

@@ -67,6 +67,17 @@ class GE_FUNC_VISIBILITY WeightsParser {
* @author * @author
*/ */
virtual Status ParseFromMemory(const char *input, uint32_t lengt, ge::ComputeGraphPtr &graph) = 0; virtual Status ParseFromMemory(const char *input, uint32_t lengt, ge::ComputeGraphPtr &graph) = 0;

virtual bool HasError() {
return false;
}

virtual Status Save(const std::string &file) {
(void)file;
return SUCCESS;
}

virtual void Clear() {}
}; };
} // namespace domi } // namespace domi




+ 12
- 0
inc/framework/pne/process_node_engine.h View File

@@ -27,6 +27,15 @@
#include "framework/pne/pne_model.h" #include "framework/pne/pne_model.h"


namespace ge { namespace ge {
class ProcessNodeEngineImpl {
public:
virtual Status OptimizeGraph(const std::vector<GeTensor> &inputs, ComputeGraphPtr &compute_graph) = 0;

virtual Status BuildGraph(ComputeGraphPtr &compute_graph, PneModelPtr &model) = 0;
};

using ProcessNodeEngineImplPtr = std::shared_ptr<ProcessNodeEngineImpl>;

class ProcessNodeEngine { class ProcessNodeEngine {
public: public:
ProcessNodeEngine() = default; ProcessNodeEngine() = default;
@@ -45,8 +54,11 @@ class ProcessNodeEngine {


virtual const std::string &GetEngineName(const ge::NodePtr &node_ptr = nullptr) const = 0; virtual const std::string &GetEngineName(const ge::NodePtr &node_ptr = nullptr) const = 0;


virtual void SetImpl(ProcessNodeEngineImplPtr impl) = 0;

protected: protected:
std::string engine_id_; std::string engine_id_;
ProcessNodeEngineImplPtr impl_ = nullptr;
}; };


using ProcessNodeEnginePtr = std::shared_ptr<ProcessNodeEngine>; using ProcessNodeEnginePtr = std::shared_ptr<ProcessNodeEngine>;


+ 27
- 0
inc/framework/runtime/gert_api.h View File

@@ -0,0 +1,27 @@
/**
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef AIR_CXX_INC_FRAMEWORK_RUNTIME_GERT_API_H_
#define AIR_CXX_INC_FRAMEWORK_RUNTIME_GERT_API_H_
#include "model_v2_executor.h"
#include "common/ge_types.h"

namespace gert {
std::unique_ptr<ModelV2Executor> LoadExecutorFromFile(const char *file_path, ge::graphStatus &error_code);
std::unique_ptr<ModelV2Executor> LoadExecutorFromModelData(const ge::ModelData &model_data,
ge::graphStatus &error_code);
} // namespace gert
#endif // AIR_CXX_INC_FRAMEWORK_RUNTIME_GERT_API_H_

+ 94
- 0
inc/framework/runtime/model_desc.h View File

@@ -0,0 +1,94 @@
/**
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AIR_CXX_INC_FRAMEWORK_RUNTIME_MODEL_DESC_H_
#define AIR_CXX_INC_FRAMEWORK_RUNTIME_MODEL_DESC_H_
#include "common/ge_types.h"
#include "exe_graph/runtime/shape.h"
#include "exe_graph/runtime/continuous_vector.h"
#include "exe_graph/runtime/storage_format.h"
#include "exe_graph/runtime/storage_shape.h"
namespace gert {
class ShapeRange {
public:
const Shape &GetMin() const;
const Shape &GetMax() const;
Shape &MutableMin();
Shape &MutableMax();
private:
Shape min_;
Shape max_;
};
class ModelIoDesc {
public:
const char *GetName() const;
int32_t GetDataType() const;
ge::Format GetStorageFormat() const;
ge::Format GetOriginFormat() const;
int64_t GetSize() const;
const Shape &GetStorageShape() const;
const Shape &GetOriginShape() const;
const ShapeRange &GetOriginShapeRange() const;
const ShapeRange &GetStorageShapeRange() const;
void SetName(const char *name);
void SetDataType(int32_t data_type);
void SetStorageFormat(ge::Format format);
void SetOriginFormat(ge::Format format);
Shape &MutableStorageShape();
Shape &MutableOriginShape();
ShapeRange &MutableOriginShapeRange();
ShapeRange &MutableStorageShapeRange();
private:
const char *name_;
int32_t data_type_;
StorageFormat format_;
StorageShape shape_;
ShapeRange storage_shape_range_;
ShapeRange origin_shape_range_;
};
class ModelDesc {
public:
static size_t CalcSize(size_t input_num, size_t output_num);
const ModelIoDesc *GetInputDesc(size_t index) const;
const ModelIoDesc *GetAllInputsDesc(size_t &input_num) const;
const ModelIoDesc *GetOutputDesc(size_t index) const;
const ModelIoDesc *GetAllOutputsDesc(size_t &output_num) const;
ModelIoDesc *MutableInputDesc(size_t index);
ModelIoDesc *MutableOutputDesc(size_t index);
ModelIoDesc *AllMutableIoDesc(size_t &input_num, size_t &output_num);
void SetInputNum(size_t input_num);
void SetOutputNum(size_t output_num);
ge::graphStatus GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) const;
ge::graphStatus GetUserDesignateShapeOrder(std::vector<std::string> &user_designate_shape_order) const;
ge::graphStatus GetModelAttrs(std::vector<std::string> &attrs) const;
private:
size_t input_num_;
size_t output_num_;
ContinuousVector model_io_descs_;
};
} // namespace gert
#endif // AIR_CXX_INC_FRAMEWORK_RUNTIME_MODEL_DESC_H_

+ 142
- 0
inc/framework/runtime/model_v2_executor.h View File

@@ -0,0 +1,142 @@
/**
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef AIR_CXX_RUNTIME_V2_CORE_MODEL_V_2_EXECUTOR_H_
#define AIR_CXX_RUNTIME_V2_CORE_MODEL_V_2_EXECUTOR_H_
#include <memory>
#include "graph/compute_graph.h"
#include "graph/ge_error_codes.h"
#include "model_desc.h"
#include "runtime/stream.h"
#include "exe_graph/runtime/tensor.h"

namespace gert {
enum SubExeGraphType { kInitExeGraph, kMainExeGraph, kDeInitExeGraph, kSubExeGraphTypeEnd };
static constexpr char *kSubExeGraphTypeStrs[kSubExeGraphTypeEnd] = {(char *)"Init", (char *)"Main", (char *)"DeInit"};
inline const char *GetSubExeGraphTypeStr(SubExeGraphType type) {
return kSubExeGraphTypeStrs[type];
}

class ResourceGuard {
public:
void *ResetExecutionData(std::unique_ptr<uint8_t[]> execution_data);
void ResetAnyValue(std::unique_ptr<uint8_t[]> any_values, size_t count);
void PushNode(void *node);
void PushWatcher(void *watcher);
void *ResetNodesArray(std::unique_ptr<uint8_t[]> nodes_array);
void *ResetStartNodesArray(std::unique_ptr<uint8_t[]> start_nodes_array);
void *ResetNodesIndgreeArray(std::unique_ptr<uint8_t[]> nodes_indgree_array);
void *ResetNodesWaitIndgreeArray(std::unique_ptr<uint8_t[]> nodes_indgree_array);
void *ResetInputsArray(std::unique_ptr<uint8_t[]> inputs_array);
void *ResetOutputsArray(std::unique_ptr<uint8_t[]> outputs_array);
void *ResetWatchersArray(std::unique_ptr<uint8_t[]> watchers_array);
void *ResetReadyQueue(void *ready_queue);
void *ResetBuffer(std::unique_ptr<uint8_t[]> buffer);
void *ResetComputeNodeInfo(std::unique_ptr<uint8_t[]> compute_node_info);
void *ResetKernelExtendInfo(std::unique_ptr<uint8_t[]> kernel_extend_info);
void *ResetModelDesc(std::unique_ptr<uint8_t[]> model_desc);

~ResourceGuard();

private:
std::unique_ptr<uint8_t[]> execution_data_holder_;
size_t any_values_num_;
std::unique_ptr<uint8_t[]> any_values_guard_;

std::vector<std::unique_ptr<void, decltype(&free)>> nodes_guarder_;
std::vector<std::unique_ptr<void, decltype(&free)>> watchers_guarder_;
std::unique_ptr<uint8_t[]> continuous_buffer_guarder_;
std::unique_ptr<uint8_t[]> buffer_guarder_;
std::unique_ptr<uint8_t[]> compute_node_info_guarder_;
std::unique_ptr<uint8_t[]> kernel_extend_info_guarder_;
std::unique_ptr<uint8_t[]> model_desc_guarder_;

std::unique_ptr<uint8_t[]> nodes_array_guarder_;
std::unique_ptr<uint8_t[]> start_nodes_array_guarder_;
std::unique_ptr<uint8_t[]> nodes_indgree_array_guarder_;
std::unique_ptr<uint8_t[]> nodes_wait_indgree_array_guarder_;
std::unique_ptr<uint8_t[]> inputs_array_guarder_;
std::unique_ptr<uint8_t[]> outputs_array_guarder_;
std::unique_ptr<uint8_t[]> watchers_array_guarder_;
std::unique_ptr<void, decltype(&free)> ready_queue_guarder_{nullptr, nullptr};
};

struct ModelExecuteArg {
rtStream_t stream;
};
static_assert(std::is_standard_layout<ModelExecuteArg>::value, "The class ModelExecuteArg must be a POD");

class ExeGraphExecutor {
public:
// todo unload时释放anyvalue资源
ge::graphStatus Load() {
return ge::GRAPH_SUCCESS;
}
ge::graphStatus UnLoad() {
return ge::GRAPH_SUCCESS;
}

/**
* 设置图执行的输入/输出,需要注意的是,使用者需要自己保证inputs/outputs刷新完全!!!
*/
ge::graphStatus SpecifyInputs(void **inputs, size_t start, size_t num);
ge::graphStatus SpecifyOutputs(void **outputs, size_t num);
ge::graphStatus Execute();

const void *GetExecutionData() const {
return execution_data_;
}

ResourceGuard &GetResourceGuard();
void *SetExecutionData(std::unique_ptr<uint8_t[]> execution_data);

private:
friend class ModelV2ExecutorTestHelper;

void *execution_data_;
ResourceGuard resource_guard_;
};
class ModelV2Executor {
public:
static std::unique_ptr<ModelV2Executor> Create(const ge::ComputeGraphPtr &root_graph);

ge::graphStatus Load();
ge::graphStatus Execute(const ModelExecuteArg &arg, Tensor **inputs, size_t input_num, Tensor **outputs,
size_t output_num);
ge::graphStatus ExecuteSync(Tensor **inputs, size_t input_num, Tensor **outputs, size_t output_num);
ge::graphStatus UnLoad();

const ModelDesc &GetModelDesc() const;
void SetModelDesc(ModelDesc *model_desc);
ModelV2Executor(const ModelV2Executor &) = delete;
ModelV2Executor(ModelV2Executor &&) = delete;
ModelV2Executor &operator=(const ModelV2Executor &) = delete;
ModelV2Executor &operator=(ModelV2Executor &&) = delete;

private:
friend class ModelV2ExecutorBuilder;
friend class ModelV2ExecutorTestHelper;
ModelV2Executor() = default;

private:
std::array<ExeGraphExecutor, kSubExeGraphTypeEnd> graphs_;
ResourceGuard resource_guard_;
ModelDesc *model_desc_ = nullptr;
rtStream_t default_stream_ = nullptr;
};
} // namespace gert

#endif // AIR_CXX_RUNTIME_V2_CORE_MODEL_V_2_EXECUTOR_H_

+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit f3e9df35da67ff00a22a09ec5b369bbc4bac9e74
Subproject commit 002617852e22767bd864db3c01595630e23f5496

+ 2
- 0
third_party/fwkacllib/inc/hccl/base.h View File

@@ -211,6 +211,8 @@ typedef struct {


#define HCCL_REQUEST_NULL NULL #define HCCL_REQUEST_NULL NULL


#define HCCL_TAG_ANY (1 << 30)

#ifdef __cplusplus #ifdef __cplusplus
} }
#endif // __cplusplus #endif // __cplusplus


+ 2
- 0
third_party/fwkacllib/inc/ops/all_ops.h View File

@@ -42,6 +42,7 @@
#include "list_ops.h" #include "list_ops.h"
#include "logging_ops.h" #include "logging_ops.h"
#include "lookup_ops.h" #include "lookup_ops.h"
#include "map_ops.h"
#include "math_ops.h" #include "math_ops.h"
#include "matrix_calculation_ops.h" #include "matrix_calculation_ops.h"
#include "nn_batch_norm_ops.h" #include "nn_batch_norm_ops.h"
@@ -79,4 +80,5 @@
#include "warp_perspective_ops.h" #include "warp_perspective_ops.h"
#include "vector_search.h" #include "vector_search.h"
#include "deep_md.h" #include "deep_md.h"
#include "encoding_ops.h"
#endif // OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_

+ 33
- 0
third_party/fwkacllib/inc/ops/array_ops.h View File

@@ -1550,6 +1550,39 @@ REG_OP(EnsureShape)
DT_FLOAT,DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) DT_FLOAT,DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.REQUIRED_ATTR(shape, ListInt) .REQUIRED_ATTR(shape, ListInt)
.OP_END_FACTORY_REG(EnsureShape) .OP_END_FACTORY_REG(EnsureShape)

/**
* @brief Finds the first unique element from every consecutive group of equivalent elements.

* @par Inputs:
* x: A ND tensor.

* @par Attributes:
* @li return_idx: An optional bool. Whether to also return the indices. The default value is False
* @li return_count: An optional bool. Whether to also return the counts for each element. The default is False.
* @li axis: An optional int. Which one axis to apply unique. The default is 1000, which means None.

* @par Outputs:
* @li y: "x" in the unique output "y".
* @li idx: The index of each value of "x".
* @li count: The counts of each value of "y".

* @attention Constraints:
* UniqueConsecutive runs on the Ascend AI CPU, which delivers poor performance.

* @par Third-party framework compatibility
* Compatible with the PyTorch operator UniqueConsecutive.
*/

REG_OP(UniqueConsecutive)
.INPUT(x, TensorType::BasicType())
.OUTPUT(y, TensorType::BasicType())
.OUTPUT(idx, TensorType::IndexNumberType())
.OUTPUT(count, TensorType::IndexNumberType())
.ATTR(return_idx, Bool, false)
.ATTR(return_counts, Bool, false)
.ATTR(axis, Int, 1000)
.OP_END_FACTORY_REG(UniqueConsecutive)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_

+ 0
- 53
third_party/fwkacllib/inc/ops/case_condition_ops.h View File

@@ -1,53 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file case_condition_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_CASE_CONDITION_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_CASE_CONDITION_OPS_H_

#include "graph/operator_reg.h"

namespace ge {
/**
*@brief x[0] is i, x[1] is j and x[2] is k when algorithm is LU,
y = 0 when i >= k && j < k,
y = 1 when i == k && j == k,
y = 2 when i > k && j == k,
y = 3 when i == k && j > k,
y = 4 when i > k && j > k,
default y = 5
use for lu decomposition
*@par Inputs:
*x: A Tensor of type int32/int64/uint64. \n

*@par Attributes:
*algorithm: A string, only support LU now
*@par Outputs:
*y: A Tensor of type int32
*/
REG_OP(CaseCondition)
.INPUT(x, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
.OUTPUT(y, TensorType({DT_INT32}))
.ATTR(algorithm, String, "LU")
.OP_END_FACTORY_REG(CaseCondition)

} // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_CASE_CONDITION_OPS_H_

+ 0
- 48
third_party/fwkacllib/inc/ops/coordinates_1d_to_2d_ops.h View File

@@ -1,48 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file coordinates_1d_to_2d_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_COORDINATES_1D_TO_2D_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_COORDINATES_1D_TO_2D_OPS_H_

#include "graph/operator_reg.h"

namespace ge {
/**
*@brief Convert one-dimensional coordinates to two-dimensional coordinates.
*@par Inputs:
*@li x: A Tensor of type int32/int64/uint64. One-dimensional coordinates.
*@li shape: A Tensor of type int32/int64/uint64. 4D tensor [N,C,H,W].
*@par Outputs:
*@li row: row of two-dimensional
*@li col: col of two-dimensional
*@li n: col number of two-dimensional
*/
REG_OP(Coordinates1DTo2D)
.INPUT(x, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
.INPUT(shape, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
.OUTPUT(row, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
.OUTPUT(col, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
.OUTPUT(n, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
.OP_END_FACTORY_REG(Coordinates1DTo2D)

} // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_COORDINATES_1D_TO_2D_OPS_H_

+ 2
- 2
third_party/fwkacllib/inc/ops/elewise_calculation_ops.h View File

@@ -1620,8 +1620,8 @@ REG_OP(Greater)
* Compatible with the TensorFlow operator zeros_like. * Compatible with the TensorFlow operator zeros_like.
*/ */
REG_OP(ZerosLike) REG_OP(ZerosLike)
.INPUT(x, TensorType::BasicType())
.OUTPUT(y, TensorType::BasicType())
.INPUT(x, TensorType({BasicType(), DT_VARIANT}))
.OUTPUT(y, TensorType({BasicType(), DT_VARIANT}))
.OP_END_FACTORY_REG(ZerosLike) .OP_END_FACTORY_REG(ZerosLike)


/** /**


+ 49
- 0
third_party/fwkacllib/inc/ops/encoding_ops.h View File

@@ -0,0 +1,49 @@
/**
* Copyright (C) Huawei Technologies Co., Ltd 2022-2022. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file encoding_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_ENCODING_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_ENCODING_OPS_H_

#include "graph/operator_reg.h"
#include "graph/operator.h"

namespace ge {
/**
* @brief An op to decode indices for LDPC code. \n

* @par Inputs:
* @li valid_num: an int32 tensor indicates index limit for each line.
* @li matrix_info: an int32 2D-tensor store the block indices info of connection H matrix. \n

* @par Outputs:
* indices: an int32 2D-tensor store the concrete indices value.
*
* @par Restrictions:
* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/

REG_OP(LDPCDecode)
.INPUT(valid_num, TensorType({DT_INT32}))
.INPUT(matrix_info, TensorType({DT_INT32}))
.OUTPUT(indices, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(LDPCDecode)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_ENCODING_OPS_H_

+ 24
- 0
third_party/fwkacllib/inc/ops/functional_ops.h View File

@@ -124,6 +124,30 @@ REG_OP(If)
.GRAPH(else_branch) .GRAPH(else_branch)
.OP_END_FACTORY_REG(If) .OP_END_FACTORY_REG(If)


/**
*@brief Select one of the subgraphs to pass the input tensors and return the output tensors . \n

*@par Inputs:
*@li branch_index: A int32 scalar which determines the selected subgraph.
*@li input: The input tensors, which will be passed to the subgraph . It's a dynamic input. \n

*@par Graphs:
*branches: A list of subgraphs, each of which takes 'input' and returns a list of tensors,
* whose types are the same as what every other subgraph returns . \n

*@par Outputs:
*output: The output tensors returned by one of branches . It's a dynamic output. \n

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator Case.
*/
REG_OP(StatelessCase)
.INPUT(branch_index, DT_INT32)
.DYNAMIC_INPUT(input, TensorType::ALL())
.DYNAMIC_OUTPUT(output, TensorType::ALL())
.DYNAMIC_GRAPH(branches)
.OP_END_FACTORY_REG(StatelessCase)

/** /**
*@brief Select one of the subgraphs to pass the input tensors and return the output tensors . \n *@brief Select one of the subgraphs to pass the input tensors and return the output tensors . \n




+ 0
- 63
third_party/fwkacllib/inc/ops/index_to_addr_ops.h View File

@@ -1,63 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file index_to_addr_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_INDEX_TO_ADDR_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_INDEX_TO_ADDR_OPS_H_

#include "graph/operator_reg.h"

namespace ge {

/**
*@brief get block tensor according to base addr tensor, for hccl remote read to use.
*@par Inputs:
*@li base_addr: A Tensor of type int64/uint64. \n
*@li row:A Tensor of type int64/uint64. \n
*@li col: A Tensor of type int64/uint64.

*@par Outputs:
*addr_table: list of [rank id, host addr, device addr, read size]

*@par Attributes:
*@li ori_shape: An required list int. Shape of base tensor.
*@li block_size: An required list int. Shape of split block tensor.
*@li ori_storage_mode: An optional string from: '"Matrix", "UT"'. Defaults to
"Matrix". Currently only support Matrix storage
*@li block_storage_mode: An optional string from: '"Matrix", "UT"'. Defaults to
"Matrix". Currently only support Matrix storage
*@li rank_id: An optional int of rank id. Defaults is 0
*@li dtype: An optional Type of base tensor. Defaults is DT_FLOAT
*/
REG_OP(IndexToAddr)
.INPUT(base_addr, TensorType({DT_INT64, DT_UINT64}))
.INPUT(x, TensorType({DT_INT64, DT_UINT64}))
.OUTPUT(addrs_table, TensorType({DT_INT64, DT_UINT64}))
.REQUIRED_ATTR(ori_shape, ListInt)
.REQUIRED_ATTR(block_size, ListInt)
.ATTR(ori_storage_mode, String, "Matrix")
.ATTR(block_storage_mode, String, "Matrix")
.ATTR(rank_id, Int, 0)
.ATTR(dtype, Type, DT_FLOAT)
.OP_END_FACTORY_REG(IndexToAddr)

} // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_INDEX_TO_ADDR_OPS_H_

+ 152
- 0
third_party/fwkacllib/inc/ops/map_ops.h View File

@@ -0,0 +1,152 @@
/**
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file map_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_MAP_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_MAP_OPS_H_
#include "graph/operator_reg.h"

namespace ge {
/**
* @brief Returns whether the given key exists in the map. \n

* @par Inputs:
* @li input_handle: A scalar Tensor of type variant. The original map.
* @li key: The key to check. Supports int32, int64, string. \n

* @par Outputs:
* has_key: A scalar Tensor of type bool. Whether the key is already in the map or not. \n

* @par Third-party framework compatibility.
* Compatible with tensorflow TensorMapHasKey operator.
*/
REG_OP(TensorMapHasKey)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.INPUT(key, TensorType({DT_INT32, DT_INT64, DT_STRING}))
.OUTPUT(has_key, TensorType({DT_BOOL}))
.OP_END_FACTORY_REG(TensorMapHasKey)

/**
* @brief Returns a tensor map with item from given key erased. \n

* @par Inputs:
* @li input_handle: A scalar Tensor of type variant. The original map.
* @li key: The key of the value to be erased. Supports int32, int64, string. \n

* @par Outputs:
* output_handle: A scalar Tensor of type variant. The map with value from given key removed. \n

* @par Third-party framework compatibility.
* Compatible with tensorflow TensorMapErase operator.
*/
REG_OP(TensorMapErase)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.INPUT(key, TensorType({DT_INT32, DT_INT64, DT_STRING}))
.OUTPUT(output_handle, TensorType({DT_VARIANT}))
.OP_END_FACTORY_REG(TensorMapErase)

/**
* @brief Returns a map that is the 'input_handle'
with the given key-value pair inserted. \n

* @par Inputs:
* @li input_handle: The original map, Must be type: DT_VARIANT.
* @li key: A Tensor,the key to be inserted.Must be one of
the following types: int32, int64, string.
* @li value: A Tensor,the value to be inserted.Must be
one of BasicType types. \n

* @par Outputs:
* output_handle: The map with key and value inserted.
Must be type: DT_VARIANT. \n
*/
REG_OP(TensorMapInsert)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.INPUT(key, TensorType({DT_INT32, DT_INT64, DT_STRING}))
.INPUT(value, BasicType)
.OUTPUT(output_handle, TensorType({DT_VARIANT}))
.OP_END_FACTORY_REG(TensorMapInsert)

/**
* @brief Returns the value from a given key in a tensor map . \n

* @par Inputs:
* @li input_handle: The input map. Must be type: DT_VARIANT.
* @li key: A Tensor,the key to be looked up. Must be one of
the following types: int32,int64,string . \n

* @par Attributes:
* value_dtype: A int. Representing the type of value . \n

* @par Outputs:
* value: A Tensor,the value found from the given key.
*/
REG_OP(TensorMapLookup)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.INPUT(key, TensorType({DT_INT32, DT_INT64, DT_STRING}))
.OUTPUT(value, BasicType)
.REQUIRED_ATTR(value_dtype, Type)
.OP_END_FACTORY_REG(TensorMapLookup)

/**
* @brief return TensorMap Size. \n
*
* @par Inputs:
* input_handle: A Tensor. Must be one of the following types: variant. \n
*
* @par Outputs:
* size: A Tensor. Must be one of the following types: int32. \n
*/
REG_OP(TensorMapSize)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.OUTPUT(size, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(TensorMapSize)

/**
* @brief Return TensorMapStackKeys \n
*
* @par Inputs:
* input_handle: A Tensor. Must be one of the following types: variant. \n
*
* @par Outputs:
* keys: A Tensor. Must be one of the following types: int32, int64, string. \n
*
* @par Attributes:
* key_dtype: An required param. It is the dtype of the key.
*/
REG_OP(TensorMapStackKeys)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.OUTPUT(keys, TensorType({DT_INT32, DT_INT64, DT_STRING}))
.REQUIRED_ATTR(key_dtype, Type)
.OP_END_FACTORY_REG(TensorMapStackKeys)

/**
* @brief Creates and returns an empty tensor map. \n

* @par Outputs:
* handle: An empty tensor map . \n

* @par Third-party framework compatibility.
* Compatible with tensorflow EmptyTensorMap operator.
*/
REG_OP(EmptyTensorMap)
.OUTPUT(handle, TensorType({DT_VARIANT}))
.OP_END_FACTORY_REG(EmptyTensorMap)
} // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_MAP_OPS_H_

+ 79
- 0
third_party/fwkacllib/inc/ops/matrix_calculation_ops.h View File

@@ -24,6 +24,82 @@
#include "graph/operator_reg.h" #include "graph/operator_reg.h"


namespace ge { namespace ge {
/**
* @brief Backprop W of AttentionLnQKV + ReduceSumD \n
* @par Inputs:
* Four inputs, including:
* @li x: A Tensor. Must be one of the following types: float16.
* @li query_dx: A Tensor. Must be one of the following types: float16.
* @li key_dw: A Tensor. Must be one of the following types: float16.
* @li value_dw: A Tensor. Must be one of the following types: float16.

* @par Attributes:
* @li trans_a: A optional attribute, the type is bool. Defaults to True.
* @li trans_b: A optional attribute, the type is bool. Defaults to False. \n

* @par Outputs:
* Six outputs, including:
* @li dw_query: A Tensor. Must be one of the following types: float16.
* @li dw_key: A Tensor. Must be one of the following types: float16.
* @li dw_value: A Tensor. Must be one of the following types: float16.
* @li dbias_query: A Tensor. Must be one of the following types: float16.
* @li dbias_key: A Tensor. Must be one of the following types: float16.
* @li dbias_value: A Tensor. Must be one of the following types: float16. \n

* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
*/
REG_OP(AttentionQKVGradW)
.INPUT(x, TensorType({DT_FLOAT16}))
.INPUT(query_dx, TensorType({DT_FLOAT16}))
.INPUT(key_dw, TensorType({DT_FLOAT16}))
.INPUT(value_dw, TensorType({DT_FLOAT16}))
.OUTPUT(dw_query, TensorType({DT_FLOAT16}))
.OUTPUT(dw_key, TensorType({DT_FLOAT16}))
.OUTPUT(dw_value, TensorType({DT_FLOAT16}))
.OUTPUT(dbias_query, TensorType({DT_FLOAT16}))
.OUTPUT(dbias_key, TensorType({DT_FLOAT16}))
.OUTPUT(dbias_value, TensorType({DT_FLOAT16}))
.ATTR(trans_a, Bool, true)
.ATTR(trans_b, Bool, false)
.OP_END_FACTORY_REG(AttentionQKVGradW)

/**
* @brief Backprop X of AttentionLnQKV + AddN \n
* @par Inputs:
* Seven inputs, including:
* @li ln_dx: A Tensor. Must be one of the following types: float16.
* @li query_dx: A Tensor. Must be one of the following types: float16.
* @li key_dw: A Tensor. Must be one of the following types: float16.
* @li value_dw: A Tensor. Must be one of the following types: float16.
* @li kernel_query: A Tensor. Must be one of the following types: float16.
* @li kernel_key: A Tensor. Must be one of the following types: float16.
* @li kernel_value: A Tensor. Must be one of the following types: float16. \n

* @par Attributes:
* @li trans_a: A optional attribute, the type is bool. Defaults to False.
* @li trans_b: A optional attribute, the type is bool. Defaults to True. \n

* @par Outputs:
* One outputs, including:
* @li dx: A Tensor. Must be one of the following types: float16. \n

* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
*/
REG_OP(AttentionQKVGradX)
.INPUT(ln_dx, TensorType({DT_FLOAT16}))
.INPUT(query_dx, TensorType({DT_FLOAT16}))
.INPUT(key_dw, TensorType({DT_FLOAT16}))
.INPUT(value_dw, TensorType({DT_FLOAT16}))
.INPUT(kernel_query, TensorType({DT_FLOAT16}))
.INPUT(kernel_key, TensorType({DT_FLOAT16}))
.INPUT(kernel_value, TensorType({DT_FLOAT16}))
.OUTPUT(dx, TensorType({DT_FLOAT16}))
.ATTR(trans_a, Bool, false)
.ATTR(trans_b, Bool, true)
.OP_END_FACTORY_REG(AttentionQKVGradX)

/** /**
* @brief * @brief
/ (MatMul -> ConfusionTransposeD). / (MatMul -> ConfusionTransposeD).
@@ -54,6 +130,9 @@ namespace ge {
* @li value_output: A Tensor. Must be one of the following types: float16. * @li value_output: A Tensor. Must be one of the following types: float16.
* @li mean: A Tensor. Must be one of the following types: float16. * @li mean: A Tensor. Must be one of the following types: float16.
* @li variance: A Tensor. Must be one of the following types: float16. \n * @li variance: A Tensor. Must be one of the following types: float16. \n

* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
*/ */
REG_OP(AttentionLnQKV) REG_OP(AttentionLnQKV)
.INPUT(x, TensorType({DT_FLOAT16})) .INPUT(x, TensorType({DT_FLOAT16}))


+ 40
- 0
third_party/fwkacllib/inc/ops/nn_detect_ops.h View File

@@ -2276,6 +2276,46 @@ REG_OP(BalanceRois)
.OUTPUT(balance_rois, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(balance_rois, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(index, TensorType({DT_INT32})) .OUTPUT(index, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(BalanceRois) .OP_END_FACTORY_REG(BalanceRois)

/**
* @brief First calculate the minimum closure area of the two boxes, IoU,
* The CIoU is obtained by combining the center distance and width to height ratio and IoU. \n

* @par Inputs:
* Two inputs, including:
* @li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
* shape (4, N). "N" indicates the number of bounding boxes, and the value
* "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
* @li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
* with shape (4, M). "M" indicates the number of ground truth boxes, and
* the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n

* @par Attributes:
* @li trans: An optional bool, true for 'xywh', false for 'xyxy'.
* @li is_cross: An optional bool, control whether the output shape is [N, M] or [1, N]
* @li mode: An optional string, computation mode, a character string with the value range of [iou, iof]
* @li atan_sub_flag: An optional bool, control whether to output atan_sub. \n

* @par Outputs:
* Two outputs, including:
* @li overlap: A 2D Tensor of type float16 or float32 with shape [N, M] or [1, N],
* specifying the IoU or IoF ratio .
* @li atan_sub: A 2D Tensor of type float16 or float32 with shape [N, M] or [1, N],
* specifying the IoU or IoF ratio . \n

* @attention Constraints:
* "is_cross" only support false, "atan_sub_flag" only support true.
*/
REG_OP(CIoU)
.INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(atan_sub, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(trans, Bool, false)
.ATTR(is_cross, Bool, true)
.ATTR(mode, String, "iou")
.ATTR(atan_sub_flag, Bool, false)
.OP_END_FACTORY_REG(CIoU)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_


+ 16
- 16
third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h View File

@@ -458,22 +458,22 @@ REG_OP(Softsign)
.OP_END_FACTORY_REG(Softsign) .OP_END_FACTORY_REG(Softsign)


/** /**
* @brief Computes softsignGrad: y_grad / (1 + abs(x)) ** 2 .
* @brief Computes softsignGrad: gradients / (1 + abs(features)) ** 2 .
* *
* @par Inputs: * @par Inputs:
* Two inputs, including: * Two inputs, including:
* @li y_grad: A Tensor.Must be one of the following types:float16, float32,
* @li x: A Tensor of the same type and shape as "gradients".
* @li gradients: A Tensor.Must be one of the following types:float16, float32,
* @li features: A Tensor of the same type and shape as "gradients".


* @par x_grad:
* output:A Tensor. Has the same type as "y_grad".
* @par Outputs:
* output:A Tensor. Has the same type as "gradients".
* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the TensorFlow operator SoftsignGrad. * Compatible with the TensorFlow operator SoftsignGrad.
*/ */
REG_OP(SoftsignGrad) REG_OP(SoftsignGrad)
.INPUT(y_grad, TensorType::FloatingDataType())
.INPUT(x, TensorType::FloatingDataType())
.OUTPUT(x_grad, TensorType::FloatingDataType())
.INPUT(gradients, TensorType::FloatingDataType())
.INPUT(features, TensorType::FloatingDataType())
.OUTPUT(output, TensorType::FloatingDataType())
.OP_END_FACTORY_REG(SoftsignGrad) .OP_END_FACTORY_REG(SoftsignGrad)


/** /**
@@ -500,23 +500,23 @@ REG_OP(Selu)
.OP_END_FACTORY_REG(Selu) .OP_END_FACTORY_REG(Selu)


/** /**
* @brief Computes SeluGrad backprops: y_grad * (y + scale * alpha)
* if y < 0, scale * y_grad otherwise .
* @brief Computes SeluGrad backprops: gradients * (outputs + scale * alpha)
* if outputs < 0, scale * gradients otherwise .


* @par Inputs: * @par Inputs:
* Two inputs, including: * Two inputs, including:
* @li y_grad: A Tensor of type RealNumberType .
* @li y: A Tensor of type RealNumberType .
* @li gradients: A Tensor of type RealNumberType .
* @li outputs: A Tensor of type RealNumberType .
* @par Outputs: * @par Outputs:
* x_grad: A Tensor. Must have the same type as "y_grad" .
* y: A Tensor. Must have the same type as "gradients" .


* @par Third-party framework compatibility * @par Third-party framework compatibility
* Compatible with the TensorFlow operator SeluGrad. * Compatible with the TensorFlow operator SeluGrad.
*/ */
REG_OP(SeluGrad) REG_OP(SeluGrad)
.INPUT(y_grad, TensorType::RealNumberType())
.INPUT(y, TensorType::RealNumberType())
.OUTPUT(x_grad, TensorType::RealNumberType())
.INPUT(gradients, TensorType::RealNumberType())
.INPUT(outputs, TensorType::RealNumberType())
.OUTPUT(y, TensorType::RealNumberType())
.OP_END_FACTORY_REG(SeluGrad) .OP_END_FACTORY_REG(SeluGrad)


/** /**


+ 28
- 0
third_party/fwkacllib/inc/ops/selection_ops.h View File

@@ -2612,6 +2612,34 @@ REG_OP(DynSeqOuter)
.INPUT(seq_len2, TensorType({DT_INT32})) .INPUT(seq_len2, TensorType({DT_INT32}))
.OUTPUT(y, TensorType::BasicType()) .OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(DynSeqOuter) .OP_END_FACTORY_REG(DynSeqOuter)

/**
* @brief Returns sliced data based on max nmsed_num. \n

* @par Inputs:
* Four inputs, including:
* @li input_nmsed_boxes: A Tensor. Must be the following types: float16.
* @li input_nmsed_score: A Tensor. Must be the following types: float16.
* @li input_nmsed_class: A Tensor. Must be the following types: float16.
* @li input_nmsed_num: A Tensor. Must be the following types: int32. \n

* @par Outputs:
* output_nmsed_boxes: A Tensor. Must be the following type: float.
* output_nmsed_score: A Tensor. Must be the following type: float.
* output_nmsed_class: A Tensor. Must be the following type: float. \n

* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(NonMaxSuppressionBucketize)
.INPUT(input_nmsed_boxes, TensorType({DT_FLOAT16}))
.INPUT(input_nmsed_score, TensorType({DT_FLOAT16}))
.INPUT(input_nmsed_class, TensorType({DT_FLOAT16}))
.INPUT(input_nmsed_num, TensorType({DT_INT32}))
.OUTPUT(output_nmsed_boxes, TensorType({DT_FLOAT}))
.OUTPUT(output_nmsed_score, TensorType({DT_FLOAT}))
.OUTPUT(output_nmsed_class, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(NonMaxSuppressionBucketize)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_

+ 0
- 50
third_party/fwkacllib/inc/ops/slice_write_ops.h View File

@@ -1,50 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file slice_write_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_SLICE_WRITE_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_SLICE_WRITE_OPS_H_

#include "graph/operator_reg.h"

namespace ge {

/**
*@brief write tensor value to tensor x.
*@par Inputs:
*x: A Tensor of type float16/float/double/int32/int64. \n
*begin:A Tensor of type int32/int64. \n
*value: A Tensor of type float16/float/double/int32/int64.
*@par Outputs:
*x: same tensor with input x
*/
REG_OP(SliceWrite)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \
DT_INT32, DT_INT64}))
.INPUT(begin, TensorType({DT_INT32, DT_INT64}))
.INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \
DT_INT32, DT_INT64}))
.OUTPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \
DT_INT32, DT_INT64}))
.OP_END_FACTORY_REG(SliceWrite)

} // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_SLICE_WRITE_OPS_H_

+ 3
- 3
third_party/fwkacllib/inc/ops/sparse_ops.h View File

@@ -951,7 +951,7 @@ REG_OP(SerializeSparse)
DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \ DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING})) DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
.INPUT(shape, TensorType({DT_INT64})) .INPUT(shape, TensorType({DT_INT64}))
.OUTPUT(serialized_sparse, TensorType({DT_STRING}))
.OUTPUT(serialized_sparse, TensorType({DT_STRING, DT_VARIANT}))
.ATTR(out_type, Type, DT_STRING) .ATTR(out_type, Type, DT_STRING)
.OP_END_FACTORY_REG(SerializeSparse) .OP_END_FACTORY_REG(SerializeSparse)


@@ -979,7 +979,7 @@ REG_OP(SerializeManySparse)
DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \ DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING})) DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
.INPUT(shape, TensorType({DT_INT64})) .INPUT(shape, TensorType({DT_INT64}))
.OUTPUT(serialized_sparse, TensorType({DT_STRING}))
.OUTPUT(serialized_sparse, TensorType({DT_STRING, DT_VARIANT}))
.ATTR(out_type, Type, DT_STRING) .ATTR(out_type, Type, DT_STRING)
.OP_END_FACTORY_REG(SerializeManySparse) .OP_END_FACTORY_REG(SerializeManySparse)


@@ -1002,7 +1002,7 @@ REG_OP(SerializeManySparse)
* Compatible with the TensorFlow operator DeserializeSparse. * Compatible with the TensorFlow operator DeserializeSparse.
*/ */
REG_OP(DeserializeSparse) REG_OP(DeserializeSparse)
.INPUT(serialized_sparse, TensorType({DT_STRING}))
.INPUT(serialized_sparse, TensorType({DT_STRING, DT_VARIANT}))
.OUTPUT(indices, TensorType({DT_INT64})) .OUTPUT(indices, TensorType({DT_INT64}))
.OUTPUT(values, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, \ .OUTPUT(values, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, \
DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \ DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \


+ 92
- 0
third_party/fwkacllib/inc/ops/vector_search.h View File

@@ -154,6 +154,98 @@ REG_OP(CalcBucketsLimitAndOffset)
.OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) .OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64}))
.REQUIRED_ATTR(total_limit, Int) .REQUIRED_ATTR(total_limit, Int)
.OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) .OP_END_FACTORY_REG(CalcBucketsLimitAndOffset)

/**
*@brief get block tensor according to base addr tensor, for hccl remote read to use.
*@par Inputs:
*@li base_addr: A Tensor of type int64/uint64. \n
*@li row:A Tensor of type int64/uint64. \n
*@li col: A Tensor of type int64/uint64.

*@par Outputs:
*addr_table: list of [rank id, host addr, device addr, read size]

*@par Attributes:
*@li ori_shape: An required list int. Shape of base tensor.
*@li block_size: An required list int. Shape of split block tensor.
*@li ori_storage_mode: An optional string from: '"Matrix", "UT"'. Defaults to
"Matrix". Currently only support Matrix storage
*@li block_storage_mode: An optional string from: '"Matrix", "UT"'. Defaults to
"Matrix". Currently only support Matrix storage
*@li rank_id: An optional int of rank id. Defaults is 0
*@li dtype: An optional Type of base tensor. Defaults is DT_FLOAT
*/
REG_OP(IndexToAddr)
.INPUT(base_addr, TensorType({DT_INT64, DT_UINT64}))
.INPUT(x, TensorType({DT_INT64, DT_UINT64}))
.OUTPUT(addrs_table, TensorType({DT_INT64, DT_UINT64}))
.REQUIRED_ATTR(ori_shape, ListInt)
.REQUIRED_ATTR(block_size, ListInt)
.ATTR(ori_storage_mode, String, "Matrix")
.ATTR(block_storage_mode, String, "Matrix")
.ATTR(rank_id, Int, 0)
.ATTR(dtype, Type, DT_FLOAT)
.OP_END_FACTORY_REG(IndexToAddr)

/**
*@brief Convert one-dimensional coordinates to two-dimensional coordinates.
*@par Inputs:
*@li x: A Tensor of type int32/int64/uint64. One-dimensional coordinates.
*@li shape: A Tensor of type int32/int64/uint64. 4D tensor [N,C,H,W].
*@par Outputs:
*@li row: row of two-dimensional
*@li col: col of two-dimensional
*@li n: col number of two-dimensional
*/
REG_OP(Coordinates1DTo2D)
.INPUT(x, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
.INPUT(shape, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
.OUTPUT(row, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
.OUTPUT(col, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
.OUTPUT(n, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
.OP_END_FACTORY_REG(Coordinates1DTo2D)

/**
*@brief x[0] is i, x[1] is j and x[2] is k when algorithm is LU,
y = 0 when i >= k && j < k,
y = 1 when i == k && j == k,
y = 2 when i > k && j == k,
y = 3 when i == k && j > k,
y = 4 when i > k && j > k,
default y = 5
use for lu decomposition
*@par Inputs:
*x: A Tensor of type int32/int64/uint64. \n

*@par Attributes:
*algorithm: A string, only support LU now
*@par Outputs:
*y: A Tensor of type int32
*/
REG_OP(CaseCondition)
.INPUT(x, TensorType({DT_INT32, DT_INT64, DT_UINT64}))
.OUTPUT(y, TensorType({DT_INT32}))
.ATTR(algorithm, String, "LU")
.OP_END_FACTORY_REG(CaseCondition)

/**
*@brief write tensor value to tensor x.
*@par Inputs:
*x: A Tensor of type float16/float/double/int32/int64. \n
*begin:A Tensor of type int32/int64. \n
*value: A Tensor of type float16/float/double/int32/int64.
*@par Outputs:
*x: same tensor with input x
*/
REG_OP(SliceWrite)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \
DT_INT32, DT_INT64}))
.INPUT(begin, TensorType({DT_INT32, DT_INT64}))
.INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \
DT_INT32, DT_INT64}))
.OUTPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \
DT_INT32, DT_INT64}))
.OP_END_FACTORY_REG(SliceWrite)
} // namespace ge } // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_ #endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_

+ 14
- 0
third_party/fwkacllib/inc/runtime/rt_model.h View File

@@ -263,6 +263,18 @@ typedef struct tagrtStreamLabelGotoTask_t {
uint8_t reserved[36]; uint8_t reserved[36];
} rtStreamLabelGotoTask_t; } rtStreamLabelGotoTask_t;


typedef struct tagrtNpuGetFloatStatusTask_t {
uint64_t outputAddr;
uint64_t outputSize;
uint32_t checkMode;
uint8_t reserved[20];
} rtNpuGetFloatStatusTask_t;

typedef struct tagrtNpuClearFloatStatusTask_t {
uint32_t checkMode;
uint8_t reserved[36];
} rtNpuClearFloatStatusTask_t;

typedef struct tagTaskInfo { typedef struct tagTaskInfo {
uint32_t type; uint32_t type;
uint32_t streamID; uint32_t streamID;
@@ -288,6 +300,8 @@ typedef struct tagTaskInfo {
rtStreamSwitchNTaskInfo_t streamSwitchNTask; rtStreamSwitchNTaskInfo_t streamSwitchNTask;
rtStreamLabelSwitchByIndexTask_t streamLabelSwitchIndexTask; rtStreamLabelSwitchByIndexTask_t streamLabelSwitchIndexTask;
rtStreamLabelGotoTask_t streamLabelGotoTask; rtStreamLabelGotoTask_t streamLabelGotoTask;
rtNpuGetFloatStatusTask_t npuGetFloatStatusTask;
rtNpuClearFloatStatusTask_t npuClearFloatStatusTask;
uint32_t reserved[10]; uint32_t reserved[10];
} u; } u;
} rtTaskInfo_t; } rtTaskInfo_t;


Loading…
Cancel
Save