diff --git a/CMakeLists.txt b/CMakeLists.txt index 60509838..5e58eeba 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -125,7 +125,6 @@ else () message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") endif() endif() - set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef) set(PARSER_DIR ${CMAKE_CURRENT_LIST_DIR}/parser) set(GE_DEPEND_DIR ${CMAKE_CURRENT_LIST_DIR}/..) @@ -158,6 +157,7 @@ else () elseif(ENABLE_MS_TESTCASES) include(cmake/external_libs/protobuf_static.cmake) include(cmake/external_libs/protoc.cmake) + include(cmake/external_libs/json.cmake) include(cmake/external_libs/securec.cmake) include(cmake/FindModule.cmake) include(cmake/intf_pub_linux.cmake) @@ -175,5 +175,4 @@ else () endif() add_subdirectory(ge) - endif () diff --git a/cmake/external_libs/json.cmake b/cmake/external_libs/json.cmake index 3c1cd012..04659ebc 100755 --- a/cmake/external_libs/json.cmake +++ b/cmake/external_libs/json.cmake @@ -9,10 +9,6 @@ if (GE_PB_PKG) set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip") set(MD5 "0dc903888211db3a0f170304cd9f3a89") set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) -#elseif (ENABLE_GITEE) -# set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip") -# set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7") -#set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include") else() set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip") set(MD5 "0dc903888211db3a0f170304cd9f3a89") diff --git a/ge/ge_runtime/CMakeLists.txt b/ge/ge_runtime/CMakeLists.txt index 3243766f..ffea784b 100644 --- a/ge/ge_runtime/CMakeLists.txt +++ b/ge/ge_runtime/CMakeLists.txt @@ -16,6 +16,7 @@ set(GE_SRC_LIST "task/label_goto_task.cc" "task/label_set_task.cc" "task/label_switch_task.cc" + "task/label_manager.cc" ) add_library(ge_runtime SHARED ${GE_SRC_LIST}) diff --git a/ge/ge_runtime/task/hccl_task.cc b/ge/ge_runtime/task/hccl_task.cc index b1c7158c..bfe0d0f3 100644 --- a/ge/ge_runtime/task/hccl_task.cc +++ b/ge/ge_runtime/task/hccl_task.cc @@ -53,15 +53,7 @@ HcclTask::HcclTask(const ModelContext &model_context, const std::shared_ptrworkspace_size() > 0) { - rtError_t rt_ret = rtMalloc(&workspace_mem_, task_info_->workspace_size(), RT_MEMORYINFO_HBM); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return false; - } + workspace_mem_ = task_info_->workspace_addr(); } GELOGI("HcclTaskInfo Distribute Start. begin to call function LoadTask in hccl."); diff --git a/ge/ge_runtime/task/label_goto_task.cc b/ge/ge_runtime/task/label_goto_task.cc index 7cb6d556..a3b70971 100644 --- a/ge/ge_runtime/task/label_goto_task.cc +++ b/ge/ge_runtime/task/label_goto_task.cc @@ -16,33 +16,46 @@ #include "ge_runtime/task/label_goto_task.h" #include "ge_runtime/task/task_factory.h" -#include "framework/common/util.h" namespace ge { namespace model_runner { LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::shared_ptr &task_info) - : TaskRepeater(model_context, task_info), task_info_(task_info) { + : TaskRepeater(model_context, task_info), + task_info_(task_info), + stream_(nullptr), + index_value_(nullptr) { if (task_info_ == nullptr) { GELOGW("task_info_ is null!"); return; } auto stream_list = model_context.stream_list(); auto label_list = model_context.label_list(); + rt_model_handle_ = model_context.rt_model_handle(); uint32_t stream_id = task_info->stream_id(); - uint32_t label_id = task_info->label_id(); + label_id_ = task_info->label_id(); GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); - GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id); - if (stream_id >= stream_list.size() || label_id >= label_list.size()) { + GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id_); + if (stream_id >= stream_list.size() || label_id_ >= label_list.size()) { GELOGW("Stream/Label id invalid."); return; } stream_ = stream_list[stream_id]; - label_ = label_list[label_id]; + label_manager_ = LabelManager::GetInstance(); + if (label_manager_ == nullptr) { + GELOGW("Get label manager instance failed."); + return; + } + label_info_ = label_manager_->GetLabelInfo(rt_model_handle_, {label_id_}, label_list); } LabelGotoTask::~LabelGotoTask() { - GE_FREE_RT_LOG(label_info_); - GE_FREE_RT_LOG(index_value_); + if (index_value_ != nullptr) { + rtError_t rt_ret = rtFree(index_value_); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "rtFree index_value_ failed! ret: 0x%X.", rt_ret); + } + index_value_ = nullptr; + } } bool LabelGotoTask::Distribute() { @@ -94,21 +107,34 @@ bool LabelGotoTask::CheckParamValid() { return false; } - if (label_ == nullptr) { - GELOGE(PARAM_INVALID, "label is null!"); + if (label_info_ == nullptr) { + GELOGE(PARAM_INVALID, "label info is null!"); return false; } - if (label_info_ != nullptr) { - GELOGE(PARAM_INVALID, "label_info_ has dirty data."); - return false; + if (index_value_ == nullptr) { + rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), RT_MEMORY_HBM); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return false; + } + + uint64_t index = 0; + rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &index, sizeof(index), RT_MEMCPY_HOST_TO_DEVICE); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return false; + } } - if (index_value_ != nullptr) { - GELOGE(PARAM_INVALID, "index_value_ has dirty data."); + void *label_info = label_info_->GetLabelInfo(); + rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, 1, label_info, stream_); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return false; } + GELOGI("DistributeTask end."); return true; } diff --git a/ge/ge_runtime/task/label_goto_task.h b/ge/ge_runtime/task/label_goto_task.h index addbb700..e579c683 100644 --- a/ge/ge_runtime/task/label_goto_task.h +++ b/ge/ge_runtime/task/label_goto_task.h @@ -18,7 +18,11 @@ #define GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ #include +#include +#include +#include #include "ge_runtime/task/task.h" +#include "ge_runtime/task/label_manager.h" namespace ge { namespace model_runner { @@ -31,13 +35,13 @@ class LabelGotoTask : public TaskRepeater { bool Distribute() override; private: - bool CheckParamValid(); - std::shared_ptr task_info_; - void *stream_{nullptr}; - void *label_{nullptr}; - void *label_info_{nullptr}; - void *index_value_{nullptr}; + void *stream_; + std::shared_ptr label_info_; + void *index_value_; + uint32_t label_id_; + rtModel_t rt_model_handle_; + std::shared_ptr label_manager_; }; } // namespace model_runner } // namespace ge diff --git a/ge/ge_runtime/task/label_manager.cc b/ge/ge_runtime/task/label_manager.cc new file mode 100644 index 00000000..a2b0c3aa --- /dev/null +++ b/ge/ge_runtime/task/label_manager.cc @@ -0,0 +1,119 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "ge_runtime/task/label_manager.h" +#include +#include +#include "runtime/mem.h" +#include "runtime/rt_model.h" +#include "common/ge_inner_error_codes.h" +#include "framework/common/debug/ge_log.h" + +namespace ge { +namespace model_runner { +std::weak_ptr LabelManager::instance_; +std::mutex LabelManager::instance_mutex_; + +template +static std::string GetVectorString(const std::vector &vec) { + std::string ret; + for (size_t i = 0; i < vec.size(); ++i) { + if (i != 0) { + ret.push_back(','); + } + ret += std::to_string(vec[i]); + } + return ret; +} + +LabelGuard::~LabelGuard() { + void *label_info = GetLabelInfo(); + if (label_info != nullptr) { + rtError_t rt_ret = rtFree(label_info); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "rtFree label_info failed! ret: 0x%X.", rt_ret); + } + } +} + +std::shared_ptr LabelManager::GetInstance() { + std::lock_guard lock(instance_mutex_); + auto instance = instance_.lock(); + if (instance != nullptr) { + return instance; + } + + instance = std::make_shared(); + instance_ = instance; + return instance; +} + +std::shared_ptr LabelManager::GetLabelInfo(rtModel_t model, const std::vector &label_ids, + const std::vector &all_label) { + std::lock_guard lock(model_info_mapping_mutex_); + rtError_t rt_ret; + auto model_iter = model_info_mapping_.find(model); + if (model_iter == model_info_mapping_.end()) { + model_info_mapping_.emplace(model, std::map>()); + model_iter = model_info_mapping_.find(model); + } + + std::string label_id_str = GetVectorString(label_ids); + auto &label_map = model_iter->second; + auto label_iter = label_map.find(label_id_str); + if (label_iter != label_map.end()) { + auto label_guard = label_iter->second.lock(); + if (label_guard != nullptr) { + GELOGI("model %p find same label id %s.", model, label_id_str.c_str()); + return label_guard; + } + } + + GELOGI("Alloc label id %s for model %p.", label_id_str.c_str(), model); + void *label_info; + std::vector label_list; + bool status = true; + std::transform(label_ids.begin(), label_ids.end(), std::back_inserter(label_list), + [&all_label, &status](uint32_t idx) -> void * { + if (idx >= all_label.size()) { + GELOGE(PARAM_INVALID, "Invalid label id %u, all label list size %zu.", idx, all_label.size()); + status = false; + return nullptr; + } + return all_label[idx]; + }); + if (!status) { + GELOGE(PARAM_INVALID, "Get label info failed."); + return nullptr; + } + uint32_t label_info_size = sizeof(rtLabelDevInfo) * label_list.size(); + rt_ret = rtMalloc(&label_info, label_info_size, RT_MEMORY_HBM); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return nullptr; + } + + rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info, label_info_size); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return nullptr; + } + + auto label_guard = std::make_shared(label_info); + label_map.emplace(label_id_str, label_guard); + return label_guard; +} +} // namespace model_runner +} // namespace ge diff --git a/ge/ge_runtime/task/label_manager.h b/ge/ge_runtime/task/label_manager.h new file mode 100644 index 00000000..f2c42c29 --- /dev/null +++ b/ge/ge_runtime/task/label_manager.h @@ -0,0 +1,54 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_ +#define GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_ + +#include +#include +#include +#include +#include + +namespace ge { +namespace model_runner { +class LabelGuard { + public: + explicit LabelGuard(void *label_info) : label_info_(reinterpret_cast(label_info)) {} + ~LabelGuard(); + void *GetLabelInfo() { return reinterpret_cast(label_info_); } + + private: + uintptr_t label_info_; +}; + +class LabelManager { + public: + static std::shared_ptr GetInstance(); + std::shared_ptr GetLabelInfo(rtModel_t model, const std::vector &label_ids, + const std::vector &all_label); + + private: + std::mutex model_info_mapping_mutex_; + std::map>> model_info_mapping_; + + static std::weak_ptr instance_; + static std::mutex instance_mutex_; +}; + + +} // namespace model_runner +} // namespace ge +#endif // GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_ \ No newline at end of file diff --git a/ge/ge_runtime/task/label_switch_task.cc b/ge/ge_runtime/task/label_switch_task.cc index 8c795da9..cde278d9 100644 --- a/ge/ge_runtime/task/label_switch_task.cc +++ b/ge/ge_runtime/task/label_switch_task.cc @@ -24,14 +24,14 @@ LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context, : TaskRepeater(model_context, task_info), task_info_(task_info), stream_(nullptr), - all_label_resource_(), label_info_(nullptr) { if (task_info_ == nullptr) { GELOGW("task_info_ is null!"); return; } - all_label_resource_ = model_context.label_list(); + rt_model_handle_ = model_context.rt_model_handle(); + auto all_label_resource = model_context.label_list(); auto stream_list = model_context.stream_list(); uint32_t stream_id = task_info->stream_id(); GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); @@ -40,18 +40,16 @@ LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context, return; } stream_ = stream_list[stream_id]; -} - -LabelSwitchTask::~LabelSwitchTask() { - if (label_info_ != nullptr) { - rtError_t rt_ret = rtFree(label_info_); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "rtFree fwkOpBuf failed! ret: 0x%X.", rt_ret); - } - label_info_ = nullptr; + label_manager_ = LabelManager::GetInstance(); + if (label_manager_ == nullptr) { + GELOGW("Get label manager instance failed."); + return; } + label_info_ = label_manager_->GetLabelInfo(rt_model_handle_, task_info_->label_list(), all_label_resource); } +LabelSwitchTask::~LabelSwitchTask() {} + bool LabelSwitchTask::Distribute() { GELOGI("LabelSwitchTask Distribute start."); if (!CheckParamValid()) { @@ -117,8 +115,8 @@ bool LabelSwitchTask::CheckParamValid() { return false; } - if (label_info_ != nullptr) { - GELOGE(PARAM_INVALID, "label_info_ has dirty data."); + if (label_info_ == nullptr) { + GELOGE(PARAM_INVALID, "CopyLabelList failed, label info is null."); return false; } @@ -126,6 +124,5 @@ bool LabelSwitchTask::CheckParamValid() { } REGISTER_TASK(TaskInfoType::LABEL_SWITCH, LabelSwitchTask, LabelSwitchTaskInfo); - } // namespace model_runner } // namespace ge diff --git a/ge/ge_runtime/task/label_switch_task.h b/ge/ge_runtime/task/label_switch_task.h index 463faa31..cfa6877c 100644 --- a/ge/ge_runtime/task/label_switch_task.h +++ b/ge/ge_runtime/task/label_switch_task.h @@ -19,6 +19,7 @@ #include #include "ge_runtime/task/task.h" +#include "ge_runtime/task/label_manager.h" namespace ge { namespace model_runner { @@ -35,8 +36,9 @@ class LabelSwitchTask : public TaskRepeater { std::shared_ptr task_info_; void *stream_; - std::vector all_label_resource_; - void *label_info_; + rtModel_t rt_model_handle_; + std::shared_ptr label_info_; + std::shared_ptr label_manager_; }; } // namespace model_runner } // namespace ge diff --git a/inc/external/acl/acl.h b/inc/external/acl/acl.h new file mode 100644 index 00000000..8d261201 --- /dev/null +++ b/inc/external/acl/acl.h @@ -0,0 +1,82 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_ACL_ACL_H_ +#define INC_EXTERNAL_ACL_ACL_H_ + +#include "acl_rt.h" +#include "acl_op.h" +#include "acl_mdl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Current version is 1.0.0 +#define ACL_MAJOR_VERSION 1 +#define ACL_MINOR_VERSION 0 +#define ACL_PATCH_VERSION 0 + +/** + * @ingroup AscendCL + * @brief acl initialize + * + * @par Restriction + * The aclInit interface can be called only once in a process + * @param configPath [IN] the config path,it can be NULL + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclInit(const char *configPath); + +/** + * @ingroup AscendCL + * @brief acl finalize + * + * @par Restriction + * Need to call aclFinalize before the process exits. + * After calling aclFinalize,the services cannot continue to be used normally. + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclFinalize(); + +/** + * @ingroup AscendCL + * @brief query ACL interface version + * + * @param majorVersion[OUT] ACL interface major version + * @param minorVersion[OUT] ACL interface minor version + * @param patchVersion[OUT] ACL interface patch version + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *minorVersion, int32_t *patchVersion); + +/** + * @ingroup AscendCL + * @brief get recent error message + * + * @retval null for failed + * @retval OtherValues success + */ +ACL_FUNC_VISIBILITY const char *aclGetRecentErrMsg(); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_ACL_H_ diff --git a/inc/external/acl/acl_base.h b/inc/external/acl/acl_base.h new file mode 100644 index 00000000..64d4bd81 --- /dev/null +++ b/inc/external/acl/acl_base.h @@ -0,0 +1,638 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_ACL_ACL_BASE_H_ +#define INC_EXTERNAL_ACL_ACL_BASE_H_ + +#include +#include +#include "error_codes/rt_error_codes.h" +#include "error_codes/ge_error_codes.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_MSC_VER) +#ifdef FUNC_VISIBILITY +#define ACL_FUNC_VISIBILITY _declspec(dllexport) +#else +#define ACL_FUNC_VISIBILITY +#endif +#else +#ifdef FUNC_VISIBILITY +#define ACL_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define ACL_FUNC_VISIBILITY +#endif +#endif + +#ifdef __GNUC__ +#define ACL_DEPRECATED __attribute__((deprecated)) +#define ACL_DEPRECATED_MESSAGE(message) __attribute__((deprecated(message))) +#elif defined(_MSC_VER) +#define ACL_DEPRECATED __declspec(deprecated) +#define ACL_DEPRECATED_MESSAGE(message) __declspec(deprecated(message)) +#else +#define ACL_DEPRECATED +#define ACL_DEPRECATED_MESSAGE(message) +#endif + +typedef void *aclrtStream; +typedef void *aclrtEvent; +typedef void *aclrtContext; +typedef int aclError; +typedef uint16_t aclFloat16; +typedef struct aclDataBuffer aclDataBuffer; +typedef struct aclTensorDesc aclTensorDesc; + +static const int ACL_ERROR_NONE = 0; +static const int ACL_SUCCESS = 0; + +static const int ACL_ERROR_INVALID_PARAM = 100000; +static const int ACL_ERROR_UNINITIALIZE = 100001; +static const int ACL_ERROR_REPEAT_INITIALIZE = 100002; +static const int ACL_ERROR_INVALID_FILE = 100003; +static const int ACL_ERROR_WRITE_FILE = 100004; +static const int ACL_ERROR_INVALID_FILE_SIZE = 100005; +static const int ACL_ERROR_PARSE_FILE = 100006; +static const int ACL_ERROR_FILE_MISSING_ATTR = 100007; +static const int ACL_ERROR_FILE_ATTR_INVALID = 100008; +static const int ACL_ERROR_INVALID_DUMP_CONFIG = 100009; +static const int ACL_ERROR_INVALID_PROFILING_CONFIG = 100010; +static const int ACL_ERROR_INVALID_MODEL_ID = 100011; +static const int ACL_ERROR_DESERIALIZE_MODEL = 100012; +static const int ACL_ERROR_PARSE_MODEL = 100013; +static const int ACL_ERROR_READ_MODEL_FAILURE = 100014; +static const int ACL_ERROR_MODEL_SIZE_INVALID = 100015; +static const int ACL_ERROR_MODEL_MISSING_ATTR = 100016; +static const int ACL_ERROR_MODEL_INPUT_NOT_MATCH = 100017; +static const int ACL_ERROR_MODEL_OUTPUT_NOT_MATCH = 100018; +static const int ACL_ERROR_MODEL_NOT_DYNAMIC = 100019; +static const int ACL_ERROR_OP_TYPE_NOT_MATCH = 100020; +static const int ACL_ERROR_OP_INPUT_NOT_MATCH = 100021; +static const int ACL_ERROR_OP_OUTPUT_NOT_MATCH = 100022; +static const int ACL_ERROR_OP_ATTR_NOT_MATCH = 100023; +static const int ACL_ERROR_OP_NOT_FOUND = 100024; +static const int ACL_ERROR_OP_LOAD_FAILED = 100025; +static const int ACL_ERROR_UNSUPPORTED_DATA_TYPE = 100026; +static const int ACL_ERROR_FORMAT_NOT_MATCH = 100027; +static const int ACL_ERROR_BIN_SELECTOR_NOT_REGISTERED = 100028; +static const int ACL_ERROR_KERNEL_NOT_FOUND = 100029; +static const int ACL_ERROR_BIN_SELECTOR_ALREADY_REGISTERED = 100030; +static const int ACL_ERROR_KERNEL_ALREADY_REGISTERED = 100031; +static const int ACL_ERROR_INVALID_QUEUE_ID = 100032; +static const int ACL_ERROR_REPEAT_SUBSCRIBE = 100033; +static const int ACL_ERROR_STREAM_NOT_SUBSCRIBE = 100034; +static const int ACL_ERROR_THREAD_NOT_SUBSCRIBE = 100035; +static const int ACL_ERROR_WAIT_CALLBACK_TIMEOUT = 100036; +static const int ACL_ERROR_REPEAT_FINALIZE = 100037; +static const int ACL_ERROR_NOT_STATIC_AIPP = 100038; +static const int ACL_ERROR_COMPILING_STUB_MODE = 100039; +static const int ACL_ERROR_GROUP_NOT_SET = 100040; +static const int ACL_ERROR_GROUP_NOT_CREATE = 100041; +static const int ACL_ERROR_PROF_ALREADY_RUN = 100042; +static const int ACL_ERROR_PROF_NOT_RUN = 100043; +static const int ACL_ERROR_DUMP_ALREADY_RUN = 100044; +static const int ACL_ERROR_DUMP_NOT_RUN = 100045; +static const int ACL_ERROR_PROF_REPEAT_SUBSCRIBE = 148046; +static const int ACL_ERROR_PROF_API_CONFLICT = 148047; +static const int ACL_ERROR_INVALID_MAX_OPQUEUE_NUM_CONFIG = 148048; +static const int ACL_ERROR_INVALID_OPP_PATH = 148049; +static const int ACL_ERROR_OP_UNSUPPORTED_DYNAMIC = 148050; + +static const int ACL_ERROR_BAD_ALLOC = 200000; +static const int ACL_ERROR_API_NOT_SUPPORT = 200001; +static const int ACL_ERROR_INVALID_DEVICE = 200002; +static const int ACL_ERROR_MEMORY_ADDRESS_UNALIGNED = 200003; +static const int ACL_ERROR_RESOURCE_NOT_MATCH = 200004; +static const int ACL_ERROR_INVALID_RESOURCE_HANDLE = 200005; +static const int ACL_ERROR_FEATURE_UNSUPPORTED = 200006; +static const int ACL_ERROR_PROF_MODULES_UNSUPPORTED = 200007; + +static const int ACL_ERROR_STORAGE_OVER_LIMIT = 300000; + +static const int ACL_ERROR_INTERNAL_ERROR = 500000; +static const int ACL_ERROR_FAILURE = 500001; +static const int ACL_ERROR_GE_FAILURE = 500002; +static const int ACL_ERROR_RT_FAILURE = 500003; +static const int ACL_ERROR_DRV_FAILURE = 500004; +static const int ACL_ERROR_PROFILING_FAILURE = 500005; + +#define ACL_TENSOR_SHAPE_RANGE_NUM 2 +#define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE + +typedef enum { + ACL_DT_UNDEFINED = -1, + ACL_FLOAT = 0, + ACL_FLOAT16 = 1, + ACL_INT8 = 2, + ACL_INT32 = 3, + ACL_UINT8 = 4, + ACL_INT16 = 6, + ACL_UINT16 = 7, + ACL_UINT32 = 8, + ACL_INT64 = 9, + ACL_UINT64 = 10, + ACL_DOUBLE = 11, + ACL_BOOL = 12, + ACL_STRING = 13, +} aclDataType; + +typedef enum { + ACL_FORMAT_UNDEFINED = -1, + ACL_FORMAT_NCHW = 0, + ACL_FORMAT_NHWC = 1, + ACL_FORMAT_ND = 2, + ACL_FORMAT_NC1HWC0 = 3, + ACL_FORMAT_FRACTAL_Z = 4, + ACL_FORMAT_NC1HWC0_C04 = 12, + ACL_FORMAT_NDHWC = 27, + ACL_FORMAT_FRACTAL_NZ = 29, + ACL_FORMAT_NCDHW = 30, + ACL_FORMAT_NDC1HWC0 = 32, + ACL_FRACTAL_Z_3D = 33 +} aclFormat; + +typedef enum { + ACL_DEBUG = 0, + ACL_INFO = 1, + ACL_WARNING = 2, + ACL_ERROR = 3, +} aclLogLevel; + +typedef enum { + ACL_MEMTYPE_DEVICE = 0, + ACL_MEMTYPE_HOST = 1, +} aclMemType; + +/** + * @ingroup AscendCL + * @brief Converts data of type aclFloat16 to data of type float + * + * @param value [IN] Data to be converted + * + * @retval Transformed data + */ +ACL_FUNC_VISIBILITY float aclFloat16ToFloat(aclFloat16 value); + +/** + * @ingroup AscendCL + * @brief Converts data of type float to data of type aclFloat16 + * + * @param value [IN] Data to be converted + * + * @retval Transformed data + */ +ACL_FUNC_VISIBILITY aclFloat16 aclFloatToFloat16(float value); + +/** + * @ingroup AscendCL + * @brief create data of aclDataBuffer + * + * @param data [IN] pointer to data + * @li Need to be managed by the user, + * call aclrtMalloc interface to apply for memory, + * call aclrtFree interface to release memory + * + * @param size [IN] size of data in bytes + * + * @retval pointer to created instance. nullptr if run out of memory + * + * @see aclrtMalloc | aclrtFree + */ +ACL_FUNC_VISIBILITY aclDataBuffer *aclCreateDataBuffer(void *data, size_t size); + +/** + * @ingroup AscendCL + * @brief destroy data of aclDataBuffer + * + * @par Function + * Only the aclDataBuffer type data is destroyed here. + * The memory of the data passed in when the aclDataDataBuffer interface + * is called to create aclDataBuffer type data must be released by the user + * + * @param dataBuffer [IN] pointer to the aclDataBuffer + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclCreateDataBuffer + */ +ACL_FUNC_VISIBILITY aclError aclDestroyDataBuffer(const aclDataBuffer *dataBuffer); + +/** + * @ingroup AscendCL + * @brief update new data of aclDataBuffer + * + * @param dataBuffer [OUT] pointer to aclDataBuffer + * @li The old data need to be released by the user, otherwise it may occur memory leak leakage + * call aclGetDataBufferAddr interface to get old data address + * call aclrtFree interface to release memory + * + * @param data [IN] pointer to new data + * @li Need to be managed by the user, + * call aclrtMalloc interface to apply for memory, + * call aclrtFree interface to release memory + * + * @param size [IN] size of data in bytes + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtMalloc | aclrtFree | aclGetDataBufferAddr + */ +ACL_FUNC_VISIBILITY aclError aclUpdateDataBuffer(aclDataBuffer *dataBuffer, void *data, size_t size); + +/** + * @ingroup AscendCL + * @brief get data address from aclDataBuffer + * + * @param dataBuffer [IN] pointer to the data of aclDataBuffer + * + * @retval data address + */ +ACL_FUNC_VISIBILITY void *aclGetDataBufferAddr(const aclDataBuffer *dataBuffer); + +/** + * @ingroup AscendCL + * @brief get data size of aclDataBuffer + * + * @param dataBuffer [IN] pointer to the data of aclDataBuffer + * + * @retval data size + */ +ACL_DEPRECATED_MESSAGE("aclGetDataBufferSize is deprecated, use aclGetDataBufferSizeV2 instead") +ACL_FUNC_VISIBILITY uint32_t aclGetDataBufferSize(const aclDataBuffer *dataBuffer); + +/** + * @ingroup AscendCL + * @brief get data size of aclDataBuffer to replace aclGetDataBufferSize + * + * @param dataBuffer [IN] pointer to the data of aclDataBuffer + * + * @retval data size + */ +ACL_FUNC_VISIBILITY size_t aclGetDataBufferSizeV2(const aclDataBuffer *dataBuffer); + +/** + * @ingroup AscendCL + * @brief get size of aclDataType + * + * @param dataType [IN] aclDataType data the size to get + * + * @retval size of the aclDataType + */ +ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType); + +// interfaces of tensor desc +/** + * @ingroup AscendCL + * @brief create data aclTensorDesc + * + * @param dataType [IN] Data types described by tensor + * @param numDims [IN] the number of dimensions of the shape + * @param dims [IN] the size of the specified dimension + * @param format [IN] tensor format + * + * @retval aclTensorDesc pointer. + * @retval nullptr if param is invalid or run out of memory + */ +ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, + aclFormat format); + +/** + * @ingroup AscendCL + * @brief destroy data aclTensorDesc + * + * @param desc [IN] pointer to the data of aclTensorDesc to destroy + */ +ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc); + +/** + * @ingroup AscendCL + * @brief set tensor shape range for aclTensorDesc + * + * @param desc [OUT] pointer to the data of aclTensorDesc + * @param dimsCount [IN] the number of dimensions of the shape + * @param dimsRange [IN] the range of dimensions of the shape + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount, + int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]); + +/** + * @ingroup AscendCL + * @brief get data type specified by the tensor description + * + * @param desc [IN] pointer to the instance of aclTensorDesc + * + * @retval data type specified by the tensor description. + * @retval ACL_DT_UNDEFINED if description is null + */ +ACL_FUNC_VISIBILITY aclDataType aclGetTensorDescType(const aclTensorDesc *desc); + +/** + * @ingroup AscendCL + * @brief get data format specified by the tensor description + * + * @param desc [IN] pointer to the instance of aclTensorDesc + * + * @retval data format specified by the tensor description. + * @retval ACL_FORMAT_UNDEFINED if description is null + */ +ACL_FUNC_VISIBILITY aclFormat aclGetTensorDescFormat(const aclTensorDesc *desc); + +/** + * @ingroup AscendCL + * @brief get tensor size specified by the tensor description + * + * @param desc [IN] pointer to the instance of aclTensorDesc + * + * @retval data size specified by the tensor description. + * @retval 0 if description is null + */ +ACL_FUNC_VISIBILITY size_t aclGetTensorDescSize(const aclTensorDesc *desc); + +/** + * @ingroup AscendCL + * @brief get element count specified by the tensor description + * + * @param desc [IN] pointer to the instance of aclTensorDesc + * + * @retval element count specified by the tensor description. + * @retval 0 if description is null + */ +ACL_FUNC_VISIBILITY size_t aclGetTensorDescElementCount(const aclTensorDesc *desc); + +/** + * @ingroup AscendCL + * @brief get number of dims specified by the tensor description + * + * @param desc [IN] pointer to the instance of aclTensorDesc + * + * @retval number of dims specified by the tensor description. + * @retval 0 if description is null + * @retval ACL_UNKNOWN_RANK if the tensor dim is -2 + */ +ACL_FUNC_VISIBILITY size_t aclGetTensorDescNumDims(const aclTensorDesc *desc); + +/** + * @ingroup AscendCL + * @brief Get the size of the specified dim in the tensor description + * + * @param desc [IN] pointer to the instance of aclTensorDesc + * @param index [IN] index of dims, start from 0. + * + * @retval dim specified by the tensor description and index. + * @retval -1 if description or index is invalid + */ +ACL_DEPRECATED_MESSAGE("aclGetTensorDescDim is deprecated, use aclGetTensorDescDimV2 instead") +ACL_FUNC_VISIBILITY int64_t aclGetTensorDescDim(const aclTensorDesc *desc, size_t index); + +/** + * @ingroup AscendCL + * @brief Get the size of the specified dim in the tensor description + * + * @param desc [IN] pointer to the instance of aclTensorDesc + * @param index [IN] index of dims, start from 0. + * @param dimSize [OUT] size of the specified dim. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, size_t index, int64_t *dimSize); + +/** + * @ingroup AscendCL + * @brief Get the range of the specified dim in the tensor description + * + * @param desc [IN] pointer to the instance of aclTensorDesc + * @param index [IN] index of dims, start from 0. + * @param dimRangeNum [IN] number of dimRange. + * @param dimRange [OUT] range of the specified dim. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, size_t index, size_t dimRangeNum, + int64_t *dimRange); + +/** + * @ingroup AscendCL + * @brief set tensor description name + * + * @param desc [OUT] pointer to the instance of aclTensorDesc + * @param name [IN] tensor description name + */ +ACL_FUNC_VISIBILITY void aclSetTensorDescName(aclTensorDesc *desc, const char *name); + +/** + * @ingroup AscendCL + * @brief get tensor description name + * + * @param desc [IN] pointer to the instance of aclTensorDesc + * + * @retval tensor description name. + * @retval empty string if description is null + */ +ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc); + +/** + * @ingroup AscendCL + * @brief Convert the format in the source aclTensorDesc according to + * the specified dstFormat to generate a new target aclTensorDesc. + * The format in the source aclTensorDesc remains unchanged. + * + * @param srcDesc [IN] pointer to the source tensor desc + * @param dstFormat [IN] destination format + * @param dstDesc [OUT] pointer to the pointer to the destination tensor desc + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat, + aclTensorDesc **dstDesc); + +/** + * @ingroup AscendCL + * @brief Set the storage format specified by the tensor description + * + * @param desc [OUT] pointer to the instance of aclTensorDesc + * @param format [IN] the storage format + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_DEPRECATED_MESSAGE("aclSetTensorStorageFormat is deprecated, use aclSetTensorFormat instead") +ACL_FUNC_VISIBILITY aclError aclSetTensorStorageFormat(aclTensorDesc *desc, aclFormat format); + +/** + * @ingroup AscendCL + * @brief Set the storage shape specified by the tensor description + * + * @param desc [OUT] pointer to the instance of aclTensorDesc + * @param numDims [IN] the number of dimensions of the shape + * @param dims [IN] the size of the specified dimension + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_DEPRECATED_MESSAGE("aclSetTensorStorageShape is deprecated, use aclSetTensorShape instead") +ACL_FUNC_VISIBILITY aclError aclSetTensorStorageShape(aclTensorDesc *desc, int numDims, const int64_t *dims); + +/** + * @ingroup AscendCL + * @brief Set the format specified by the tensor description + * + * @param desc [OUT] pointer to the instance of aclTensorDesc + * @param format [IN] the storage format + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclSetTensorFormat(aclTensorDesc *desc, aclFormat format); + +/** + * @ingroup AscendCL + * @brief Set the shape specified by the tensor description + * + * @param desc [OUT] pointer to the instance of aclTensorDesc + * @param numDims [IN] the number of dimensions of the shape + * @param dims [IN] the size of the specified dimension + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclSetTensorShape(aclTensorDesc *desc, int numDims, const int64_t *dims); + +/** + * @ingroup AscendCL + * @brief Set the original format specified by the tensor description + * + * @param desc [OUT] pointer to the instance of aclTensorDesc + * @param format [IN] the storage format + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclSetTensorOriginFormat(aclTensorDesc *desc, aclFormat format); + +/** + * @ingroup AscendCL + * @brief Set the original shape specified by the tensor description + * + * @param desc [OUT] pointer to the instance of aclTensorDesc + * @param numDims [IN] the number of dimensions of the shape + * @param dims [IN] the size of the specified dimension + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int numDims, const int64_t *dims); + +/** + * @ingroup AscendCL + * @brief get op description info + * + * @param desc [IN] pointer to tensor description + * @param index [IN] index of tensor + * + * @retval null for failed. + * @retval OtherValues success. + */ +ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index); + +/** + * @ingroup AscendCL + * @brief get address of tensor + * + * @param desc [IN] pointer to tensor description + * + * @retval null for failed + * @retval OtherValues success + */ +ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc); + +/** + * @ingroup AscendCL + * @brief Set the dynamic input name specified by the tensor description + * + * @param desc [OUT] pointer to the instance of aclTensorDesc + * @param dynamicInputName [IN] pointer to the dynamic input name + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclSetTensorDynamicInput(aclTensorDesc *desc, const char *dynamicInputName); + +/** + * @ingroup AscendCL + * @brief Set const data specified by the tensor description + * + * @param desc [OUT] pointer to the instance of aclTensorDesc + * @param dataBuffer [IN] pointer to the const databuffer + * @param length [IN] the length of const databuffer + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBuffer, size_t length); + +/** + * @ingroup AscendCL + * @brief Set tensor memory type specified by the tensor description + * + * @param desc [OUT] pointer to the instance of aclTensorDesc + * @param memType [IN] ACL_MEMTYPE_DEVICE means device, ACL_MEMTYPE_HOST means host + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclSetTensorPlaceMent(aclTensorDesc *desc, aclMemType memType); + +/** + * @ingroup AscendCL + * @brief an interface for users to output APP logs + * + * @param logLevel [IN] the level of current log + * @param func [IN] the function where the log is located + * @param file [IN] the file where the log is located + * @param line [IN] Number of source lines where the log is located + * @param fmt [IN] the format of current log + * @param ... [IN] the value of current log + */ +ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line, + const char *fmt, ...); + +/** + * @ingroup AscendCL + * @brief get soc name + * + * @retval null for failed + * @retval OtherValues success + */ +ACL_FUNC_VISIBILITY const char *aclrtGetSocName(); + +#define ACL_APP_LOG(level, fmt, ...) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_ACL_BASE_H_ diff --git a/inc/external/acl/acl_mdl.h b/inc/external/acl/acl_mdl.h new file mode 100644 index 00000000..2bf85e29 --- /dev/null +++ b/inc/external/acl/acl_mdl.h @@ -0,0 +1,1225 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_ACL_ACL_MODEL_H_ +#define INC_EXTERNAL_ACL_ACL_MODEL_H_ + +#include +#include + +#include "acl_base.h" +#include "acl_rt.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ACL_MAX_DIM_CNT 128 +#define ACL_MAX_TENSOR_NAME_LEN 128 +#define ACL_MAX_BATCH_NUM 128 +#define ACL_MAX_HW_NUM 128 +#define ACL_MAX_SHAPE_COUNT 128 +#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF + +#define ACL_MDL_LOAD_FROM_FILE 1 +#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2 +#define ACL_MDL_LOAD_FROM_MEM 3 +#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4 +#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5 +#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6 + +#define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data" +#define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data" +#define ACL_ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES "_datadump_original_op_names" + +typedef struct aclmdlDataset aclmdlDataset; +typedef struct aclmdlDesc aclmdlDesc; +typedef struct aclmdlAIPP aclmdlAIPP; +typedef struct aclAippExtendInfo aclAippExtendInfo; +typedef struct aclmdlConfigHandle aclmdlConfigHandle; + +typedef enum { + ACL_YUV420SP_U8 = 1, + ACL_XRGB8888_U8, + ACL_RGB888_U8, + ACL_YUV400_U8, + ACL_NC1HWC0DI_FP16, + ACL_NC1HWC0DI_S8, + ACL_ARGB8888_U8, + ACL_YUYV_U8, + ACL_YUV422SP_U8, + ACL_AYUV444_U8, + ACL_RAW10, + ACL_RAW12, + ACL_RAW16, + ACL_RAW24, + ACL_AIPP_RESERVED = 0xffff, +} aclAippInputFormat; + +typedef enum { + ACL_MDL_PRIORITY_INT32 = 0, + ACL_MDL_LOAD_TYPE_SIZET, + ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */ + ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */ + ACL_MDL_MEM_SIZET, + ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */ + ACL_MDL_WEIGHT_SIZET, + ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */ + ACL_MDL_WORKSPACE_SIZET, + ACL_MDL_INPUTQ_NUM_SIZET, + ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */ + ACL_MDL_OUTPUTQ_NUM_SIZET, + ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */ +} aclmdlConfigAttr; + +typedef enum { + ACL_DATA_WITHOUT_AIPP = 0, + ACL_DATA_WITH_STATIC_AIPP, + ACL_DATA_WITH_DYNAMIC_AIPP, + ACL_DYNAMIC_AIPP_NODE +} aclmdlInputAippType; + +typedef struct aclmdlIODims { + char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */ + size_t dimCount; /**< dim array count */ + int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */ +} aclmdlIODims; + +typedef struct aclAippDims { + aclmdlIODims srcDims; /**< input dims before model transform */ + size_t srcSize; /**< input size before model transform */ + aclmdlIODims aippOutdims; /**< aipp output dims */ + size_t aippOutSize; /**< aipp output size */ +} aclAippDims; + +typedef struct aclmdlBatch { + size_t batchCount; /**< batch array count */ + uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */ +} aclmdlBatch; + +typedef struct aclmdlHW { + size_t hwCount; /**< height&width array count */ + uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */ +} aclmdlHW; + +typedef struct aclAippInfo { + aclAippInputFormat inputFormat; + int32_t srcImageSizeW; + int32_t srcImageSizeH; + int8_t cropSwitch; + int32_t loadStartPosW; + int32_t loadStartPosH; + int32_t cropSizeW; + int32_t cropSizeH; + int8_t resizeSwitch; + int32_t resizeOutputW; + int32_t resizeOutputH; + int8_t paddingSwitch; + int32_t leftPaddingSize; + int32_t rightPaddingSize; + int32_t topPaddingSize; + int32_t bottomPaddingSize; + int8_t cscSwitch; + int8_t rbuvSwapSwitch; + int8_t axSwapSwitch; + int8_t singleLineMode; + int32_t matrixR0C0; + int32_t matrixR0C1; + int32_t matrixR0C2; + int32_t matrixR1C0; + int32_t matrixR1C1; + int32_t matrixR1C2; + int32_t matrixR2C0; + int32_t matrixR2C1; + int32_t matrixR2C2; + int32_t outputBias0; + int32_t outputBias1; + int32_t outputBias2; + int32_t inputBias0; + int32_t inputBias1; + int32_t inputBias2; + int32_t meanChn0; + int32_t meanChn1; + int32_t meanChn2; + int32_t meanChn3; + float minChn0; + float minChn1; + float minChn2; + float minChn3; + float varReciChn0; + float varReciChn1; + float varReciChn2; + float varReciChn3; + aclFormat srcFormat; + aclDataType srcDatatype; + size_t srcDimNum; + size_t shapeCount; + aclAippDims outDims[ACL_MAX_SHAPE_COUNT]; + aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */ +} aclAippInfo; + +/** + * @ingroup AscendCL + * @brief Create data of type aclmdlDesc + * + * @retval the aclmdlDesc pointer + */ +ACL_FUNC_VISIBILITY aclmdlDesc *aclmdlCreateDesc(); + +/** + * @ingroup AscendCL + * @brief destroy data of type aclmdlDesc + * + * @param modelDesc [IN] Pointer to almdldlDesc to be destroyed + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlDestroyDesc(aclmdlDesc *modelDesc); + +/** + * @ingroup AscendCL + * @brief Get aclmdlDesc data of the model according to the model ID + * + * @param modelDesc [OUT] aclmdlDesc pointer + * @param modelId [IN] model id + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetDesc(aclmdlDesc *modelDesc, uint32_t modelId); + +/** + * @ingroup AscendCL + * @brief Get the number of the inputs of + * the model according to data of aclmdlDesc + * + * @param modelDesc [IN] aclmdlDesc pointer + * + * @retval input size with aclmdlDesc + */ +ACL_FUNC_VISIBILITY size_t aclmdlGetNumInputs(aclmdlDesc *modelDesc); + +/** + * @ingroup AscendCL + * @brief Get the number of the output of + * the model according to data of aclmdlDesc + * + * @param modelDesc [IN] aclmdlDesc pointer + * + * @retval output size with aclmdlDesc + */ +ACL_FUNC_VISIBILITY size_t aclmdlGetNumOutputs(aclmdlDesc *modelDesc); + +/** + * @ingroup AscendCL + * @brief Get the size of the specified input according to + * the data of type aclmdlDesc + * + * @param modelDesc [IN] aclmdlDesc pointer + * @param index [IN] the size of the number of inputs to be obtained, + * the index value starts from 0 + * + * @retval Specify the size of the input + */ +ACL_FUNC_VISIBILITY size_t aclmdlGetInputSizeByIndex(aclmdlDesc *modelDesc, size_t index); + +/** + * @ingroup AscendCL + * @brief Get the size of the specified output according to + * the data of type aclmdlDesc + * + * @param modelDesc [IN] aclmdlDesc pointer + * @param index [IN] the size of the number of outputs to be obtained, + * the index value starts from 0 + * + * @retval Specify the size of the output + */ +ACL_FUNC_VISIBILITY size_t aclmdlGetOutputSizeByIndex(aclmdlDesc *modelDesc, size_t index); + +/** + * @ingroup AscendCL + * @brief Create data of type aclmdlDataset + * + * @retval the aclmdlDataset pointer + */ +ACL_FUNC_VISIBILITY aclmdlDataset *aclmdlCreateDataset(); + +/** + * @ingroup AscendCL + * @brief destroy data of type aclmdlDataset + * + * @param dataset [IN] Pointer to aclmdlDataset to be destroyed + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlDestroyDataset(const aclmdlDataset *dataset); + +/** + * @ingroup AscendCL + * @brief Add aclDataBuffer to aclmdlDataset + * + * @param dataset [OUT] aclmdlDataset address of aclDataBuffer to be added + * @param dataBuffer [IN] aclDataBuffer address to be added + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlAddDatasetBuffer(aclmdlDataset *dataset, aclDataBuffer *dataBuffer); + +/** + * @ingroup AscendCL + * @brief Set aclTensorDesc to aclmdlDataset + * + * @param dataset [OUT] aclmdlDataset address of aclDataBuffer to be added + * @param tensorDesc [IN] aclTensorDesc address to be added + * @param index [IN] index of tensorDesc which to be added + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetDatasetTensorDesc(aclmdlDataset *dataset, aclTensorDesc *tensorDesc, + size_t index); + +/** + * @ingroup AscendCL + * @brief Get the number of aclDataBuffer in aclmdlDataset + * + * @param dataset [IN] aclmdlDataset poiter + * + * @retval the number of aclDataBuffer + */ +ACL_FUNC_VISIBILITY size_t aclmdlGetDatasetNumBuffers(const aclmdlDataset *dataset); + +/** + * @ingroup AscendCL + * @brief Get the aclDataBuffer in aclmdlDataset by index + * + * @param dataset [IN] aclmdlDataset poiter + * @param index [IN] the index of aclDataBuffer + * + * @retval Get successfully, return the address of aclDataBuffer + * @retval Failure return NULL + */ +ACL_FUNC_VISIBILITY aclDataBuffer *aclmdlGetDatasetBuffer(const aclmdlDataset *dataset, size_t index); + +/** + * @ingroup AscendCL + * @brief Load offline model data from files + * and manage memory internally by the system + * + * @par Function + * After the system finishes loading the model, + * the model ID returned is used as a mark to identify the model + * during subsequent operations + * + * @param modelPath [IN] Storage path for offline model files + * @param modelId [OUT] Model ID generated after + * the system finishes loading the model + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFile(const char *modelPath, uint32_t *modelId); + +/** + * @ingroup AscendCL + * @brief Load offline model data from memory and manage the memory of + * model running internally by the system + * + * @par Function + * After the system finishes loading the model, + * the model ID returned is used as a mark to identify the model + * during subsequent operations + * + * @param model [IN] Model data stored in memory + * @param modelSize [IN] model data size + * @param modelId [OUT] Model ID generated after + * the system finishes loading the model + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, uint32_t *modelId); + +/** + * @ingroup AscendCL + * @brief Load offline model data from a file, + * and the user manages the memory of the model run by itself + * + * @par Function + * After the system finishes loading the model, + * the model ID returned is used as a mark to identify the model + * during subsequent operations. + * @param modelPath [IN] Storage path for offline model files + * @param modelId [OUT] Model ID generated after finishes loading the model + * @param workPtr [IN] A pointer to the working memory + * required by the model on the Device,can be null + * @param workSize [IN] The amount of working memory required by the model + * @param weightPtr [IN] Pointer to model weight memory on Device + * @param weightSize [IN] The amount of weight memory required by the model + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, uint32_t *modelId, void *workPtr, + size_t workSize, void *weightPtr, size_t weightSize); + +/** + * @ingroup AscendCL + * @brief Load offline model data from memory, + * and the user can manage the memory of model running + * + * @par Function + * After the system finishes loading the model, + * the model ID returned is used as a mark to identify the model + * during subsequent operations + * @param model [IN] Model data stored in memory + * @param modelSize [IN] model data size + * @param modelId [OUT] Model ID generated after finishes loading the model + * @param workPtr [IN] A pointer to the working memory + * required by the model on the Device,can be null + * @param workSize [IN] work memory size + * @param weightPtr [IN] Pointer to model weight memory on Device,can be null + * @param weightSize [IN] The amount of weight memory required by the model + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, uint32_t *modelId, + void *workPtr, size_t workSize, void *weightPtr, + size_t weightSize); + +/** + * @ingroup AscendCL + * @brief load model from file with async queue + * + * @param modelPath [IN] model path + * @param modelId [OUT] return model id if load success + * @param inputQ [IN] input queue pointer + * @param inputQNum [IN] input queue num + * @param outputQ [IN] output queue pointer + * @param outputQNum [IN] output queue num + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithQ(const char *modelPath, uint32_t *modelId, const uint32_t *inputQ, + size_t inputQNum, const uint32_t *outputQ, size_t outputQNum); + +/** + * @ingroup AscendCL + * @brief load model from memory with async queue + * + * @param model [IN] model memory which user manages + * @param modelSize [IN] model size + * @param modelId [OUT] return model id if load success + * @param inputQ [IN] input queue pointer + * @param inputQNum [IN] input queue num + * @param outputQ [IN] output queue pointer + * @param outputQNum [IN] output queue num + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId, + const uint32_t *inputQ, size_t inputQNum, const uint32_t *outputQ, + size_t outputQNum); + +/** + * @ingroup AscendCL + * @brief Execute model synchronous inference until the inference result is returned + * + * @param modelId [IN] ID of the model to perform inference + * @param input [IN] Input data for model inference + * @param output [OUT] Output data for model inference + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output); + +/** + * @ingroup AscendCL + * @brief Execute model asynchronous inference until the inference result is returned + * + * @param modelId [IN] ID of the model to perform inference + * @param input [IN] Input data for model inference + * @param output [OUT] Output data for model inference + * @param stream [IN] stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | + * aclmdlLoadFromMemWithMem + */ +ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output, + aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief unload model with model id + * + * @param modelId [IN] model id to be unloaded + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlUnload(uint32_t modelId); + +/** + * @ingroup AscendCL + * @brief Get the weight memory size and working memory size + * required for model execution according to the model file + * + * @param fileName [IN] Model path to get memory information + * @param workSize [OUT] The amount of working memory for model executed + * @param weightSize [OUT] The amount of weight memory for model executed + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlQuerySize(const char *fileName, size_t *workSize, size_t *weightSize); + +/** + * @ingroup AscendCL + * @brief Obtain the weights required for + * model execution according to the model data in memory + * + * @par Restriction + * The execution and weight memory is Device memory, + * and requires user application and release. + * @param model [IN] model memory which user manages + * @param modelSize [IN] model data size + * @param workSize [OUT] The amount of working memory for model executed + * @param weightSize [OUT] The amount of weight memory for model executed + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlQuerySizeFromMem(const void *model, size_t modelSize, size_t *workSize, + size_t *weightSize); + +/** + * @ingroup AscendCL + * @brief In dynamic batch scenarios, + * it is used to set the number of images processed + * at one time during model inference + * + * @param modelId [IN] model id + * @param dataset [IN|OUT] data for model inference + * @param index [IN] index of dynamic tensor + * @param batchSize [IN] Number of images processed at a time during model + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | + * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetDynamicBatchSize(uint32_t modelId, aclmdlDataset *dataset, size_t index, + uint64_t batchSize); + +/** + * @ingroup AscendCL + * @brief Sets the H and W of the specified input of the model + * + * @param modelId [IN] model id + * @param dataset [IN|OUT] data for model inference + * @param index [IN] index of dynamic tensor + * @param height [IN] model height + * @param width [IN] model width + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | + * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetDynamicHWSize(uint32_t modelId, aclmdlDataset *dataset, size_t index, + uint64_t height, uint64_t width); + +/** + * @ingroup AscendCL + * @brief Sets the dynamic dims of the specified input of the model + * + * @param modelId [IN] model id + * @param dataset [IN|OUT] data for model inference + * @param index [IN] index of dynamic dims + * @param dims [IN] value of dynamic dims + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | + * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetInputDynamicDims(uint32_t modelId, aclmdlDataset *dataset, size_t index, + const aclmdlIODims *dims); + +/** + * @ingroup AscendCL + * @brief get input dims info + * + * @param modelDesc [IN] model description + * @param index [IN] input tensor index + * @param dims [OUT] dims info + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlGetInputDimsV2 + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetInputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims); + +/** + * @ingroup AscendCL + * @brief get input dims info(version 2), especially for static aipp + * it is the same with aclmdlGetInputDims while model without static aipp + * + * @param modelDesc [IN] model description + * @param index [IN] input tensor index + * @param dims [OUT] dims info + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlGetInputDims + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetInputDimsV2(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims); + +/** + * @ingroup AscendCL + * @brief get output dims info + * + * @param modelDesc [IN] model description + * @param index [IN] output tensor index + * @param dims [OUT] dims info + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetOutputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims); + +/** + * @ingroup AscendCL + * @brief get current output dims info + * + * @par Function + * The following use cases are supported: + * @li Get current output shape when model is dynamic and + * dynamic shape info is set + * @li Get max output shape when model is dynamic and + * dynamic shape info is not set + * @li Get actual output shape when model is static + * + * @param modelDesc [IN] model description + * @param index [IN] output tensor index + * @param dims [OUT] dims info + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetCurOutputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims); + +/** + * @ingroup AscendCL + * @brief get attr value by op name + * + * @param modelDesc [IN] model description + * @param opName [IN] op name + * @param attr [IN] attr name + * + * @retval the attr value + */ +ACL_FUNC_VISIBILITY const char *aclmdlGetOpAttr(aclmdlDesc *modelDesc, const char *opName, const char *attr); + +/** + * @ingroup AscendCL + * @brief get input name by index + * + * @param modelDesc [IN] model description + * @param index [IN] intput tensor index + * + * @retval input tensor name,the same life cycle with modelDesc + */ +ACL_FUNC_VISIBILITY const char *aclmdlGetInputNameByIndex(const aclmdlDesc *modelDesc, size_t index); + +/** + * @ingroup AscendCL + * @brief get output name by index + * + * @param modelDesc [IN] model description + * @param index [IN] output tensor index + * + * @retval output tensor name,the same life cycle with modelDesc + */ +ACL_FUNC_VISIBILITY const char *aclmdlGetOutputNameByIndex(const aclmdlDesc *modelDesc, size_t index); + +/** + * @ingroup AscendCL + * @brief get input format by index + * + * @param modelDesc [IN] model description + * @param index [IN] intput tensor index + * + * @retval input tensor format + */ +ACL_FUNC_VISIBILITY aclFormat aclmdlGetInputFormat(const aclmdlDesc *modelDesc, size_t index); + +/** + * @ingroup AscendCL + * @brief get output format by index + * + * @param modelDesc [IN] model description + * @param index [IN] output tensor index + * + * @retval output tensor format + */ +ACL_FUNC_VISIBILITY aclFormat aclmdlGetOutputFormat(const aclmdlDesc *modelDesc, size_t index); + +/** + * @ingroup AscendCL + * @brief get input data type by index + * + * @param modelDesc [IN] model description + * @param index [IN] intput tensor index + * + * @retval input tensor data type + */ +ACL_FUNC_VISIBILITY aclDataType aclmdlGetInputDataType(const aclmdlDesc *modelDesc, size_t index); + +/** + * @ingroup AscendCL + * @brief get output data type by index + * + * @param modelDesc [IN] model description + * @param index [IN] output tensor index + * + * @retval output tensor data type + */ +ACL_FUNC_VISIBILITY aclDataType aclmdlGetOutputDataType(const aclmdlDesc *modelDesc, size_t index); + +/** + * @ingroup AscendCL + * @brief get input tensor index by name + * + * @param modelDesc [IN] model description + * @param name [IN] intput tensor name + * @param index [OUT] intput tensor index + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetInputIndexByName(const aclmdlDesc *modelDesc, const char *name, size_t *index); + +/** + * @ingroup AscendCL + * @brief get output tensor index by name + * + * @param modelDesc [IN] model description + * @param name [IN] output tensor name + * @param index [OUT] output tensor index + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetOutputIndexByName(const aclmdlDesc *modelDesc, const char *name, size_t *index); + +/** + * @ingroup AscendCL + * @brief get dynamic batch info + * + * @param modelDesc [IN] model description + * @param batch [OUT] dynamic batch info + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetDynamicBatch(const aclmdlDesc *modelDesc, aclmdlBatch *batch); + +/** + * @ingroup AscendCL + * @brief get dynamic height&width info + * + * @param modelDesc [IN] model description + * @param index [IN] input tensor index + * @param hw [OUT] dynamic height&width info + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetDynamicHW(const aclmdlDesc *modelDesc, size_t index, aclmdlHW *hw); + +/** + * @ingroup AscendCL + * @brief get dynamic gear count + * + * @param modelDesc [IN] model description + * @param index [IN] unused, must be -1 + * @param gearCount [OUT] dynamic gear count + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetInputDynamicGearCount(const aclmdlDesc *modelDesc, size_t index, + size_t *gearCount); + +/** + * @ingroup AscendCL + * @brief get dynamic dims info + * + * @param modelDesc [IN] model description + * @param index [IN] unused, must be -1 + * @param dims [OUT] value of dynamic dims + * @param gearCount [IN] dynamic gear count + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetInputDynamicDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims, + size_t gearCount); + +/** + * @ingroup AscendCL + * @brief Create data of type aclmdlAIPP + * + * @param batchSize [IN] batchsizes of model + * + * @retval the aclmdlAIPP pointer + */ +ACL_FUNC_VISIBILITY aclmdlAIPP *aclmdlCreateAIPP(uint64_t batchSize); + +/** + * @ingroup AscendCL + * @brief destroy data of type aclmdlAIPP + * + * @param aippParmsSet [IN] Pointer for aclmdlAIPP to be destroyed + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlDestroyAIPP(const aclmdlAIPP *aippParmsSet); + +/** + * @ingroup AscendCL + * @brief set InputFormat of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param inputFormat [IN] The inputFormat of aipp + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet, aclAippInputFormat inputFormat); + +/** + * @ingroup AscendCL + * @brief set cscParms of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param csc_switch [IN] Csc switch + * @param cscMatrixR0C0 [IN] Csc_matrix_r0_c0 + * @param cscMatrixR0C1 [IN] Csc_matrix_r0_c1 + * @param cscMatrixR0C2 [IN] Csc_matrix_r0_c2 + * @param cscMatrixR1C0 [IN] Csc_matrix_r1_c0 + * @param cscMatrixR1C1 [IN] Csc_matrix_r1_c1 + * @param cscMatrixR1C2 [IN] Csc_matrix_r1_c2 + * @param cscMatrixR2C0 [IN] Csc_matrix_r2_c0 + * @param cscMatrixR2C1 [IN] Csc_matrix_r2_c1 + * @param cscMatrixR2C2 [IN] Csc_matrix_r2_c2 + * @param cscOutputBiasR0 [IN] Output Bias for RGB to YUV, element of row 0, unsigned number + * @param cscOutputBiasR1 [IN] Output Bias for RGB to YUV, element of row 1, unsigned number + * @param cscOutputBiasR2 [IN] Output Bias for RGB to YUV, element of row 2, unsigned number + * @param cscInputBiasR0 [IN] Input Bias for YUV to RGB, element of row 0, unsigned number + * @param cscInputBiasR1 [IN] Input Bias for YUV to RGB, element of row 1, unsigned number + * @param cscInputBiasR2 [IN] Input Bias for YUV to RGB, element of row 2, unsigned number + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0, + int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0, + int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0, + int16_t cscMatrixR2C1, int16_t cscMatrixR2C2, + uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1, + uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0, + uint8_t cscInputBiasR1, uint8_t cscInputBiasR2); + +/** + * @ingroup AscendCL + * @brief set rb/ub swap switch of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param rbuvSwapSwitch [IN] rb/ub swap switch + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch); + +/** + * @ingroup AscendCL + * @brief set RGBA->ARGB, YUVA->AYUV swap switch of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param axSwapSwitch [IN] RGBA->ARGB, YUVA->AYUV swap switch + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch); + +/** + * @ingroup AscendCL + * @brief set source image of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param srcImageSizeW [IN] Source image width + * @param srcImageSizeH [IN] Source image height + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW, + int32_t srcImageSizeH); + +/** + * @ingroup AscendCL + * @brief set resize switch of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param scfSwitch [IN] Resize switch + * @param scfInputSizeW [IN] Input width of scf + * @param scfInputSizeH [IN] Input height of scf + * @param scfOutputSizeW [IN] Output width of scf + * @param scfOutputSizeH [IN] Output height of scf + * @param batchIndex [IN] Batch parameter index + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, int8_t scfSwitch, int32_t scfInputSizeW, + int32_t scfInputSizeH, int32_t scfOutputSizeW, + int32_t scfOutputSizeH, uint64_t batchIndex); + +/** + * @ingroup AscendCL + * @brief set cropParams of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param cropSwitch [IN] Crop switch + * @param cropStartPosW [IN] The start horizontal position of cropping + * @param cropStartPosH [IN] The start vertical position of cropping + * @param cropSizeW [IN] Crop width + * @param cropSizeH [IN] Crop height + * @param batchIndex [IN] Batch parameter index + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, int8_t cropSwitch, int32_t cropStartPosW, + int32_t cropStartPosH, int32_t cropSizeW, int32_t cropSizeH, + uint64_t batchIndex); + +/** + * @ingroup AscendCL + * @brief set paddingParams of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param paddingSwitch [IN] Padding switch + * @param paddingSizeTop [IN] Top padding size + * @param paddingSizeBottom [IN] Bottom padding size + * @param paddingSizeLeft [IN] Left padding size + * @param paddingSizeRight [IN] Right padding size + * @param batchIndex [IN] Batch parameter index + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch, + int32_t paddingSizeTop, int32_t paddingSizeBottom, + int32_t paddingSizeLeft, int32_t paddingSizeRight, + uint64_t batchIndex); + +/** + * @ingroup AscendCL + * @brief set DtcPixelMean of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param dtcPixelMeanChn0 [IN] Mean value of channel 0 + * @param dtcPixelMeanChn1 [IN] Mean value of channel 1 + * @param dtcPixelMeanChn2 [IN] Mean value of channel 2 + * @param dtcPixelMeanChn3 [IN] Mean value of channel 3 + * @param batchIndex [IN] Batch parameter index + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, int16_t dtcPixelMeanChn0, + int16_t dtcPixelMeanChn1, int16_t dtcPixelMeanChn2, + int16_t dtcPixelMeanChn3, uint64_t batchIndex); + +/** + * @ingroup AscendCL + * @brief set DtcPixelMin of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param dtcPixelMinChn0 [IN] Min value of channel 0 + * @param dtcPixelMinChn1 [IN] Min value of channel 1 + * @param dtcPixelMinChn2 [IN] Min value of channel 2 + * @param dtcPixelMinChn3 [IN] Min value of channel 3 + * @param batchIndex [IN] Batch parameter index + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, float dtcPixelMinChn0, + float dtcPixelMinChn1, float dtcPixelMinChn2, + float dtcPixelMinChn3, uint64_t batchIndex); + +/** + * @ingroup AscendCL + * @brief set PixelVarReci of type aclmdlAIPP + * + * @param aippParmsSet [OUT] Pointer for aclmdlAIPP + * @param dtcPixelVarReciChn0 [IN] sfr_dtc_pixel_variance_reci_ch0 + * @param dtcPixelVarReciChn1 [IN] sfr_dtc_pixel_variance_reci_ch1 + * @param dtcPixelVarReciChn2 [IN] sfr_dtc_pixel_variance_reci_ch2 + * @param dtcPixelVarReciChn3 [IN] sfr_dtc_pixel_variance_reci_ch3 + * @param batchIndex [IN] Batch parameter index + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, float dtcPixelVarReciChn0, + float dtcPixelVarReciChn1, float dtcPixelVarReciChn2, + float dtcPixelVarReciChn3, uint64_t batchIndex); + +/** + * @ingroup AscendCL + * @brief set aipp parameters to model + * + * @param modelId [IN] model id + * @param dataset [IN] Pointer of dataset + * @param index [IN] index of input for aipp data(ACL_DYNAMIC_AIPP_NODE) + * @param aippParmsSet [IN] Pointer for aclmdlAIPP + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | + * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset *dataset, size_t index, + const aclmdlAIPP *aippParmsSet); + +/** + * @ingroup AscendCL + * @brief set aipp parameters to model + * + * @param modelId [IN] model id + * @param dataset [IN] Pointer of dataset + * @param index [IN] index of input for data which linked dynamic aipp(ACL_DATA_WITH_DYNAMIC_AIPP) + * @param aippParmsSet [IN] Pointer for aclmdlAIPP + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | + * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlDataset *dataset, size_t index, + const aclmdlAIPP *aippParmsSet); + +/** + * @ingroup AscendCL + * @brief get input aipp type + * + * @param modelId [IN] model id + * @param index [IN] index of input + * @param type [OUT] aipp type for input.refrer to aclmdlInputAippType(enum) + * @param dynamicAttachedDataIndex [OUT] index for dynamic attached data(ACL_DYNAMIC_AIPP_NODE) + * valid when type is ACL_DATA_WITH_DYNAMIC_AIPP, invalid value is ACL_INVALID_NODE_INDEX + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | + * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, aclmdlInputAippType *type, + size_t *dynamicAttachedDataIndex); + +/** + * @ingroup AscendCL + * @brief get static aipp parameters from model + * + * @param modelId [IN] model id + * @param index [IN] index of tensor + * @param aippinfo [OUT] Pointer for static aipp info + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval ACL_ERROR_MODEL_AIPP_NOT_EXIST The tensor of index is not configured with aipp + * @retval OtherValues Failure + * + * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | + * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName + */ +ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo); + +/** + * @ingroup AscendCL + * @brief get op description info + * + * @param deviceId [IN] device id + * @param streamId [IN] stream id + * @param taskId [IN] task id + * @param opName [OUT] pointer to op name + * @param opNameLen [IN] the length of op name + * @param inputDesc [OUT] pointer to input description + * @param numInputs [OUT] the number of input tensor + * @param outputDesc [OUT] pointer to output description + * @param numOutputs [OUT] the number of output tensor + * + * @retval ACL_SUCCESS The function is successfully executed + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, uint32_t taskId, + char *opName, size_t opNameLen, aclTensorDesc **inputDesc, + size_t *numInputs, aclTensorDesc **outputDesc, + size_t *numOutputs); + +/** + * @ingroup AscendCL + * @brief init dump + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlInitDump(); + +/** + * @ingroup AscendCL + * @brief set param of dump + * + * @param dumpCfgPath [IN] the path of dump config + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath); + +/** + * @ingroup AscendCL + * @brief finalize dump. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump(); + +/** + * @ingroup AscendCL + * @brief load model with config + * + * @param handle [IN] pointer to model config handle + * @param modelId [OUT] pointer to model id + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *handle, uint32_t *modelId); + +/** + * @ingroup AscendCL + * @brief create model config handle of type aclmdlConfigHandle + * + * @retval the aclmdlConfigHandle pointer + * + * @see aclmdlDestroyConfigHandle + */ +ACL_FUNC_VISIBILITY aclmdlConfigHandle *aclmdlCreateConfigHandle(); + +/** + * @ingroup AscendCL + * @brief destroy data of type aclmdlConfigHandle + * + * @param handle [IN] pointer to model config handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclmdlCreateConfigHandle + */ +ACL_FUNC_VISIBILITY aclError aclmdlDestroyConfigHandle(aclmdlConfigHandle *handle); + +/** + * @ingroup AscendCL + * @brief set config for model load + * + * @param handle [OUT] pointer to model config handle + * @param attr [IN] config attr in model config handle to be set + * @param attrValue [IN] pointer to model config value + * @param valueSize [IN] memory size of attrValue + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr, + const void *attrValue, size_t valueSize); + +/** + * @ingroup AscendCL + * @brief get real tensor name from modelDesc + * + * @param modelDesc [IN] pointer to modelDesc + * @param name [IN] tensor name + * + * @retval the pointer of real tensor name + * @retval Failure return NULL + */ +ACL_FUNC_VISIBILITY const char *aclmdlGetTensorRealName(const aclmdlDesc *modelDesc, const char *name); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_ diff --git a/inc/external/acl/acl_op.h b/inc/external/acl/acl_op.h new file mode 100644 index 00000000..d2e59bfb --- /dev/null +++ b/inc/external/acl/acl_op.h @@ -0,0 +1,504 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_ACL_ACL_OP_H_ +#define INC_EXTERNAL_ACL_ACL_OP_H_ + +#include "acl_base.h" +#include "acl_rt.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct aclopHandle aclopHandle; +typedef struct aclopAttr aclopAttr; +typedef struct aclopKernelDesc aclopKernelDesc; + +typedef void (*aclDataDeallocator)(void *data, size_t length); + +static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1; + +typedef enum aclEngineType { + ACL_ENGINE_SYS, + ACL_ENGINE_AICORE, + ACL_ENGINE_VECTOR, +} aclopEngineType; + +/** + * @ingroup AscendCL + * @brief Set base directory that contains single op models + * + * @par Restriction + * The aclopSetModelDir interface can be called only once in a process. + * @param modelDir [IN] path of the directory + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopSetModelDir(const char *modelDir); + +/** + * @ingroup AscendCL + * @brief load single op models from memory + * + * @par Restriction + * The aclopLoad interface can be called more than one times in a process. + * @param model [IN] address of single op models + * @param modelSize [IN] size of single op models + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopLoad(const void *model, size_t modelSize); + +/** + * @ingroup AscendCL + * @brief create data of type aclopAttr + * + * @retval pointer to created instance. + * @retval nullptr if run out of memory + */ +ACL_FUNC_VISIBILITY aclopAttr *aclopCreateAttr(); + +/** + * @ingroup AscendCL + * @brief destroy data of typ aclopAttr + * + * @param attr [IN] pointer to the instance of aclopAttr + */ +ACL_FUNC_VISIBILITY void aclopDestroyAttr(const aclopAttr *attr); + +/** + * @ingroup AscendCL + * @brief set an attribute. the type of the attribute is bool + * + * @param attr [OUT] pointer to the instance of aclopAttr + * @param attrName [IN] attribute name + * @param attrValue [IN] attribute value + * false if attrValue is 0, true otherwise. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopSetAttrBool(aclopAttr *attr, const char *attrName, uint8_t attrValue); + +/** + * @ingroup AscendCL + * @brief set an attribute. the type of the attribute is int64_t + * + * @param attr [OUT] pointer to the instance of aclopAttr + * @param attrName [IN] attribute name + * @param attrValue [IN] attribute value + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopSetAttrInt(aclopAttr *attr, const char *attrName, int64_t attrValue); + +/** + * @ingroup AscendCL + * @brief set an attribute. the type of the attribute is float + * + * @param attr [OUT] pointer to the instance of aclopAttr + * @param attrName [IN] attribute name + * @param attrValue [IN] attribute value + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopSetAttrFloat(aclopAttr *attr, const char *attrName, float attrValue); + +/** + * @ingroup AscendCL + * @brief set an attribute. the type of the attribute is string + * + * @param attr [OUT] pointer to the instance of aclopAttr + * @param attrName [IN] attribute name + * @param attrValue [IN] attribute value + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *attrName, const char *attrValue); + +/** + * @ingroup AscendCL + * @brief set an attribute. the type of the attribute is list of bools + * + * @param attr [OUT] pointer to the instance of aclopAttr + * @param attrName [IN] attribute name + * @param numValues [IN] number of values. false if attrValue is 0, true otherwise. + * @param values [IN] pointer to values + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues, + const uint8_t *values); + +/** + * @ingroup AscendCL + * @brief set an attribute. the type of the attribute is list of ints + * + * @param attr [OUT] pointer to the instance of aclopAttr + * @param attrName [IN] attribute name + * @param numValues [IN] number of values + * @param values [IN] pointer to values + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues, + const int64_t *values); + +/** + * @ingroup AscendCL + * @brief set an attribute. the type of the attribute is list of floats + * + * @param attr [OUT] pointer to the instance of aclopAttr + * @param attrName [IN] attribute name + * @param numValues [IN] number of values + * @param values [IN] pointer to values + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues, + const float *values); + +/** + * @ingroup AscendCL + * @brief set an attribute. the type of the attribute is list of strings + * + * @param attr [OUT] pointer to the instance of aclopAttr + * @param attrName [IN] attribute name + * @param numValues [IN] number of values + * @param values [IN] pointer to values + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues, + const char **values); + +/** + * @ingroup AscendCL + * @brief set an attribute. the type of the attribute is list of list of ints + * + * @param attr [OUT] pointer to the instance of aclopAttr + * @param attrName [IN] attribute name + * @param numLists [IN] number of lists + * @param numValues [IN] pointer to number of values of each list + * @param values [IN] pointer to values + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char *attrName, int numLists, + const int *numValues, const int64_t *const values[]); + +/** + * @ingroup AscendCL + * @brief Load and execute the specified operator asynchronously + * + * @par Restriction + * @li The input and output organization of each operator is different, + * and the application needs to organize the operator strictly + * according to the operator input and output parameters when calling. + * @li When the user calls aclopExecute, + * the ACL finds the corresponding task according to the optype, + * the description of the input tesnsor, + * the description of the output tesnsor, and attr, and issues the execution. + * + * @param opType [IN] type of op + * @param numInputs [IN] number of inputs + * @param inputDesc [IN] pointer to array of input tensor descriptions + * @param inputs [IN] pointer to array of input buffers + * @param numOutputs [IN] number of outputs + * @param outputDesc [IN] pointer to array of output tensor descriptions + * @param outputs [OUT] pointer to array of output buffers + * @param attr [IN] pointer to instance of aclopAttr. + * may pass nullptr if the op has no attribute + * @param stream [IN] stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead") +ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], + const aclDataBuffer *const inputs[], int numOutputs, + const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], + const aclopAttr *attr, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Load and execute the specified operator + * The difference with aclopExecute is that aclopExecuteV2 will refresh outputDesc + * + * @par Restriction + * @li The input and output organization of each operator is different, + * and the application needs to organize the operator strictly + * according to the operator input and output parameters when calling. + * @li When the user calls aclopExecuteV2, + * the ACL finds the corresponding task according to the optype, + * the description of the input tesnsor, + * the description of the output tesnsor, and attr, and issues the execution. + * + * @param opType [IN] type of op + * @param numInputs [IN] number of inputs + * @param inputDesc [IN] pointer to array of input tensor descriptions + * @param inputs [IN] pointer to array of input buffers + * @param numOutputs [IN] number of outputs + * @param outputDesc [IN|OUT] pointer to array of output tensor descriptions + * @param outputs [OUT] pointer to array of output buffers + * @param attr [IN] pointer to instance of aclopAttr. + * may pass nullptr if the op has no attribute + * @param stream [IN] stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[], + aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], + aclDataBuffer *outputs[], aclopAttr *attr, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief create a instance of aclopHandle. + * + * @param opType [IN] type of op + * @param numInputs [IN] number of inputs + * @param inputDesc [IN] pointer to array of input tensor descriptions + * @param numOutputs [IN] number of outputs + * @param outputDesc [IN] pointer to array of output tensor descriptions + * @param opAttr [IN] pointer to instance of aclopAttr. + * may pass nullptr if the op has no attribute + * @param handle [OUT] pointer to the pointer to the handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, int numInputs, + const aclTensorDesc *const inputDesc[], int numOutputs, + const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, + aclopHandle **handle); + +/** + * @ingroup AscendCL + * @brief destroy aclopHandle instance + * + * @param handle [IN] pointer to the instance of aclopHandle + */ +ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle); + +/** + * @ingroup AscendCL + * @brief execute an op with the handle. + * can save op model matching cost compared with aclopExecute + * + * @param handle [IN] pointer to the instance of aclopHandle. + * The aclopCreateHandle interface has been called + * in advance to create aclopHandle type data. + * @param numInputs [IN] number of inputs + * @param inputs [IN] pointer to array of input buffers. + * The aclCreateDataBuffer interface has been called + * in advance to create aclDataBuffer type data. + * @param numOutputs [IN] number of outputs + * @param outputs [OUT] pointer to array of output buffers + * @param stream [IN] stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclopCreateHandle | aclCreateDataBuffer + */ +ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInputs, + const aclDataBuffer *const inputs[], int numOutputs, + aclDataBuffer *const outputs[], aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief cast data type + * + * @param srcDesc [IN] source tensor desc + * @param srcBuffer [IN] source tensor buffer + * @param dstDesc [IN] destination tensor desc + * @param dstBuffer [OUT] destination tensor buffer + * @param truncate [IN] do not truncate if value is 0, truncate otherwise + * @param stream [IN] stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDataBuffer *srcBuffer, + const aclTensorDesc *dstDesc, aclDataBuffer *dstBuffer, uint8_t truncate, + aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief create a handle for casting datatype + * + * @param srcDesc [IN] source tensor desc + * @param dstDesc [IN] destination tensor desc + * @param truncate [IN] do not truncate if value is 0, truncate otherwise + * @param handle [OUT] pointer to the pointer to the handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, aclTensorDesc *dstDesc, uint8_t truncate, + aclopHandle **handle); + +/** + * @ingroup AscendCL + * @brief create kernel + * + * @param opType [IN] op type + * @param kernelId [IN] kernel id + * @param kernelName [IN] kernel name + * @param binData [IN] kernel bin data + * @param binSize [IN] kernel bin size + * @param enginetype [IN] enigne type + * @param deallocator [IN] callback function for deallocating bin data, + * null if bin data to be deallocated by caller + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclopCompile + */ +ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *kernelId, const char *kernelName, + void *binData, int binSize, aclopEngineType enginetype, + aclDataDeallocator deallocator); + +/** + * @ingroup AscendCL + * @brief create kernel + * + * @param numInputs [IN] number of inputs + * @param inputDesc [IN] pointer to array of input tensor descriptions + * @param numOutputs [IN] number of outputs + * @param outputDesc [IN] pointer to array of output tensor descriptions + * @param opAttr [IN] pointer to instance of aclopAttr + * @param aclopKernelDesc [IN] pointer to instance of aclopKernelDesc + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +typedef aclError (*aclopCompileFunc)(int numInputs, const aclTensorDesc *const inputDesc[], int numOutputs, + const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, + aclopKernelDesc *aclopKernelDesc); + +/** + * @ingroup AscendCL + * @brief register compile function + * + * @param opType [IN] op type + * @param func [IN] compile function + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclopUnregisterCompileFunc + */ +ACL_FUNC_VISIBILITY aclError aclopRegisterCompileFunc(const char *opType, aclopCompileFunc func); + +/** + * @ingroup AscendCL + * @brief unregister compile function + * + * @param opType [IN] op type + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType); + +/** + * @ingroup AscendCL + * @brief set kernel args + * + * @param kernelDesc [IN] pointer to instance of aclopKernelDesc + * @param kernelId [IN] kernel id + * @param blockDim [IN] block dim + * @param args [IN] args + * @param argSize [IN] size in bytes of args + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, const char *kernelId, uint32_t blockDim, + const void *args, uint32_t argSize); + +/** + * @ingroup AscendCL + * @brief set workspace sizes + * + * @param kernelDesc [IN] pointer to instance of aclopKernelDesc + * @param numWorkspaces [IN] number of workspaces + * @param workspaceSizes [IN] pointer to array of sizes of workspaces + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kernelDesc, int numWorkspaces, + size_t *workspaceSizes); + +/** + * @ingroup AscendCL + * @brief compile op with dynamic shape + * + * @param opType [IN] op type + * @param numInputs [IN] number of inputs + * @param inputDesc [IN] pointer to array of input tensor descriptions + * @param numOutputs [IN] number of outputs + * @param outputDesc [IN] pointer to array of output tensor descriptions + * @param attr [IN] pointer to instance of aclopAttr. + * may pass nullptr if the op has no attribute + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs, + const aclTensorDesc *const inputDesc[], int numOutputs, + const aclTensorDesc *const outputDesc[], const aclopAttr *attr); + +/** + * @ingroup AscendCL + * @brief inferShape the specified operator synchronously + * + * @param opType [IN] type of op + * @param numInputs [IN] number of inputs + * @param inputDesc [IN] pointer to array of input tensor descriptions + * @param inputs [IN] pointer to array of input buffers + * @param numOutputs [IN] number of outputs + * @param outputDesc [OUT] pointer to array of output tensor descriptions + * @param attr [IN] pointer to instance of aclopAttr. + * may pass nullptr if the op has no attribute + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, int numInputs, aclTensorDesc *inputDesc[], + aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], + aclopAttr *attr); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_ACL_OP_H_ diff --git a/inc/external/acl/acl_op_compiler.h b/inc/external/acl/acl_op_compiler.h new file mode 100644 index 00000000..d9d1b3da --- /dev/null +++ b/inc/external/acl/acl_op_compiler.h @@ -0,0 +1,121 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ +#define INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ + +#include "acl_base.h" +#include "acl_op.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum aclCompileType { ACL_COMPILE_SYS, ACL_COMPILE_UNREGISTERED } aclopCompileType; + +typedef enum { + ACL_PRECISION_MODE, + ACL_AICORE_NUM, + ACL_AUTO_TUNE_MODE, + ACL_OP_SELECT_IMPL_MODE, + ACL_OPTYPELIST_FOR_IMPLMODE, + ACL_OP_DEBUG_LEVEL, + ACL_DEBUG_DIR, + ACL_OP_COMPILER_CACHE_MODE, + ACL_OP_COMPILER_CACHE_DIR, + ACL_OP_PERFORMANCE_MODE +} aclCompileOpt; + +typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclOpCompileFlag; + +/** + * @ingroup AscendCL + * @brief compile op + * + * @param opType [IN] op type + * @param numInputs [IN] number of inputs + * @param inputDesc [IN] pointer to array of input tensor descriptions + * @param numOutputs [IN] number of outputs + * @param outputDesc [IN] pointer to array of output tensor descriptions + * @param attr [IN] pointer to instance of aclopAttr. + * may pass nullptr if the op has no attribute + * @param engineType [IN] engine type + * @param compileFlag [IN] compile flag + * @param opPath [IN] path of op + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], + int numOutputs, const aclTensorDesc *const outputDesc[], + const aclopAttr *attr, aclopEngineType engineType, + aclopCompileType compileFlag, const char *opPath); + +/** + * @ingroup AscendCL + * @brief compile and execute op + * + * @param opType [IN] op type + * @param numInputs [IN] number of inputs + * @param inputDesc [IN] pointer to array of input tensor descriptions + * @param inputs [IN] pointer to array of input buffers + * @param numOutputs [IN] number of outputs + * @param outputDesc [IN] pointer to array of output tensor descriptions + * @param outputs [IN] pointer to array of outputs buffers + * @param attr [IN] pointer to instance of aclopAttr. + * may pass nullptr if the op has no attribute + * @param engineType [IN] engine type + * @param compileFlag [IN] compile flag + * @param opPath [IN] path of op + * @param stream [IN] stream handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute( + const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], + int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr, + aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief set compile option + * + * @param aclCompileOpt [IN] compile option + * @param value [IN] pointer for the option value + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *value); + +/** + * @ingroup AscendCL + * @brief set compile flag + * + * @param flag [IN] compile flag, ACL_OP_COMPILE_DEFAULT means compile with default mode + * ACL_OP_COMPILE_FUZZ means compile with fuzz mode + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopSetCompileFlag(aclOpCompileFlag flag); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ diff --git a/inc/external/acl/acl_prof.h b/inc/external/acl/acl_prof.h new file mode 100644 index 00000000..3784d8c6 --- /dev/null +++ b/inc/external/acl/acl_prof.h @@ -0,0 +1,329 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_ACL_PROF_H_ +#define INC_EXTERNAL_ACL_PROF_H_ + +#include "acl_base.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ACL_PROF_ACL_API 0x0001 +#define ACL_PROF_TASK_TIME 0x0002 +#define ACL_PROF_AICORE_METRICS 0x0004 +#define ACL_PROF_AICPU 0x0008 + +/** + * @deprecated please use aclprofGetOpTypeLen and aclprofGetOpTNameLen instead + */ +#define ACL_PROF_MAX_OP_NAME_LEN 257 +#define ACL_PROF_MAX_OP_TYPE_LEN 65 + +typedef enum { + ACL_AICORE_ARITHMETIC_UTILIZATION = 0, + ACL_AICORE_PIPE_UTILIZATION = 1, + ACL_AICORE_MEMORY_BANDWIDTH = 2, + ACL_AICORE_L0B_AND_WIDTH = 3, + ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, + ACL_AICORE_NONE = 0xFF +} aclprofAicoreMetrics; + +typedef struct aclprofConfig aclprofConfig; +typedef struct aclprofStopConfig aclprofStopConfig; +typedef struct aclprofAicoreEvents aclprofAicoreEvents; +typedef struct aclprofSubscribeConfig aclprofSubscribeConfig; + +/** + * @ingroup AscendCL + * @brief profiling initialize + * + * @param profilerResultPath [IN] path of profiling result + * @param length [IN] length of profilerResultPath + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclprofFinalize + */ +ACL_FUNC_VISIBILITY aclError aclprofInit(const char *profilerResultPath, size_t length); + +/** + * @ingroup AscendCL + * @brief profiling finalize + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclprofInit + */ +ACL_FUNC_VISIBILITY aclError aclprofFinalize(); + +/** + * @ingroup AscendCL + * @brief Start profiling modules by profilerConfig + * + * @param profilerConfig [IN] config of profiling + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclprofStop + */ +ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig); + +/** + * @ingroup AscendCL + * @brief Create data of type aclprofConfig + * + * @param deviceIdList [IN] list of device id + * @param deviceNums [IN] number of devices + * @param aicoreMetrics [IN] type of aicore metrics + * @param aicoreEvents [IN] pointer to aicore events, only support NULL now + * @param dataTypeConfig [IN] config modules need profiling + * + * @retval the aclprofConfig pointer + * + * @see aclprofDestroyConfig + */ +ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, + aclprofAicoreMetrics aicoreMetrics, + aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); + +/** + * @ingroup AscendCL + * @brief Destroy data of type aclprofConfig + * + * @param profilerConfig [IN] config of profiling + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclprofCreateConfig + */ +ACL_FUNC_VISIBILITY aclError aclprofDestroyConfig(const aclprofConfig *profilerConfig); + +/** + * @ingroup AscendCL + * @brief stop profiling modules by stopProfilingConfig + * + * @param profilerConfig [IN] pointer to stop config of profiling + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclprofStart + */ +ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig); + +/** + * @ingroup AscendCL + * @brief subscribe profiling data of model + * + * @param modelId [IN] the model id subscribed + * @param profSubscribeConfig [IN] pointer to config of model subscribe + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclprofModelUnSubscribe + */ +ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig); + +/** + * @ingroup AscendCL + * @brief unsubscribe profiling data of model + * + * @param modelId [IN] the model id unsubscribed + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclprofModelSubscribe + */ +ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId); + +/** + * @ingroup AscendCL + * @brief create subscribe config + * + * @param timeInfoSwitch [IN] switch whether get time info from model + * @param aicoreMetrics [IN] aicore metrics + * @param fd [IN] pointer to write pipe + * + * @retval the aclprofSubscribeConfig pointer + * + * @see aclprofDestroySubscribeConfig + */ +ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch, + aclprofAicoreMetrics aicoreMetrics, void *fd); + +/** + * @ingroup AscendCL + * @brief destroy subscribe config + * + * @param profSubscribeConfig [IN] subscribe config + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclprofCreateSubscribeConfig + */ +ACL_FUNC_VISIBILITY aclError aclprofDestroySubscribeConfig(const aclprofSubscribeConfig *profSubscribeConfig); + +/** + * @ingroup AscendCL + * @brief create subscribe config + * + * @param opDescSize [OUT] size of op desc + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclprofGetOpDescSize(size_t *opDescSize); + +/** + * @ingroup AscendCL + * @brief get op number from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param opNumber [OUT] op number of subscription data + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLen, uint32_t *opNumber); + +/** + * @ingroup AscendCL + * @brief get length op type from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param index [IN] index of op array in opInfo + * @param opTypeLen [OUT] actual length of op type string + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclprofGetOpTypeLen(const void *opInfo, size_t opInfoLen, uint32_t index, + size_t *opTypeLen); + +/** + * @ingroup AscendCL + * @brief get op type from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param index [IN] index of op array in opInfo + * @param opType [OUT] obtained op type string + * @param opTypeLen [IN] obtained length of op type string + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType, + size_t opTypeLen); + +/** + * @ingroup AscendCL + * @brief get length op name from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param index [IN] index of op array in opInfo + * @param opNameLen [OUT] actual length of op name string + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclprofGetOpNameLen(const void *opInfo, size_t opInfoLen, uint32_t index, + size_t *opNameLen); + +/** + * @ingroup AscendCL + * @brief get op type from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param index [IN] index of op array in opInfo + * @param opName [OUT] obtained op name string + * @param opNameLen [IN] obtained length of op name string + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName, + size_t opNameLen); + +/** + * @ingroup AscendCL + * @brief get start time of specified op from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param index [IN] index of op array in opInfo + * + * @retval start time(us) of specified op with timestamp + * @retval 0 for failed + */ +ACL_FUNC_VISIBILITY uint64_t aclprofGetOpStart(const void *opInfo, size_t opInfoLen, uint32_t index); + +/** + * @ingroup AscendCL + * @brief get end time of specified op from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param index [IN] index of op array in opInfo + * + * @retval end time(us) of specified op with timestamp + * @retval 0 for failed + */ +ACL_FUNC_VISIBILITY uint64_t aclprofGetOpEnd(const void *opInfo, size_t opInfoLen, uint32_t index); + +/** + * @ingroup AscendCL + * @brief get excution time of specified op from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * @param index [IN] index of op array in opInfo + * + * @retval execution time(us) of specified op with timestamp + * @retval 0 for failed + */ +ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opInfoLen, uint32_t index); + +/** + * @ingroup AscendCL + * @brief get model id from subscription data + * + * @param opInfo [IN] pointer to subscription data + * @param opInfoLen [IN] memory size of subscription data + * + * @retval model id of subscription data + * @retval 0 for failed + */ +ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_PROF_H_ diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h new file mode 100644 index 00000000..5ee70724 --- /dev/null +++ b/inc/external/acl/acl_rt.h @@ -0,0 +1,958 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_ACL_ACL_RT_H_ +#define INC_EXTERNAL_ACL_ACL_RT_H_ + +#include +#include +#include "acl_base.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ACL_EVENT_TIME_LINE 0x00000008u + +typedef enum aclrtRunMode { + ACL_DEVICE, + ACL_HOST, +} aclrtRunMode; + +typedef enum aclrtTsId { + ACL_TS_ID_AICORE = 0, + ACL_TS_ID_AIVECTOR = 1, + ACL_TS_ID_RESERVED = 2, +} aclrtTsId; + +typedef enum aclrtEventStatus { + ACL_EVENT_STATUS_COMPLETE = 0, + ACL_EVENT_STATUS_NOT_READY = 1, + ACL_EVENT_STATUS_RESERVED = 2, +} aclrtEventStatus; + +typedef enum aclrtCallbackBlockType { + ACL_CALLBACK_NO_BLOCK, + ACL_CALLBACK_BLOCK, +} aclrtCallbackBlockType; + +typedef enum aclrtMemcpyKind { + ACL_MEMCPY_HOST_TO_HOST, + ACL_MEMCPY_HOST_TO_DEVICE, + ACL_MEMCPY_DEVICE_TO_HOST, + ACL_MEMCPY_DEVICE_TO_DEVICE, +} aclrtMemcpyKind; + +typedef enum aclrtMemMallocPolicy { + ACL_MEM_MALLOC_HUGE_FIRST, + ACL_MEM_MALLOC_HUGE_ONLY, + ACL_MEM_MALLOC_NORMAL_ONLY, + ACL_MEM_MALLOC_HUGE_FIRST_P2P, + ACL_MEM_MALLOC_HUGE_ONLY_P2P, + ACL_MEM_MALLOC_NORMAL_ONLY_P2P, +} aclrtMemMallocPolicy; + +typedef enum aclrtMemAttr { + ACL_DDR_MEM, + ACL_HBM_MEM, + ACL_DDR_MEM_HUGE, + ACL_DDR_MEM_NORMAL, + ACL_HBM_MEM_HUGE, + ACL_HBM_MEM_NORMAL, + ACL_DDR_MEM_P2P_HUGE, + ACL_DDR_MEM_P2P_NORMAL, + ACL_HBM_MEM_P2P_HUGE, + ACL_HBM_MEM_P2P_NORMAL, +} aclrtMemAttr; + +typedef enum aclrtGroupAttr { + ACL_GROUP_AICORE_INT, + ACL_GROUP_AIV_INT, + ACL_GROUP_AIC_INT, + ACL_GROUP_SDMANUM_INT, + ACL_GROUP_ASQNUM_INT, + ACL_GROUP_GROUPID_INT +} aclrtGroupAttr; + +typedef struct tagRtGroupInfo aclrtGroupInfo; + +typedef struct rtExceptionInfo aclrtExceptionInfo; + +typedef void (*aclrtCallback)(void *userData); + +typedef void (*aclrtExceptionInfoCallback)(aclrtExceptionInfo *exceptionInfo); + +/** + * @ingroup AscendCL + * @brief Set a callback function to handle exception information + * + * @param callback [IN] callback function to handle exception information + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSetExceptionInfoCallback(aclrtExceptionInfoCallback callback); + +/** + * @ingroup AscendCL + * @brief Get task id from exception information + * + * @param info [IN] pointer of exception information + * + * @retval The task id from exception information + * @retval 0xFFFFFFFF if info is null + */ +ACL_FUNC_VISIBILITY uint32_t aclrtGetTaskIdFromExceptionInfo(const aclrtExceptionInfo *info); + +/** + * @ingroup AscendCL + * @brief Get stream id from exception information + * + * @param info [IN] pointer of exception information + * + * @retval The stream id from exception information + * @retval 0xFFFFFFFF if info is null + */ +ACL_FUNC_VISIBILITY uint32_t aclrtGetStreamIdFromExceptionInfo(const aclrtExceptionInfo *info); + +/** + * @ingroup AscendCL + * @brief Get thread id from exception information + * + * @param info [IN] pointer of exception information + * + * @retval The thread id of fail task + * @retval 0xFFFFFFFF if info is null + */ +ACL_FUNC_VISIBILITY uint32_t aclrtGetThreadIdFromExceptionInfo(const aclrtExceptionInfo *info); + +/** + * @ingroup AscendCL + * @brief Get device id from exception information + * + * @param info [IN] pointer of exception information + * + * @retval The thread id of fail task + * @retval 0xFFFFFFFF if info is null + */ +ACL_FUNC_VISIBILITY uint32_t aclrtGetDeviceIdFromExceptionInfo(const aclrtExceptionInfo *info); + +/** + * @ingroup AscendCL + * @brief The thread that handles the callback function on the Stream + * + * @param threadId [IN] thread ID + * @param stream [IN] stream handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSubscribeReport(uint64_t threadId, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Add a callback function to be executed on the host + * to the task queue of the Stream + * + * @param fn [IN] Specify the callback function to be added + * The function prototype of the callback function is: + * typedef void (*aclrtCallback)(void *userData); + * @param userData [IN] User data to be passed to the callback function + * @param blockType [IN] callback block type + * @param stream [IN] stream handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtLaunchCallback(aclrtCallback fn, void *userData, aclrtCallbackBlockType blockType, + aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief After waiting for a specified time, trigger callback processing + * + * @par Function + * The thread processing callback specified by + * the aclrtSubscribeReport interface + * + * @param timeout [IN] timeout value + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtSubscribeReport + */ +ACL_FUNC_VISIBILITY aclError aclrtProcessReport(int32_t timeout); + +/** + * @ingroup AscendCL + * @brief Cancel thread registration, + * the callback function on the specified Stream + * is no longer processed by the specified thread + * + * @param threadId [IN] thread ID + * @param stream [IN] stream handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtUnSubscribeReport(uint64_t threadId, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief create context and associates it with the calling thread + * + * @par Function + * The following use cases are supported: + * @li If you don't call the aclrtCreateContext interface + * to explicitly create the context, + * the system will use the default context, which is implicitly created + * when the aclrtSetDevice interface is called. + * @li If multiple contexts are created in a process + * (there is no limit on the number of contexts), + * the current thread can only use one of them at the same time. + * It is recommended to explicitly specify the context of the current thread + * through the aclrtSetCurrentContext interface to increase. + * the maintainability of the program. + * + * @param context [OUT] point to the created context + * @param deviceId [IN] device to create context on + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtSetDevice | aclrtSetCurrentContext + */ +ACL_FUNC_VISIBILITY aclError aclrtCreateContext(aclrtContext *context, int32_t deviceId); + +/** + * @ingroup AscendCL + * @brief destroy context instance + * + * @par Function + * Can only destroy context created through aclrtCreateContext interface + * + * @param context [IN] the context to destroy + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtCreateContext + */ +ACL_FUNC_VISIBILITY aclError aclrtDestroyContext(aclrtContext context); + +/** + * @ingroup AscendCL + * @brief set the context of the thread + * + * @par Function + * The following scenarios are supported: + * @li If the aclrtCreateContext interface is called in a thread to explicitly + * create a Context (for example: ctx1), the thread's Context can be specified + * without calling the aclrtSetCurrentContext interface. + * The system uses ctx1 as the context of thread1 by default. + * @li If the aclrtCreateContext interface is not explicitly created, + * the system uses the default context as the context of the thread. + * At this time, the aclrtDestroyContext interface cannot be used to release + * the default context. + * @li If the aclrtSetCurrentContext interface is called multiple times to + * set the thread's Context, the last one prevails. + * + * @par Restriction + * @li If the cevice corresponding to the context set for the thread + * has been reset, you cannot set the context as the context of the thread, + * otherwise a business exception will result. + * @li It is recommended to use the context created in a thread. + * If the aclrtCreateContext interface is called in thread A to create a context, + * and the context is used in thread B, + * the user must guarantee the execution order of tasks in the same stream + * under the same context in two threads. + * + * @param context [IN] the current context of the thread + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtCreateContext | aclrtDestroyContext + */ +ACL_FUNC_VISIBILITY aclError aclrtSetCurrentContext(aclrtContext context); + +/** + * @ingroup AscendCL + * @brief get the context of the thread + * + * @par Function + * If the user calls the aclrtSetCurrentContext interface + * multiple times to set the context of the current thread, + * then the last set context is obtained + * + * @param context [OUT] the current context of the thread + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtSetCurrentContext + */ +ACL_FUNC_VISIBILITY aclError aclrtGetCurrentContext(aclrtContext *context); + +/** + * @ingroup AscendCL + * @brief Specify the device to use for the operation + * implicitly create the default context and the default stream + * + * @par Function + * The following use cases are supported: + * @li Device can be specified in the process or thread. + * If you call the aclrtSetDevice interface multiple + * times to specify the same device, + * you only need to call the aclrtResetDevice interface to reset the device. + * @li The same device can be specified for operation + * in different processes or threads. + * @li Device is specified in a process, + * and multiple threads in the process can share this device to explicitly + * create a Context (aclrtCreateContext interface). + * @li In multi-device scenarios, you can switch to other devices + * through the aclrtSetDevice interface in the process. + * + * @param deviceId [IN] the device id + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtResetDevice |aclrtCreateContext + */ +ACL_FUNC_VISIBILITY aclError aclrtSetDevice(int32_t deviceId); + +/** + * @ingroup AscendCL + * @brief Reset the current operating Device and free resources on the device, + * including the default context, the default stream, + * and all streams created under the default context, + * and synchronizes the interface. + * If the task under the default context or stream has not been completed, + * the system will wait for the task to complete before releasing it. + * + * @par Restriction + * @li The Context, Stream, and Event that are explicitly created + * on the device to be reset. Before resetting, + * it is recommended to follow the following interface calling sequence, + * otherwise business abnormalities may be caused. + * @li Interface calling sequence: + * call aclrtDestroyEvent interface to release Event or + * call aclrtDestroyStream interface to release explicitly created Stream-> + * call aclrtDestroyContext to release explicitly created Context-> + * call aclrtResetDevice interface + * + * @param deviceId [IN] the device id + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtResetDevice(int32_t deviceId); + +/** + * @ingroup AscendCL + * @brief get target device of current thread + * + * @param deviceId [OUT] the device id + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtGetDevice(int32_t *deviceId); + +/** + * @ingroup AscendCL + * @brief get target side + * + * @param runMode [OUT] the run mode + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtGetRunMode(aclrtRunMode *runMode); + +/** + * @ingroup AscendCL + * @brief Wait for compute device to finish + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSynchronizeDevice(void); + +/** + * @ingroup AscendCL + * @brief Set Scheduling TS + * + * @param tsId [IN] the ts id + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSetTsDevice(aclrtTsId tsId); + +/** + * @ingroup AscendCL + * @brief get total device number. + * + * @param count [OUT] the device number + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtGetDeviceCount(uint32_t *count); + +/** + * @ingroup AscendCL + * @brief create event instance + * + * @param event [OUT] created event + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtCreateEvent(aclrtEvent *event); + +/** + * @ingroup AscendCL + * @brief create event instance with flag + * + * @param event [OUT] created event + * @param flag [IN] event flag + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtCreateEventWithFlag(aclrtEvent *event, uint32_t flag); + +/** + * @ingroup AscendCL + * @brief destroy event instance + * + * @par Function + * Only events created through the aclrtCreateEvent interface can be + * destroyed, synchronous interfaces. When destroying an event, + * the user must ensure that the tasks involved in the aclrtSynchronizeEvent + * interface or the aclrtStreamWaitEvent interface are completed before + * they are destroyed. + * + * @param event [IN] event to destroy + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtCreateEvent | aclrtSynchronizeEvent | aclrtStreamWaitEvent + */ +ACL_FUNC_VISIBILITY aclError aclrtDestroyEvent(aclrtEvent event); + +/** + * @ingroup AscendCL + * @brief Record an Event in the Stream + * + * @param event [IN] event to record + * @param stream [IN] stream handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Reset an event + * + * @par Function + * Users need to make sure to wait for the tasks in the Stream + * to complete before resetting the Event + * + * @param event [IN] event to reset + * @param stream [IN] stream handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Queries an event's status + * + * @param event [IN] event to query + * @param status [OUT] event status + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status); + +/** + * @ingroup AscendCL + * @brief Block Host Running, wait event to be complete + * + * @param event [IN] event to wait + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event); + +/** + * @ingroup AscendCL + * @brief computes the elapsed time between events. + * + * @param ms [OUT] time between start and end in ms + * @param start [IN] starting event + * @param end [IN] ending event + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream + */ +ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, aclrtEvent end); + +/** + * @ingroup AscendCL + * @brief alloc memory on device + * + * @par Function + * alloc for size linear memory on device + * and return a pointer to allocated memory by *devPtr + * + * @par Restriction + * @li The memory requested by the aclrtMalloc interface needs to be released + * through the aclrtFree interface. + * @li Before calling the media data processing interface, + * if you need to apply memory on the device to store input or output data, + * you need to call acldvppMalloc to apply for memory. + * + * @param devPtr [OUT] pointer to pointer to allocated memory on device + * @param size [IN] alloc memory size + * @param policy [IN] memory alloc policy + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtFree | acldvppMalloc | aclrtMallocCached + */ +ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy); + +/** + * @ingroup AscendCL + * @brief allocate memory on device with cache + * + * @par Function + * alloc for size linear memory on device + * and return a pointer to allocated memory by *devPtr + * + * @par Restriction + * @li The memory requested by the aclrtMallocCached interface needs to be released + * through the aclrtFree interface. + * + * @param devPtr [OUT] pointer to pointer to allocated memory on device + * @param size [IN] alloc memory size + * @param policy [IN] memory alloc policy + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtFree | aclrtMalloc + */ +ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy); + +/** + * @ingroup AscendCL + * @brief flush cache data to ddr + * + * @param devPtr [IN] the pointer that flush data to ddr + * @param size [IN] flush size + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtMemFlush(void *devPtr, size_t size); + +/** + * @ingroup AscendCL + * @brief invalidate cache data + * + * @param devPtr [IN] pointer to invalidate cache data + * @param size [IN] invalidate size + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtMemInvalidate(void *devPtr, size_t size); + +/** + * @ingroup AscendCL + * @brief free device memory + * + * @par Function + * can only free memory allocated through the aclrtMalloc interface + * + * @param devPtr [IN] Pointer to memory to be freed + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtMalloc + */ +ACL_FUNC_VISIBILITY aclError aclrtFree(void *devPtr); + +/** + * @ingroup AscendCL + * @brief alloc memory on host + * + * @par Restriction + * @li The requested memory cannot be used in the Device + * and needs to be explicitly copied to the Device. + * @li The memory requested by the aclrtMallocHost interface + * needs to be released through the aclrtFreeHost interface. + * + * @param hostPtr [OUT] pointer to pointer to allocated memory on the host + * @param size [IN] alloc memory size + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtFreeHost + */ +ACL_FUNC_VISIBILITY aclError aclrtMallocHost(void **hostPtr, size_t size); + +/** + * @ingroup AscendCL + * @brief free host memory + * + * @par Function + * can only free memory allocated through the aclrtMallocHost interface + * + * @param hostPtr [IN] free memory pointer + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtMallocHost + */ +ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr); + +/** + * @ingroup AscendCL + * @brief synchronous memory replication between host and device + * + * @param dst [IN] destination address pointer + * @param destMax [IN] Max length of the destination address memory + * @param src [IN] source address pointer + * @param count [IN] the length of byte to copy + * @param kind [IN] memcpy type + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count, + aclrtMemcpyKind kind); + +/** + * @ingroup AscendCL + * @brief Initialize memory and set contents of memory to specified value + * + * @par Function + * The memory to be initialized is on the Host or device side, + * and the system determines whether + * it is host or device according to the address + * + * @param devPtr [IN] Starting address of memory + * @param maxCount [IN] Max length of destination address memory + * @param value [IN] Set value + * @param count [IN] The length of memory + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t value, size_t count); + +/** + * @ingroup AscendCL + * @brief Asynchronous memory replication between Host and Device + * + * @par Function + * After calling this interface, + * be sure to call the aclrtSynchronizeStream interface to ensure that + * the task of memory replication has been completed + * + * @par Restriction + * @li For on-chip Device-to-Device memory copy, + * both the source and destination addresses must be 64-byte aligned + * + * @param dst [IN] destination address pointer + * @param destMax [IN] Max length of destination address memory + * @param src [IN] source address pointer + * @param count [IN] the number of byte to copy + * @param kind [IN] memcpy type + * @param stream [IN] asynchronized task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtSynchronizeStream + */ +ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count, + aclrtMemcpyKind kind, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Asynchronous initialize memory + * and set contents of memory to specified value async + * + * @par Function + * The memory to be initialized is on the Host or device side, + * and the system determines whether + * it is host or device according to the address + * + * @param devPtr [IN] destination address pointer + * @param maxCount [IN] Max length of destination address memory + * @param value [IN] set value + * @param count [IN] the number of byte to set + * @param stream [IN] asynchronized task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtSynchronizeStream + */ +ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count, + aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief create stream instance + * + * @param stream [OUT] the created stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtCreateStream(aclrtStream *stream); + +/** + * @ingroup AscendCL + * @brief destroy stream instance + * + * @par Function + * Can only destroy streams created through the aclrtCreateStream interface + * + * @par Restriction + * Before calling the aclrtDestroyStream interface to destroy + * the specified Stream, you need to call the aclrtSynchronizeStream interface + * to ensure that the tasks in the Stream have been completed. + * + * @param stream [IN] the stream to destroy + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtCreateStream | aclrtSynchronizeStream + */ +ACL_FUNC_VISIBILITY aclError aclrtDestroyStream(aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief block the host until all tasks + * in the specified stream have completed + * + * @param stream [IN] the stream to wait + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSynchronizeStream(aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Blocks the operation of the specified Stream until + * the specified Event is completed. + * Support for multiple streams waiting for the same event. + * + * @param stream [IN] the wait stream If using thedefault Stream, set NULL + * @param event [IN] the event to wait + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtStreamWaitEvent(aclrtStream stream, aclrtEvent event); + +/** + * @ingroup AscendCL + * @brief set group + * + * @par Function + * set the task to the corresponding group + * + * @param groupId [IN] group id + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtGetGroupCount | aclrtGetAllGroupInfo | aclrtGetGroupInfoDetail + */ +ACL_FUNC_VISIBILITY aclError aclrtSetGroup(int32_t groupId); + +/** + * @ingroup AscendCL + * @brief get the number of group + * + * @par Function + * get the number of group. if the number of group is zero, + * it means that group is not supported or group is not created. + * + * @param count [OUT] the number of group + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + */ +ACL_FUNC_VISIBILITY aclError aclrtGetGroupCount(uint32_t *count); + +/** + * @ingroup AscendCL + * @brief create group information + * + * @retval null for failed. + * @retval OtherValues success. + * + * @see aclrtDestroyGroupInfo + */ +ACL_FUNC_VISIBILITY aclrtGroupInfo *aclrtCreateGroupInfo(); + +/** + * @ingroup AscendCL + * @brief destroy group information + * + * @param groupInfo [IN] pointer to group information + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtCreateGroupInfo + */ +ACL_FUNC_VISIBILITY aclError aclrtDestroyGroupInfo(aclrtGroupInfo *groupInfo); + +/** + * @ingroup AscendCL + * @brief get all group information + * + * @param groupInfo [OUT] pointer to group information + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtGetGroupCount + */ +ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo); + +/** + * @ingroup AscendCL + * @brief get detail information of group + * + * @param groupInfo [IN] pointer to group information + * @param groupIndex [IN] group index value + * @param attr [IN] group attribute + * @param attrValue [OUT] pointer to attribute value + * @param valueLen [IN] length of attribute value + * @param paramRetSize [OUT] pointer to real length of attribute value + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtGetGroupCount | aclrtGetAllGroupInfo + */ +ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupIndex, + aclrtGroupAttr attr, void *attrValue, size_t valueLen, + size_t *paramRetSize); + +/** + * @ingroup AscendCL + * @brief checking whether current device and peer device support the p2p feature + * + * @param canAccessPeer [OUT] pointer to save the checking result + * @param deviceId [IN] current device id + * @param peerDeviceId [IN] peer device id + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtDeviceEnablePeerAccess | aclrtDeviceDisablePeerAccess + */ +ACL_FUNC_VISIBILITY aclError aclrtDeviceCanAccessPeer(int32_t *canAccessPeer, int32_t deviceId, int32_t peerDeviceId); + +/** + * @ingroup AscendCL + * @brief enable the peer device to support the p2p feature + * + * @param peerDeviceId [IN] the peer device id + * @param flags [IN] reserved field, now it must be zero + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtDeviceCanAccessPeer | aclrtDeviceDisablePeerAccess + */ +ACL_FUNC_VISIBILITY aclError aclrtDeviceEnablePeerAccess(int32_t peerDeviceId, uint32_t flags); + +/** + * @ingroup AscendCL + * @brief disable the peer device to support the p2p function + * + * @param peerDeviceId [IN] the peer device id + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclrtDeviceCanAccessPeer | aclrtDeviceEnablePeerAccess + */ +ACL_FUNC_VISIBILITY aclError aclrtDeviceDisablePeerAccess(int32_t peerDeviceId); + +/** + * @ingroup AscendCL + * @brief Obtain the free memory and total memory of specified attribute. + * the specified memory include normal memory and huge memory. + * + * @param attr [IN] the memory attribute of specified device + * @param free [OUT] the free memory of specified device + * @param total [OUT] the total memory of specified device. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, size_t *total); + +/** + * @ingroup AscendCL + * @brief Set the timeout interval for waitting of op + * + * @param timeout [IN] op wait timeout + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSetOpWaitTimeout(uint32_t timeout); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_ACL_RT_H_ diff --git a/inc/external/acl/acl_tdt.h b/inc/external/acl/acl_tdt.h new file mode 100644 index 00000000..c357518d --- /dev/null +++ b/inc/external/acl/acl_tdt.h @@ -0,0 +1,276 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_ACL_ACL_TDT_H_ +#define INC_EXTERNAL_ACL_ACL_TDT_H_ + +#include "acl/acl_base.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum acltdtTensorType { + ACL_TENSOR_DATA_UNDEFINED = -1, + ACL_TENSOR_DATA_TENSOR, + ACL_TENSOR_DATA_END_OF_SEQUENCE, + ACL_TENSOR_DATA_ABNORMAL +}; + +typedef struct acltdtDataItem acltdtDataItem; +typedef struct acltdtDataset acltdtDataset; +typedef struct acltdtChannelHandle acltdtChannelHandle; + +/** + * @ingroup AscendCL + * @brief Get tensor type from item + * + * @param dataItem [IN] pointer to the data item + * + * @retval Tensor type. + * @retval ACL_DT_UNDEFINED if dataItem is null + */ +ACL_FUNC_VISIBILITY acltdtTensorType acltdtGetTensorTypeFromItem(const acltdtDataItem *dataItem); + +/** + * @ingroup AscendCL + * @brief Get data type from item + * + * @param dataItem [IN] pointer to the data item + * + * @retval Data type. + * @retval ACL_DT_UNDEFINED if dataItem is null + */ +ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *dataItem); + +/** + * @ingroup AscendCL + * @brief Get data address from item + * + * @param dataItem [IN] pointer to data item + * + * @retval null for failed + * @retval OtherValues success + */ +ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem); + +/** + * @ingroup AscendCL + * @brief Get data size from item + * + * @param dataItem [IN] pointer to data item + * + * @retval 0 for failed + * @retval OtherValues success + */ +ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem); + +/** + * @ingroup AscendCL + * @brief Get dim's number from item + * + * @param dataItem [IN] pointer to data item + * + * @retval 0 for failed + * @retval OtherValues success + */ +ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem); + +/** + * @ingroup AscendCL + * @brief Get dims from item + * + * @param dataItem [IN] the struct of data item + * @param dims [IN|OUT] pointer to the dims of dataTtem + * @param dimNum [IN] the size of the dims + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataItem, int64_t *dims, size_t dimNum); + +/** + * @ingroup AscendCL + * @brief Create the struct of data item + * + * @param tdtType [IN] Tdt tensor type + * @param dims [IN] pointer of tdtDataItem's dims + * @param dimNum [IN] Dim number + * @param dataType [IN] Data type + * @param data [IN] Data pointer + * @param size [IN] Data size + * + * @retval null for failed + * @retval OtherValues success + * + * @see acltdtDestroyDataItem + */ +ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum, + aclDataType dataType, void *data, size_t size); + +/** + * @ingroup AscendCL + * @brief Destroy the struct of data item + * + * @param dataItem [IN] pointer to the data item + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateDataItem + */ +ACL_FUNC_VISIBILITY aclError acltdtDestroyDataItem(acltdtDataItem *dataItem); + +/** + * @ingroup AscendCL + * @brief Create the tdt dataset + * + * @retval null for failed + * @retval OtherValues success + * + * @see acltdtDestroyDataset + */ +ACL_FUNC_VISIBILITY acltdtDataset *acltdtCreateDataset(); + +/** + * @ingroup AscendCL + * @brief Destroy the tdt dataset + * + * @param dataset [IN] pointer to the dataset + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateDataset + */ +ACL_FUNC_VISIBILITY aclError acltdtDestroyDataset(acltdtDataset *dataset); + +/** + * @ingroup AscendCL + * @brief Get the data item + * + * @param dataset [IN] pointer to the dataset + * @param index [IN] index of the dataset + * + * @retval null for failed + * @retval OtherValues success + * + * @see acltdtAddDataItem + */ +ACL_FUNC_VISIBILITY acltdtDataItem *acltdtGetDataItem(const acltdtDataset *dataset, size_t index); + +/** + * @ingroup AscendCL + * @brief Get the data item + * + * @param dataset [OUT] pointer to the dataset + * @param dataItem [IN] pointer to the data item + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtGetDataItem + */ +ACL_FUNC_VISIBILITY aclError acltdtAddDataItem(acltdtDataset *dataset, acltdtDataItem *dataItem); + +/** + * @ingroup AscendCL + * @brief Get the size of dataset + * + * @param dataset [IN] pointer to the dataset + * + * @retval 0 for failed + * @retval OtherValues success + */ +ACL_FUNC_VISIBILITY size_t acltdtGetDatasetSize(const acltdtDataset *dataset); + +/** + * @ingroup AscendCL + * @brief Stop the channel + * + * @param handle [IN] pointer to the channel handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateChannel | acltdtDestroyChannel + */ +ACL_FUNC_VISIBILITY aclError acltdtStopChannel(acltdtChannelHandle *handle); + +/** + * @ingroup AscendCL + * @brief Create the channel + * + * @param deviceId [IN] the device id + * @param name [IN] the channel's name + * + * @retval null for failed + * @retval OtherValues success + * + * @see acltdtStopChannel | acltdtDestroyChannel + */ +ACL_FUNC_VISIBILITY acltdtChannelHandle *acltdtCreateChannel(uint32_t deviceId, const char *name); + +/** + * @ingroup AscendCL + * @brief Destroy the channel + * + * @param handle [IN] pointer to the channel handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateChannel | acltdtStopChannel + */ +ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle); + +/** + * @ingroup AscendCL + * @brief Send tensor to device + * + * @param handle [IN] pointer to the channel handle + * @param dataset [IN] pointer to the dataset + * @param timeout [IN] to be reserved, now it must be -1 + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtReceiveTensor + */ +ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset, + int32_t timeout); + +/** + * @ingroup AscendCL + * @brief Receive tensor from device + * + * @param handle [IN] pointer to the channel handle + * @param dataset [OUT] pointer to the dataset + * @param timeout [IN] to be reserved, now it must be -1 + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtSendTensor + */ +ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset, + int32_t timeout); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_ACL_TDT_H_ diff --git a/inc/external/acl/error_codes/ge_error_codes.h b/inc/external/acl/error_codes/ge_error_codes.h new file mode 100644 index 00000000..cafc5a64 --- /dev/null +++ b/inc/external/acl/error_codes/ge_error_codes.h @@ -0,0 +1,75 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_ +#define INC_EXTERNAL_GE_GE_ERROR_CODES_H_ + +#if defined(_MSC_VER) +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY _declspec(dllexport) +#else +#define GE_FUNC_VISIBILITY +#endif +#else +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_VISIBILITY +#endif +#endif + +#include + +#ifdef __cplusplus +extern "C" { +#endif +static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000; +static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001; +static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002; +static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003; +static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006; +static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007; +static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008; +static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009; +static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011; +static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012; +static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013; +static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014; +static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015; +static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016; +static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017; +static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018; +static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019; +static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020; +static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021; +static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022; +static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000; +static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001; +static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000; +static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001; +static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002; +static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003; +static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004; +static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005; +static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006; +static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007; +static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008; +static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009; + +#ifdef __cplusplus +} // namespace ge +#endif +#endif // INC_EXTERNAL_GE_GE_ERROR_CODES_H_ diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h new file mode 100644 index 00000000..a1392cc6 --- /dev/null +++ b/inc/external/acl/error_codes/rt_error_codes.h @@ -0,0 +1,109 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ +#define __INC_EXTERNEL_RT_ERROR_CODES_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static const int32_t ACL_RT_SUCCESS = 0; // success + +static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid +static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id +static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null +static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context +static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context +static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal +static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned +static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed +static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed +static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream +static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread +static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set +static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create +static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream +static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type +static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle +static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type +static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout + +static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support +static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error +static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error +static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow +static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device +static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail +static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission +static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource +static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource +static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource +static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource +static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource + +static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error +static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error +static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream +static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream +static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete +static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence +static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete +static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error +static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error +static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support +static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat +static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed +static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout +static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error +static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout +static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception +static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception +static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout +static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception +static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error +static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error +static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error +static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error +static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal +static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering +static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init +static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data +static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error +static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate +static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed +static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed +static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context +static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out +static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout +static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception +static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal + +static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error +static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error +static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect + +#ifdef __cplusplus +} +#endif + +#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ diff --git a/inc/external/acl/ops/acl_cblas.h b/inc/external/acl/ops/acl_cblas.h new file mode 100644 index 00000000..3d81eb2b --- /dev/null +++ b/inc/external/acl/ops/acl_cblas.h @@ -0,0 +1,334 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ +#define INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ + +#include "acl/acl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum aclTransType { ACL_TRANS_N, ACL_TRANS_T, ACL_TRANS_NZ, ACL_TRANS_NZ_T } aclTransType; + +typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECISION } aclComputeType; + +/** + * @ingroup AscendCL + * @brief perform the matrix-vector multiplication + * + * @param transA [IN] transpose type of matrix A + * @param m [IN] number of rows of matrix A + * @param n [IN] number of columns of matrix A + * @param alpha [IN] pointer to scalar used for multiplication. + * of same type as dataTypeC + * @param a [IN] pointer to matrix A + * @param lda [IN] leading dimension used to store the matrix A + * @param dataTypeA [IN] datatype of matrix A + * @param x [IN] pointer to vector x + * @param incx [IN] stride between consecutive elements of vector x + * @param dataTypeX [IN] datatype of vector x + * @param beta [IN] pointer to scalar used for multiplication. + * of same type as dataTypeC If beta == 0, + * then y does not have to be a valid input + * @param y [IN|OUT] pointer to vector y + * @param incy [IN] stride between consecutive elements of vector y + * @param dataTypeY [IN] datatype of vector y + * @param type [IN] computation type + * @param stream [IN] stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, const void *alpha, const void *a, int lda, + aclDataType dataTypeA, const void *x, int incx, aclDataType dataTypeX, + const void *beta, void *y, int incy, aclDataType dataTypeY, + aclComputeType type, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief create a handle for performing the matrix-vector multiplication + * + * @param transA [IN] transpose type of matrix A + * @param m [IN] number of rows of matrix A + * @param n [IN] number of columns of matrix A + * @param dataTypeA [IN] datatype of matrix A + * @param dataTypeX [IN] datatype of vector x + * @param dataTypeY [IN] datatype of vector y + * @param type [IN] computation type + * @param handle [OUT] pointer to the pointer to the handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, int m, int n, aclDataType dataTypeA, + aclDataType dataTypeX, aclDataType dataTypeY, + aclComputeType type, aclopHandle **handle); + +/** + * @ingroup AscendCL + * @brief perform the matrix-vector multiplication + * + * @param transA [IN] transpose type of matrix A + * @param m [IN] number of rows of matrix A + * @param n [IN] number of columns of matrix A + * @param alpha [IN] pointer to scalar used for multiplication + * @param a [IN] pointer to matrix A + * @param lda [IN] leading dimension used to store the matrix A + * @param x [IN] pointer to vector x + * @param incx [IN] stride between consecutive elements of vector x + * @param beta [IN] pointer to scalar used for multiplication. + * If beta value == 0, + * then y does not have to be a valid input + * @param y [IN|OUT] pointer to vector y + * @param incy [IN] stride between consecutive elements of vector y + * @param type [IN] computation type + * @param stream [IN] stream + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, const aclFloat16 *alpha, + const aclFloat16 *a, int lda, const aclFloat16 *x, int incx, + const aclFloat16 *beta, aclFloat16 *y, int incy, aclComputeType type, + aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief create a handle for performing the matrix-vector multiplication + * + * @param transA [IN] transpose type of matrix A + * @param m [IN] number of rows of matrix A + * @param n [IN] number of columns of matrix A + * @param type [IN] computation type + * @param handle [OUT] pointer to the pointer to the handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, int m, int n, aclComputeType type, + aclopHandle **handle); + +/** + * @ingroup AscendCL + * @brief perform the matrix-vector multiplication + * + * @param transA [IN] transpose type of matrix A + * @param m [IN] number of rows of matrix A + * @param n [IN] number of columns of matrix A + * @param alpha [IN] pointer to scalar used for multiplication + * @param a [IN] pointer to matrix A + * @param lda [IN] leading dimension used to store the matrix A + * @param x [IN] pointer to vector x + * @param incx [IN] stride between consecutive elements of vector x + * @param beta [IN] pointer to scalar used for multiplication. + * If beta value == 0, + * then y does not have to be a valid input + * @param y [IN|OUT] pointer to vector y + * @param incy [IN] stride between consecutive elements of vector y + * @param type [IN] computation type + * @param stream [IN] stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, const int32_t *alpha, const int8_t *a, + int lda, const int8_t *x, int incx, const int32_t *beta, int32_t *y, + int incy, aclComputeType type, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief create a handle for performing the matrix-vector multiplication + * + * @param transA [IN] transpose type of matrix A + * @param m [IN] number of rows of matrix A + * @param n [IN] number of columns of matrix A + * @param handle [OUT] pointer to the pointer to the handle + * @param type [IN] computation type + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, int m, int n, aclComputeType type, + aclopHandle **handle); + +/** + * @ingroup AscendCL + * @brief perform the matrix-matrix multiplication + * + * @param transA [IN] transpose type of matrix A + * @param transB [IN] transpose type of matrix B + * @param transC [IN] transpose type of matrix C + * @param m [IN] number of rows of matrix A and matrix C + * @param n [IN] number of columns of matrix B and matrix C + * @param k [IN] number of columns of matrix A and rows of matrix B + * @param alpha [IN] pointer to scalar used for multiplication. of same type as dataTypeC + * @param matrixA [IN] pointer to matrix A + * @param lda [IN] leading dimension array used to store matrix A + * @param dataTypeA [IN] datatype of matrix A + * @param matrixB [IN] pointer to matrix B + * @param ldb [IN] leading dimension array used to store matrix B + * @param dataTypeB [IN] datatype of matrix B + * @param beta [IN] pointer to scalar used for multiplication. + * of same type as dataTypeC If beta == 0, + * then matrixC does not have to be a valid input + * @param matrixC [IN|OUT] pointer to matrix C + * @param ldc [IN] leading dimension array used to store matrix C + * @param dataTypeC [IN] datatype of matrix C + * @param type [IN] computation type + * @param stream [IN] stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, + int k, const void *alpha, const void *matrixA, int lda, + aclDataType dataTypeA, const void *matrixB, int ldb, aclDataType dataTypeB, + const void *beta, void *matrixC, int ldc, aclDataType dataTypeC, + aclComputeType type, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief create a handle for performing the matrix-matrix multiplication + * + * @param transA [IN] transpose type of matrix A + * @param transB [IN] transpose type of matrix B + * @param transC [IN] transpose type of matrix C + * @param m [IN] number of rows of matrix A and matrix C + * @param n [IN] number of columns of matrix B and matrix C + * @param k [IN] number of columns of matrix A and rows of matrix B + * @param dataTypeA [IN] datatype of matrix A + * @param dataTypeB [IN] datatype of matrix B + * @param dataTypeC [IN] datatype of matrix C + * @param type [IN] computation type + * @param handle [OUT] pointer to the pointer to the handle + * @param type [IN] computation type + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, + int m, int n, int k, aclDataType dataTypeA, + aclDataType dataTypeB, aclDataType dataTypeC, + aclComputeType type, aclopHandle **handle); + +/** + * @ingroup AscendCL + * @brief perform the matrix-matrix multiplication + * + * @param transA [IN] transpose type of matrix A + * @param transB [IN] transpose type of matrix B + * @param transC [IN] transpose type of matrix C + * @param m [IN] number of rows of matrix A and matrix C + * @param n [IN] number of columns of matrix B and matrix C + * @param k [IN] number of columns of matrix A and rows of matrix B + * @param alpha [IN] pointer to scalar used for multiplication + * @param matrixA [IN] pointer to matrix A + * @param lda [IN] leading dimension used to store the matrix A + * @param matrixB [IN] pointer to matrix B + * @param ldb [IN] leading dimension used to store the matrix B + * @param beta [IN] pointer to scalar used for multiplication. + * If beta value == 0, + * then matrixC does not have to be a valid input + * @param matrixC [IN|OUT] pointer to matrix C + * @param ldc [IN] leading dimension used to store the matrix C + * @param type [IN] computation type + * @param stream [IN] stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, + int k, const aclFloat16 *alpha, const aclFloat16 *matrixA, int lda, + const aclFloat16 *matrixB, int ldb, const aclFloat16 *beta, + aclFloat16 *matrixC, int ldc, aclComputeType type, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief create a handle for performing the matrix-matrix multiplication + * + * @param transA [IN] transpose type of matrix A + * @param transB [IN] transpose type of matrix B + * @param transC [IN] transpose type of matrix C + * @param m [IN] number of rows of matrix A and matrix C + * @param n [IN] number of columns of matrix B and matrix C + * @param k [IN] number of columns of matrix A and rows of matrix B + * @param type [IN] computation type + * @param handle [OUT] pointer to the pointer to the handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, aclTransType transB, aclTransType transC, + int m, int n, int k, aclComputeType type, + aclopHandle **handle); + +/** + * @ingroup AscendCL + * @brief perform the matrix-matrix multiplication + * + * @param transA [IN] transpose type of matrix A + * @param transB [IN] transpose type of matrix B + * @param transC [IN] transpose type of matrix C + * @param m [IN] number of rows of matrix A and matrix C + * @param n [IN] number of columns of matrix B and matrix C + * @param k [IN] number of columns of matrix A and rows of matrix B + * @param alpha [IN] pointer to scalar used for multiplication + * @param matrixA [IN] pointer to matrix A + * @param lda [IN] leading dimension used to store the matrix A + * @param matrixB [IN] pointer to matrix B + * @param ldb [IN] leading dimension used to store the matrix B + * @param beta [IN] pointer to scalar used for multiplication. + * If beta value == 0, + * then matrixC does not have to be a valid input + * @param matrixC [IN|OUT] pointer to matrix C + * @param ldc [IN] leading dimension used to store the matrix C + * @param type [IN] computation type + * @param stream [IN] stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, + int k, const int32_t *alpha, const int8_t *matrixA, int lda, + const int8_t *matrixB, int ldb, const int32_t *beta, int32_t *matrixC, + int ldc, aclComputeType type, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief create a handle for performing the matrix-matrix multiplication + * + * @param transA [IN] transpose type of matrix A + * @param transB [IN] transpose type of matrix B + * @param transC [IN] transpose type of matrix C + * @param m [IN] number of rows of matrix A and matrix C + * @param n [IN] number of columns of matrix B and matrix C + * @param k [IN] number of columns of matrix A and rows of matrix B + * @param type [IN] computation type + * @param handle [OUT] pointer to the pointer to the handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, + int m, int n, int k, aclComputeType type, + aclopHandle **handle); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h new file mode 100644 index 00000000..dcaa3936 --- /dev/null +++ b/inc/external/acl/ops/acl_dvpp.h @@ -0,0 +1,2568 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if !defined(ENABLE_DVPP_INTERFACE) +#if defined(_MSC_VER) +#error message("if you want to use dvpp funtions ,please use the macro definition (ENABLE_DVPP_INTERFACE).") +#else +#error "if you want to use dvpp funtions ,please use the macro definition (ENABLE_DVPP_INTERFACE)." +#endif +#endif + +#ifndef INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ +#define INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ + +#include +#include +#include "acl/acl.h" +#include "acl/acl_base.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct acldvppPicDesc acldvppPicDesc; +typedef struct acldvppBatchPicDesc acldvppBatchPicDesc; +typedef struct acldvppRoiConfig acldvppRoiConfig; +typedef struct acldvppResizeConfig acldvppResizeConfig; +typedef struct acldvppBorderConfig acldvppBorderConfig; +typedef struct acldvppLutMap acldvppLutMap; +typedef struct acldvppChannelDesc acldvppChannelDesc; +typedef struct acldvppJpegeConfig acldvppJpegeConfig; +typedef struct aclvdecChannelDesc aclvdecChannelDesc; +typedef struct acldvppStreamDesc acldvppStreamDesc; +typedef struct aclvdecFrameConfig aclvdecFrameConfig; +typedef struct aclvencChannelDesc aclvencChannelDesc; +typedef struct aclvencFrameConfig aclvencFrameConfig; +typedef struct acldvppHist acldvppHist; +typedef void (*aclvdecCallback)(acldvppStreamDesc *input, acldvppPicDesc *output, void *userData); +typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output, void *userdata); + +// Supported Pixel Format +enum acldvppPixelFormat { + PIXEL_FORMAT_YUV_400 = 0, // 0 + PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1 + PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2 + PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3 + PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4 + PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5 + PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6 + PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7 + PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8 + PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9 + PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10 + PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11 + PIXEL_FORMAT_RGB_888 = 12, // 12 + PIXEL_FORMAT_BGR_888 = 13, // 13 + PIXEL_FORMAT_ARGB_8888 = 14, // 14 + PIXEL_FORMAT_ABGR_8888 = 15, // 15 + PIXEL_FORMAT_RGBA_8888 = 16, // 16 + PIXEL_FORMAT_BGRA_8888 = 17, // 17 + PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18 + PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19 + PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20 + PIXEL_FORMAT_YVU_PLANAR_422, + PIXEL_FORMAT_YVU_PLANAR_444, + PIXEL_FORMAT_RGB_444 = 23, + PIXEL_FORMAT_BGR_444, + PIXEL_FORMAT_ARGB_4444, + PIXEL_FORMAT_ABGR_4444, + PIXEL_FORMAT_RGBA_4444, + PIXEL_FORMAT_BGRA_4444, + PIXEL_FORMAT_RGB_555, + PIXEL_FORMAT_BGR_555, + PIXEL_FORMAT_RGB_565, + PIXEL_FORMAT_BGR_565, + PIXEL_FORMAT_ARGB_1555, + PIXEL_FORMAT_ABGR_1555, + PIXEL_FORMAT_RGBA_1555, + PIXEL_FORMAT_BGRA_1555, + PIXEL_FORMAT_ARGB_8565, + PIXEL_FORMAT_ABGR_8565, + PIXEL_FORMAT_RGBA_8565, + PIXEL_FORMAT_BGRA_8565, + PIXEL_FORMAT_RGB_BAYER_8BPP = 50, + PIXEL_FORMAT_RGB_BAYER_10BPP, + PIXEL_FORMAT_RGB_BAYER_12BPP, + PIXEL_FORMAT_RGB_BAYER_14BPP, + PIXEL_FORMAT_RGB_BAYER_16BPP, + PIXEL_FORMAT_BGR_888_PLANAR = 70, + PIXEL_FORMAT_HSV_888_PACKAGE, + PIXEL_FORMAT_HSV_888_PLANAR, + PIXEL_FORMAT_LAB_888_PACKAGE, + PIXEL_FORMAT_LAB_888_PLANAR, + PIXEL_FORMAT_S8C1, + PIXEL_FORMAT_S8C2_PACKAGE, + PIXEL_FORMAT_S8C2_PLANAR, + PIXEL_FORMAT_S16C1, + PIXEL_FORMAT_U8C1, + PIXEL_FORMAT_U16C1, + PIXEL_FORMAT_S32C1, + PIXEL_FORMAT_U32C1, + PIXEL_FORMAT_U64C1, + PIXEL_FORMAT_S64C1, + PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000, + PIXEL_FORMAT_YVU_SEMIPLANAR_440, + PIXEL_FORMAT_FLOAT32, + PIXEL_FORMAT_BUTT, + PIXEL_FORMAT_UNKNOWN = 10000 +}; + +// Stream Format +enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL }; + +// Supported Channel Mode +enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 }; + +// Supported Border Type +enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 }; + +// Venc parameter type +enum aclvencChannelDescParamType { + ACL_VENC_THREAD_ID_UINT64 = 0, + ACL_VENC_CALLBACK_PTR, + ACL_VENC_PIXEL_FORMAT_UINT32, + ACL_VENC_ENCODE_TYPE_UINT32, + ACL_VENC_PIC_WIDTH_UINT32, + ACL_VENC_PIC_HEIGHT_UINT32, + ACL_VENC_KEY_FRAME_INTERVAL_UINT32, + ACL_VENC_BUF_ADDR_PTR, + ACL_VENC_BUF_SIZE_UINT32, + ACL_VENC_RC_MODE_UINT32, + ACL_VENC_SRC_RATE_UINT32, + ACL_VENC_MAX_BITRATE_UINT32, + ACL_VENC_MAX_IP_PROP_UINT32 +}; + +// Jpeg picture format +enum acldvppJpegFormat { + ACL_JPEG_CSS_444 = 0, + ACL_JPEG_CSS_422, + ACL_JPEG_CSS_420, + ACL_JPEG_CSS_GRAY, + ACL_JPEG_CSS_440, + ACL_JPEG_CSS_411, + ACL_JPEG_CSS_UNKNOWN = 1000 +}; + +/** + * @ingroup AscendCL + * @brief alloc device memory for dvpp. + * + * @par Function + * @li It's mainly used for allocating memory to device media data processing. + * The requested memory meets the data processing requirements. + * After calling this interface to request memory, + * you must release the memory using the acldvppFree interface. + * @li When calling the acldvppMalloc interface to apply for memory, + * the size entered by the user is aligned upwards to 32 integer multiples, + * and an additional 32 bytes are applied. + * + * @par Restriction + * If the user uses the acldvppMalloc interface to apply for a large block of + * memory and divide and manage the memory by himself, + * when applying for memory, the user needs to align up to 32 integer + * times + 32 bytes (ALIGN_UP [len] +32 words) according to + * the actual data size of each picture Section) to manage memory. + * + * @param devPtr [OUT] memory pointer. + * @param size [IN] memory size. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppFree + */ +ACL_FUNC_VISIBILITY aclError acldvppMalloc(void **devPtr, size_t size); + +/** + * @ingroup AscendCL + * @brief free device memory for dvpp. + * + * @par Function + * Free the memory requested through the acldvppMalloc interface + * @param devPtr [IN] memory pointer to free. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppMalloc + */ +ACL_FUNC_VISIBILITY aclError acldvppFree(void *devPtr); + +/** + * @ingroup AscendCL + * @brief create DvppChannelDesc. + * + * @par Function + * Create a channel for image data processing. + * The same channel can be reused + * and is no longer available after destruction + * + * @retval null for failed. + * @retval OtherValues success. + */ +ACL_FUNC_VISIBILITY acldvppChannelDesc *acldvppCreateChannelDesc(); + +/** + * @ingroup AscendCL + * @brief destroy dvppChannelDesc. + * + * @par Function + * Can only destroy channels created by the acldvppCreateChannel interface + * @param channelDesc [IN] the channel description. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannelDesc | acldvppDestroyChannel + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyChannelDesc(acldvppChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get dvpp channel Id. + * + * @par Restriction + * Interface calling sequence: + * acldvppCreateChannelDesc --> acldvppCreateChannel --> + * acldvppGetChannelDescChannelId + * + * @param channelDesc [IN] the channel description. + * + * @retval channel id. + * + * @see acldvppCreateChannelDesc | acldvppCreateChannel + */ +ACL_FUNC_VISIBILITY uint64_t acldvppGetChannelDescChannelId(const acldvppChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Create dvpp picture description. + * + * @retval null for failed. + * @retval OtherValues success. + */ +ACL_FUNC_VISIBILITY acldvppPicDesc *acldvppCreatePicDesc(); + +/** + * @ingroup AscendCL + * @brief Destroy dvpp picture description. + * + * @par Function + * Can only destroy picture description information created + * through acldvppCreatePicDesc interface. + * @param picDesc [IN] dvpp picture description. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreatePicDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyPicDesc(acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Set dvpp picture description's data. + * + * @param picDesc [OUT] dvpp picture description. + * @param dataDev [IN] dvpp picture dataDev.Must be the memory + * requested using the acldvppMalloc interface. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppMalloc + */ +ACL_FUNC_VISIBILITY aclError acldvppSetPicDescData(acldvppPicDesc *picDesc, void *dataDev); + +/** + * @ingroup AscendCL + * @brief Set dvpp picture description's size. + * + * @param picDesc [OUT] dvpp picture description. + * @param size dvpp [IN] picture size. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetPicDescSize(acldvppPicDesc *picDesc, uint32_t size); + +/** + * @ingroup AscendCL + * @brief Set dvpp picture description's format. + * + * @param picDesc [OUT] dvpp picture description. + * @param format [IN] dvpp picture format. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetPicDescFormat(acldvppPicDesc *picDesc, acldvppPixelFormat format); + +/** + * @ingroup AscendCL + * @brief Set dvpp picture description's width. + * + * @param picDesc [OUT] dvpp picture description. + * @param width [IN] dvpp picture width. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetPicDescWidth(acldvppPicDesc *picDesc, uint32_t width); + +/** + * @ingroup AscendCL + * @brief Set dvpp picture description's height. + * + * @param picDesc [OUT] dvpp picture description. + * @param height [IN] dvpp picture height. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetPicDescHeight(acldvppPicDesc *picDesc, uint32_t height); + +/** + * @ingroup AscendCL + * @brief Set dvpp picture description's widthStride. + * + * @par Restriction + * Width alignment requirements: + * @li The minimum stride is 32 and the maximum is 4096 * 4 + * (that is, an image in argb format with a width of 4096); + * @li For 8K scaling, widthStride is required to be aligned to 2; + * @li For non 8K scaling, the calculation formula for widthStride + * is different for different image formats: + * @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16 + * @li yuv422packed: input image width * 2 and then align to 16 + * @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16 + * @li xrgb8888: input image width * 4, align to 16 + * @li HFBC:input image width + * + * @param picDesc [OUT] dvpp picture description. + * @param widthStride [IN] dvpp picture widthStride. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetPicDescWidthStride(acldvppPicDesc *picDesc, uint32_t widthStride); + +/** + * @ingroup AscendCL + * @brief Set dvpp picture description's heightStride. + * + * @par Restriction + * Height alignment requirements: + * @li The height of the input image is aligned to 2. + * High stride minimum 6 and maximum 4096. + * + * @param picDesc [OUT] dvpp picture description. + * @param heightStride [IN] dvpp picture heightStride. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetPicDescHeightStride(acldvppPicDesc *picDesc, uint32_t heightStride); + +/** + * @ingroup AscendCL + * @brief Set dvpp picture description's retcode. + * + * @param picDesc [OUT] dvpp picture description. + * @param retCode [IN] dvpp picture retcode. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetPicDescRetCode(acldvppPicDesc *picDesc, uint32_t retCode); + +/** + * @ingroup AscendCL + * @brief Get picture data. + * + * @param picDesc [IN] dvpp picture description. + * + * @retval picture data addr. + * @retval default nullptr. + */ +ACL_FUNC_VISIBILITY void *acldvppGetPicDescData(const acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Get picture data size. + * + * @param picDesc [IN] dvpp picture description. + * + * @retval picture data size. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescSize(const acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Get dvpp picture desc's format. + * + * @param picDesc [IN] dvpp picture description. + * + * @retval format + * @retval default PIXEL_FORMAT_YUV_400. + */ +ACL_FUNC_VISIBILITY acldvppPixelFormat acldvppGetPicDescFormat(const acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Get dvpp picture desc's width. + * + * @param picDesc [IN] dvpp picture description. + * + * @retval width. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescWidth(const acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Get dvpp picture desc's height. + * + * @param picDesc [IN] dvpp picture description. + * + * @retval height. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescHeight(const acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Get dvpp picture desc's widthStride. + * + * @par Restriction + * Width alignment requirements: + * @li The minimum stride is 32 and the maximum is 4096 * 4 + * (that is, an image in argb format with a width of 4096); + * @li For 8K scaling, widthStride is required to be aligned to 2; + * @li For non 8K scaling, the calculation formula for widthStride + * is different for different image formats: + * @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16 + * @li yuv422packed: input image width * 2 and then align to 16 + * @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16 + * @li xrgb8888: input image width * 4, align to 16 + * @li HFBC:input image width + * + * @param picDesc [IN] dvpp picture description. + * + * @retval stride width. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescWidthStride(const acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Get dvpp picture desc's heightStride. + * + * @par Restriction + * Height alignment requirements: + * @li The height of the input image is aligned to 2. + * High stride minimum 6 and maximum 4096. + * + * @param picDesc [IN] dvpp picture description. + * + * @retval stride height. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescHeightStride(const acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Get dvpp picture desc's retcode. + * + * @param picDesc [IN] dvpp picture description. + * + * @retval ret code. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picDesc); + +/** + * @ingroup AscendCL + * @brief Create dvpp roi config. + * + * @param left [IN] the left offset, must be even + * @param right [IN] the right offset, must be odd + * @param top [IN] the top offset, must be even + * @param bottom [IN] the bottom offset, must be odd + * + * @retval null for failed. + * @retval other success + */ +ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top, + uint32_t bottom); + +/** + * @ingroup AscendCL + * @brief Destroy dvpp roi config. + * + * @par Function + * Destroys data created through the acldvppCreateRoiConfig interface + * @param roiConfig [IN] dvpp roi config. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateRoiConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyRoiConfig(acldvppRoiConfig *roiConfig); + +/** + * @ingroup AscendCL + * @brief Set left of RoiConfig. + * + * @param config [OUT] RoiConfig + * @param left [IN] left offset + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigLeft(acldvppRoiConfig *config, uint32_t left); + +/** + * @ingroup AscendCL + * @brief Set right of RoiConfig. + * + * @param config [OUT] RoiConfig + * @param right [IN] right offset + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigRight(acldvppRoiConfig *config, uint32_t right); + +/** + * @ingroup AscendCL + * @brief Set top of RoiConfig. + * + * @param config [OUT] RoiConfig + * @param top [IN] top offset + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigTop(acldvppRoiConfig *config, uint32_t top); + +/** + * @ingroup AscendCL + * @brief Set bottom of RoiConfig. + * + * @param config [OUT] RoiConfig + * @param bottom [IN] bottom offset + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config, uint32_t bottom); + +/** + * @ingroup AscendCL + * @brief Set RoiConfig. + * + * @param config [OUT] RoiConfig + * @param left [IN] left offset + * @param right [IN] right offset + * @param top [IN] top offset + * @param bottom [IN] bottom offset + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top, + uint32_t bottom); + +/** + * @ingroup AscendCL + * @brief Create dvpp resize config. + * The specified scaling algorithm is not supported. + * The default scaling algorithm is "nearest neighbor interpolation". + * + * @retval null for failed. + * @retval other success. + */ +ACL_FUNC_VISIBILITY acldvppResizeConfig *acldvppCreateResizeConfig(); + +/** + * @ingroup AscendCL + * @brief Destroy dvpp resize config. + * + * @par Function + * Destroys the scaling configuration data created by + * the acldvppCreateResizeConfig interface + * + * @param resizeConfig [IN] resize config. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateResizeConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyResizeConfig(acldvppResizeConfig *resizeConfig); + +/** + * @ingroup AscendCL + * @brief Create jpege config. + * + * @retval null for failed. + * @retval other success. + */ +ACL_FUNC_VISIBILITY acldvppJpegeConfig *acldvppCreateJpegeConfig(); + +/** + * @ingroup AscendCL + * @brief Destroy jpege config. + * + * @par Function + * Destroys the encoding configuration data created by + * the acldvppCreateJpegeConfig interface + * @param jpegeConfig [IN] config pointer to destroy. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateJpegeConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyJpegeConfig(acldvppJpegeConfig *jpegeConfig); + +/** + * @ingroup AscendCL + * @brief Set jpege config's level. + * + * @param jpegeConfig [OUT] Call the acldvppCreateJpegeConfig + * interface to create acldvppJpegeConfig data + * @param level [IN] Encoding quality range [0, 100], + * where level 0 encoding quality is similar to level 100, + * and the smaller the value in [1, 100], + * the worse the quality of the output picture. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetJpegeConfigLevel(acldvppJpegeConfig *jpegeConfig, uint32_t level); + +/** + * @ingroup AscendCL + * @brief Get jpege config's level. + * + * @param jpegeConfig [IN] jpege config. + * + * @retval compression level. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetJpegeConfigLevel(const acldvppJpegeConfig *jpegeConfig); + +/** + * @ingroup AscendCL + * @brief create vdecChannelDesc.Channel description information + * when creating a video data processing channel. + * + * @retval null for failed. + * @retval other success + */ +ACL_FUNC_VISIBILITY aclvdecChannelDesc *aclvdecCreateChannelDesc(); + +/** + * @ingroup AscendCL + * @brief destroy vdecChannelDesc. + * + * @par Function + * Can only destroy aclvdecChannelDesc type created + * through aclvdecCreateChannelDesc interface + * @param channelDesc [IN] channel description. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + + * @see aclvdecCreateChannelDesc + */ +ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannelDesc(aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's channel id. + * + * @param channelDesc [OUT] vdec channel description. + * @param channelId [IN] decoding channel id: 0~15. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescChannelId(aclvdecChannelDesc *channelDesc, uint32_t channelId); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's thread id. + * + * @param channelDesc [OUT] vdec channel description. + * @param threadId [IN] thread id. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescThreadId(aclvdecChannelDesc *channelDesc, uint64_t threadId); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's callback function. + * + * @param channelDesc [OUT] vdec channel description. + * @param callback [IN] function callback.Function prototype: + * void (* aclvdecCallback) + * (acldvppStreamDesc * input, acldvppPicDesc * output, void* userdata) + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclvdecCallback + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescCallback(aclvdecChannelDesc *channelDesc, aclvdecCallback callback); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's video encoding type. + * + * @param channelDesc [OUT] vdec channel description. + * @param enType [IN] video encoding type. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescEnType(aclvdecChannelDesc *channelDesc, acldvppStreamFormat enType); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's out picture format. + * + * @param channelDesc [OUT] vdec channel description. + * @param outPicFormat [IN] out picture format (acldvppPixelFormat). + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicFormat(aclvdecChannelDesc *channelDesc, + acldvppPixelFormat outPicFormat); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's out picture width. + * + * @param channelDesc [OUT] vdec channel description. + * @param outPicWidth [IN] out picture width. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicWidth(aclvdecChannelDesc *channelDesc, uint32_t outPicWidth); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's out picture height. + * + * @param channelDesc [OUT] vdec channel description. + * @param outPicHeight [IN] out picture height. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicHeight(aclvdecChannelDesc *channelDesc, uint32_t outPicHeight); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's reference frame num. + * + * @param channelDesc [OUT] vdec channel description. + * @param refFrameNum [IN] reference frame num. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescRefFrameNum(aclvdecChannelDesc *channelDesc, uint32_t refFrameNum); + +/** + * @ingroup AscendCL + * @brief Set vdec channel description's bit depth. + * + * @param channelDesc [OUT] vdec channel description. + * @param bitDepth [IN] bit depth. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescBitDepth(aclvdecChannelDesc *channelDesc, uint32_t bitDepth); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's channel id. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval decoding channel id: 0~15. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescChannelId(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's thread id. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval thread id. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint64_t aclvdecGetChannelDescThreadId(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's callback function. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval function callback.Function prototype: + * void (* aclvdecCallback) + * (acldvppStreamDesc * input, acldvppPicDesc * output, void* userdata) + * @retval default null. + * + * @see aclvdecCallback + */ +ACL_FUNC_VISIBILITY aclvdecCallback aclvdecGetChannelDescCallback(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's video encoding type. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval video encoding type. + * @retval default H265_MAIN_LEVEL. + */ +ACL_FUNC_VISIBILITY acldvppStreamFormat aclvdecGetChannelDescEnType(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's out picture format. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval out picture format. + * @retval default DVPP_OUTPUT_YUV420SP_UV. + */ +ACL_FUNC_VISIBILITY acldvppPixelFormat aclvdecGetChannelDescOutPicFormat(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's out picture width. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval out picture width. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutPicWidth(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's out picture height. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval out picture height (for vdec malloc memory). + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutPicHeight(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's bit depth. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval bit depth. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescBitDepth(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get vdec channel description's reference frame num. + * + * @param channelDesc [IN] vdec channel description. + * + * @retval reference frame num. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescRefFrameNum(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief create vencChannelDesc. + * + * @retval null for failed, other success + */ +ACL_FUNC_VISIBILITY aclvencChannelDesc *aclvencCreateChannelDesc(); + +/** + * @ingroup AscendCL + * @brief destroy vencChannelDesc. + * + * @param channelDesc [IN] channel desc. + * + * @retval ACL_SUCCESS:success, other:failed + */ +ACL_FUNC_VISIBILITY aclError aclvencDestroyChannelDesc(aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Set decoding thread id for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param threadId [IN] thread id + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescThreadId(aclvencChannelDesc *channelDesc, uint64_t threadId); + +/** + * @ingroup AscendCL + * @brief Set func callback for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param callback [IN] func callback + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescCallback(aclvencChannelDesc *channelDesc, aclvencCallback callback); + +/** + * @ingroup AscendCL + * @brief Set video encoding type for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param enType [IN] video encoding type + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescEnType(aclvencChannelDesc *channelDesc, acldvppStreamFormat enType); + +/** + * @ingroup AscendCL + * @brief Set pic format for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param picFormat [IN] pic format + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicFormat(aclvencChannelDesc *channelDesc, + acldvppPixelFormat picFormat); + +/** + * @ingroup AscendCL + * @brief Set out pic width for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param picWidth [IN] pic width + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicWidth(aclvencChannelDesc *channelDesc, uint32_t picWidth); + +/** + * @ingroup AscendCL + * @brief Set pic height for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param picHeight [IN] pic height + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicHeight(aclvencChannelDesc *channelDesc, uint32_t picHeight); + +/** + * @ingroup AscendCL + * @brief Set key frame interval for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param keyFrameInterval [IN] Interval of key frame + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescKeyFrameInterval(aclvencChannelDesc *channelDesc, + uint32_t keyFrameInterval); + +/** + * @ingroup AscendCL + * @brief Set output buffer address for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param bufAddr [IN] output buffer address + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescBufAddr(aclvencChannelDesc *channelDesc, void *bufAddr); + +/** + * @ingroup AscendCL + * @brief Set output buffer size for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param bufSize [IN] output buffer size + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescBufSize(aclvencChannelDesc *channelDesc, uint32_t bufSize); + +/** + * @ingroup AscendCL + * @brief Set rc model for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param rcMode [IN] venc rc mode(VBR=1, CBR=2) + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescRcMode(aclvencChannelDesc *channelDesc, uint32_t rcMode); + +/** + * @ingroup AscendCL + * @brief Set source rate for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param srcRate [IN] source rate + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescSrcRate(aclvencChannelDesc *channelDesc, uint32_t srcRate); + +/** + * @ingroup AscendCL + * @brief Set max bit rate for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param maxBitRate [IN] max bit rate + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc *channelDesc, uint32_t maxBitRate); + +/** + * @ingroup AscendCL + * @brief Set venc parameter for venc channel desc. + * + * @param channelDesc [OUT] venc channel desc + * @param paramType [IN] parameter type + * @param length [IN] parameter length + * @param param [IN] pointer to parameter value + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc, + aclvencChannelDescParamType paramType, size_t length, + const void *param); + +/** + * @ingroup AscendCL + * @brief Get output buffer address for venc channel desc. + * + * @param channelDesc[IN] venc channel desc + * + * @retval output buffer address + */ +ACL_FUNC_VISIBILITY void *aclvencGetChannelDescBufAddr(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get output buffer size for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval output buffer size + */ +ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescBufSize(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get decoding channel id for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval decoding channel id: 0~15, default 0 + */ +ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescChannelId(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get decoding thread id for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval thread id, default 0 + */ +ACL_FUNC_VISIBILITY uint64_t aclvencGetChannelDescThreadId(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get func callback for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval func callback, default null + */ +ACL_FUNC_VISIBILITY aclvencCallback aclvencGetChannelDescCallback(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get video encoding type for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval video encoding type, default H265_MAIN_LEVEL + */ +ACL_FUNC_VISIBILITY acldvppStreamFormat aclvencGetChannelDescEnType(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get pic format for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval pic format + */ +ACL_FUNC_VISIBILITY acldvppPixelFormat aclvencGetChannelDescPicFormat(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get pic width for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval pic width, default 0 + */ +ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescPicWidth(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get pic height for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval pic height, default 0 + */ +ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescPicHeight(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Get interval of key frame for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval interval of key frame, default 0 + */ +ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescKeyFrameInterval(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * + * @brief Get rc mode for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval rc mode, default 0 + */ +ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescRcMode(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * + * @brief Get source rate for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval source rate, default 0 + */ +ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescSrcRate(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * + * @brief Get max bit rate for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * + * @retval max bit rate, default 0 + */ +ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * + * @brief Get venc parameter for venc channel desc. + * + * @param channelDesc [IN] venc channel desc + * @param paramType [IN] parameter type + * @param length [IN] parameter length + * @param paramRetSize [OUT] pointer to parameter real length + * @param param [OUT] pointer to parameter value + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc, + aclvencChannelDescParamType paramType, size_t length, + size_t *paramRetSize, void *param); + +/** + * @ingroup AscendCL + * @brief get forced restart of I-frame interval from config + * + * @param config [IN] venc frame config + * + * @retval 0: Not forced; 1: Forced restart of I-frame -1: error + */ +ACL_FUNC_VISIBILITY uint8_t aclvencGetFrameConfigForceIFrame(const aclvencFrameConfig *config); + +/** + * @ingroup AscendCL + * @brief get forced restart of I-frame interval from config + * + * @param config [IN] venc frame config + * + * @retval Whether it is the end frame: 0: no; 1: end frame + */ +ACL_FUNC_VISIBILITY uint8_t aclvencGetFrameConfigEos(const aclvencFrameConfig *config); + +/** + * @ingroup AscendCL + * @brief set single frame encoding configuration parameters + * + * @param config [OUT] venc frame config + * @param forceFrame [IN] forced restart of I-frame interval: 0: Not forced; 1: Forced restart of I-frame + * + * @retval ACL_SUCCESS for ok, others for fail + */ +ACL_FUNC_VISIBILITY aclError aclvencSetFrameConfigForceIFrame(aclvencFrameConfig *config, uint8_t forceIFrame); + +/** + * @ingroup AscendCL + * @brief set single frame encoding configuration parameters + * + * @param config [OUT] venc frame config + * @param eos [IN] Whether it is the end frame: 0: no; 1: end frame + * + * @retval ACL_SUCCESS for ok, others for fail + */ +ACL_FUNC_VISIBILITY aclError aclvencSetFrameConfigEos(aclvencFrameConfig *config, uint8_t eos); + +/** + * @ingroup AscendCL + * @brief dvpp venc destroy frame config + * + * @param config [IN] venc frame config + * + * @retval ACL_SUCCESS for ok, others for fail + */ +ACL_FUNC_VISIBILITY aclError aclvencDestroyFrameConfig(aclvencFrameConfig *config); + +/** + * @ingroup AscendCL + * @brief Create dvpp venc frame config. + * + * @retval null for failed, other aclvencFrameConfig ptr + */ +ACL_FUNC_VISIBILITY aclvencFrameConfig *aclvencCreateFrameConfig(); + +/** + * @ingroup AscendCL + * @brief Create dvpp venc channel. + * + * @param channelDesc [IN|OUT] venc channel desc + * + * @retval ACL_SUCCESS for ok, others for fail + */ +ACL_FUNC_VISIBILITY aclError aclvencCreateChannel(aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Destroy dvpp venc channel. + * + * @param channelDesc [IN] venc channel desc + * + * @retval ACL_SUCCESS for ok, others for fail + */ +ACL_FUNC_VISIBILITY aclError aclvencDestroyChannel(aclvencChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief dvpp venc launch send frame task. + * + * @param channelDesc [IN] venc channel desc + * @param input [IN] input picture desc + * @param reserve [IN] reserve parameter + * @param config [IN] dvpp frame config + * @param userdata [IN] user callback function + * + * @retval ACL_SUCCESS for ok, others for fail + */ +ACL_FUNC_VISIBILITY aclError aclvencSendFrame(aclvencChannelDesc *channelDesc, acldvppPicDesc *input, void *reserve, + aclvencFrameConfig *config, void *userdata); + +/** + * @ingroup AscendCL + * @brief Create dvpp stream description. + * + * @retval null for failed. + * @retval other success. + */ +ACL_FUNC_VISIBILITY acldvppStreamDesc *acldvppCreateStreamDesc(); + +/** + * @ingroup AscendCL + * @brief Destroy dvpp stream description. + * + * @par Function + * Can only destroy acldvppStreamDesc type created through + * acldvppCreateStreamDesc interface. + * + * @param streamDesc [IN] dvpp stream description. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateStreamDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyStreamDesc(acldvppStreamDesc *streamDesc); + +/** + * @ingroup AscendCL + * @brief Set stream description's data addr. + * + * @param streamDesc [OUT] dvpp stream description. + * @param dataDev [IN] data addr. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescData(acldvppStreamDesc *streamDesc, void *dataDev); + +/** + * @ingroup AscendCL + * @brief Set stream description's data size. + * + * @param streamDesc [OUT] dvpp stream description. + * @param size [IN] data size. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescSize(acldvppStreamDesc *streamDesc, uint32_t size); + +/** + * @ingroup AscendCL + * @brief Set stream description's format. + * + * @param streamDesc [OUT] dvpp stream description. + * @param format [IN] stream format. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescFormat(acldvppStreamDesc *streamDesc, acldvppStreamFormat format); + +/** + * @ingroup AscendCL + * @brief Set stream description's timestamp. + * + * @param streamDesc [OUT] dvpp stream description. + * @param timestamp [IN] current timestamp. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescTimestamp(acldvppStreamDesc *streamDesc, uint64_t timestamp); + +/** + * @ingroup AscendCL + * @brief Set stream description's ret code. + * + * @param streamDesc [OUT] dvpp stream description. + * @param retCode [IN] result code. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescRetCode(acldvppStreamDesc *streamDesc, uint32_t retCode); + +/** + * @ingroup AscendCL + * @brief Set stream description's eos. + * + * @param streamDesc [OUT] dvpp stream description. + * @param eos [IN] end flag of sequence. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescEos(acldvppStreamDesc *streamDesc, uint8_t eos); + +/** + * @ingroup AscendCL + * @brief Get stream description's data addr. + * + * @param streamDesc [IN] dvpp stream description. + * + * @retval data addr. + * @retval deault nullptr. + */ +ACL_FUNC_VISIBILITY void *acldvppGetStreamDescData(const acldvppStreamDesc *streamDesc); + +/** + * @ingroup AscendCL + * @brief Get stream description's data size. + * + * @param streamDesc [IN] dvpp stream description. + * + * @retval data size. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetStreamDescSize(const acldvppStreamDesc *streamDesc); + +/** + * @ingroup AscendCL + * @brief Get stream description's format. + * + * @param streamDesc [IN] dvpp stream description. + * + * @retval stream format. + * @retval default ACL_DVPP_STREAM_H264. + */ +ACL_FUNC_VISIBILITY acldvppStreamFormat acldvppGetStreamDescFormat(const acldvppStreamDesc *streamDesc); + +/** + * @ingroup AscendCL + * @brief Get stream description's timestamp. + * + * @param streamDesc [IN] dvpp stream description. + * + * @retval current timestamp. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint64_t acldvppGetStreamDescTimestamp(const acldvppStreamDesc *streamDesc); + +/** + * @ingroup AscendCL + * @brief Get stream description's retCode. + * + * @param streamDesc [IN] dvpp stream description. + * + * @retval result code. + * @retval default 0. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetStreamDescRetCode(const acldvppStreamDesc *streamDesc); + +/** + * @ingroup AscendCL + * @brief Get stream description's eos. + * + * @param streamDesc [IN] dvpp stream description. + * + * @retval end flag of sequence. + * @retval default 0(false). + */ +ACL_FUNC_VISIBILITY uint8_t acldvppGetStreamDescEos(const acldvppStreamDesc *streamDesc); + +/** + * @ingroup AscendCL + * @brief Create vdec frame config. + * + * @retval null for failed. + * @retval other success. + */ +ACL_FUNC_VISIBILITY aclvdecFrameConfig *aclvdecCreateFrameConfig(); + +/** + * @ingroup AscendCL + * @brief Destroy vdec frame config. + * + * @par Function + * Can only destroy aclvdecFrameConfig type created through + * aclvdecCreateFrameConfig interface + * + * @param vdecFrameConfig [IN] vdec frame config. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclvdecCreateFrameConfig + */ +ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecFrameConfig); + +/** + * @ingroup AscendCL + * @brief Get image width and height of jpeg. + * + * @param data [IN] image data in host memory + * @param size [IN] the size of image data + * @param width [OUT] the width of image from image header + * @param height [OUT] the height of image from image header + * @param components [OUT] the components of image from image header + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height, + int32_t *components); + +/** + * @ingroup AscendCL + * @brief Get image width and height of jpeg. + * + * @param data [IN] image data in host memory + * @param size [IN] the size of image data + * @param width [OUT] the width of image from image header + * @param height [OUT] the height of image from image header + * @param components [OUT] the components of image from image header + * @param format [OUT] the format of image from image header + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_t size, uint32_t *width, + uint32_t *height, int32_t *components, + acldvppJpegFormat *format); + +/** + * @ingroup AscendCL + * @brief Predict encode size of jpeg image. + * + * @param inputDesc [IN] dvpp image desc + * @param config [IN] jpeg encode config + * @param size [OUT] the size predicted of image + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc, + const acldvppJpegeConfig *config, uint32_t *size); + +/** + * @ingroup AscendCL + * @brief Predict decode size of jpeg image. + * + * @param data [IN] origin image data in host memory + * @param dataSize [IN] the size of origin image data + * @param outputPixelFormat [IN] the pixel format jpeg decode + * @param decSize [OUT] the size predicted for decode image + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize, + acldvppPixelFormat outputPixelFormat, uint32_t *decSize); + +/** + * @ingroup AscendCL + * @brief Get image width and height of png. + * + * @param data [IN] image data in host memory + * @param size [IN] the size of image data + * @param width [OUT] the width of image from image header + * @param height [OUT] the height of image from image header + * @param components [OUT] the components of image from image header + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width, + uint32_t *height, int32_t *components); + +/** + * @ingroup AscendCL + * @brief Predict decode size of png image. + * + * @param data [IN] origin image data in host memory + * @param dataSize [IN] the size of origin image data + * @param outputPixelFormat [IN] the pixel format jpeg decode + * @param decSize [OUT] the size predicted for decode image + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize, + acldvppPixelFormat outputPixelFormat, uint32_t *decSize); + +/** + * @ingroup AscendCL + * @brief Create dvpp channel, the same channel can be reused + * and is no longer available after destruction. + * + * @param channelDesc [IN|OUT] the channel destruction + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannelDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppCreateChannel(acldvppChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Destroy dvpp channel. + * + * @par Restriction + * Can only destroy channel created through the acldvppCreateChannel interface + * + * @param channelDesc [IN] the channel destruction + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief dvpp vpc resize. + * + * @par Restriction + * Width alignment requirements: + * @li The minimum stride is 32 and the maximum is 4096 * 4 + * (that is, an image in argb format with a width of 4096); + * @li For 8K scaling, widthStride is required to be aligned to 2; + * @li For non 8K scaling, the calculation formula for widthStride + * is different for different image formats: + * @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16 + * @li yuv422packed: input image width * 2 and then align to 16 + * @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16 + * @li xrgb8888: input image width * 4, align to 16 + * @li HFBC:input image width + * Height alignment requirements: + * @li The height of the input image is aligned to 2. + * High stride minimum 6 and maximum 4096. + * + * @param channelDesc [IN] the channel destruction + * @param inputDesc [IN] resize input picture destruction + * @param outputDesc [IN|OUT] resize output picture destruction + * @param resizeConfig [IN] resize config + * @param stream [IN] resize task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreatePicDesc + * | acldvppCreateResizeConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig, + aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc crop. + * + * @par Function + * crop the input picture according to the specified area, + * and then store the picture in the output memory as the output picture + * + * @par Restriction + * Width alignment requirements: + * @li The minimum stride is 32 and the maximum is 4096 * 4 + * (that is, an image in argb format with a width of 4096); + * @li For 8K scaling, widthStride is required to be aligned to 2; + * @li For non 8K scaling, the calculation formula for widthStride + * is different for different image formats: + * @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16 + * @li yuv422packed: input image width * 2 and then align to 16 + * @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16 + * @li xrgb8888: input image width * 4, align to 16 + * @li HFBC:input image width + * Height alignment requirements: + * @li The height of the input image is aligned to 2. + * High stride minimum 6 and maximum 4096. + * + * @param channelDesc [IN] the channel destruction + * @param inputDesc [IN] crop input picture destruction + * @param outputDesc [IN|OUT] crop output picture destruction + * @param cropArea [IN] crop area config + * @param stream [IN] crop task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, + aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc crop and resize config. + * + * @par Function + * crop the input picture with resize config according to the specified area, + * and then store the picture in the output memory as the output picture + * + * @par Restriction + * Width alignment requirements: + * @li The minimum stride is 32 and the maximum is 4096 * 4 + * (that is, an image in argb format with a width of 4096); + * @li For 8K scaling, widthStride is required to be aligned to 2; + * @li For non 8K scaling, the calculation formula for widthStride + * is different for different image formats: + * @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16 + * @li yuv422packed: input image width * 2 and then align to 16 + * @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16 + * @li xrgb8888: input image width * 4, align to 16 + * @li HFBC:input image width + * Height alignment requirements: + * @li The height of the input image is aligned to 2. + * High stride minimum 6 and maximum 4096. + * + * @param channelDesc [IN] the channel destruction + * @param inputDesc [IN] crop input picture destruction + * @param outputDesc [IN|OUT] crop output picture destruction + * @param cropArea [IN] crop area config + * @param resizeConfig [IN] resize config + * @param stream [IN] crop and resize config task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, + acldvppResizeConfig *resizeConfig, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc batch crop. + * + * @par Function + * crop the input batch picture according to the specified area + * as the output batch pictures + * + * @param channelDesc [IN] the channel destruction + * @param srcBatchPicDescs [IN] crop input batch picture destruction + * @param roiNums [IN] roi config numbers + * @param size [IN] roiNum size + * @param dstBatchPicDescs [IN|OUT] crop output batch picture destruction + * @param cropAreas [IN] crop area configs + * @param stream [IN] crop batch task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc, + acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, + uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc batch crop and resize config. + * + * @par Function + * crop the input batch picture with resize config according to the specified area + * as the output batch pictures + * + * @param channelDesc [IN] the channel destruction + * @param srcBatchPicDescs [IN] crop input batch picture destruction + * @param roiNums [IN] roi config numbers + * @param size [IN] roiNum size + * @param dstBatchPicDescs [IN|OUT] crop output batch picture destruction + * @param cropAreas [IN] crop area configs + * @param resizeConfig [IN] resize config + * @param stream [IN] crop batch and resize config task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateDvppConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc *channelDesc, + acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, + uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], + acldvppResizeConfig *resizeConfig, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc crop and paste. + * + * @par Function + * crop the input picture according to the specified area, + * and paste the picture to the specified position of the target picture + * as the output picture + * + * @param channelDesc [IN] thechannel destruction + * @param inputDesc [IN] crop and paste input picture destruction + * @param outputDesc [IN|OUT] crop and paste output picture destruction + * @param cropArea [IN] crop area config + * @param pasteArea [IN] paste area config + * @param stream [IN] crop and paste task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, + acldvppRoiConfig *pasteArea, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc crop, resize config and paste. + * + * @par Function + * crop the input picture with resize config according to the specified area, + * and paste the picture to the specified position of the target picture + * as the output picture + * + * @param channelDesc [IN] thechannel destruction + * @param inputDesc [IN] crop and paste input picture destruction + * @param outputDesc [IN|OUT] crop and paste output picture destruction + * @param cropArea [IN] crop area config + * @param pasteArea [IN] paste area config + * @param resizeConfig [IN] resize config + * @param stream [IN] crop, paste and resize task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, + acldvppRoiConfig *pasteArea, + acldvppResizeConfig *resizeConfig, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc batch crop and paste. + * + * @par Function + * crop the input batch picture according to the specified area, + * and paste the pictures to the specified position of the target pictures + * as the output batch pictures + * + * @param channelDesc [IN] the channel destruction + * @param srcBatchPicDescs [IN] crop input batch picture destruction + * @param roiNums [IN] roi config numbers + * @param size [IN] roiNum size + * @param dstBatchPicDescs [IN|OUT] crop output batch picture destruction + * @param cropAreas [IN] crop area configs + * @param pasteAreas [IN] paste area configs + * @param stream [IN] crop batch task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc, + acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, + uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], + acldvppRoiConfig *pasteAreas[], aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc batch crop, resize config and paste. + * + * @par Function + * crop the input batch picture with resize config according to the specified area, + * and paste the pictures to the specified position of the target pictures + * as the output batch pictures + * + * @param channelDesc [IN] the channel destruction + * @param srcBatchPicDescs [IN] crop input batch picture destruction + * @param roiNums [IN] roi config numbers + * @param size [IN] roiNum size + * @param dstBatchPicDescs [IN|OUT] crop output batch picture destruction + * @param cropAreas [IN] crop area configs + * @param pasteAreas [IN] paste area configs + * @param resizeConfig [IN] resize config + * @param stream [IN] crop batch and resize config task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync( + acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppRoiConfig *pasteAreas[], + acldvppResizeConfig *resizeConfig, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc jpeg decode. + * + * @par Function + * For different source picture formats, after decoding, + * output pictures in the following format: + * @li jpeg(444) -> YUV444SP:V is front U is back, + * YUV420 SP V is front U is back, YUV420SP U is front V is back; + * @li jpeg(422) -> YUV422SP:V is in front U is behind, + * YUV420SP V is in front U is behind, YUV420SP U is in front V is behind; + * @li jpeg(420) -> YUV420SP: + * V is front U is back, YUV420SP U is front V is back; + * @li jpeg(400) -> YUV420SP:UV data is filled with 0 x 80. + * + * @param channelDesc [IN] the channel destruction + * @param data [IN] decode input picture destruction's data + * @param size [IN] decode input picture destruction's size + * @param outputDesc [IN|OUT] decode output picture destruction + * @param stream [IN] decode task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreatePicDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, + acldvppPicDesc *outputDesc, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc jpeg encode. + * + * @param channelDesc [IN] the channel destruction + * @param inputDesc [IN] encode input picture destruction + * @param data [OUT] encode output picture destruction's data + * @param size [IN|OUT] encode output picture destruction's size + * @param config [IN] jpeg encode config + * @param stream [IN] encode task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreateJpegeConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + const void *data, uint32_t *size, acldvppJpegeConfig *config, + aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc png decode. + * + * @param channelDesc [IN] the channel destruction + * @param data [IN] decode input picture destruction's data + * @param size [IN] decode input picture destruction's size + * @param outputDesc [IN|OUT] decode output picture destruction + * @param stream [IN] decode task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreatePicDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, + acldvppPicDesc *outputDesc, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Create vdec channel. + * + * @par Function + * Create a channel for video data processing, + * the same channel can be reused, + * and is no longer available after destruction + * + * @param channelDesc [IN|OUT] the channel destruction + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclvdecCreateChannelDesc + */ +ACL_FUNC_VISIBILITY aclError aclvdecCreateChannel(aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Destroy vdec channel. + * + * @par Function + * Can only destroy channels created by the aclvdecCreateChannel interface + * + * @param channelDesc [IN] the channel destruction + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclvdecCreateChannel + */ +ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief dvpp vdec send frame. + * + * @par Function + * Pass the input memory to be decoded + * and the decoded output memory to the decoder for decoding + * + * @param channelDesc [IN] vdec channel destruction + * @param input [IN] input stream destruction + * @param output [IN|OUT] output picture destruction + * @param config [IN] vdec frame config + * @param userData [IN] user data for callback function + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc + */ +ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, + acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData); + +/** + * @ingroup AscendCL + * @brief dvpp vdec send skipped frame. + * + * @par Function + * Pass video frame to decoder + * + * @param channelDesc [IN] vdec channel destruction + * @param input [IN] input stream destruction + * @param config [IN] vdec frame config + * @param userData [IN] user data for callback function + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame + */ +ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, + aclvdecFrameConfig *config, void *userData); + +/** + * @ingroup AscendCL + * @brief dvpp vpc convert color. + * + * @par Restriction + * @li outputDesc:Width height stride, No changes are allowed. Just configure 0 + * @par Function + * Convert color gamut + * + * @param channelDesc [IN] the channel destruction + * @param inputDesc [IN] convert color input picture destruction + * @param outputDesc [IN|OUT] convert color output picture destruction + * @param stream [IN] convert color task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreatePicDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc pyramid down. + * + * @par Restriction + * @li outputDesc:format only supported YUV400 + * @par Function + * Image pyramid down + * + * @param channelDesc [IN] the channel destruction + * @param inputDesc [IN] pyr down input picture destruction + * @param outputDesc [IN|OUT] pyr down output picture destruction + * @param reserve [IN] reserved param , must be nullptr + * @param stream [IN] pyr down task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreatePicDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Set dvpp channel mode. + * + * @param channelDesc [OUT] the channel destruction + * @param mode [IN] channel mode + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode); + +/** + * @ingroup AscendCL + * @brief Set resize config interpolation. + * + * @param resizeConfig [OUT] the resize config + * @param interpolation [IN] interpolation + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetResizeConfigInterpolation(acldvppResizeConfig *resizeConfig, + uint32_t interpolation); + +/** + * @ingroup AscendCL + * @brief Get resize config interpolation. + * + * @param resizeConfig [IN] the resize config + * + * @retval Interpolation of resize config. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppResizeConfig *resizeConfig); + +/** + * @ingroup AscendCL + * @brief Set vdec channel out mode. + * + * @param channelDesc [OUT] the channel destruction + * @param outMode [IN] channel out mode + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode); + +/** + * @ingroup AscendCL + * @brief Get vdec channel out mode. + * + * @param channelDesc [IN] the channel destruction + * + * @retval Out mode of channel destruction + * @retval default 0 + */ +ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutMode(const aclvdecChannelDesc *channelDesc); + +/** + * @ingroup AscendCL + * @brief Create dvpp batch picture description. + * + * @param batchSize [IN] batch size + * + * @retval null for failed. + * @retval OtherValues success. + */ +ACL_FUNC_VISIBILITY acldvppBatchPicDesc *acldvppCreateBatchPicDesc(uint32_t batchSize); + +/** + * @ingroup AscendCL + * @brief Get dvpp picture description. + * + * @param batchPicDesc [IN] dvpp batch picture description. + * @param index [IN] index of batch + * + * @retval null for failed. + * @retval OtherValues Failure + * + * @see acldvppCreateBatchPicDesc + */ +ACL_FUNC_VISIBILITY acldvppPicDesc *acldvppGetPicDesc(acldvppBatchPicDesc *batchPicDesc, uint32_t index); + +/** + * @ingroup AscendCL + * @brief Destroy dvpp batch picture description. + * + * @par Function + * Can only destroy batch picture description information created + * through acldvppCreateBatchPicDesc interface. + * + * @param batchPicDesc [IN] dvpp batch picture description. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateBatchPicDesc + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyBatchPicDesc(acldvppBatchPicDesc *batchPicDesc); + +/** + * @ingroup AscendCL + * @brief Create dvpp lut map. + * + * @retval null for failed. + * @retval OtherValues success. + */ +ACL_FUNC_VISIBILITY acldvppLutMap *acldvppCreateLutMap(); + +/** + * @ingroup AscendCL + * @brief Destroy lut map. + * + * @param lutMap [IN] lut map + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyLutMap(acldvppLutMap *lutMap); + +/** + * @ingroup AscendCL + * @brief Get lut map dims. + * + * @param lutMap [IN] lut map + * + * @retval 0 for failed. + * @retval OtherValues success. + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap); + +/** + * @ingroup AscendCL + * @brief Get lut map data. + * + * @param lutMap [IN] lut map + * @param dim [IN] input dim of map + * @param data [OUT] the dim of lut map's data + * @param len [OUT] the dim of lut map's length + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data, + uint32_t *len); +/** + * @ingroup AscendCL + * @brief Vpc equalize hist. + * + * @param channelDesc [IN] channel desc + * @param inputDesc [IN] input desc + * @param outputDesc [IN|OUT] output desc + * @param lutMap [IN] lut map param + * @param stream [IN] runtime stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc, + const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, + const acldvppLutMap *lutMap, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Create dvpp border config. + * + * @retval null for failed. + * @retval OtherValues success. + */ +ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig(); + +/** + * @ingroup AscendCL + * @brief Set value of border config. + * + * @param borderConfig [OUT] border config + * @param index [IN] index of value array + * @param value [IN] value + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index, + double value); + +/** + * @ingroup AscendCL + * @brief Set border type of border config. + * + * @param borderConfig [OUT] border config + * @param borderType [IN] border type + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigBorderType(acldvppBorderConfig *borderConfig, + acldvppBorderType borderType); + +/** + * @ingroup AscendCL + * @brief Set top of border config. + * + * @param borderConfig [OUT] border config + * @param top [IN] top of border + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigTop(acldvppBorderConfig *borderConfig, uint32_t top); + +/** + * @ingroup AscendCL + * @brief Set bottom of border config. + * + * @param borderConfig [OUT] border config + * @param bottom [IN] bottom of border + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigBottom(acldvppBorderConfig *borderConfig, uint32_t bottom); + +/** + * @ingroup AscendCL + * @brief Set left of border config. + * + * @param borderConfig [OUT] border config + * @param left [IN] left of border + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigLeft(acldvppBorderConfig *borderConfig, uint32_t left); + +/** + * @ingroup AscendCL + * @brief Set right of border config. + * + * @param borderConfig [OUT] border config + * @param right [IN] right of border + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigRight(acldvppBorderConfig *borderConfig, uint32_t right); + +/** + * @ingroup AscendCL + * @brief Get value of border config. + * + * @param borderConfig [IN] border config + * @param index[IN] index of value array + * + * @retval invalid value is < 0, normal Value is >= 0 + */ +ACL_FUNC_VISIBILITY double acldvppGetBorderConfigValue(const acldvppBorderConfig *borderConfig, uint32_t index); + +/** + * @ingroup AscendCL + * @brief Get border type of border config. + * + * @param borderConfig [IN] border config + * @retval border type of border config + */ +ACL_FUNC_VISIBILITY acldvppBorderType acldvppGetBorderConfigBorderType(const acldvppBorderConfig *borderConfig); + +/** + * @ingroup AscendCL + * @brief Get right of border config. + * + * @param borderConfig [IN] border config + * + * @retval default 0, top value of border config + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigTop(const acldvppBorderConfig *borderConfig); + +/** + * @ingroup AscendCL + * @brief Get Bottom of border config. + * + * @param borderConfig [IN] border config + * + * @retval default 0, top value of border config + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigBottom(const acldvppBorderConfig *borderConfig); + +/** + * @ingroup AscendCL + * @brief Get left of border config. + * + * @param borderConfig [IN] border config + * + * @retval default 0, top value of border config + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigLeft(const acldvppBorderConfig *borderConfig); + +/** + * @ingroup AscendCL + * @brief Get right of border config. + * + * @param borderConfig [IN] border config + * + * @retval default 0, right value of border config + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigRight(const acldvppBorderConfig *borderConfig); + +/** + * @ingroup AscendCL + * @brief Destroy border config. + * + * @param borderConfig [IN] border config + * + * @retval ACL_SUCCESS for success, other for failure + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *borderConfig); + +/** + * @ingroup AscendCL + * @brief Vpc make border. + * + * @param channelDesc [IN] channel desc + * @param inputDesc [IN] input desc + * @param outputDesc [IN|OUT] output desc + * @param borderConfig [IN] border config param + * @param stream [IN] runtime stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc, + const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, + const acldvppBorderConfig *borderConfig, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Dvpp vpc calc hist. + * + * @param channelDesc [IN] the channel destruction + * @param srcPicDesc [IN] pyr down input picture destruction + * @param hist [IN|OUT] pyr down output picture destruction + * @param reserve [IN] reserved param, must be nullptr + * @param stream [IN] task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc, + acldvppHist *hist, void *reserve, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief Create vpc hist description. + * + * @retval null for failed. + * @retval OtherValues success. + */ +ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist(); + +/** + * @ingroup AscendCL + * @brief Destroy vpc hist description. + * + * @par Function + * Can only destroy hist description information created + * through acldvppCreateHist interface. + * + * @param hist [IN] vpc hist description. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateHist + */ +ACL_FUNC_VISIBILITY aclError acldvppDestroyHist(acldvppHist *hist); + +/** + * @ingroup AscendCL + * @brief Get dims of vpc hist description. + * + * @param hist [IN] vpc hist description. + * + * @retval dims of vpc hist description. + * + * @see acldvppCreateHist | acldvppVpcCalcHistAsync + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetHistDims(acldvppHist *hist); + +/** + * @ingroup AscendCL + * @brief Get data from vpc hist description by dim. + * + * @param hist [IN] vpc hist description. + * @param dim [IN] which dim to get data. + * @param data [OUT] address of output hist data. + * @param len [OUT] len of output hist data. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateHist | acldvppVpcCalcHistAsync + */ +ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim, uint32_t **data, uint16_t *len); + +/** + * @ingroup AscendCL + * @brief Get dvpp calc hist process return code. + * + * @param hist [IN] vpc hist description. + * + * @retval Dvpp calc hist process return code. + * + * @see acldvppCreateHist | acldvppVpcCalcHistAsync + */ +ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist); + +/** + * @ingroup AscendCL + * @brief Set vpc hist description to 0. + * + * @par Function + * Can only clear hist description information created + * through acldvppCreateHist interface. + * + * @param hist [IN] vpc hist description. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateHist + */ +ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist); + +/** + * @ingroup AscendCL + * @brief dvpp vpc batch crop, resize config and make border. + * + * @par Function + * crop the input batch picture with resize config and border configs according to the specified area + * as the output batch pictures + * + * @param channelDesc [IN] the channel destruction + * @param srcBatchPicDescs [IN] crop input batch picture destruction + * @param roiNums [IN] roi config numbers + * @param size [IN] roiNum size + * @param dstBatchPicDescs [IN|OUT] crop output batch picture destruction + * @param cropAreas [IN] crop area configs + * @param borderCfgs [IN] border configs + * @param resizeConfig [IN] resize config + * @param stream [IN] crop batch, resize config and make border task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync( + acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[], + acldvppResizeConfig *resizeConfig, aclrtStream stream); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ diff --git a/inc/external/acl/ops/acl_fv.h b/inc/external/acl/ops/acl_fv.h new file mode 100644 index 00000000..4bd392c9 --- /dev/null +++ b/inc/external/acl/ops/acl_fv.h @@ -0,0 +1,348 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ +#define INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ + +#include "acl/acl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct aclfvInitPara aclfvInitPara; +typedef struct aclfvFeatureInfo aclfvFeatureInfo; +typedef struct aclfvRepoRange aclfvRepoRange; +typedef struct aclfvQueryTable aclfvQueryTable; +typedef struct aclfvSearchInput aclfvSearchInput; +typedef struct aclfvSearchResult aclfvSearchResult; + +// search operation type +enum aclfvSearchType { + SEARCH_1_N, // 1:N operation type + SEARCH_N_M // N:M operation type +}; + +/** + * @ingroup AscendCL + * @brief Create fv init param. + * + * @param fsNum [IN] The feature num + * + * @retval null for failed. + * @retval OtherValues success. + */ +ACL_FUNC_VISIBILITY aclfvInitPara *aclfvCreateInitPara(uint64_t fsNum); + +/** + * @ingroup AscendCL + * @brief Destroy fv init param. + * + * @par Function + * Can only destroy fv init param information created + * through aclfvCreateInitPara interface. + * + * @param initPara [IN] fv init param. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclfvCreateInitPara + */ +ACL_FUNC_VISIBILITY aclError aclfvDestroyInitPara(aclfvInitPara *initPara); + +/** + * @ingroup AscendCL + * @brief set value for maxTopNumFor1N which in fv init param. + * + * @param initPara [IN|OUT] fv init param. + * @param maxTopNumFor1N [IN] maxTopNumFor1N value for init param. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclfvSet1NTopNum(aclfvInitPara *initPara, uint32_t maxTopNumFor1N); + +/** + * @ingroup AscendCL + * @brief set value for maxTopNumForNM which in fv init param. + * + * @param initPara [IN|OUT] fv init param. + * @param maxTopNumForNM [IN] maxTopNumForNM value for init param. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t maxTopNumForNM); + +/** + * @ingroup AscendCL + * @brief Create fv feature info. + * + * @param id0 [IN] The first level library id0 + * @param id1 [IN] Secondary library id1 + * @param offset [IN] The offset of the first feature in the library + * @param featureLen [IN] Single feature length + * @param featureCount [IN] Single feature count + * @param featureData [IN] Feature value list + * @param featureDataLen [IN] Feature value list length + * + * @retval null for failed. + * @retval OtherValues success. + */ +ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset, + uint32_t featureLen, uint32_t featureCount, + uint8_t *featureData, uint32_t featureDataLen); + +/** + * @ingroup AscendCL + * @brief Destroy fv feature info. + * + * @par Function + * Can only destroy fv feature info information created + * through aclfvCreateFeatureInfo interface. + * + * @param featureInfo [IN] fv feature info. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclfvCreateFeatureInfo + */ +ACL_FUNC_VISIBILITY aclError aclfvDestroyFeatureInfo(aclfvFeatureInfo *featureInfo); + +/** + * @ingroup AscendCL + * @brief Create fv repo range. + * + * @param id0Min [IN] id0 start value + * @param id0Min [IN] id0 max + * @param id1Min [IN] id0 start value + * @param id1Max [IN] id1 max + * + * @retval null for failed. OtherValues success + */ +ACL_FUNC_VISIBILITY aclfvRepoRange *aclfvCreateRepoRange(uint32_t id0Min, uint32_t id0Max, uint32_t id1Min, + uint32_t id1Max); + +/** + * @ingroup AscendCL + * @brief Destroy fv repo range. + * + * @par Function + * Can only destroy fv repo range information created + * through aclfvCreateRepoRange interface. + * + * @param repoRange [IN] fv repo range. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclfvCreateRepoRange + */ +ACL_FUNC_VISIBILITY aclError aclfvDestroyRepoRange(aclfvRepoRange *repoRange); + +/** + * @ingroup AscendCL + * @brief Create query table. + * + * @param queryCnt [IN] Number of tables, the maximum number is 6 + * @param tableLen [IN] Single table length, table length is 32KB + * @param tableData [IN] Feature value list + * @param tableDataLen [IN] The length of memory requested by the featureData pointer + * + * @retval null for failed. OtherValues success + */ +ACL_FUNC_VISIBILITY aclfvQueryTable *aclfvCreateQueryTable(uint32_t queryCnt, uint32_t tableLen, uint8_t *tableData, + uint32_t tableDataLen); + +/** + * @ingroup AscendCL + * @brief Destroy query table. + * + * @par Function + * Can only destroy query table information created + * through aclfvCreateQueryTable interface. + * + * @param queryTable [IN] query table. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclfvCreateQueryTable + */ +ACL_FUNC_VISIBILITY aclError aclfvDestroyQueryTable(aclfvQueryTable *queryTable); + +/** + * @ingroup AscendCL + * @brief Create search input. + * + * @param queryTable [IN] query table + * @param repoRange [IN] query repo range + * @param topk [IN] query topk + * + * @retval null for failed. OtherValues success + */ +ACL_FUNC_VISIBILITY aclfvSearchInput *aclfvCreateSearchInput(aclfvQueryTable *queryTable, aclfvRepoRange *repoRange, + uint32_t topk); + +/** + * @ingroup AscendCL + * @brief Destroy search input. + * + * @par Function + * Can only destroy search input information created + * through aclfvCreateSearchInput interface. + * + * @param searchInput [IN] search input. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclfvCreateSearchInput + */ +ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInput); + +/** + * @ingroup AscendCL + * @brief Create search result. + * + * @param queryCnt [IN] Retrieve the number of features + * @param resultNum [IN] The number of search results for each feature, the number is queryCnt + * @param resultNumDataLen [IN] resultNum memory length + * @param id0 [IN] Level 1 library id0 + * @param id1 [IN] Secondary library id1 + * @param resultOffset [IN] The offset of the bottom library corresponding + * to each feature retrieval result, total length topK * queryCnt + * @param resultDistance [IN] Distance, total length topK * queryCnt + * @param dataLen [IN] The memory size requested by + * id0\id1\reslutOffset\resultDistance + * + * @retval null for failed. OtherValues success + */ +ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum, + uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, + uint32_t *resultOffset, float *resultDistance, + uint32_t dataLen); + +/** + * @ingroup AscendCL + * @brief Destroy search result. + * + * @par Function + * Can only destroy search result information created + * through aclfvCreateSearchResult interface. + * + * @param searchResult [IN] search result. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see aclfvCreateSearchResult + */ +ACL_FUNC_VISIBILITY aclError aclfvDestroySearchResult(aclfvSearchResult *searchResult); + +/** + * @ingroup AscendCL + * @brief fv IP initialize. + * + * @param initPara [IN] fv init param. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure. + */ +ACL_FUNC_VISIBILITY aclError aclfvInit(aclfvInitPara *initPara); + +/** + * @ingroup AscendCL + * @brief release fv resources. + * + * @par Function + * Can only release fv resources created + * through aclfvInit interface. + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure. + * + * @see aclfvInit + */ +ACL_FUNC_VISIBILITY aclError aclfvRelease(); + +/** + * @ingroup AscendCL + * @brief fv repo add. + * + * @param type [IN] repo add type + * @param featureInfo [IN] add feature information + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure. + */ +ACL_FUNC_VISIBILITY aclError aclfvRepoAdd(aclfvSearchType type, aclfvFeatureInfo *featureInfo); + +/** + * @ingroup AscendCL + * @brief fv repo del. + * + * @param type [IN] repo delete type + * @param repoRange [IN] repo range information + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure. + */ +ACL_FUNC_VISIBILITY aclError aclfvRepoDel(aclfvSearchType type, aclfvRepoRange *repoRange); + +/** + * @ingroup AscendCL + * @brief fv accurate del. + * + * @param featureInfo [IN] accurate delete feature information + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure. + */ +ACL_FUNC_VISIBILITY aclError aclfvDel(aclfvFeatureInfo *featureInfo); + +/** + * @ingroup AscendCL + * @brief fv accurate modify. + * + * @param featureInfo [IN] accurate modify feature information + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure. + */ +ACL_FUNC_VISIBILITY aclError aclfvModify(aclfvFeatureInfo *featureInfo); + +/** + * @ingroup AscendCL + * @brief fv search. + * + * @param type [IN] search type + * @param searchInput [IN] search input + * @param searchRst [OUT] search result + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure. + */ +ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput *searchInput, + aclfvSearchResult *searchRst); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ diff --git a/inc/external/hccl/hccl.h b/inc/external/hccl/hccl.h new file mode 100644 index 00000000..8261adc4 --- /dev/null +++ b/inc/external/hccl/hccl.h @@ -0,0 +1,159 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hccl.h + * @brief HCCL API + */ + +#ifndef HCCL_H_ +#define HCCL_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * @brief Initialize HCCL. + * + * @param clusterInfo A string identifying the cluster info file path, include file name. + * @param rank A integer identifying the identify for the rank. + * @param comm A pointer identifying the initialized communication resource. + * @return HcclResult + * @see HcclCommDestroy() + */ +extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm); + +/** + * @brief Get hccl root info. + * + * @param rootInfo A pointer identifying the hccl root info. + * @return HcclResult + */ +extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo); + +/** + * @brief Initialize HCCL with root info. + * + * @param nRanks A integer identifying the rank size of the cluster. + * @param rootInfo A struct identifying the hccl root info. + * @param rank A integer identifying the identify for the rank. + * @param comm A pointer identifying the initialized communication resource. + * @return HcclResult + * @see HcclCommDestroy() + */ +extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm); + +/** + * @brief AllReduce operator. + * + * @param sendBuf A pointer identifying the input data address of the operator. + * @param recvBuf A pointer identifying the output data address of the operator. + * @param count An integer(u64) identifying the number of the output data. + * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, + * float32. + * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, + HcclComm comm, aclrtStream stream); + +/** + * @brief Broadcast operator. + * + * @param buf A pointer identifying the data address of the operator. + * @param count An integer(u64) identifying the number of the data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param root An integer(u32) identifying the the root rank in the operator. + * @param comm A pointer identifying the communication resource based on + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, + aclrtStream stream); + +/** + * @brief ReduceScatter operator. + * + * @param sendBuf A pointer identifying the input data address of the operator. + * @param recvBuf A pointer identifying the output data address of the operator. + * @param recvCount An integer(u64) identifying the number of the output data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, + HcclReduceOp op, HcclComm comm, aclrtStream stream); + +/** + * @brief AllGather operator. + * + * @param sendBuf A pointer identifying the input data address of the operator. + * @param recvBuf A pointer identifying the output data address of the operator. + * @param sendCount An integer(u64) identifying the number of the input data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, + aclrtStream stream); +/** + * @brief Get the rank size of this comm. + * + * @param comm A pointer identifying the communication resource based on. + * @param rankSize A pointer identifying the rank size. + * @return HcclResult + */ +extern HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize); + +/** + * @brief Get the rank id of this comm. + * + * @param comm A pointer identifying the communication resource based on. + * @param rankSize A pointer identifying the rank id. + * @return HcclResult + */ +extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank); +/** + * @brief Barrier operator. + * + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream); + +/** + * @brief Destroy HCCL comm + * + * @param comm A pointer identifying the communication resource targetting + * @return HcclResult + * @see HcclCommInitClusterInfo() + */ +extern HcclResult HcclCommDestroy(HcclComm comm); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // HCCL_H_ diff --git a/inc/external/hccl/hccl_types.h b/inc/external/hccl/hccl_types.h new file mode 100644 index 00000000..0e832396 --- /dev/null +++ b/inc/external/hccl/hccl_types.h @@ -0,0 +1,101 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hccl_types.h + * @brief HCCL data type definition + * + */ + +#ifndef HCCL_TYPES_H_ +#define HCCL_TYPES_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * @brief HCCL functions return value definition + */ +typedef enum { + HCCL_SUCCESS = 0, /**< success */ + HCCL_E_PARA = 1, /**< parameter error */ + HCCL_E_PTR = 2, /**< empty pointer */ + HCCL_E_MEMORY = 3, /**< memory error */ + HCCL_E_INTERNAL = 4, /**< internal error */ + HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ + HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ + HCCL_E_UNAVAIL = 7, /**< resource unavailable */ + HCCL_E_SYSCALL = 8, /**< call system interface error */ + HCCL_E_TIMEOUT = 9, /**< timeout */ + HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ + HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ + HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ + HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ + HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ + HCCL_E_RUNTIME = 15, /**< call runtime api fail */ + HCCL_E_DRV = 16, /**< call driver api fail */ + HCCL_E_PROFILING = 17, /**< call profiling api fail */ + HCCL_E_CCE = 18, /**< call cce api fail */ + HCCL_E_NETWORK = 19, /**< call network api fail */ + HCCL_E_RESERVED /**< reserved */ +} HcclResult; + +/** + * @brief handle to HCCL communicator + */ +typedef void *HcclComm; + +/** + * @brief HCCL Reduction opperation + */ +typedef enum { + HCCL_REDUCE_SUM = 0, /**< sum */ + HCCL_REDUCE_PROD = 1, /**< prod */ + HCCL_REDUCE_MAX = 2, /**< max */ + HCCL_REDUCE_MIN = 3, /**< min */ + HCCL_REDUCE_RESERVED /**< reserved */ +} HcclReduceOp; + +/** + * @brief HCCL data type + */ +typedef enum { + HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ + HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ + HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ + HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ + HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ + HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ + HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ + HCCL_DATA_TYPE_RESERVED /**< reserved */ +} HcclDataType; + +const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length + +/** + * @brief HCCL root info + */ +typedef struct HcclRootInfoDef { + char internal[HCCL_ROOT_INFO_BYTES]; +} HcclRootInfo; + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // HCCL_TYPES_H_ diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h new file mode 100644 index 00000000..a1392cc6 --- /dev/null +++ b/inc/external/runtime/rt_error_codes.h @@ -0,0 +1,109 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ +#define __INC_EXTERNEL_RT_ERROR_CODES_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static const int32_t ACL_RT_SUCCESS = 0; // success + +static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid +static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id +static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null +static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context +static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context +static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal +static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned +static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed +static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed +static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream +static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread +static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set +static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create +static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream +static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type +static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle +static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type +static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout + +static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support +static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error +static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error +static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow +static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device +static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail +static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission +static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource +static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource +static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource +static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource +static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource + +static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error +static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error +static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream +static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream +static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete +static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence +static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete +static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error +static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error +static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support +static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat +static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed +static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout +static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error +static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout +static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception +static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception +static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout +static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception +static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error +static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error +static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error +static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error +static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal +static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering +static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init +static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data +static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error +static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate +static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed +static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed +static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context +static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out +static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout +static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception +static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal + +static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error +static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error +static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect + +#ifdef __cplusplus +} +#endif + +#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ diff --git a/inc/framework/ge_runtime/task_info.h b/inc/framework/ge_runtime/task_info.h index f59c6454..4530bff7 100644 --- a/inc/framework/ge_runtime/task_info.h +++ b/inc/framework/ge_runtime/task_info.h @@ -271,13 +271,14 @@ class FusionEndTaskInfo : public TaskInfo { class HcclTaskInfo : public TaskInfo { public: HcclTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string hccl_type, void *input_data_addr, - void *output_data_addr, int64_t workspace_size, int64_t hccl_stream_num, + void *output_data_addr, void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num, const std::vector &private_def, void *ops_kernel_store, int32_t count, int64_t root_id, int64_t op_type, int64_t data_type, const std::string &group, bool dump_flag) : TaskInfo(op_name, stream_id, TaskInfoType::HCCL, dump_flag), hccl_type_(hccl_type), input_data_addr_(input_data_addr), output_data_addr_(output_data_addr), + workspace_addr_(workspace_addr), workspace_size_(workspace_size), hccl_stream_num_(hccl_stream_num), private_def_(private_def), @@ -292,6 +293,7 @@ class HcclTaskInfo : public TaskInfo { const std::string &hccl_type() const { return hccl_type_; } void *input_data_addr() const { return input_data_addr_; } void *output_data_addr() const { return output_data_addr_; } + void *workspace_addr() const { return workspace_addr_; } int64_t workspace_size() const { return workspace_size_; } int64_t hccl_stream_num() const { return hccl_stream_num_; } const std::vector &private_def() const { return private_def_; } @@ -306,6 +308,7 @@ class HcclTaskInfo : public TaskInfo { std::string hccl_type_; void *input_data_addr_; void *output_data_addr_; + void *workspace_addr_; int64_t workspace_size_; int64_t hccl_stream_num_; std::vector private_def_; diff --git a/metadef b/metadef index a725349b..21178899 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit a725349b65aef2940555af2ddb7b9461fbe0d5fd +Subproject commit 211788997dcc9aa63527541a44d511388c06bce5 diff --git a/scripts/format_source_code.sh b/scripts/format_source_code.sh new file mode 100755 index 00000000..1fd0b4f6 --- /dev/null +++ b/scripts/format_source_code.sh @@ -0,0 +1,107 @@ +#!/bin/bash +# Copyright 2019-2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +set -e + +CLANG_FORMAT=$(which clang-format) || (echo "Please install 'clang-format' tool first"; exit 1) + +version=$("${CLANG_FORMAT}" --version | sed -n "s/.*\ \([0-9]*\)\.[0-9]*\.[0-9]*.*/\1/p") +if [[ "${version}" -lt "8" ]]; then + echo "clang-format's version must be at least 8.0.0" + exit 1 +fi + +CURRENT_PATH=$(pwd) +SCRIPTS_PATH=$(dirname "$0") + +echo "CURRENT_PATH=${CURRENT_PATH}" +echo "SCRIPTS_PATH=${SCRIPTS_PATH}" + +# print usage message +function usage() +{ + echo "Format the specified source files to conform the code style." + echo "Usage:" + echo "bash $0 [-a] [-c] [-l] [-h]" + echo "e.g. $0 -c" + echo "" + echo "Options:" + echo " -a format of all files" + echo " -c format of the files changed compared to last commit, default case" + echo " -l format of the files changed in last commit" + echo " -h Print usage" +} + +# check and set options +function checkopts() +{ + # init variable + mode="changed" # default format changed files + + # Process the options + while getopts 'aclh' opt + do + case "${opt}" in + a) + mode="all" + ;; + c) + mode="changed" + ;; + l) + mode="lastcommit" + ;; + h) + usage + exit 0 + ;; + *) + echo "Unknown option ${opt}!" + usage + exit 1 + esac + done +} + +# init variable +# check options +checkopts "$@" + +# switch to project root path, which contains clang-format config file '.clang-format' +cd "${SCRIPTS_PATH}/.." || exit 1 + +FMT_FILE_LIST='__format_files_list__' + +if [[ "X${mode}" == "Xall" ]]; then + find src -type f -name "*" | grep "\.h$\|\.cc$" > "${FMT_FILE_LIST}" || true + find inc -type f -name "*" | grep "\.h$\|\.cc$" >> "${FMT_FILE_LIST}" || true +elif [[ "X${mode}" == "Xchanged" ]]; then + # --diff-filter=ACMRTUXB will ignore deleted files in commit + git diff --diff-filter=ACMRTUXB --name-only | grep "^inc\|^src" | grep "\.h$\|\.cc$" >> "${FMT_FILE_LIST}" || true +else # "X${mode}" == "Xlastcommit" + git diff --diff-filter=ACMRTUXB --name-only HEAD~ HEAD | grep "^inc\|^src" | grep "\.h$\|\.cc$" > "${FMT_FILE_LIST}" || true +fi + +while read line; do + if [ -f "${line}" ]; then + ${CLANG_FORMAT} -i "${line}" + fi +done < "${FMT_FILE_LIST}" + +rm "${FMT_FILE_LIST}" +cd "${CURRENT_PATH}" || exit 1 + +echo "Specified cpp source files have been format successfully." diff --git a/third_party/fwkacllib/inc/cce/taskdown_common.hpp b/third_party/fwkacllib/inc/cce/taskdown_common.hpp index 3ecea523..7954162e 100644 --- a/third_party/fwkacllib/inc/cce/taskdown_common.hpp +++ b/third_party/fwkacllib/inc/cce/taskdown_common.hpp @@ -27,15 +27,16 @@ namespace cce { #define CC_FUSION_OP_MAX 32 typedef enum tagccKernelType { - CCE_AI_CORE = 0, /* cce aicore */ - CCE_AI_CPU = 1, /* cce aicpu */ - TE = 2, /* te operator*/ - CUSTOMIZED = 3, /* customized operator */ - TE_AI_CORE = 4, /* te aicore operator*/ - TE_AI_CPU = 5, /* te aicpu operator */ - AI_CPU = 6, /* aicpu */ - CUST_AI_CPU = 7, /* custom aicpu*/ - INVALID = 8, /* unknown kernel type */ + CCE_AI_CORE = 0, /* cce aicore */ + CCE_AI_CPU = 1, /* cce aicpu */ + TE = 2, /* te operator*/ + CUSTOMIZED = 3, /* customized operator */ + TE_AI_CORE = 4, /* te aicore operator*/ + TE_AI_CPU = 5, /* te aicpu operator */ + AI_CPU = 6, /* aicpu */ + CUST_AI_CPU = 7, /* custom aicpu*/ + HOST_CPU = 8, /* host cpu */ + INVALID = 10000 /* unknown kernel type */ } ccKernelType; typedef struct tagOpContext { diff --git a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/hccl/base.h b/third_party/fwkacllib/inc/hccl/base.h index e57563b3..ffbf552b 100644 --- a/third_party/fwkacllib/inc/hccl/base.h +++ b/third_party/fwkacllib/inc/hccl/base.h @@ -124,27 +124,27 @@ struct HcomRemoteAccessAddrInfo { }; struct HcomAllToAllVParams { - void *sendbuf; - void *sendcounts; - void *sdispls; - HcclDataType sendtype; - void *recvbuf; - void *recvcounts; - void *rdispls; - HcclDataType recvtype; - const char *group; + void *sendbuf; // device mem + void *sendcounts; // device mem; Type: uint_64 + void *sdispls; // device mem; Type: uint_64 + HcclDataType sendtype; + void *recvbuf; // device mem + void *recvcounts; // device mem; Type: uint_64 + void *rdispls; // device mem; Type: uint_64 + HcclDataType recvtype; + const char *group; // not used now }; struct HcomGatherAllToAllVParams { - void *addrInfo; - void *addrInfoCountPerRank; - void *recvbuf; - void *recvcounts; - void *rdispls; - void *gatheredbuf; - s32 addrLength; - HcclDataType recvtype; - const char *group; + void *addrInfo; // device mem; contains host VA[uint_64]: [addr, length, addr, length, addr, length, ...] + void *addrInfoCountPerRank; // device mem; length: ranksize; contains addrInfoCounts for every rank + void *recvbuf; // device mem + void *recvcounts; // device mem; Type: uint_64 + void *rdispls; // device mem; Type: uint_64 + void *gatheredbuf; // device mem + s32 addrLength; + HcclDataType recvtype; + const char *group; // not used now }; #ifdef __cplusplus diff --git a/third_party/fwkacllib/inc/hccl/hccl_types.h b/third_party/fwkacllib/inc/hccl/hccl_types.h deleted file mode 100644 index 50a64795..00000000 --- a/third_party/fwkacllib/inc/hccl/hccl_types.h +++ /dev/null @@ -1,101 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file hccl_types.h - * @brief HCCL data type definition - * - */ - -#ifndef HCCL_TYPES_H_ -#define HCCL_TYPES_H_ - -#include - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -/** - * @brief HCCL functions return value definition - */ -typedef enum { - HCCL_SUCCESS = 0, /**< success */ - HCCL_E_PARA = 1, /**< parameter error */ - HCCL_E_PTR = 2, /**< empty pointer */ - HCCL_E_MEMORY = 3, /**< memory error */ - HCCL_E_INTERNAL = 4, /**< internal error */ - HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ - HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ - HCCL_E_UNAVAIL = 7, /**< resource unavailable */ - HCCL_E_SYSCALL = 8, /**< call system interface error */ - HCCL_E_TIMEOUT = 9, /**< timeout */ - HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ - HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ - HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ - HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ - HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ - HCCL_E_RUNTIME = 15, /**< call runtime api fail */ - HCCL_E_DRV = 16, /**< call driver api fail */ - HCCL_E_PROFILING = 17, /**< call profiling api fail */ - HCCL_E_CCE = 18, /**< call cce api fail */ - HCCL_E_NETWORK = 19, /**< call network api fail */ - HCCL_E_RESERVED /**< reserved */ -} HcclResult; - -/** - * @brief handle to HCCL communicator - */ -typedef void *HcclComm; - -/** - * @brief HCCL Reduction opperation - */ -typedef enum { - HCCL_REDUCE_SUM = 0, /**< sum */ - HCCL_REDUCE_PROD = 1, /**< prod */ - HCCL_REDUCE_MAX = 2, /**< max */ - HCCL_REDUCE_MIN = 3, /**< min */ - HCCL_REDUCE_RESERVED /**< reserved */ -} HcclReduceOp; - -/** - * @brief HCCL data type - */ -typedef enum { - HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ - HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ - HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ - HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ - HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ - HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ - HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ - HCCL_DATA_TYPE_RESERVED /**< reserved */ -} HcclDataType; - -const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length - -/** - * @brief HCCL root info - */ -typedef struct HcclRootInfoDef { - char internal[HCCL_ROOT_INFO_BYTES]; -} HcclRootInfo; - -#ifdef __cplusplus -} -#endif // __cplusplus -#endif // HCCL_TYPES_H_ diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h index 955764d6..bf1f395b 100644 --- a/third_party/fwkacllib/inc/hccl/hcom.h +++ b/third_party/fwkacllib/inc/hccl/hcom.h @@ -164,8 +164,22 @@ HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, const std::vector& addrInfos, std::function callback); +/** + * @brief Put alltoallv communication operation into hcom executor. + * + * @param params information about alltoallv communication operation. + * @param callback callback after collective communication operation. + * @return HcclResult + */ HcclResult HcomExecEnqueueAllToAllV(HcomAllToAllVParams params, std::function callback); +/** + * @brief Put agther alltoallv communication operation into hcom executor. + * + * @param params information about agther alltoallv communication operation. + * @param callback callback after collective communication operation. + * @return HcclResult + */ HcclResult HcomExecEnqueueGatherAllToAllV(HcomGatherAllToAllVParams params, std::function callback); diff --git a/third_party/fwkacllib/inc/mmpa/mmpa_api.h b/third_party/fwkacllib/inc/mmpa/mmpa_api.h index 38a689ee..f8d5ccf3 100644 --- a/third_party/fwkacllib/inc/mmpa/mmpa_api.h +++ b/third_party/fwkacllib/inc/mmpa/mmpa_api.h @@ -56,6 +56,7 @@ #include #include #include +#include #include #include diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h index 993f36ba..3d196e41 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h @@ -550,6 +550,10 @@ MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMod MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra); + +MMPA_FUNC_VISIBILITY mmSize mmGetPageSize(); +MMPA_FUNC_VISIBILITY VOID *mmAlignMalloc(mmSize mallocSize, mmSize alignSize); +MMPA_FUNC_VISIBILITY VOID mmAlignFree(VOID *addr); #define MMPA_DLL_API #ifdef __cplusplus diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h index 49e97a5d..e6b6f71e 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h @@ -557,6 +557,10 @@ MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMod MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra); + +MMPA_FUNC_VISIBILITY mmSize mmGetPageSize(); +MMPA_FUNC_VISIBILITY VOID *mmAlignMalloc(mmSize mallocSize, mmSize alignSize); +MMPA_FUNC_VISIBILITY VOID mmAlignFree(VOID *addr); #ifdef __cplusplus #if __cplusplus } diff --git a/third_party/fwkacllib/inc/ops/aipp.h b/third_party/fwkacllib/inc/ops/aipp.h index bed984bd..86805f72 100644 --- a/third_party/fwkacllib/inc/ops/aipp.h +++ b/third_party/fwkacllib/inc/ops/aipp.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -65,6 +65,8 @@ in aipp config file, framework will auto add one input node to graph at last. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator AippData. +*@par Restrictions: +*Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly. */ REG_OP(AippData) .INPUT(data, TensorType::ALL()) diff --git a/third_party/fwkacllib/inc/ops/all_ops.h b/third_party/fwkacllib/inc/ops/all_ops.h index 1ac83783..cc11f5f9 100644 --- a/third_party/fwkacllib/inc/ops/all_ops.h +++ b/third_party/fwkacllib/inc/ops/all_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,6 +39,7 @@ #include "image_ops.h" #include "internal_ops.h" #include "linalg_ops.h" +#include "list_ops.h" #include "logging_ops.h" #include "lookup_ops.h" #include "math_ops.h" diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h index e1f64421..fd35b546 100644 --- a/third_party/fwkacllib/inc/ops/array_ops.h +++ b/third_party/fwkacllib/inc/ops/array_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -626,7 +626,7 @@ REG_OP(StopGradient) *x: A tensor. \n *@par Outputs: -*y: A tensor. \n +*y: A tensor with the same shape and contents as input. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator Identity. @@ -666,7 +666,7 @@ REG_OP(IdentityN) *@li axis: The dimension index at which to expand. \n *@par Outputs: -*y: A tensor. \n +*y: A tensor with the same data as input, with an additional dimension inserted at the index specified by axis. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator ExpandDims. @@ -713,7 +713,7 @@ REG_OP(Unsqueeze) *@par Outputs: *y: A tensor. \n -*@par Attention: +*@attention Constraints: *This operator cannot be directly called by the acllopExecute API. \n *@par Third-party framework compatibility @@ -1153,6 +1153,102 @@ REG_OP(EditDistance) .OUTPUT(output, TensorType({DT_FLOAT})) .OP_END_FACTORY_REG(EditDistance) +/** +* @brief sort_v2. + +* @par Inputs: +* @li x: An ND tensor of type float16. + +* @par Attributes: + +* @li axis: An optional int. The dimension to sort along. This value defaults to -1. +* @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False. + +* @par Outputs: +* @li y: An ND tensor of type float16. + +* @attention Constraints: +* @li Axis should select the last dim. +* @li When the sorting data is less than 150K, it is recommended to use this tbe ops, + and the descending performance is better than the ascending. +* @li The upper limit of data on Ascend910 is 2000K. +*/ +REG_OP(SortV2) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .ATTR(axis, Int, -1) + .ATTR(descending, Bool, false) + .OP_END_FACTORY_REG(SortV2) + +/** +* @brief Expand the input tensor to a compatible shape. \n + +* @par Inputs: +* One inputs, including: +* @li x: A Tensor. Must be one of the following types: +* float16, float32, int32, int8 ,uint8. \n +* @li shape: A Tensor to specify the shape that the input tensor expanded to. \n + +* @par Outputs: +* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n + +* @par Third-party framework compatibility +* Compatible with the ONNX operator Expand. +*/ + +REG_OP(Expand) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .INPUT(shape, TensorType({DT_INT16, DT_INT32, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .OP_END_FACTORY_REG(Expand) + +/** +*@Returns a tensor containing the indices of all non-zero elements of input. \n + +*@par Inputs: +*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64. + +*@par Attributes: +* transpose: the output tensor will be transposed if true. \n + +*@par Outputs: +* y: A Tensor. Has the same type as "x" . \n + +*@par Third-party framework compatibility +*Compatible with the PyTorch operator NonZero. +*/ + +REG_OP(NonZero) + .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \ + DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL})) + .OUTPUT(y, TensorType({DT_INT64})) + .ATTR(transpose, Bool, false) + .OP_END_FACTORY_REG(NonZero) + +/** +* @brief Expand the input tensor to a compatible shape. \n + +* @par Inputs: +* One inputs, including: +* @li x: A Tensor. Must be one of the following types: +* float16, float32, int32, int8 ,uint8. \n + +* @par Attributes: +* @li shape: A required listInt to specify the shape that the input tensor expanded to. \n + + +* @par Outputs: +* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n + +* @par Third-party framework compatibility +* Compatible with the ONNX operator Expand. +*/ + +REG_OP(ExpandD) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .REQUIRED_ATTR(shape, ListInt) + .OP_END_FACTORY_REG(ExpandD) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/audio_ops.h b/third_party/fwkacllib/inc/ops/audio_ops.h index d9883253..f05135d1 100644 --- a/third_party/fwkacllib/inc/ops/audio_ops.h +++ b/third_party/fwkacllib/inc/ops/audio_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h b/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h new file mode 100644 index 00000000..d0800a08 --- /dev/null +++ b/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h @@ -0,0 +1,58 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * \file avg_pool_1d_ops.h + * \brief + */ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_AVGPOOL1DOPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_AVGPOOL1DOPS_H_ +#include "graph/operator_reg.h" + +namespace ge { +/** +*@brief Generate an auxiliary matrix . \n + +*@par Inputs: +* @li x: A tensor. Must be one of the following types:uint8, int8,int16, int32, + int64, float16, float, double.The format must be NHWC NCHW NC1HWC0. + +*@par Attributes: +*@li ksize: Kernel size. Input type is int. +*@li strides: Input type is int. +*@li pads: Input type is listInt . +*@li ceil_mode: Bool, default value is false. +*@li count_include_pad: Bool, default value is false. \n + +*@par Outputs: +*y_tensor: A tensor with the same types as "x" . \n +*@par Third-party framework compatibility + +*Compatible with the TensorFlow operator Unbatch. +*/ +REG_OP(AvgPool1DAvgMatrix) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT8, + DT_INT32, DT_INT64, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT8, + DT_INT32, DT_INT64, DT_DOUBLE})) + .REQUIRED_ATTR(ksize, Int) + .REQUIRED_ATTR(strides, Int) + .REQUIRED_ATTR(pads, ListInt) + .ATTR(ceil_mode, Bool, false) + .ATTR(count_include_pad, Bool, false) + .OP_END_FACTORY_REG(AvgPool1DAvgMatrix) +} +#endif \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/batch_ops.h b/third_party/fwkacllib/inc/ops/batch_ops.h index 8a1c5a7b..ca4fe1db 100644 --- a/third_party/fwkacllib/inc/ops/batch_ops.h +++ b/third_party/fwkacllib/inc/ops/batch_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -64,10 +64,10 @@ the same types as "x_tensors" . It's a dynamic output. \n REG_OP(Batch) .DYNAMIC_INPUT(x_tensors, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, \ DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE})) - .OUTPUT(y_index, TensorType({ DT_INT64 })) - .OUTPUT(y_id, TensorType({ DT_INT64 })) .DYNAMIC_OUTPUT(y_tensors, TensorType({DT_INT8, DT_UINT8, DT_INT16, \ DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_BOOL})) + .OUTPUT(y_index, TensorType({ DT_INT64 })) + .OUTPUT(y_id, TensorType({ DT_INT64 })) .REQUIRED_ATTR(num_batch_threads, Int) .REQUIRED_ATTR(max_batch_size, Int) .ATTR(max_enqueued_batches, Int, 10) @@ -107,11 +107,13 @@ across multiple sessions . \n REG_OP(Unbatch) .INPUT(x_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ - DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) + DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ + DT_COMPLEX64, DT_COMPLEX128})) .INPUT(index, TensorType({DT_INT64})) .INPUT(id, TensorType({DT_INT64})) .OUTPUT(y_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ - DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) + DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ + DT_COMPLEX64, DT_COMPLEX128})) .REQUIRED_ATTR(timeout_micros, Int) .ATTR(container, String, "") .ATTR(shared_name, String, "") @@ -146,13 +148,16 @@ across multiple sessions . \n REG_OP(UnbatchGrad) .INPUT(x_input, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ - DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) + DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ + DT_COMPLEX64, DT_COMPLEX128})) .INPUT(index, TensorType({DT_INT64})) .INPUT(grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ - DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) + DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ + DT_COMPLEX64, DT_COMPLEX128})) .INPUT(id, TensorType({DT_INT64})) .OUTPUT(y_grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ - DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) + DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ + DT_COMPLEX64, DT_COMPLEX128})) .ATTR(container, String, "") .ATTR(shared_name, String, "") .OP_END_FACTORY_REG(UnbatchGrad) diff --git a/third_party/fwkacllib/inc/ops/bitwise_ops.h b/third_party/fwkacllib/inc/ops/bitwise_ops.h index 5c83e161..dac78118 100644 --- a/third_party/fwkacllib/inc/ops/bitwise_ops.h +++ b/third_party/fwkacllib/inc/ops/bitwise_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,35 @@ namespace ge { +/** +*@brief Element-wise computes the bitwise left-shift of x and y . \n + +*@par Inputs: +*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper" +are 0D scalars. +* @li x: A Tensor. Must be one of the following types: int8, int16, int32, +int64, uint8, uint16, uint32, uint64. +* @li y: A Tensor. Has the same type as "x". \n + +*@par Outputs: +* z: A Tensor. Has the same type as "x". \n + +*@attention Constraints: +*Unique runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator LeftShift. +*/ + +REG_OP(LeftShift) + .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \ + DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64})) + .INPUT(y, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \ + DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64})) + .OUTPUT(z, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \ + DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64})) + .OP_END_FACTORY_REG(LeftShift) + /** *@brief Element-wise computes the bitwise right-shift of x and y . \n diff --git a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h index 550e8b7d..08e54824 100644 --- a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h +++ b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h index e20607bf..890c52ae 100644 --- a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h +++ b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/condtake_ops.h b/third_party/fwkacllib/inc/ops/condtake_ops.h index 5e91eb07..029cffbf 100644 --- a/third_party/fwkacllib/inc/ops/condtake_ops.h +++ b/third_party/fwkacllib/inc/ops/condtake_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/control_flow_ops.h b/third_party/fwkacllib/inc/ops/control_flow_ops.h index 7196b14f..e5bd3534 100644 --- a/third_party/fwkacllib/inc/ops/control_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/control_flow_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -96,7 +96,7 @@ REG_OP(RefMerge) * Otherwise, the data is forwarded to "output_false" . \n *@par Inputs: - *@li data: The tensor to be forwarded. \ n + *@li data: The tensor to be forwarded. \n * Must be one of the following types: float16, float32, float64, * int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool. *@li pred: A boolean scalar. The output port that will receive data . \n @@ -387,12 +387,12 @@ REG_OP(ControlTrigger) *@par Inputs: * Three inputs, including: -*@li x: One dimensional tensore of type int32, specifying queried shape, max size is 8. -*@li data_seq: One dimensional tensore of type int32, specifying the mapped table is queried. -*@li level_index: One dimensional tensore of type int32, specifying secondary index. \n +*@li x: One dimensional tensor of type int32, specifying queried shape, max size is 128. +*@li data_seq: One dimensional tensor of type int32, specifying the mapped table is queried. +*@li level_index: One dimensional tensor of type int32, specifying secondary index. \n *@par Outputs: -*@li y: A Tensor with shape [batch, 8], of type int32, specifying index of shape in the map. +*@li y: A Tensor with shape [8], of type int32, specifying index of shape in the map. *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. */ diff --git a/third_party/fwkacllib/inc/ops/correlation.h b/third_party/fwkacllib/inc/ops/correlation.h new file mode 100644 index 00000000..caebba50 --- /dev/null +++ b/third_party/fwkacllib/inc/ops/correlation.h @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * \file correlation.h + * \brief + */ +#ifndef GE_OP_CORRELATION_OPS_H +#define GE_OP_CORRELATION_OPS_H + +#include "graph/operator_reg.h" + +namespace ge { +/** +*@brief Computes a 2D Correlation given 4D "x" and "filter" tensors. +* +*@par Inputs: +* @li filter: A 4D tensor of filters. +* @li x: A 4D tensor of input images, batch number must equal to batch +* number of "filter", and channel must equal to channel of "filter". +* +*@par Attributes: +* @li groups: set correlation mode, must be 1 or channel. +* +*@par Outputs: +*y: A Tensor. Has the same type as "x". + +*@par Third-party framework compatibility +* Compatible with caffe correlation custom operator. +*/ +REG_OP(Correlation) + .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) + .INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32})) + .ATTR(groups, Int, 1) + .OP_END_FACTORY_REG(Correlation) +} // namespace ge + +#endif // GE_OP_NN_CALCULATION_OPS_H diff --git a/third_party/fwkacllib/inc/ops/ctc_ops.h b/third_party/fwkacllib/inc/ops/ctc_ops.h index 2c75fd09..e907b828 100644 --- a/third_party/fwkacllib/inc/ops/ctc_ops.h +++ b/third_party/fwkacllib/inc/ops/ctc_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -137,6 +137,87 @@ REG_OP(CTCBeamSearchDecoder) .OUTPUT(log_probability, TensorType({DT_FLOAT, DT_DOUBLE})) .OP_END_FACTORY_REG(CTCBeamSearchDecoder) +/** +*@brief The Connectionist Temporal Classification loss. + +*@par Inputs: +*@li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size, + and C = number of classes (including blank). + It represent the logarithmized probabilities of the outputs. +*@li targets: Tensor of size (N, S), where S= max target length. + It represent the target sequences. +*@li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs. +*@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets. + +*@par Outputs: +*@li neg_log_likelihood: A loss value which is differentiable with respect to each input node. +*@li log_alpha: The probability of possible trace of input to target. + +*@par Attributes: +*@li blank : Blank label. Default 0. +*@li reduction: Specifies the reduction to apply to the output. Default: 'mean'. +*@li zero_infinity : Whether to zero infinite losses and the associated gradients. + +*@par Third-party framework compatibility +* Compatible with Pytorch CTCLoss operator. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(CTCLossV2) + .INPUT(log_probs, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(targets, TensorType({DT_INT32, DT_INT64})) + .INPUT(input_lengths, TensorType({DT_INT32, DT_INT64})) + .INPUT(target_lengths, TensorType({DT_INT32, DT_INT64})) + .OUTPUT(neg_log_likelihood, TensorType({DT_FLOAT, DT_DOUBLE})) + .OUTPUT(log_alpha, TensorType({DT_FLOAT, DT_DOUBLE})) + .ATTR(blank, Int, 0) + .ATTR(reduction, String, "mean") + .ATTR(zero_infinity, Bool, false) + .OP_END_FACTORY_REG(CTCLossV2) + +/** +*@brief The Connectionist Temporal Classification loss grad. + +*@par Inputs: +*@li grad_out: Gradient renewal coefficient. Tensor of size (N), where N = batch size. +*@li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size, + and C = number of classes (including blank). + It represent the logarithmized probabilities of the outputs. +*@li targets: Tensor of size (N, S), where S= max target length. + It represent the target sequences. +*@li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs. +*@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets. +*@li neg_log_likelihood: A loss value which is differentiable with respect to each input node. +*@li log_alpha: The probability of possible trace of input to target. + +*@par Outputs: +*@li grad: Tensor of size (T, N, C), The grad of Connectionist Temporal Classification loss. + +*@par Attributes: +*@li blank : Blank label. Default 0. +*@li reduction: Specifies the reduction to apply to the output. Default: 'mean'. +*@li zero_infinity : Whether to zero infinite losses and the associated gradients. + +*@par Third-party framework compatibility +* Compatible with Pytorch CTCLoss operator. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(CTCLossV2Grad) + .INPUT(grad_out, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(log_probs, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(targets, TensorType({DT_INT32, DT_INT64})) + .INPUT(input_lengths, TensorType({DT_INT32, DT_INT64})) + .INPUT(target_lengths, TensorType({DT_INT32, DT_INT64})) + .INPUT(neg_log_likelihood, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(log_alpha, TensorType({DT_FLOAT, DT_DOUBLE})) + .OUTPUT(grad, TensorType({DT_FLOAT, DT_DOUBLE})) + .ATTR(blank, Int, 0) + .ATTR(reduction, String, "mean") + .ATTR(zero_infinity, Bool, false) + .OP_END_FACTORY_REG(CTCLossV2Grad) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_CTC_OPS_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h index bb937a75..6021f4e3 100644 --- a/third_party/fwkacllib/inc/ops/data_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -908,7 +908,7 @@ REG_OP(TensorArray) .OUTPUT(handle, TensorType({DT_RESOURCE})) .OUTPUT(flow, TensorType({DT_FLOAT})) .REQUIRED_ATTR(dtype, Type) - .ATTR(element_shape, ListInt, ge::UNKNOWN_SHAPE) + .ATTR(element_shape, ListInt, ge::UNKNOWN_RANK) .ATTR(dynamic_size, Bool, false) .ATTR(clear_after_read, Bool, true) .ATTR(identical_element_shapes, Bool, false) @@ -963,7 +963,7 @@ REG_OP(TensorArrayConcat) DT_QUINT8, DT_QINT32})) .OUTPUT(lengths, TensorType({DT_INT64})) .REQUIRED_ATTR(dtype, Type) - .ATTR(element_shape_except0, ListInt, ge::UNKNOWN_SHAPE) + .ATTR(element_shape_except0, ListInt, ge::UNKNOWN_RANK) .OP_END_FACTORY_REG(TensorArrayConcat) /** @@ -999,7 +999,7 @@ REG_OP(TensorArrayGather) DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT32})) .REQUIRED_ATTR(dtype, Type) - .ATTR(element_shape, ListInt, ge::UNKNOWN_SHAPE) + .ATTR(element_shape, ListInt, ge::UNKNOWN_RANK) .OP_END_FACTORY_REG(TensorArrayGather) /** @@ -1430,6 +1430,24 @@ REG_OP(OrderedMapClear) .ATTR(shared_name, String, "") .OP_END_FACTORY_REG(OrderedMapClear) +/** +*@brief FakeQueue, support tf api FixedLengthRecordReader. \n + +*@par Inputs: +*Including: +* @li resource: A Tensor of type DT_RESOURCE. + +*@par Outputs: +*handle: A Tensor of type DT_STRING ref. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator FakeQueue. +*/ +REG_OP(FakeQueue) + .INPUT(resource, TensorType({DT_RESOURCE})) + .OUTPUT(handle, TensorType({DT_STRING})) + .OP_END_FACTORY_REG(FakeQueue) + /** *@brief Returns the number of incomplete elements in the underlying container. \n @@ -2258,6 +2276,7 @@ REG_OP(LruCache) .ATTR(shared_name, String, "LruCache") .ATTR(cache_size, Int, 100000) .ATTR(load_factor, Float, 1) + .REQUIRED_ATTR(dtype, Type) .OP_END_FACTORY_REG(LruCache) /** @@ -2277,9 +2296,9 @@ REG_OP(CacheAdd) .INPUT(cache, TensorType({DT_RESOURCE})) .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) .OUTPUT(swap_in_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) - .OUTPUT(swap_in_idx, TensorType({DT_INT64})) + .OUTPUT(swap_in_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) .OUTPUT(swap_out_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) - .OUTPUT(swap_out_idx, TensorType({DT_INT64})) + .OUTPUT(swap_out_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) .OP_END_FACTORY_REG(CacheAdd) /** @@ -2295,9 +2314,65 @@ REG_OP(CacheAdd) REG_OP(CacheRemoteIndexToLocal) .INPUT(cache, TensorType({DT_RESOURCE})) .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) - .OUTPUT(local_idx, TensorType({DT_INT64})) + .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) .OP_END_FACTORY_REG(CacheRemoteIndexToLocal) +/** +*@brief CacheAllToLocalIndex, get id in cache +*@par Inputs: +*cache: resource data +*local_idx: id in cache. +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(CacheAllIndexToLocal) + .INPUT(cache, TensorType({DT_RESOURCE})) + .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) + .REQUIRED_ATTR(dtype, Type) + .OP_END_FACTORY_REG(CacheAllIndexToLocal) + +/** +*@brief DynamicGetNext, dynamic get next data +*@par Inputs: +*x: the iterator, all types are available +*@par Outputs: +*y: the date in iterator, all types are available +*@par Attributes: +*output_types: types of all outputs +*output_shapes: shapes of all outputs +*_dynamic_graph_execute_mode: dynamic graph execution mode, +value is one of lazy_recompile and dynamic_execute +*_getnext_inputs_shape_range: shape ranges of outputs, +it works where _dynamic_graph_execute_mode is dynamic_execute +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(DynamicGetNext) + .INPUT(x, TensorType::ALL()) + .DYNAMIC_OUTPUT(y, TensorType::ALL()) + .ATTR(output_types, ListType, {}) + .ATTR(output_shapes, ListListInt, {{}, {}}) + .ATTR(_dynamic_graph_execute_mode, String, "lazy_recompile") + .ATTR(_getnext_inputs_shape_range, String, "") + .OP_END_FACTORY_REG(DynamicGetNext) + +/** +*@brief AdpGetNext +*@par Outputs: +*y: the data in iterator, all types are available +*@par Attributes: +*output_types: types of all outputs +*output_shapes: shapes of all outputs +*queue_name: cdqm queue name +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(AdpGetNext) + .DYNAMIC_OUTPUT(y, TensorType::ALL()) + .ATTR(output_types, ListType, {}) + .ATTR(output_shapes, ListListInt, {{}, {}}) + .ATTR(queue_name, String, "") + .OP_END_FACTORY_REG(AdpGetNext) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index c64bc138..f61e2939 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,10 +28,13 @@ namespace ge { *@par Inputs: *Dynamic inputs, including: -* @li x: A list of Tensor objects, each with same shape and type. The supported types are: +*x: A list of Tensor objects, each with same shape and type. The supported types are: * float16, float32, double, int32, uint8, int16, int8, complex64, int64, * qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n +*@par Attributes: +*N: An required attribute of type int32, means nums of inputs. \n + *@par Outputs: *y: A Tensor. Has the same shape and type as the elements of "x". \n @@ -122,7 +125,8 @@ REG_OP(MinimumGrad) *@par Inputs: *One input: *x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8, - int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. \n + int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. + For float32 type, the actual calculation on the chip is based on float16. \n *@par Attributes: *dst_type: An required attribute of type int32, specifying the dst data type. \n @@ -142,6 +146,8 @@ REG_OP(Cast) /** *@brief Returns the truth value of (x1 >= x2) element-wise. \n +*when input is int32 and (x2 - x1) > 2**31 or < -2**31 +*aicore accuracy is not guaranteed \n *@par Inputs: *Two inputs, including: @@ -163,6 +169,8 @@ REG_OP(GreaterEqual) /** *@brief Returns the truth value of (x1 < x2) element-wise. \n +*when input is int32 and (x2 - x1) > 2**31 or < -2**31 +*aicore accuracy is not guaranteed \n *@par Inputs: *Two inputs, including: @@ -322,8 +330,8 @@ REG_OP(Sub) *@brief computes the absolute value of a tensor. \n *@par Inputs: -*One inputs, including: -* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n +*One input, including: \n +*x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n *@par Outputs: *y: A Tensor. Has the same type as "x". \n @@ -563,6 +571,8 @@ REG_OP(InvGrad) /** *@brief: Returns the truth value of (x <= y) element-wise. \n +*when input is int32 and (x2 - x1) > 2**31 or < -2**31 +*aicore accuracy is not guaranteed \n *@par Inputs: * Two inputs, including: @@ -611,6 +621,15 @@ REG_OP(Log1p) *@par Outputs: *y: A Tensor. Has the same type as "x1". + +*@attention Constraints: +*@li x2: The input data does not support 0 +*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the +*requirement of double thousandths in the mini form +*@li Due to different architectures, the calculation results of this operator +*on NPU and CPU may be inconsistent +*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 + *@par Third-party framework compatibility *Compatible with the TensorFlow operator Mod. */ @@ -1020,7 +1039,7 @@ REG_OP(BesselI1e) * y = log_base(shift + scale * x), with "base" > 0. \n * @par Inputs: -* @li x: A Tensor of type complex64, complex128, float16, float32 or double. \n +* x: A Tensor of type complex64, complex128, float16, float32 or double. \n * @par Attributes: * @li base: An optional float32, specifying the base "e". Defaults to "-1.0" @@ -1065,7 +1084,7 @@ REG_OP(Log) * uint8, int8, uint16, int16, int32, int64, complex64, complex128. \n * @attention Constraints: -* @li "x1" and "x2" have incompatible shapes or types. \n +* "x1" and "x2" have incompatible shapes or types. \n * @par Third-party framework compatibility * Compatible with the TensorFlow operator Multiply. @@ -1451,6 +1470,8 @@ REG_OP(ReciprocalGrad) /** *@brief Returns the truth value of (x1 > x2) element-wise. \n +*when input is int32 and (x2 - x1) > 2**31 or < -2**31 +*aicore accuracy is not guaranteed \n *@par Inputs: *@li x1: A Tensor of type float16, float32, double, int64, int32, int16, int8, @@ -2042,6 +2063,15 @@ REG_OP(FloorDiv) * *@par Outputs: *y: Result remainder. + +*@attention Constraints: +*@li x2: The input data does not support 0 +*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the +*requirement of double thousandths in the mini form +*@li Due to different architectures, the calculation results of this operator +*on NPU and CPU may be inconsistent +*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 + *@par Third-party framework compatibility * Compatible with the TensorFlow operator FloorMod. */ @@ -2168,6 +2198,14 @@ REG_OP(Tan) *@par Outputs: *y: A Tensor. Has the same type as "x1". \n +*@attention Constraints: +*@li x2: The input data does not support 0 +*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the +*requirement of double thousandths in the mini form +*@li Due to different architectures, the calculation results of this operator +*on NPU and CPU may be inconsistent +*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 + *@par Third-party framework compatibility *@li Compatible with the TensorFlow operator TruncateMod. */ @@ -2424,6 +2462,25 @@ REG_OP(Eltwise) .ATTR(coeff, ListFloat, {}) .OP_END_FACTORY_REG(Eltwise) +/** + *@brief Computes the inverse error function of each element of input. \n + + *@par Inputs: + *One inputs, including: + * @li input_x: A tensor. Must be one of the following types: + * float16, float32. \n + + *@par Outputs: + *y: A Tensor with the same type and shape of input_x's. \n + + *@par Third-party framework compatibility + *Compatible with the Pytorch operator Erfinv. \n + */ +REG_OP(Erfinv) + .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16})) + .OP_END_FACTORY_REG(Erfinv) + /** *@brief Computes element-wise population count. \n @@ -2829,9 +2886,9 @@ REG_OP(AdamApplyOneAssign) *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LambApplyOptimizerAssign) - .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2842,6 +2899,8 @@ REG_OP(LambApplyOptimizerAssign) .INPUT(do_use_weight, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(weight_decay_rate, TensorType({DT_FLOAT16,DT_FLOAT})) .OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT})) .OP_END_FACTORY_REG(LambApplyOptimizerAssign) /** @@ -2873,7 +2932,8 @@ REG_OP(LambApplyWeightAssign) .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(input4, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT})) .OP_END_FACTORY_REG(LambApplyWeightAssign) /** @@ -3183,12 +3243,14 @@ REG_OP(Fills) *@brief Add tensor with scale. \n *@par Inputs: -*Five inputs, including: -* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32. -* @li x2: A scale. Must be float. \n +*One input, including: \n +*x: A Tensor. Must be one of the following types:int32,int16, float16, float32. \n + +*@par Attributes: +*value: A scale. Must be float. \n *@par Outputs: -*@li y: A Tensor. Has the same type and shape as "x1". \n +*y: A Tensor. Has the same type and shape as "x1". \n *@par Third-party framework compatibility: * Compatible with the Pytorch operator adds. @@ -3329,8 +3391,441 @@ REG_OP(TensorRedirect) .OUTPUT(output_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32})) .OP_END_FACTORY_REG(TensorRedirect) -} // namespace ge +/** +* @brief Performs the element-wise division of tensor x2 by tensor x3, +* multiply the result by the scalar value and add it to tensor x1 + +* @par Inputs: +* Three inputs, including: +* @li input_data: A mutable input Tensor. Must be one of the following types: +* float16, float32. +* @li x1: A mutable input Tensor of the same type as x1. +* @li x2: A mutable input Tensor of the same type as x1. +* @li value: A mutable input Tensor. Must be one of the following types: +* float16, float32, int32. \n + +* @par Outputs: +* @li y: A mutable Tensor. Has the same type as "x1". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Addcdiv. +*/ +REG_OP(Addcdiv) + .INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(Addcdiv) + +/** +* @brief Performs the element-wise multiplication of tensor x2 by tensor x3, +* multiply the result by the scalar value and add it to tensor input_data + + +* @par Inputs: +* Three inputs, including: +* @li input_data: A mutable input Tensor. Must be one of the following types: +* float16, float32, int8, int32, uint8. +* @li x1: A mutable input Tensor of the same type as x1. +* @li x2: A mutable input Tensor of the same type as x1. +* @li value: A tensor which includes only one element of the same type as x1. \n + +* @par Outputs: +* @li y: A mutable output Tensor. Has the same type as "x1". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Addcmul. +*/ +REG_OP(Addcmul) + .INPUT(input_data, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) + .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) + .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) + .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) + .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) + .OP_END_FACTORY_REG(Addcmul) +/** +* @brief Computes the result of x2 * alpha + x1. + +* @par Inputs: +* @li x1: An ND tensor of type float16, float32, int32. +* @li x2: An ND tensor of type float16, float32, int32. +* @li alpha: A scalar tensor of type float16, float32. \n + +* @par Outputs: +* @li y: An ND tensor tensor with the same shape and type as "x1". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Axpy. +*/ +REG_OP(AxpyV2) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OP_END_FACTORY_REG(AxpyV2) + +/** +* @brief Computes the result of x1 - x2. + +* @par Inputs: +* @li x1: An ND tensor of type float16, float, int32. +* @li x2: An ND tensor of type float16, float, int32. \n + +* @par Outputs: +* @li y: An ND tensor tensor with the same type as "x1". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Sub. +*/ +REG_OP(PtSub) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OP_END_FACTORY_REG(PtSub) + +/** +* @brief Add the partial values of two tensors in format NC1HWC0. + +* @par Inputs: +* @li x1: A Tensor in 5HD, and must be one of the following types: float16, +* float32. \n +* @li x2: A Tensor of the same type as "x1", and the same shape as "x1", +* except for the C1 value. \n + +* @par Attributes: +* @li x1_c1_offset: A required int. Offset value of C1 in "x1". \n +* @li x2_c1_offset: A required int. Offset value of C1 in "x2". \n +* @li c1_len: A required int. C1 len of "y". The value must be less than +* the difference between C1 and offset in "x1" and "x2". \n + +* @par Outputs: +* @li y: A Tensor of the same type as "x1", and the same shape as "x1", +* except for the C1 value. Record the result after adding. \n +*/ +REG_OP(StrideAdd) + .INPUT(x1, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(x2, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .REQUIRED_ATTR(x1_c1_offset, Int) + .REQUIRED_ATTR(x2_c1_offset, Int) + .REQUIRED_ATTR(c1_len, Int) + .OP_END_FACTORY_REG(StrideAdd) + +/** +* @brief Compare two tensors are totally equal or not, only output a bool value" + +* @par Inputs: +* Two inputs, including: +* @li input_x: A Tensor. the first tensor. \n +* @li input_y: A Tensor. the second tensor. \n + +* @par Outputs: +* @li output_z: A Tensor. Bool type, compare result of the two inputs. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch equal operator. \n +*/ +REG_OP(TensorEqual) + .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .OUTPUT(output_z, TensorType({DT_BOOL})) + .OP_END_FACTORY_REG(TensorEqual) + +/** + * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). + * All inputs and outputs must have the same data type. This operator supports multidirectional + * (i.e., Numpy-style) broadcasting + * + * @par inputs + * one input including: + * @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64 + * + * @par output + * one output including: + * @li y:A Tensor of the same type as x + * + */ +REG_OP(MaxN) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) + .OP_END_FACTORY_REG(MaxN) + + +/** + * @brief Calculates x * maske * value. + * + * @par Inputs: + * @li x: An tensor of type float16 or float32, specifying the input to the data layer. + * @li mask: An tensor of type int8 or float16 or float32, be same shape with x. \n + * + * @par Attributes: + * value: A optional float. \n + * + * @par Outputs: + * y: The output tensor of type float16 or float32. + @ li y:A Tensor of the same type and shape as x + * + */ +REG_OP(MaskedScale) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32})) + .INPUT(mask, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32})) + .REQUIRED_ATTR(value, Float) + .OP_END_FACTORY_REG(MaskedScale) + +/** + * @brief Calculate the lerp function. \n + + * @par Inputs: + * Three inputs, including: + * @li start: A tensor. Must be one of the following types: + * float16, float32. \n + * @li end: A tensor. Must be one of the following types: + * float16, float32. \n + * @li weight: A tensor. Must be one of the following types: + * float16, float32. \n + + * @par Outputs: + * y: A Tensor with the same type and shape of input_x's. \n + + * @par Third-party framework compatibility + * Compatible with the Pytorch operator Lerp. \n + */ +REG_OP(Lerp) + .INPUT(start, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(end, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(Lerp) + +/** +*@brief Returns the num value of abs(x1-x2) > atol+rtol*abs(x2) element-wise. \n + +* +*@par Inputs: +*@li x1: A tensor. Must be one of the following types: float32, int32, uint8, int8, float16 +*@li x2: A tensor of the same type as "x1". +* +*@par Attributes: +* atol: Defaults to "1e-05". +* rtol: Defaults to "1e-03". +* +*@par Outputs: +* num: A tensor of type float32. +* +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +* +*/ +REG_OP(DataCompare) + .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 })) + .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 })) + .OUTPUT(num, TensorType({DT_FLOAT})) + .ATTR(atol, Float, 1e-5) + .ATTR(rtol, Float, 1e-3) + .OP_END_FACTORY_REG(DataCompare) + +/** +*@brief Hardmax(element in input, axis) = 1 if the element is the first maximum value along the specified axis, 0 +*otherwise The input does not need to explicitly be a 2D vector.The "axis" attribute indicates the dimension along +*which Hardmax will be performed.The output tensor has the same shape and contains the Hardmax values of the +*corresponding input. +* +*@par inputs +*one input including: +*@li x: input A Tensor.Must be one of the following types:float32,float16 +* +*@par Attributes: +*@li axis:A required int attribute that decides which dimension will be used to cal the hard_max +* +*@par output: +*one output including: +*@li y:A Tensor of the same type as x +* +*/ +REG_OP(HardMax) + .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT })) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(axis, Int, -1) + .OP_END_FACTORY_REG(HardMax) + +/** +* @brief Computes the dot product (inner product) of two tensors. This function does not broadcast. + +* @par Inputs: +* Two inputs, including: +* @li input_x: A Tensor. the first tensor must be 1d. \n +* @li input_y: A Tensor. the second tensor must be 1d. \n + +* @par Outputs: +* @li output: A Tensor. Result of the two inputs, must be 1d. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch dot operator. \n +*/ +REG_OP(Dot) + .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) + .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) + .OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) + .OP_END_FACTORY_REG(Dot) + +/** +*@brief Returns a new tensor with boolean elements representing \n +*if each element of input is “close” to the corresponding element of other \n + +*@par Inputs: +*Two inputs, including: +* @li x1: A tensor. Must be one of the following types: +* float16, float32, int32. \n +* @li x2: A tensor with the same type and shape of x1's. \n + +*@par Attributes: +*@li rtol: An optional float.Defaults to 1e-05. \n +*@li atol: An optional float.Defaults to 1e-08. \n +*@li equal_nan: An optional bool.Defaults to false. \n + +*@par Outputs: +*y: A Tensor bool with the same shape of x1's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator isclose. \n +*/ +REG_OP(IsClose) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OUTPUT(y, TensorType({DT_BOOL})) + .ATTR(rtol, Float, 1e-05) + .ATTR(atol, Float, 1e-08) + .ATTR(equal_nan, Bool, false) + .OP_END_FACTORY_REG(IsClose) + +/** +* @brief Returns the reverse tensor of the ArgMax operator of a tensor. \n + +* @par Inputs: +* three input, including: +* var: A Tensor of type float16, float32, int32 or int8. \n +* indices: A Tensor of type int32. \n +* updates: A Tensor of type float16, float32, int32 or int8. \n + +* @par Attributes: +* @li dimension: An integer of type int, specifying the axis information of the index with the maximum value.\n + +* @par Outputs: +* y: A Tensor of type float16, float32, int32 or int8. \n +* +*@attention Constraints: +*@li indices: only support int32,and shape same to "updates" +*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". +*@li y:A Tensor, the type and shape is same to "var" \n + +*@par Third-party framework compatibility +* not support all scene like pytorch operator scatter +* exp: +* var.shape=[2,3,4,5], dim=2, the shape of indices and updates should be [2,3,5] +* not support the shape of indices and updates is [2,3,2,5] like pytorch operator scatter. \n +*/ +REG_OP(ArgMaxGrad) + .INPUT(var, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) + .INPUT(indices, TensorType({DT_INT32})) + .INPUT(updates, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) + .REQUIRED_ATTR(dimension, Int) + .OP_END_FACTORY_REG(ArgMaxGrad) + +/** +* @brief Returns the reverse tensor of the ArgMax operator of a tensor. \n + +* @par Inputs: +* three input, including: +* var: A Tensor of type float16, float32, int32 or int8. \n +* indices: A Tensor of type int32. \n +* updates: A Tensor of type float16, float32, int32 or int8. \n +* assist: A Tensor of int32,also a assist matrix and it's shape must match the shape of var \n + +* @par Attributes: +* @li dimension: An integer of type int, specifying the axis information of the index with the maximum value.\n + +* @par Outputs: +* y: A Tensor of type float16, float32, int32 or int8. \n + +*@attention Constraints: +*@li indices: only support int32,and shape same to "updates" +*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". +*@li y:A Tensor, the type and shape is same to "var" \n + +*@par Third-party framework compatibility +* not support all scene like pytorch operator scatter +* exp: +* var.shape=[2,3,4,5], dim=2, the shape of indices and updates should be [2,3,5] +* not support the shape of indices and updates is [2,3,2,5] like pytorch operator scatter. \n +*/ +REG_OP(ArgMaxGradD) + .INPUT(var, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) + .INPUT(indices, TensorType({DT_INT32})) + .INPUT(updates, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) + .INPUT(assist, TensorType({DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) + .REQUIRED_ATTR(dimension, Int) + .OP_END_FACTORY_REG(ArgMaxGradD) + +/** +*@brief Calculates the reversed outputs of the function "AddMatMatElements" +* c = c * beta + alpha * a * b + +*@par Inputs: +*Three inputs, including: +* @li c: A mutable Tensor. Must be one of the following types: +* float16, float32. +* @li a: A mutable Tensor of the same type as "c". +* @li b: A mutable Tensor of the same type as "c". +* @li beta: A mutable scalar of the same type as "c". +* @li alpha: A mutable scalar of the same type as "c". \n + +*@par Outputs: +* @li c: A mutable Tensor. Has the same type as "c". \n + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator AddMatMatElements. +*/ +REG_OP(AddMatMatElements) + .INPUT(c, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(a, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(b, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(c, TensorType({DT_FLOAT, DT_FLOAT16})) + .OP_END_FACTORY_REG(AddMatMatElements) + +/** +*@brief Returns cosine similarity between x1 and x2,computed along dim. \n + +*@par Inputs: +*Two inputs, including: +* @li input_x1: A tensor. Must be the following types: +* float32. \n + +*@par Inputs: +*@li input_x2: A tensor. Must of the following types: +* float32. \n + +*@par Outputs: +*@li output_y: A Tensor with the same type of input_x's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator CosineSimilarity. \n +*/ +REG_OP(CosineSimilarity) + .INPUT(input_x1, TensorType({DT_FLOAT})) /* "First operand." */ + .INPUT(input_x2, TensorType({DT_FLOAT})) /* "Second operand." */ + .OUTPUT(output_y, TensorType({DT_FLOAT})) /* "Result, has same element type as two inputs" */ + .ATTR(dim, Int, 1) + .ATTR(eps, Float, 1e-8) + .OP_END_FACTORY_REG(CosineSimilarity) + +} // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/functional_ops.h b/third_party/fwkacllib/inc/ops/functional_ops.h index 598d3ad3..b09ac058 100644 --- a/third_party/fwkacllib/inc/ops/functional_ops.h +++ b/third_party/fwkacllib/inc/ops/functional_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/get_data_ops.h b/third_party/fwkacllib/inc/ops/get_data_ops.h index 33dc4f14..e5518ef8 100644 --- a/third_party/fwkacllib/inc/ops/get_data_ops.h +++ b/third_party/fwkacllib/inc/ops/get_data_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/globalavgpool.h b/third_party/fwkacllib/inc/ops/globalavgpool.h new file mode 100644 index 00000000..06f03d30 --- /dev/null +++ b/third_party/fwkacllib/inc/ops/globalavgpool.h @@ -0,0 +1,49 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * \file globalavgpool.h + * \brief + */ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_GLOBALAVERAGEPOOL_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_GLOBALAVERAGEPOOL_H_ + +#include "graph/operator_reg.h" + +namespace ge { +/** +*@brief GlobalAveragePool consumes an input tensor X and applies average pooling across the values in the same channel. +This is equivalent to AveragePool with kernel size equal to the spatial dimension of input tensor \n + +*@par Inputs: +*@li x: Input data tensor from the previous operator; dimensions for image case are (N x C x H x W), +where N is the batch size, C is the number of channels, and H and W are the height and the width of the data. +For non image case, the dimensions are in the form of (N x C x D1 x D2 ... Dn), where N is the batch size. + +*@par Outputs: +*y: Output data tensor from pooling across the input tensor. The output tensor has the same rank as the input. +The first two dimensions of output shape are the same as the input (N x C), while the other dimensions are all 1 + +*@par Restrictions: +*Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly. +*/ +REG_OP(GlobalAveragePool) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OP_END_FACTORY_REG(GlobalAveragePool) +} // namespace ge + +#endif // OPS_BUILT_IN_OP_PROTO_INC_GLOBALAVGPOOL_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/hcom_ops.h b/third_party/fwkacllib/inc/ops/hcom_ops.h index b90b225e..497f6a68 100644 --- a/third_party/fwkacllib/inc/ops/hcom_ops.h +++ b/third_party/fwkacllib/inc/ops/hcom_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,8 +45,6 @@ REG_OP(HcomAllGather) .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) .REQUIRED_ATTR(rank_size, Int) .REQUIRED_ATTR(group, String) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) .OP_END_FACTORY_REG(HcomAllGather) /** @@ -77,8 +75,6 @@ REG_OP(HcomAllReduce) .REQUIRED_ATTR(group, String) .ATTR(fusion, Int, 1) .ATTR(fusion_id, Int, -1) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) .OP_END_FACTORY_REG(HcomAllReduce) /** @@ -91,7 +87,7 @@ REG_OP(HcomAllReduce) input of this rank will be broadcast to other ranks. * @li fusion: A required integer identifying if the op need to fusion,the default value is none fusion - * @li fusion: A required integer identifying the fusion id if para fusion + * @li fusion_id: A required integer identifying the fusion id if para fusion is set. * @li group: A required string identifying the group name of ranks participating in the op. @@ -109,10 +105,39 @@ REG_OP(HcomBroadcast) .REQUIRED_ATTR(group, String) .ATTR(fusion, Int, 0) .ATTR(fusion_id, Int, -1) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) .OP_END_FACTORY_REG(HcomBroadcast) +/** + * @brief preforms reduction from others rank to rootrank + * @par Inputs: +* @li root_rank: A required integer identifying the root rank in the op + the reduction result will be on this root rank + * x: A tensor. Must be one of the following types: int8, int16, int32, float16, + float32. + * @par Attributes: + * @li reduction: A required string identifying the reduction operation to + perform.The supported operation are: "sum", "max", "min", "prod". + * @li group: A required string identifying the group name of ranks + participating in the op. + * @li fusion: An optional integer identifying the fusion flag of the op. + 0: no fusion; 1 (default): fusion; 2: fusion the ops by fusion id. + * @li fusion_id: An optional integer identifying the fusion id of the op. + * The HcomReduce ops with the same fusion id will be fused. + * @par Outputs: + * y: A Tensor. Has the same type as "x". + * @attention Constraints: + *"group" is limited to 128 characters. Use "hccl_world_group" + as the name of a world group. + */ +REG_OP(HcomReduce) + .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) + .REQUIRED_ATTR(root_rank, Int) + .REQUIRED_ATTR(reduction, String) + .REQUIRED_ATTR(group, String) + .ATTR(fusion, Int, 0) + .ATTR(fusion_id, Int, -1) + .OP_END_FACTORY_REG(HcomReduce) /** * @brief Performs reduction across all input tensors, scattering in equal blocks among ranks, each rank getting a chunk of data based on its rank @@ -139,8 +164,6 @@ REG_OP(HcomReduceScatter) .REQUIRED_ATTR(reduction, String) .REQUIRED_ATTR(group, String) .REQUIRED_ATTR(rank_size, Int) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) .OP_END_FACTORY_REG(HcomReduceScatter) /** @@ -167,8 +190,6 @@ REG_OP(HcomSend) .REQUIRED_ATTR(group, String) .REQUIRED_ATTR(sr_tag, Int) .REQUIRED_ATTR(dest_rank, Int) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) .OP_END_FACTORY_REG(HcomSend) /** @@ -202,8 +223,6 @@ REG_OP(HcomReceive) .REQUIRED_ATTR(src_rank, Int) .REQUIRED_ATTR(shape, ListInt) .REQUIRED_ATTR(dtype, Type) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) .OP_END_FACTORY_REG(HcomReceive) /** @@ -219,6 +238,15 @@ REG_OP(HcomRemoteRead) .REQUIRED_ATTR(dtype, Type) .OP_END_FACTORY_REG(HcomRemoteRead) +/** + * @brief Performs Remote Ref Read of input tensors + * @par Inputs: + * remote: A tensor. describing the remote memory address to read: u64 remoteId, u64 addrRemote, u64 length + * cache_var: The local base address + * local_offset: Skip step length + * @par Outputs: + * cache_var: The local base address + */ REG_OP(HcomRemoteRefRead) .INPUT(remote, TensorType({DT_UINT64})) .INPUT(cache_var, TensorType({DT_UINT64})) @@ -239,11 +267,90 @@ REG_OP(HcomRemoteWrite) .INPUT(local, TensorType::ALL()) .OP_END_FACTORY_REG(HcomRemoteWrite) +/** + * @brief Performs Remote Write of input tensors + * @par Inputs: + * remote: A tensor. describing the remote memory address to write: u64 remoteId, u64 addrRemote, u64 length + * @par Inputs: + * local: A Tensor. whose value is length / size_of(Type) + */ REG_OP(HcomRemoteScatterWrite) .INPUT(remote, TensorType({DT_INT64, DT_UINT64})) .INPUT(local, TensorType::ALL()) .OPTIONAL_INPUT(local_offset, TensorType({DT_UINT64})) .OP_END_FACTORY_REG(HcomRemoteScatterWrite) +/** + * @brief All ranks send different amount of data to, and receive different + amount of data from, all ranks. + * @par Inputs: + * Five inputs, including: + * @li send_data: A tensor. the memory to send. + * @li send_counts: A list, where entry i specifies the number of elements in + send_data to send to rank i. + * @li send_displacements: A list, where entry i specifies the displacement + (offset from sendbuf) from which to send data to rank i. + * @li recv_counts: A list, where entry i specifies the number of + elements to receive from rank i. + * @li recv_displacements: A list, , where entry i specifies the displacement + (offset from recv_data) to which data from rank i should be written. + * @par Outputs: + * recv_data: A Tensor has same element type as send_data. + * @par Attributes: + * @li group: A string identifying the group name of ranks participating in + the op. +* @attention all ranks participating in the op should be full-mesh networking + using the RDMA. + */ +REG_OP(HcomAllToAllV) + .INPUT(send_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) + .INPUT(send_counts, TensorType({DT_INT64})) + .INPUT(send_displacements, TensorType({DT_INT64})) + .INPUT(recv_counts, TensorType({DT_INT64})) + .INPUT(recv_displacements, TensorType({DT_INT64})) + .OUTPUT(recv_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) + .REQUIRED_ATTR(group, String) + .OP_END_FACTORY_REG(HcomAllToAllV) + +/** + * @brief All ranks send different amount of data to, and receive different + amount of data from, all ranks. And concat all data descripting by addrinfo + togather into output gathered. + * @par Inputs: + * Four inputs, including: + * @li addrinfo: A tensor, descripting the memory info(address, length) to send. + * @li addrinfo_count_per_rank: A list, where entry i specifies the number of + elements in send_data to send to rank i. + * @li recv_counts: A list, where entry i specifies the number of + elements to receive from rank i. + * @li recv_displacements: A list, , where entry i specifies the displacement + (offset from recv_data) to which data from rank i should be written. + * @par Outputs: + * Two outputs, including: + * @li recv_data: A Tensor has same element type as dtype. + * @li gathered: A Tensor has same element type as dtype. + * @par Attributes: + * @li group: A string identifying the group name of ranks participating in + the op. + * @li dtype: Datatype of send buffer elements. + * @li addr_length: descripting the element memory length in the addrinfo. + -2: all element memory length in the addrinfo is the same, but it is unknown. + -1: all element memory length is unknown. + >0: all element memory length in the addrinfo is the same. the attr value is the memory length. + * @attention all ranks participating in the op should be full-mesh networking + using the RDMA. + */ +REG_OP(HcomGatherAllToAllV) + .INPUT(addrinfo, TensorType({DT_UINT64})) + .INPUT(addrinfo_count_per_rank, TensorType({DT_INT64})) + .INPUT(recv_counts, TensorType({DT_INT64})) + .INPUT(recv_displacements, TensorType({DT_INT64})) + .OUTPUT(recv_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) + .OUTPUT(gathered, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) + .REQUIRED_ATTR(group, String) + .REQUIRED_ATTR(dtype, Type) + .REQUIRED_ATTR(addr_length, Int) + .OP_END_FACTORY_REG(HcomGatherAllToAllV) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/hvd_ops.h b/third_party/fwkacllib/inc/ops/hvd_ops.h index a49ec5ed..00299ef7 100644 --- a/third_party/fwkacllib/inc/ops/hvd_ops.h +++ b/third_party/fwkacllib/inc/ops/hvd_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index ce3262f9..6909345a 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,22 @@ #include "graph/operator_reg.h" namespace ge { +/** +*@brief Decode the frame(s) of a GIF-encoded image to a uint8 tensor . \n + +*@par Inputs: +*@li contents:A Tensor of type string. 0-D. The GIF-encoded image. \n + +*@par Outputs: +*image:A Tensor of type uint8. \n + +*@par Third-party framework compatibility +*Compatible with tensorflow DecodeGif operator. +*/ +REG_OP(DecodeGif) + .INPUT(contents, TensorType({DT_STRING})) + .OUTPUT(image, TensorType({DT_UINT8})) + .OP_END_FACTORY_REG(DecodeGif) /** *@brief Adjust the hue of one or more images . \n @@ -31,11 +47,12 @@ namespace ge { *@par Inputs: *Input images is a tensor of at least 3 dimensions. The last dimension is interpretted as channels, and must be three. Inputs include: -*@li images:A Tensor of type float. Images to adjust. At least 3-D. +*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format +must be NHWC. *@li delta:A Tensor of type float. A float delta to add to the hue . \n *@par Outputs: -*y:A Tensor of type float . \n +*y:A Tensor of type float. The format must be NHWC. \n *@attention Constraints: *Input images is a tensor of at least 3 dimensions. The last dimension is @@ -57,11 +74,12 @@ REG_OP(AdjustHue) *@par Inputs: *Input images is a tensor of at least 3 dimensions. The last dimension is interpretted as channels, and must be three. Inputs include: -*@li images:A Tensor of type float. Images to adjust. At least 3-D. +*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format +must be NHWC. *@li scale:A Tensor of type float. A float scale to add to the saturation . \n *@par Outputs: -*y:A Tensor of type float . \n +*y:A Tensor of type float. The format must be NHWC. \n *@attention Constraints: *Input images is a tensor of at least 3 dimensions. The last dimension is @@ -83,11 +101,12 @@ REG_OP(AdjustSaturation) *@par Inputs: *Input images is a tensor of at least 3 dimensions. The last 3 dimensions are interpreted as '[height, width, channels]'. Inputs include: -*@li images:A Tensor of type float. Images to adjust. At least 3-D. +*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format +must be NHWC. *@li scale:A Tensor of type float. A float multiplier for adjusting contrast . \n *@par Outputs: -*y:A Tensor of type float . \n +*y:A Tensor of type float. The format must be NHWC. \n *@attention Constraints: *Input images is a tensor of at least 3 dimensions. The last dimension is @@ -112,7 +131,7 @@ nearest neighbor sampling to a common output size specified by crop_size . \n *Input images must be a 4-D tensor. Inputs include: *@li images:A Tensor. Must be one of the following types:uint8, uint16, int8, int16, int32, int64, float16, float, double. A 4-D tensor of shape -[batch, image_height, image_width, depth]. +[batch, image_height, image_width, depth]. The format must be NHWC. *@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4]. *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch). @@ -127,7 +146,7 @@ extrapolation, when applicable. NearestNeighbor . \n *@par Outputs: -*y:A Tensor of type float . \n +*y:A Tensor of type float. The format must be NHWC. \n *@attention Constraints: *Input images must be a 4-D tensor . \n @@ -193,7 +212,9 @@ boxes tensor . \n *@par Inputs: *Input images and grads must be a 4-D tensor. Inputs include: *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. +The format must be NHWC. *@li images: A 4-D tensor of shape [batch, image_height, image_width, depth]. +The format must be NHWC. Both image_height and image_width need to be positive. *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor specifies the coordinates of a box in the box_ind[i] image and is specified in @@ -233,6 +254,7 @@ images tensor . \n *@par Inputs: *Input grads must be a 4-D tensor. Inputs include: *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. +The format must be NHWC. *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor specifies the coordinates of a box in the box_ind[i] image and is specified in normalized coordinates [y1, x1, y2, x2]. @@ -248,7 +270,8 @@ method: A string specifying the interpolation method. Only 'bilinear' is supported for now . \n *@par Outputs: -*y:A 4-D tensor of shape [batch, image_height, image_width, depth] . \n +*y:A 4-D tensor of shape [batch, image_height, image_width, depth]. The format +must be NHWC. \n *@attention Constraints: *Input grads must be a 4-D tensor . \n @@ -273,6 +296,7 @@ REG_OP(CropAndResizeGradImage) *@par Inputs: *Input x must be a 4-D tensor. Inputs include: *@li x: A 4-D float tensor of shape [batch_size, height, width, channels]. +The format must be NHWC. *@li size: A 1-D tensor of 2 elements containing the size of the glimpses to extract. The glimpse height must be specified first, following by the glimpse width. @@ -293,7 +317,7 @@ uniform_noise . \n *@par Outputs: *y:A tensor representing the glimpses [batch_size, glimpse_height, -glimpse_width, channels] . \n +glimpse_width, channels]. The format must be NHWC. \n *@attention Constraints: *Input x must be a 4-D tensor . \n @@ -340,7 +364,8 @@ REG_OP(HSVToRGB) *@par Inputs: *Input images must be a 4-D tensor. Inputs include: -*@li images: 4-D with shape [batch, height, width, channels]. +*@li images: 4-D with shape [batch, height, width, channels]. The format must +be NHWC. *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new size for the images. *@li min: A Tensor of type float. @@ -354,6 +379,7 @@ the values at the corner pixels. Defaults to false. *@par Outputs: *@li resized_images: 4-D with shape [batch, new_height, new_width, channels]. +The format must be NHWC. *@li y_min: A Tensor of type float. *@li y_max: A Tensor of type float . \n @@ -381,7 +407,8 @@ REG_OP(QuantizedResizeBilinear) *@par Inputs: *Input images must be a 4-D tensor. Inputs include: -*@li images: 4-D with shape [batch, height, width, channels]. +*@li images: 4-D with shape [batch, height, width, channels]. The format must +be NHWC. *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new size for the images . \n @@ -391,7 +418,8 @@ output tensors are aligned, preserving the values at the corner pixels. Defaults to false . \n *@par Outputs: -*y: 4-D with shape [batch, new_height, new_width, channels] . \n +*y: 4-D with shape [batch, new_height, new_width, channels]. The format must +be NHWC. \n *@attention Constraints: *Input images can be of different types but output images are always float . \n @@ -414,10 +442,10 @@ REG_OP(ResizeArea) *@par Inputs: *Input grads must be a 4-D tensor. Inputs include: *@li grads: A Tensor of type float. 4-D with shape [batch, height, width, -channels]. +channels]. The format must be NHWC. *@li original_image: A Tensor. Must be one of the following types: float, double. 4-D with shape [batch, orig_height, orig_width, channels], The image -tensor that was resized . \n +tensor that was resized. The format must be NHWC. \n *@par Attributes: *@li align_corners: An optional bool. Defaults to False. If true, the centers @@ -426,10 +454,10 @@ false. *@li half_pixel_centers: An optional bool. Defaults to False . \n *@par Outputs: -*y: A Tensor. Has the same type as original_image . \n +*y: A Tensor. Has the same type as original_image. The format must be NHWC. \n *@attention Constraints: -*Input images can be of different types but output images are always float . \n +*Input images can be of different types but output images are always float . *@par Third-party framework compatibility *Compatible with tensorflow ResizeBicubicGrad operator. @@ -448,7 +476,8 @@ REG_OP(ResizeBicubicGrad) *@par Inputs: *Input images must be a 4-D tensor. Inputs include: -*@li images: 4-D with shape [batch, height, width, channels]. +*@li images: 4-D with shape [batch, height, width, channels]. The format +must be NHWC. *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new size for the images . \n @@ -459,10 +488,11 @@ Defaults to false. *@li half_pixel_centers: An optional bool. Defaults to False . \n *@par Outputs: -*y: 4-D with shape [batch, new_height, new_width, channels] . \n +*y: 4-D with shape [batch, new_height, new_width, channels]. The format +must be NHWC. \n *@attention Constraints: -*Input images can be of different types but output images are always float . \n +*Input images can be of different types but output images are always float . *@par Third-party framework compatibility *Compatible with tensorflow ResizeBicubic operator. @@ -483,7 +513,7 @@ REG_OP(ResizeBicubic) *@par Inputs: *Input grads must be a 4-D tensor. Inputs include: *@li grads: A Tensor. Must be one of the following types: uint8, int8, int32, -float16, float, double. 4-D with shape [batch, height, width, channels]. +float16, float, double. Must set the format, supported format list ["NCHW, NHWC"] *@li size: A 1-D int32 Tensor of 2 elements: orig_height, orig_width. The original input size . \n @@ -550,9 +580,8 @@ REG_OP(ResizeNearestNeighborV2GradD) *@par Inputs: *Input grads must be a 4-D tensor. Inputs include: -*@li grads: A Tensor of type float32. 4-D with shape [batch, height, width, -channels]. -*@li original_image: A Tensor. 4-D with shape [batch, orig_height, orig_width, +*@li grads: A Tensor of type float32. Must set the format, supported format list ["NCHW, NHWC"] +*@li original_image: A Tensor. 4-D shape. Must set the format, supported format list ["NCHW, NHWC"] channels], The image tensor that was resized . \n *@par Attributes: @@ -583,7 +612,7 @@ REG_OP(ResizeBilinearV2Grad) *@par Inputs: *Input images must be a 4-D tensor. Inputs include: -*@li x: 4-D with shape [batch, height, width, channels]. +*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"] *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new size for the images . \n @@ -639,6 +668,62 @@ REG_OP(RGBToHSV) /** *@brief Generate a single randomly distorted bounding box for an image . \n +*@par Inputs: +*Input images must be a 4-D tensor. Inputs include: +*@li image_size: 1-D, containing [height, width, channels]. +*@li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding +boxes associated with the image. \n + +*@par Attributes: +*@li seed: If either seed or seed2 are set to non-zero, the random number +generator is seeded by the given seed. Otherwise, it is seeded by a random seed. +*@li seed2: A second seed to avoid seed collision. +*@li min_object_covered: The cropped area of the image must contain at least +this fraction of any bounding box supplied. The value of this parameter should +be non-negative. In the case of 0, the cropped area does not need to overlap +any of the bounding boxes supplied . +*@li aspect_ratio_range: The cropped area of the image must have an aspect +ratio = width / height within this range. +*@li max_attempts: Number of attempts at generating a cropped region of the +image of the specified constraints. After max_attempts failures, return the +entire image. +*@li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes +supplied. If true, assume an implicit bounding box covering the whole input. +If false, raise an error . \n + +*@par Outputs: +*@li begin: 1-D, containing [offset_height, offset_width, 0]. +*@li size: 1-D, containing [target_height, target_width, -1]. +*@li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box . \n + +*@attention Constraints: +*Input images can be of different types but output images are always float . \n + +*@par Third-party framework compatibility +*Compatible with tensorflow SampleDistortedBoundingBox operator. +*/ + +REG_OP(SampleDistortedBoundingBox) + .INPUT(image_size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ + DT_INT32, DT_INT64 })) + .INPUT(bounding_boxes, TensorType({ DT_FLOAT })) + .OUTPUT(begin, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ + DT_INT32, DT_INT64 })) + .OUTPUT(size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ + DT_INT32, DT_INT64 })) + .OUTPUT(bboxes, TensorType({ DT_FLOAT })) + .ATTR(seed, Int, 0) + .ATTR(seed2, Int, 0) + .ATTR(min_object_covered, Float, 0.1f) + .ATTR(aspect_ratio_range, ListFloat, { 0.75f, 1.33f }) + .ATTR(area_range, ListFloat, { 0.05f, 1.0f }) + .ATTR(max_attempts, Int, 100) + .ATTR(use_image_if_no_bounding_boxes, Bool, false) + .OP_END_FACTORY_REG(SampleDistortedBoundingBox) + +/** +*@brief Generate a single randomly distorted bounding box for an image . \n + *@par Inputs: *Input images must be a 4-D tensor. Inputs include: *@li image_size: 1-D, containing [height, width, channels]. @@ -697,7 +782,7 @@ REG_OP(SampleDistortedBoundingBoxExt2) *@par Inputs: *Input x must be a 4-D tensor. Inputs include: -*@li x: 4-D with shape [batch, height, width, channels]. +*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"]. *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new size for the images . \n @@ -729,12 +814,12 @@ REG_OP(ResizeNearestNeighborV2) *@par Inputs: *Input images must be a 4-D tensor. Inputs include: *@li images: A Tensor. Must be one of the following types: float. 4-D with -shape [batch, height, width, depth]. A batch of images. +shape [batch, height, width, depth]. A batch of images. The format must be NHWC. *@li boxes: A Tensor of type float32. 3-D with shape [batch, num_bounding_boxes, 4] containing bounding boxes . \n *@par Outputs: -*A Tensor. Has the same type as images . \n +*A Tensor. Has the same type as images. The format must be NHWC. \n *@attention Constraints: *Input images must be a 4-D tensor . \n @@ -1002,6 +1087,88 @@ REG_OP(EncodePng) .ATTR(compression, Int, -1) .OP_END_FACTORY_REG(EncodePng) + +/** +*@brief PNG-decode an image. +*@par Inputs: +*contents: 0-D. PNG-decoded image . + +*@par Attributes: +*channels: graph channels \n +*dtype: type of image + +*@par Outputs: +*image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels] +where channels is: 1: for grayscale; 2: for grayscale + alpha; 3: for RGB; +4: for RGBA . \n + +*@par Third-party framework compatibility +*Compatible with tensorflow DecodePng operator. +*/ +REG_OP(DecodePng) + .INPUT(contents, TensorType({DT_STRING})) + .OUTPUT(image, TensorType({DT_UINT8, DT_UINT16})) + .ATTR(dtype, Type, DT_UINT8) + .ATTR(channels, Int, 0) + .OP_END_FACTORY_REG(DecodePng) + +/** +*@brief Bmp-decode an image. \n + +*@par Inputs: +*@li contents: A Tensor of type string. 0-D. The BMP-encoded image. \n + +*@par Attributes: +*@li channels: Decode the desired number of color channels of the image. \n + +*@par Outputs: +*image: A Tensor dtype of uint8. + +* @par Third-party framework compatibility +* Compatible with tensorflow DecodeBmp operator. +*/ + +REG_OP(DecodeBmp) + .INPUT(contents, TensorType({DT_STRING})) + .OUTPUT(image, TensorType({DT_UINT8})) + .ATTR(channels, Int, 0) + .OP_END_FACTORY_REG(DecodeBmp) + +/** +*@brief Function parse image from string to int. \n + +*@par Inputs: +*@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n +*@li crop_window: 1-D. The crop window: [crop_y, crop_x, crop_height, crop_width]. \n + +*@par Attributes: +*@li channels: An optional int. Defaults to 0. Number of color channels for the +*decoded image. +*@li ratio: An optional int. Defaults to 1. Downscaling ratio. +*@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower +*but nicer upscaling of the chroma planes +*@li try_recover_truncated: An optional bool. Defaults to False. If true try to +*recover an image from truncated input. +*@li acceptable_fraction: An optional float. Defaults to 1. The minimum required +fraction of lines before a truncated input is accepted. +*@li dct_method: An optional string. Defaults to "". string specifying a hint +*about the algorithm used for decompression. \n + +*@par Outputs: +*image: A Tensor dtype of uint8. +*/ +REG_OP(DecodeAndCropJpeg) + .INPUT(contents, TensorType({DT_STRING})) + .INPUT(crop_window, TensorType({DT_INT32})) + .OUTPUT(image, TensorType({DT_UINT8})) + .ATTR(channels, Int, 0) + .ATTR(ratio, Int, 1) + .ATTR(fancy_upscaling, Bool, true) + .ATTR(try_recover_truncated, Bool, false) + .ATTR(acceptable_fraction, Float, 1.0) + .ATTR(dct_method, String, "") + .OP_END_FACTORY_REG(DecodeAndCropJpeg) + /** *@brief Resizes "images" to "size" using bilinear interpolation . \n @@ -1316,6 +1483,55 @@ REG_OP(CombinedNonMaxSuppression) .ATTR(clip_boxes, Bool, true) .OP_END_FACTORY_REG(CombinedNonMaxSuppression) +/** +*@brief Resizes "images" with "offset" using bilinear interpolation. \n + +*@par Inputs: +*@li img: input image, A 4-D tensor of shape `[n, h, w, c]`. +*@li warp_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for offset point. + +*@par Outputs: +*warp_img: A Tensor after resize. \n +*/ +REG_OP(IMGWarp) + .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) + .INPUT(warp_offset, TensorType({DT_FLOAT32})) + .OUTPUT(warp_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) + .OP_END_FACTORY_REG(IMGWarp) + +/** +*@brief Resizes "images" with "offset" using bilinear interpolation. \n + +*@par Inputs: +*@li img: input image, A 4-D tensor of shape `[n, h, w, c]`. +*@li map_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for resize point. + +*@par Outputs: +*map_img: A Tensor after resize. \n +*/ +REG_OP(Remap) + .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) + .INPUT(map_offset, TensorType({DT_FLOAT32})) + .OUTPUT(map_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) + .OP_END_FACTORY_REG(Remap) + +/** +*@brief Resizes "images" with "offset" using bilinear interpolation. \n + +*@par Inputs: +*@li img: input image, A 5-D tensor of shape `[n, 4, c, h, w]`, +and 4 mean input[(h_top, w_left), (h_top, w_right), (h_bottom, w_left), (h_bottom, w_right)]. +*@li warp_index: the resize offset A 4-D float tensor of shape `[n, 2, h, w]`, 2 means (x, y) for resize point. + +*@par Outputs: +*remap_img: A Tensor after ResizeBilinear, A 4-D tensor of shape `[n, c, h, w]`. \n +*/ +REG_OP(IMGWarpResize) + .INPUT(img, TensorType({DT_FLOAT32})) + .INPUT(warp_index, TensorType({DT_FLOAT32})) + .OUTPUT(warp_img, TensorType({DT_FLOAT32})) + .OP_END_FACTORY_REG(IMGWarpResize) + /** *@brief Function spatial transformer . \n @@ -1342,6 +1558,383 @@ REG_OP(SpatialTransformerD) .ATTR(use_default_theta, ListBool, {}) .OP_END_FACTORY_REG(SpatialTransformerD) -} // namespace ge +/** +* @brief Resize the input tensor. \n +currently, only support resize image tensor using nearest neighbor and linear interpolation. + +* @par Inputs: +* Input x must be a 4-D tensor. Inputs include: \n +* @li x: A Tensor. Must be one of the following types: uint8, int8, int16, \n +int32, int64, float16, float, double. 4-D with shape [batch, height, width, channels] \n +or shape [batch, channels, height, width]. +* @li roi: A 1-D float Tensor. only takes effect when attr coordinate_transformation_mode \n +is "tf_crop_and_resize" +* @li scales: A 1-D float Tensor, the scale array along each dimension, Only one of \n +'scales' and 'sizes' can be specified. +* @li sizes: A 1-D int64 Tensor, The size of the output tensor. nly one of \n +'scales' and 'sizes' can be specified. If 'size' is specified, then set scales \n +to empty data (zero shape) in this operator's input list. + +* @par Attributes: +* @li coordinate_transformation_mode: String. Defaults to half_pixel. how to transform \n +the coordinate in the resized tensor to the coordinate in the original tensor. \n +other optional: pytorch_half_pixel, align_corners, asymmetric, tf_half_pixel_for_nn, \n +tf_crop_and_resize. +* @li cubic_coeff_a: Float. Defaults to -0.75, only used in cubic interpolation. \n +other optional: -0.5 +* @li exclude_outside: Int. Defaults to 0, If set to 1, the weight of sampling \n +locations outside the tensor will be set to 0 and the weight will be renormalized \n +so that their sum is 1.0. +* @li extrapolation_value: Float. Defaults to 0.0f. When coordinate_transformation_mode \n +is "tf_crop_and_resize" and x_original is outside the range [0, length_original - 1], \n +this value is used as the corresponding output value. +* @li mode: String. Defaults to nearest. Three interpolation modes: nearest (default), \n +linear and cubic. +* @li nearest_mode: String. Defaults to round_prefer_floor. Four modes: round_prefer_floor, \n +round_prefer_ceil, floor, ceil. Only used by nearest interpolation. + +* @par Outputs: +* y: A Tensor. Has the same type as x. + +* @attention Constraints: \n +* Input x must be a 4-D tensor. + +* @par Third-party framework compatibility +* Compatible with tensorflow ResizeNearestNeighborV2 operator. +*/ + +REG_OP(Resize) + .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, + DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(roi, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(scales, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(sizes, TensorType({DT_INT64})) + .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, + DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .ATTR(coordinate_transformation_mode, String, "half_pixel") + .ATTR(cubic_coeff_a, Float, -0.75) + .ATTR(exclude_outside, Int, 0) + .ATTR(extrapolation_value, Float, 0) + .ATTR(mode, String, "nearest") + .ATTR(nearest_mode, String, "round_prefer_floor") + .OP_END_FACTORY_REG(Resize) + +/** +*@brief Function parse image from string to int. \n + +*@par Inputs: +*@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n + +*@par Attributes: +*@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image. +*@li ratio: An optional int. Defaults to 1. Downscaling ratio. +*@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower but nicer upscaling of the chroma planes +*@li try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input. +*@li acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted. +*@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n + +*@par Outputs: +*image: A Tensor dtype of uint8. +*/ +REG_OP(DecodeJpeg) + .INPUT(contents, TensorType({DT_STRING})) + .OUTPUT(image, TensorType({DT_UINT8})) + .ATTR(channels, Int, 0) + .ATTR(ratio, Int, 1) + .ATTR(fancy_upscaling, Bool, true) + .ATTR(try_recover_truncated, Bool, false) + .ATTR(acceptable_fraction, Float, 1.0) + .ATTR(dct_method, String, "") + .OP_END_FACTORY_REG(DecodeJpeg) + +/** +*@brief Image warping using per-pixel flow vectors. \n + +*@par Inputs: +*@li image: 4-D Tensor with shape `[batch, height, width, channels]`. +*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n + +*@par Outputs: +*y: Returns 4-D with the same shape and dtype as `image`. \n +*/ +REG_OP(DenseImageWarp) + .INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .OP_END_FACTORY_REG(DenseImageWarp) + +/** +*@brief Calculate the resize_d function. \n + +*@par Inputs: +*One inputs, including: +* @li x: A tensor. Must be one of the following types: +* float16, float32. \n + +*@par Attributes: +*@li sizes: An optional listInt. \n +*@li scales: An optional listFloat. + Defaults to none. \n +*@li roi: An optional listInt. + Defaults to none. \n +*@li coordinate_transformation_mode: An optional String. + Defaults to "half_pixel". \n +*@li cubic_coeff_a: An optional float. + Defaults to -0.75. \n +*@li exclude_outside: An optional int. + Defaults to 0. \n +*@li extrapolation_value: An optional float. + Defaults to 0.0. \n +*@li mode: An optional String. + Defaults to "nearest". \n +*@li nearest_mode: An optional String. + Defaults to "round_prefer_floor". \n + +*@par Outputs: +*y: A Tensor with the same type of x's, + shape depends on x and sizes. \n +*/ +REG_OP(ResizeD) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(sizes, ListInt) + .ATTR(scales, ListFloat, {}) + .ATTR(roi, ListInt, {}) + .ATTR(coordinate_transformation_mode, String, "half_pixel") + .ATTR(cubic_coeff_a, Float, -0.75) + .ATTR(exclude_outside, Int, 0) + .ATTR(extrapolation_value, Float, 0.0) + .ATTR(mode, String, "nearest") + .ATTR(nearest_mode, String, "round_prefer_floor") + .OP_END_FACTORY_REG(ResizeD) + +/** +*@brief Calculate the resize_grad_d function. \n + +*@par Inputs: +*One inputs, including: +* @li grads: A tensor. Must be one of the following types: +* float16, float32. \n + +*@par Attributes: +*@li original_size: An optional listInt. \n +*@li roi: An optional listInt. + Defaults to none. \n +*@li scales: An optional listFloat. + Defaults to none. \n +*@li coordinate_transformation_mode: An optional String. + Defaults to "half_pixel". \n +*@li cubic_coeff_a: An optional float. + Defaults to -0.75. \n +*@li exclude_outside: An optional int. + Defaults to 0. \n +*@li extrapolation_value: An optional float. + Defaults to 0.0. \n +*@li mode: An optional String. + Defaults to "nearest". \n +*@li nearest_mode: An optional String. + Defaults to "round_prefer_floor". \n + +*@par Outputs: +*y: A Tensor with the same type of x's, + shape depends on x and sizes. \n +*/ +REG_OP(ResizeGradD) + .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(original_size, ListInt) + .ATTR(roi, ListInt, {}) + .ATTR(scales, ListFloat, {}) + .ATTR(coordinate_transformation_mode, String, "half_pixel") + .ATTR(cubic_coeff_a, Float, -0.75) + .ATTR(exclude_outside, Int, 0) + .ATTR(extrapolation_value, Float, 0.0) + .ATTR(mode, String, "nearest") + .ATTR(nearest_mode, String, "round_prefer_floor") + .OP_END_FACTORY_REG(ResizeGradD) + +/** +*@brief Computes the gradients of DenseImageWarp with respect to image and flow. \n + +*@par Inputs: +*@li grad: gradients with respect to DenseImageWarp output. +*@li image: 4-D Tensor with shape `[batch, height, width, channels]`. +*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n + +*@par Outputs: +*grad_image: Returns 4-D with the same shape and dtype as `image`. +*grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n +*/ +REG_OP(DenseImageWarpGrad) + .INPUT(grad, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(grad_image, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(grad_flow, TensorType({DT_FLOAT, DT_FLOAT16})) + .OP_END_FACTORY_REG(DenseImageWarpGrad) + +/** +*@brief This operation samples input X by using interpolation based on flow field grid, + which is usually gennerated by affine_grid. The grid of shape [N, H, W, 2] is the concatenation of + (x, y) coordinates with shape [N, H, W] each, where x is indexing the 4th dimension (in width dimension) of + input data x and y is indexng the 3rd dimention (in height dimension), finally results is + the interpolation value of 4 nearest corner points. The output tensor shape will be [N, C, H, W]. + +*@par Inputs: +*@li x: 4-D Tensor with shape `[batch, channels, height, width]`. +*@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`. + +*@par Attributes: +*@li interpolation_mode: An optional string specifying the interpolation method. Only 'bilinear' is + supported for now . +*@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now . +*@li align_corners: An optional bool. If "true", the centers of the corner + pixels of the input and output tensors are aligned. Defaults to "false" . + +*@par Outputs: +*y: Returns 4-D Tensor with the same dtype as `X`. + +*@par Third-party framework compatibility +*Compatible with pytorch GridSampler2D operator. + +*@par Restrictions: +*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(GridSampler2D) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(interpolation_mode, String, "bilinear") + .ATTR(padding_mode, String, "zeros") + .ATTR(align_corners, Bool, false) + .OP_END_FACTORY_REG(GridSampler2D) + +/** +*@brief This operation unnormalize input Grid, which is usually gennerated by affine_grid. + +*@par Inputs: +*@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`. +*@li assist: Assist matrix, a 4-D tensor of type float16. + +*@par Attributes: +*@li align_corners: An optional bool. If "true", the centers of the corner + pixels of the input and output tensors are aligned. Defaults to "false" . + +*@par Outputs: +*diff: Returns 4-D Tensor with the same shape and dtype as `grid`. +*position: Returns 4-D Tensor with the same shape as `grid`. +*/ +REG_OP(GridUnnormal) + .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(assist, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(diff, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(position, TensorType({DT_INT32})) + .ATTR(align_corners, Bool, false) + .OP_END_FACTORY_REG(GridUnnormal) + +/** +*@brief This operation unfold input X based on unnormalized grid, which is gennerated by GridUnnormal. + +*@par Inputs: +*@li x: 4-D Tensor with shape `[batch, channels, height, width]`. +*@li position: 4-D Tensor with shape `[batch, output_height, output_width, 2]`. + +*@par Attributes: +*@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now . + +*@par Outputs: +*y: Returns 4-D Tensor with the same dtype as `x`. +*/ +REG_OP(ImageUnfold) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(position, TensorType({DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(padding_mode, String, "zeros") + .OP_END_FACTORY_REG(ImageUnfold) + +/** +*@brief This operation select images to warp_images according to offsets. + +*@par Inputs: +*@li images: 4-D Tensor with shape `[batch, height, width, 3]`. +*@li offsets: 4-D Tensor with shape `[batch, 4, new_height, new_width]`. + +*@par Outputs: +*warp_images: Returns 5-D Tensor with shape +`[batch, 4, new_height, new_width, 3]` and the same dtype as `images`. +*/ +REG_OP(IMGWarpOffsets) + .INPUT(images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT})) + .INPUT(offsets, TensorType({DT_FLOAT, DT_INT32})) + .OUTPUT(warp_images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(IMGWarpOffsets) + +/** +*@brief This operation samples 3d input x by using interpolation based on flow field grid, + which is usually gennerated by affine_grid. + +*@par Inputs: +*@li x: 5-D Tensor with shape `[batch, channels, depth, height, width]`. +*@li grid: flow field grid, 5-D Tensor with shape `[batch, depth, height, width, 2]`. + +*@par Attributes: +*@li interpolation_mode: An optional string specifying the interpolation method. +*@li padding_mode: An optional string specifying the pad method. +*@li align_corners: An optional bool. If "true", the centers of the corner + pixels of the input and output tensors are aligned. Defaults to "false" . + +*@par Outputs: +*y: Returns 5-D Tensor with the same dtype as `x`. + +*@par Third-party framework compatibility +*Compatible with pytorch GridSampler3D operator. + +*@par Restrictions: +*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(GridSampler3D) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .ATTR(interpolation_mode, String, "bilinear") + .ATTR(padding_mode, String, "zeros") + .ATTR(align_corners, Bool, false) + .OP_END_FACTORY_REG(GridSampler3D) +/** +*@brief Computes the gradients of GridSampler3D. + +*@par Inputs: +*@li grad: 5-D Tensor with shape `[batch, channels, depth, height, width]`. +*@li x: 5-D Tensor with shape `[batch, channels, depth, height, width]`. +*@li grid: flow field grid, 5-D Tensor with shape `[batch, depth, height, width, 2]`. + +*@par Attributes: +*@li interpolation_mode: An optional string specifying the interpolation method. +*@li padding_mode: An optional string specifying the pad method. +*@li align_corners: An optional bool. If "true", the centers of the corner + pixels of the input and output tensors are aligned. Defaults to "false" . + +*@par Outputs: +*dx: Returns 5-D Tensor with the same dtype and shape as `x`. +*dgrid: Returns 5-D Tensor with the same dtype and shape as `grid`. + +*@par Third-party framework compatibility +*Compatible with pytorch GridSampler3DGrad operator. + +*@par Restrictions: +*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(GridSampler3DGrad) + .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(dgrid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .ATTR(interpolation_mode, String, "bilinear") + .ATTR(padding_mode, String, "zeros") + .ATTR(align_corners, Bool, false) + .OP_END_FACTORY_REG(GridSampler3DGrad) + +} // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/internal_ops.h b/third_party/fwkacllib/inc/ops/internal_ops.h index 9dde14a5..bcc3f1c3 100644 --- a/third_party/fwkacllib/inc/ops/internal_ops.h +++ b/third_party/fwkacllib/inc/ops/internal_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h index 7a6fbc59..69c77bf6 100644 --- a/third_party/fwkacllib/inc/ops/linalg_ops.h +++ b/third_party/fwkacllib/inc/ops/linalg_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,8 +61,8 @@ REG_OP(CholeskyGrad) *@par Inputs: *The input x has to be symmetric and positive definite.Inputs include: -*x:A Tensor. Must be one of the following types: double, float32. Shape -is [..., M, M] . \n +*x:A Tensor. Must be one of the following types: double, float32, float16, +complex64, complex128. Shape is [..., M, M] . \n *@par Outputs: *y:A Tensor. Has the same type as x . \n @@ -76,10 +76,31 @@ form square matrices. */ REG_OP(Cholesky) - .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, \ + DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, \ + DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128})) .OP_END_FACTORY_REG(Cholesky) +/** +*@brief Computes the outer product of two 1D vectors . \n + +*@par Inputs: +*The input x1 and x2 has to be a 1D vector.Inputs include: +*@li x1:A Tensor. Must be one of the following types: float16, float32. +Shape is [N] . \n +*@li x2:A Tensor. Must have the same type as x. Shape is [M] . \n + +*@par Outputs: +*y:A Tensor. Has the same type as x . \n +*/ + +REG_OP(Ger) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(Ger) + /** *@brief Computes the sign and the log of the absolute value of the determinant of one or more square matrices . \n @@ -87,8 +108,8 @@ of one or more square matrices . \n *@par Inputs: *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions form square matrices. Inputs include: -*x:A Tensor. Must be one of the following types: double, float32. Shape is -[..., M, M] . \n +*x:A Tensor. Must be one of the following types: double, float32, +complex64, complex128. Shape is [..., M, M] . \n *@par Outputs: *@li y:A Tensor. Has the same type as x. @@ -103,9 +124,9 @@ form square matrices. \n */ REG_OP(LogMatrixDeterminant) - .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) .OP_END_FACTORY_REG(LogMatrixDeterminant) /** @@ -114,8 +135,8 @@ REG_OP(LogMatrixDeterminant) *@par Inputs: *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions form square matrices. Inputs include: -*x:A Tensor. Must be one of the following types: double, float32. Shape is -[..., M, M] . \n +*x:A Tensor. Must be one of the following types: double, float32, complex64, +complex128. Shape is [..., M, M] . \n *@par Outputs: *y:A Tensor. Has the same type as x . \n @@ -129,8 +150,8 @@ form square matrices. */ REG_OP(MatrixDeterminant) - .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) .OP_END_FACTORY_REG(MatrixDeterminant) /** @@ -140,8 +161,7 @@ their adjoints (conjugate transposes) . \n *@par Inputs: *The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions form square matrices. Inputs include: -*x:A Tensor. Must be one of the following types: double, float. Shape is -[..., M, M] . \n +*x:A Tensor of input. Shape is [..., M, M] . \n *@par Attributes: *adjoint:An optional bool. Defaults to False.Boolean indicating whether to @@ -159,8 +179,8 @@ form square matrices. \n */ REG_OP(MatrixInverse) - .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) .ATTR(adjoint, Bool, false) .OP_END_FACTORY_REG(MatrixInverse) @@ -169,8 +189,7 @@ REG_OP(MatrixInverse) *@par Inputs: *The input rhs must have the same type as matrix. Inputs include: -*@li matrix:A Tensor. Must be one of the following types: double, float. -Shape is [..., M, M]. +*@li matrix:A Tensor of input. Shape is [..., M, M]. *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n *@par Attributes: @@ -189,9 +208,9 @@ dimensions form square matrices. \n */ REG_OP(MatrixSolve) - .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE})) - .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) .ATTR(adjoint, Bool, false) .OP_END_FACTORY_REG(MatrixSolve) @@ -221,8 +240,8 @@ dimensions form square matrices. \n */ REG_OP(MatrixSolveLs) - .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE})) - .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) .INPUT(l2, TensorType({DT_DOUBLE})) .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) .ATTR(fast, Bool, true) @@ -234,8 +253,7 @@ matrices by backsubstitution . \n *@par Inputs: *The input rhs must have the same type as matrix. Inputs include: -*@li matrix: A Tensor. Must be one of the following types: double, float. -Shape is [..., M, M]. +*@li matrix: A Tensor. Shape is [..., M, M]. *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n *@par Attributes: @@ -256,9 +274,9 @@ dimensions form square matrices. \n */ REG_OP(MatrixTriangularSolve) - .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE})) - .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) .ATTR(lower, Bool, true) .ATTR(adjoint, Bool, false) .OP_END_FACTORY_REG(MatrixTriangularSolve) @@ -268,8 +286,7 @@ REG_OP(MatrixTriangularSolve) *@par Inputs: *The input shape of x must be [..., M, N]. Inputs include: -*x:A Tensor whose shape is [..., M, N]. Must be one of the following types: -double, float . \n +*x:A Tensor whose shape is [..., M, N]. \n *@par Attributes: *full_matrices: An optional bool. Defaults to False. If true, compute @@ -289,9 +306,12 @@ dimensions form matrices of size [M, N]. \n */ REG_OP(Qr) - .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE })) - .OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE })) - .OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE })) + .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \ + DT_COMPLEX64, DT_COMPLEX128 })) + .OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \ + DT_COMPLEX64, DT_COMPLEX128 })) + .OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \ + DT_COMPLEX64, DT_COMPLEX128 })) .ATTR(full_matrices, Bool, false) .OP_END_FACTORY_REG(Qr) @@ -320,12 +340,40 @@ form square matrices. \n */ REG_OP(SelfAdjointEig) - .INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT })) - .OUTPUT(eigen_value, TensorType({ DT_DOUBLE, DT_FLOAT })) - .OUTPUT(eigen_vector, TensorType({ DT_DOUBLE, DT_FLOAT })) + .INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) + .OUTPUT(eigen_value, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) + .OUTPUT(eigen_vector, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) .ATTR(compute_v, Bool, true) .OP_END_FACTORY_REG(SelfAdjointEig) +/** +*@brief Computes the sign and the log of the absolute value of the determinant +of one or more square matrices . \n + +*@par Inputs: +*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions +form square matrices. Inputs include: +*x:A Tensor. Must be one of the following types: double, float32, float16 +Shape is [..., M, M] . \n + +*@par Outputs: +*@li y:A Tensor. Has the same type as x. +*@li sign:A Tensor. Has the same type as x . \n + +*@attention Constraints: +*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions +form square matrices. \n + +*@par Third-party framework compatibility +*Compatible with tensorflow LogMatrixDeterminant operator. +*/ + +REG_OP(Slogdet) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(sign, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OP_END_FACTORY_REG(Slogdet) + /** *@brief Computes the singular value decompositions of one or more matrices . \n @@ -384,8 +432,8 @@ of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n */ REG_OP(Lu) - .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) .OUTPUT(p, TensorType({DT_INT32, DT_INT64})) .REQUIRED_ATTR(output_idx_type, Type) .OP_END_FACTORY_REG(Lu) @@ -404,8 +452,8 @@ y: Shape is `[..., M, M]` . \n */ REG_OP(MatrixSquareRoot) - .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) .OP_END_FACTORY_REG(MatrixSquareRoot) /** @@ -424,9 +472,9 @@ y: Tensor of shape `[..., M, K]` containing the solutions \n */ REG_OP(TridiagonalSolve) - .INPUT(diagonals, TensorType({DT_FLOAT, DT_DOUBLE})) - .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(diagonals, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) .ATTR(partial_pivoting, Bool, true) .OP_END_FACTORY_REG(TridiagonalSolve) diff --git a/third_party/fwkacllib/inc/ops/list_ops.h b/third_party/fwkacllib/inc/ops/list_ops.h new file mode 100644 index 00000000..a1b622e9 --- /dev/null +++ b/third_party/fwkacllib/inc/ops/list_ops.h @@ -0,0 +1,504 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * \file list_ops.h + * \brief + */ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_ + +#include +#include "graph/operator_reg.h" +#include "graph/operator.h" + +namespace ge { + +/** +*@brief Creates and returns an empty tensor list. \n + +*@par Inputs: +*@li element_shape: A shape compatible with that of elements in the list. +*@li max_num_elements: The maximum number of elements. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li handle: An empty tensor list . \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow EmptyTensorList operator. +*/ +REG_OP(EmptyTensorList) + .INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) + .INPUT(max_num_elements, TensorType({DT_INT32})) + .OUTPUT(handle, TensorType({DT_VARIANT})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(EmptyTensorList) + +/** +*@brief Returns a list which has the passed-in `Tensor` as last element +and the other elements of the given list in `input_handle`. \n + +*@par Inputs: +*@li input_handle: The old list. +*@li tensor: The tensor to put on the list. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li output_handle:A list with the elements of old list followed by tensor. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListPushBack operator. +*/ +REG_OP(TensorListPushBack) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .OUTPUT(output_handle, TensorType({DT_VARIANT})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListPushBack) + +/** +*@brief The last element of the input list as well as a +list with all but that element. \n + +*@par Inputs: +*@li input_handle: The input list. +*@li element_shape: A shape compatible with that of elements in the list. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li output_handle:A list with the elements of the old list followed by tensor. +*@li tensor:The withdrawn last element of the list. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListPopBack operator. +*/ +REG_OP(TensorListPopBack) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(element_shape, TensorType({DT_INT32})) + .OUTPUT(output_handle, TensorType({DT_VARIANT})) + .OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListPopBack) + +/** +*@brief The number of tensors in the input tensor list. \n + +*@par Inputs: +*@li input_handle: The input list. \n + +*@par Outputs: +*@li length:The number of tensors in the list. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListLength operator. +*/ +REG_OP(TensorListLength) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .OUTPUT(length, TensorType({DT_INT32})) + .OP_END_FACTORY_REG(TensorListLength) + +/** +*@brief The shape of elements in the input tensor list. \n + +*@par Inputs: +*@li input_handle: The input list. \n + +*@par Attributes: +*@li shape_type: The type of shape in the list. \n + +*@par Outputs: +*@li element_shape:A shape compatible with that of elements in the list. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListElementShape operator. +*/ +REG_OP(TensorListElementShape) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .OUTPUT(element_shape, TensorType({DT_INT32,DT_INT64})) + .ATTR(shape_type, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListElementShape) + +/** +*@brief List of the given size with empty elements. \n + +*@par Inputs: +*@li element_shape: A shape compatible with that of elements in the list. +*@li num_elements: The number of elements to reserve. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. +*@li shape_type: The type of shape in the list. \n + +*@par Outputs: +*@li handle: An output tensor list . \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListReserve operator. +*/ +REG_OP(TensorListReserve) + .INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) + .INPUT(num_elements, TensorType({DT_INT32})) + .OUTPUT(handle, TensorType({DT_VARIANT})) + .ATTR(element_dtype, Type, DT_INT32) + .ATTR(shape_type, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListReserve) + +/** +*@brief Get input tensor list elements of index position. \n + +*@par Inputs: +*@li input_handle: The input list. +*@li index: A tensor of position. +*@li element_shape: A shape compatible with that of elements in the list. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li item: An output tensor value of index position . \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListGetItem operator. +*/ +REG_OP(TensorListGetItem) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(index, TensorType({DT_INT32})) + .INPUT(element_shape, TensorType({DT_INT32})) + .OUTPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListGetItem) + +/** +*@brief Sets the index-th position of the list to contain the given tensor. \n + +*@par Inputs: +*@li input_handle: The input list. +*@li index: The position in the list to which the tensor will be assigned. +*@li item: The element to be assigned to that position. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li output_handle: An output tensor list . \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListSetItem operator. +*/ +REG_OP(TensorListSetItem) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(index, TensorType({DT_INT32})) + .INPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .OUTPUT(output_handle, TensorType({DT_VARIANT})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListSetItem) + +/** +*@brief Push tensor to list. \n + +*@par Inputs: +*@li input_handles: The input tensor lists. +*@li tensor: The tensor push into tensor list. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li output_handles: The output tensor lists. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListPushBackBatch operator. +*/ +REG_OP(TensorListPushBackBatch) + .INPUT(input_handles, TensorType({DT_VARIANT})) + .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .OUTPUT(output_handles, TensorType({DT_VARIANT})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListPushBackBatch) + +/** +*@brief Stacks all tensors in the list. \n + +*@par Inputs: +*@li input_handle: The input tensor list. +*@li element_shape: A shape compatible with that of elements in the tensor. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. +*@li num_elements: The number of elements in the list. \n + +*@par Outputs: +*@li tensor: The tensor of list. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListStack operator. +*/ +REG_OP(TensorListStack) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(element_shape, TensorType({DT_INT32})) + .OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .ATTR(element_dtype, Type, DT_INT32) + .ATTR(num_elements, Int, -1) + .OP_END_FACTORY_REG(TensorListStack) + +/** +*@brief Concats all tensors in the list along the 0th dimension. +Requires that all tensors have the same shape except the first dimension. \n + +*@par Inputs: +*@li input_handle: The input list. +*@li element_shape: The shape of the uninitialized elements in the list. +If the first dimension is not -1, it is assumed that all list elements have +the same leading dim. +*@li leading_dims: The list of leading dims of uninitialized list elements. Used if +the leading dim of input_handle.element_shape or the element_shape input arg +is not already set. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li tensor: The concated result. +*@li lengths: Output tensor containing sizes of the 0th dimension of tensors +in the list, used for computing the gradient. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListConcatV2 operator. +*/ +REG_OP(TensorListConcatV2) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) + .INPUT(leading_dims, TensorType({DT_INT64})) + .OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .OUTPUT(lengths, TensorType({DT_INT64})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListConcatV2) + +/** +*@brief Splits a tensor into a list. \n + +*@par Inputs: +*@li tensor: The input tensor. +*@li element_shape: A shape compatible with that of elements in the tensor. +*@li lengths: Vector of sizes of the 0th dimension of tensors in the list. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li output_handle: The list. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListSplit operator. +*/ +REG_OP(TensorListSplit) + .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) + .INPUT(lengths, TensorType({DT_INT64})) + .OUTPUT(output_handle, TensorType({DT_VARIANT})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListSplit) + +/** +*@brief Creates a TensorList which, when stacked, has the value of `tensor`. \n + +*@par Inputs: +*@li tensor: The input tensor. +*@li element_shape: The shape of elements in the list. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li output_handle: An output tensor list . \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListFromTensor operator. +*/ +REG_OP(TensorListFromTensor) + .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) + .OUTPUT(output_handle, TensorType({DT_VARIANT})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListFromTensor) + +/** +*@brief Resizes the list. \n + +*@par Inputs: +*@li input_handle: The input tensor list. +*@li size: size of the output list. \n + +*@par Outputs: +*@li output_handle: The output tensor list. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListResize operator. +*/ +REG_OP(TensorListResize) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(size, TensorType({DT_INT32})) + .OUTPUT(output_handle, TensorType({DT_VARIANT})) + .OP_END_FACTORY_REG(TensorListResize) + +/** +*@brief Creates a Tensor by indexing into the TensorList. \n + +*@par Inputs: +*@li input_handle: The input tensor list. +*@li indices: The indices used to index into the list. +*@li element_shape: The shape of elements in the list. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li values: The tensor. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListGather operator. +*/ +REG_OP(TensorListGather) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(indices, TensorType({DT_INT32})) + .INPUT(element_shape, TensorType({DT_INT32})) + .OUTPUT(values, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListGather) + +/** +*@brief Creates a TensorList by indexing into a Tensor. \n + +*@par Inputs: +*@li tensor: The input tensor. +*@li indices: The indices used to index into the list. +*@li element_shape: The shape of the elements in the list (can be less specified than +the shape of the tensor). +*@li num_elements: The size of the output list. Must be large enough to accommodate +the largest index in indices. If -1, the list is just large enough to include +the largest index in indices. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li output_handle: The TensorList. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListScatterV2 operator. +*/ +REG_OP(TensorListScatterV2) + .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .INPUT(indices, TensorType({DT_INT32})) + .INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) + .INPUT(num_elements, TensorType({DT_INT32})) + .OUTPUT(output_handle, TensorType({DT_VARIANT})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListScatterV2) + +/** +*@brief Scatters tensor at indices in an input list. \n + +*@par Inputs: +*@li input_handle: The input tensor list. +*@li tensor: The input tensor. +*@li indices: The indices used to index into the list. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li output_handle: The TensorList. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListScatterIntoExistingList operator. +*/ +REG_OP(TensorListScatterIntoExistingList) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .INPUT(indices, TensorType({DT_INT32})) + .OUTPUT(output_handle, TensorType({DT_VARIANT})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListScatterIntoExistingList) + +/** +*@brief Concat two tensor lists to a new tensor list. \n + +*@par Inputs: +*@li input_a: The input tensor list A. +*@li input_b: The input tensor list B. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li output: The output list. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListConcatLists operator. +*/ +REG_OP(TensorListConcatLists) + .INPUT(input_a, TensorType({DT_VARIANT})) + .INPUT(input_b, TensorType({DT_VARIANT})) + .OUTPUT(output, TensorType({DT_VARIANT})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListConcatLists) +} // namespace ge + +#endif // OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/logging_ops.h b/third_party/fwkacllib/inc/ops/logging_ops.h index bc8ae2b8..03be7757 100644 --- a/third_party/fwkacllib/inc/ops/logging_ops.h +++ b/third_party/fwkacllib/inc/ops/logging_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/lookup_ops.h b/third_party/fwkacllib/inc/ops/lookup_ops.h index b37ab048..5d928e5a 100644 --- a/third_party/fwkacllib/inc/ops/lookup_ops.h +++ b/third_party/fwkacllib/inc/ops/lookup_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index 149e0e37..319bcf70 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -222,6 +222,24 @@ REG_OP(Bucketize) .REQUIRED_ATTR(boundaries, ListFloat) .OP_END_FACTORY_REG(Bucketize) +/** +*@brief Returns a new tensor with the truncated integer values of the elements of input. \n + +*@par Inputs: +*One inputs, including: +* @li input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n + +*@par Outputs: +*y: A tensor with the same type and shape of input_x \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator Trunc. \n +*/ +REG_OP(Trunc) + .INPUT(input_x, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8})) + .OUTPUT(output_y, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8})) + .OP_END_FACTORY_REG(Trunc) + /** *@brief Computes the sum along sparse segments of a tensor . \n @@ -365,6 +383,27 @@ REG_OP(GetNext) .ATTR(channel_name, String, "") .OP_END_FACTORY_REG(GetNext) +/** +*@brief Get dynamic dims after GetNext. \n + +*@par Inputs: +*input: A nested structure of Tensor objects, from GetNext's output. \n + +*@par Attributes: +*@li shape_info: GE shape_info for each inputs, -1 means unknow dim. +*@li N: Inputs number. \n + +*@par Outputs: +*dims: GE unknow dims, a vector of int64. \n +*/ + +REG_OP(GetDynamicDims) + .DYNAMIC_INPUT(input, TensorType({DT_INT32, DT_INT64})) + .OUTPUT(dims, TensorType({DT_INT32, DT_INT64})) + .REQUIRED_ATTR(shape_info, ListInt) + .REQUIRED_ATTR(N, Int) + .OP_END_FACTORY_REG(GetDynamicDims) + /** *@brief End of sequence . \n @@ -494,6 +533,29 @@ REG_OP(NextAfter) .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE})) .OP_END_FACTORY_REG(NextAfter) +/** +*@brief Calculate the P-norm distance between vectors function. \n + +*@par Inputs: +*One inputs, including: +* @li input_x: A tensor. Must be one of the following types: +* float16, float32. \n + +*@par Attributes: +*@li p: An optional float.Defaults to 2. \n + +*@par Outputs: +*y: A Tensor with the same type and shape of input_x's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator Pdist. \n +*/ +REG_OP(Pdist) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(p, Float, 2.0) + .OP_END_FACTORY_REG(Pdist) + /** *@brief Compute element-wise finiteness, return a boolean tensor. @@ -624,6 +686,7 @@ REG_OP(NLLLoss) .OUTPUT(y, TensorType({DT_FLOAT})) .OUTPUT(total_weight, TensorType({DT_FLOAT})) .ATTR(reduction, String, "mean") + .ATTR(ignore_index, Int, -100) .OP_END_FACTORY_REG(NLLLoss) /** @@ -653,6 +716,7 @@ REG_OP(NLLLossGrad) .INPUT(total_weight, TensorType({DT_FLOAT})) .OUTPUT(x_grad, TensorType({DT_FLOAT})) .ATTR(reduction, String, "mean") + .ATTR(ignore_index, Int, -100) .OP_END_FACTORY_REG(NLLLossGrad) /** @@ -710,6 +774,9 @@ REG_OP(IFMR) *@par Third-party framework compatibility *Compatible with mindspore + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(WtsARQ) @@ -741,6 +808,9 @@ REG_OP(WtsARQ) *@par Third-party framework compatibility *Compatible with mindspore + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ActsULQ) @@ -748,8 +818,8 @@ REG_OP(ActsULQ) .INPUT(clamp_min, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(clamp_max, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(clamp_min_mask, TensorType({DT_BOOL})) - .OUTPUT(clamp_max_mask, TensorType({DT_BOOL})) + .OUTPUT(clamp_min_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT})) + .OUTPUT(clamp_max_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT})) .OUTPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT})) .ATTR(fixed_min, Bool, false) .ATTR(num_bits, Int, 8) @@ -768,12 +838,15 @@ REG_OP(ActsULQ) *@par Third-party framework compatibility *Compatible with mindspore + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ActsULQInputGrad) .INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(clamp_min_mask, TensorType({DT_BOOL})) - .INPUT(clamp_max_mask, TensorType({DT_BOOL})) + .INPUT(clamp_min_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT})) + .INPUT(clamp_max_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT})) .OUTPUT(x_grad, TensorType({DT_FLOAT16, DT_FLOAT})) .OP_END_FACTORY_REG(ActsULQInputGrad) @@ -790,11 +863,14 @@ REG_OP(ActsULQInputGrad) *@par Third-party framework compatibility *Compatible with mindspore + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ActULQClampMaxGrad) .INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(clamp_max_mask, TensorType({DT_BOOL})) + .INPUT(clamp_max_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT})) .INPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(clamp_max_grad, TensorType({DT_FLOAT16, DT_FLOAT})) .OP_END_FACTORY_REG(ActULQClampMaxGrad) @@ -812,15 +888,208 @@ REG_OP(ActULQClampMaxGrad) *@par Third-party framework compatibility *Compatible with mindspore + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ActULQClampMinGrad) .INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(clamp_min_mask, TensorType({DT_BOOL})) + .INPUT(clamp_min_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT})) .INPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(clamp_min_grad, TensorType({DT_FLOAT16, DT_FLOAT})) .OP_END_FACTORY_REG(ActULQClampMinGrad) +/** +* @brief Computes Lp norm. + +* @par Inputs: +* @li x: An ND tensor of type float16, float32. \n +* +* @par Attributes: +* @li p: Int, "inf" or "-inf", default value is 2. +* @li axes: ListInt, {} means all axes will be computed. +* @li keepdim: Bool, default is false. +* @li epsilon: Float, default is 1e-12. \n + +* @par Outputs: +* @li y: An ND tensor of type float16, float32. The shape of y is depending +* on axes and keepdim. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator LpNorm. +*/ +REG_OP(LpNorm) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(p, Int, 2) + .ATTR(axes, ListInt, {}) + .ATTR(keepdim, Bool, false) + .ATTR(epsilon, Float, 1e-12) + .OP_END_FACTORY_REG(LpNorm) + +/** +* @brief get complex. + +* @par Inputs: +* @li real: An ND tensor of type float32. double +* @li imag: An ND tensor of type float32. double \n +* +* @par Outputs: +* @li out: An ND tensor of type complex64, complex128 \n +*/ +REG_OP(Complex) + .INPUT(real, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(imag, TensorType({DT_FLOAT, DT_DOUBLE})) + .OUTPUT(out, TensorType({DT_COMPLEX64, DT_COMPLEX128})) + .ATTR(Tout, Type, DT_COMPLEX64) + .OP_END_FACTORY_REG(Complex) + +/** +* @brief deal complex. + +* @par Inputs: +* @li input: An ND tensor of type complex64, complex128 \n +* +* @par Outputs: +* @li output: An ND tensor of type float32. double \n +*/ +REG_OP(Imag) + .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE})) + .ATTR(Tout, Type, DT_FLOAT) + .OP_END_FACTORY_REG(Imag) + +/** +* @brief deal complex. + +* @par Inputs: +* @li input: An ND tensor of type complex64, complex128 \n +* +* @par Outputs: +* @li output: An ND tensor of type float32. double \n +*/ +REG_OP(Angle) + .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE})) + .ATTR(Tout, Type, DT_FLOAT) + .OP_END_FACTORY_REG(Angle) + +/** +*@brief Computes the gradient of SoftMarginLossGrad. \n + +*@par Inputs: +*Three inputs, including: +* @li predict: A tensor. Must be one of the following types: +* float16, float32. \n +* @li label: A tensor with same shape of predict. Must be one of the following types: +* float16, float32. \n +* @li dout: A tensor with same shpae of predcit. Must be one of the following types: +* float16, float32. \n + +*@par Attributes: +* @li reduction: Specifies the reduction to apply to the output: +* 'none' | 'mean' | 'sum'. Default: 'mean'. \n + +*@par Outputs: +* gradient: A Tensor with the same type of predict. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator SoftMarginLoss Backward. \n +*/ +REG_OP(SoftMarginLossGrad) + .INPUT(predict, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(label, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(dout, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(gradient, TensorType({DT_FLOAT16,DT_FLOAT})) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(SoftMarginLossGrad) + +/** +*@brief Calculate the cross product of two tensors. \n + +*@par Inputs: +*One inputs, including: +* @li x1: A tensor. Must be one of the following types: +* float16, float32, int32, int8, uint8, int16. \n +* @li x2: A tensor. Must be one of the following types: +* float16, float32, int32, int8, uint8, int16. \n + +*@par Attributes: +*@li dim: the dimination of compute.Defaults to -65530. \n + +*@par Outputs: +*y: A Tensor with the same type and shape of x1's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator cross. \n +*/ +REG_OP(Cross) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_INT16})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_INT16})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_INT16})) + .ATTR(dim, Int, -65530) + .OP_END_FACTORY_REG(Cross) + +/** + *@brief Computes batched the p-norm distance between each pair of + *the two collections of row vectors. \n + + *@par Inputs: + *Two inputs, including: + * @li x1: A tensor with shpae: BxPXM. Must be one of the following types: + * float16, float32. \n + * @li x2: A tensor with shpae: BxRxM. Must be one of the following types: + * float16, float32. \n + + *@par Attributes: + * @li p: An optional float >= 0 or inf. Defaults to 2.0. \n + + *@par Outputs: + * y: A Tensor with the same type of x1's and with shape BxPxR. \n + + *@par Third-party framework compatibility + *Compatible with the Pytorch operator Cdist. \n + */ +REG_OP(Cdist) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(p, Float, 2.0) + .OP_END_FACTORY_REG(Cdist) + +/** +*@brief Computes the grad of x1 in cdist. \n + +*@par Inputs: +*Four inputs, including: + * @li grad: Grad with shape BxPxR. Must be one of the following types: +* float16, float32. \n +* @li x1: A tensor with shpae: BxPXM. Must be one of the following types: +* float16, float32. \n +* @li x2: A tensor with shpae: BxRxM. Must be one of the following types: +* float16, float32. \n +* @li cdist: Output tensor of cdist forward with shpae: BxPXR. +* Must be one of the following types: float16, float32. \n + +*@par Attributes: +* @li p: An optional float >= 0 or inf. Defaults to 2.0. \n + +*@par Outputs: +* y: A Tensor with the same type and shape of x1's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator Cdist Backward. \n +*/ +REG_OP(CdistGrad) + .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x1, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x2, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(cdist, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) + .ATTR(p, Float, 2.0) + .OP_END_FACTORY_REG(CdistGrad) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index ed23d3f6..b317be37 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,8 +38,8 @@ namespace ge { * float32, int32. Has format [ND, NHWC] . \n *@par Attributes: -*@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. -*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n +*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. +*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n *@par Outputs: *y: The result matrix Tensor. 2D. Must be one of the following types: float16, @@ -70,8 +70,8 @@ REG_OP(MatMul) * float32, int32. Has format [ND, NHWC] . \n *@par Attributes: -*@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. -*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n +*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. +*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n *@par Outputs: *y: The result matrix Tensor. 2D. Must be one of the following types: float16, @@ -91,6 +91,36 @@ REG_OP(MatMulV2) .ATTR(offset_x, Int, 0) .OP_END_FACTORY_REG(MatMulV2) +/** +*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n + +*@par Inputs: +*Two inputs, including: +* @li x1: A matrix Tensor. 2D. Must be one of the following types: int8. +* @li x2: A matrix Tensor. 2D. Must be one of the following types: int8. +* @li compress_index: A compress index matrix of type int8. +* @li bias: A 1D Tensor. Must be one of the following types: int32, float16. + +*@par Attributes: +*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. +*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n + +*@par Outputs: +*y: The result matrix Tensor. 2D. Must be one of the following types: float16, +* int32. \n + +*/ +REG_OP(MatMulV2Compress) + .INPUT(x1, TensorType({DT_INT8})) + .INPUT(x2, TensorType({DT_INT8})) + .INPUT(compress_index, TensorType({DT_INT8})) + .OPTIONAL_INPUT(bias, TensorType({DT_INT32, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_INT32, DT_FLOAT16})) + .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) + .ATTR(transpose_x1, Bool, false) + .ATTR(transpose_x2, Bool, false) + .ATTR(offset_x, Int, 0) + .OP_END_FACTORY_REG(MatMulV2Compress) /** *@brief Performs Matrix-to-matrix Multiply, producing c=alpha[0]*a*b+beta[0]*c . \n @@ -149,15 +179,15 @@ REG_OP(GEMM) *@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n *@par Inputs: -*Three inputs, including: +*Two inputs, including: * @li x1: A matrix Tensor. Must be one of the following types: float16, * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. * @li x2: A matrix Tensor. Must be one of the following types: float16, * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n *@par Attributes: -*@li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. -*@li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n +*@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. +*@li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n *@par Outputs: *y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, @@ -175,6 +205,42 @@ REG_OP(BatchMatMul) .ATTR(adj_x2, Bool, false) .OP_END_FACTORY_REG(BatchMatMul) + +/** +* @brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n + +* @par Inputs: +* Three inputs, including: +* @li x1: A matrix Tensor. Must be one of the following types: float16, +* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. +* @li x2: A matrix Tensor. Must be one of the following types: float16, +* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n +* @li bias: A matrix Tensor. Must be one of the following types: float16, +* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n + +* @par Attributes: +* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. +* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n + +* @par Outputs: +* y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, +* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2" . \n + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator BatchMatmul. +*/ + +REG_OP(BatchMatMulV2) + .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) + .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .ATTR(adj_x1, Bool, false) + .ATTR(adj_x2, Bool, false) + .ATTR(offset_x, Int, 0) + .OP_END_FACTORY_REG(BatchMatMulV2) + /** *@brief Computes half the L2 norm of a tensor without the sqrt . \n @@ -334,7 +400,7 @@ REG_OP(MatrixSetDiagD) * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32, * uint64 *@li indices: An ND Tensor. -*Must be one of the following types: int32, int64 +*Must be one of the following types: int32 or int64 *@li updates: An ND Tensor. *Must be one of the following types: float16, float32, int8, uint8, double, * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32, @@ -378,6 +444,9 @@ REG_OP(ScatterNdUpdate) *@par Third-party framework compatibility * Compatible with the TensorFlow operator TensorScatterUpdate. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(TensorScatterUpdate) .INPUT(x, TensorType::BasicType()) @@ -386,6 +455,34 @@ REG_OP(TensorScatterUpdate) .OUTPUT(y, TensorType::BasicType()) .OP_END_FACTORY_REG(TensorScatterUpdate) +/** +*@brief Uses "updates" to update tensor "data" by "indices". \n + +*@par Inputs: +* Three inputs, including: +*@li data: An ND Tensor . \n +*Must be one of the following types: float16, float32, int32, int8, uint8 +*@li indices: An ND Tensor of type int32 or int64 +*@li updates: An Tensor. Same shape as indices. format:NCHW, NHWC . \n +*Must be one of the following types: float16, float32, int32, int8, uint8 + +*@par Attributes: +*@li axis: An optional attribute. Defaults to 0. + +*@par Outputs: +*y: A Tensor. Has the same type and format as input "data" . \n + +*@par Third-party framework compatibility +* Compatible with the ONNX operator ScatterElements. +*/ +REG_OP(ScatterElements) + .INPUT(data, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(indices, TensorType::IndexNumberType()) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .ATTR(axis, Int, 0) + .OP_END_FACTORY_REG(ScatterElements) + /** *@brief Adds sparse "updates" to a variable reference . \n @@ -394,7 +491,7 @@ REG_OP(TensorScatterUpdate) *@li var: An ND Tensor . \n *Must be one of the following types: float16, float32, int32, int8, uint8 -*@li indices: An ND Tensor of type int32 or int64. +*@li indices: An ND Tensor of type int32 or int64 *@li updates: An Tensor. format:NCHW, NHWC . \n @@ -412,10 +509,10 @@ REG_OP(TensorScatterUpdate) * Compatible with the TensorFlow operator ScatterAdd. */ REG_OP(ScatterAdd) - .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .INPUT(indices, TensorType::IndexNumberType()) - .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) - .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ScatterAdd) @@ -428,7 +525,7 @@ REG_OP(ScatterAdd) *Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An ND Tensor. -*Must be one of the following types: int32 +*Must be one of the following types: int32 or int64 *@li updates: An ND Tensor. *Must be one of the following types: float16, float, int32, int8, uint8 @@ -443,10 +540,10 @@ REG_OP(ScatterAdd) * Compatible with the TensorFlow operator ScatterDiv. */ REG_OP(ScatterDiv) - .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) - .INPUT(indices, TensorType({DT_INT32})) - .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) - .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(indices, TensorType::IndexNumberType()) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ScatterDiv) @@ -458,7 +555,7 @@ REG_OP(ScatterDiv) *@li var: An ND Tensor. *Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An ND Tensor. -*Must be one of the following types: int32 +*Must be one of the following types: int32 or int64 *@li updates: An ND Tensor. *Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: @@ -472,10 +569,10 @@ REG_OP(ScatterDiv) * Compatible with the TensorFlow operator ScatterNdAdd. */ REG_OP(ScatterNdAdd) - .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .INPUT(indices, TensorType::IndexNumberType()) - .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) - .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ScatterNdAdd) @@ -499,6 +596,9 @@ REG_OP(ScatterNdAdd) *@par Third-party framework compatibility * Compatible with the TensorFlow operator TensorScatterAdd. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(TensorScatterAdd) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) @@ -515,7 +615,7 @@ REG_OP(TensorScatterAdd) *@li var: An ND Tensor. *Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An ND Tensor. -*Must be one of the following types: int32, int64 +*Must be one of the following types: int32 or int64 *@li updates: An ND Tensor. *Must be one of the following types: float16, float, int32, int8, uint8 @@ -530,10 +630,10 @@ REG_OP(TensorScatterAdd) * Compatible with the TensorFlow operator ScatterNdSub. */ REG_OP(ScatterNdSub) - .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .INPUT(indices, TensorType::IndexNumberType()) - .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) - .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ScatterNdSub) @@ -557,6 +657,9 @@ REG_OP(ScatterNdSub) *@par Third-party framework compatibility * Compatible with the TensorFlow operator TensorScatterSub. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(TensorScatterSub) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) @@ -573,7 +676,7 @@ REG_OP(TensorScatterSub) *@li var: An ND Tensor. *Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An ND Tensor. -*Must be one of the following types: int32, int64 +*Must be one of the following types: int32 or int64 *@li updates: An ND Tensor. *Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: @@ -587,10 +690,10 @@ REG_OP(TensorScatterSub) * Compatible with the TensorFlow operator ScatterSub. */ REG_OP(ScatterSub) - .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .INPUT(indices, TensorType::IndexNumberType()) - .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) - .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ScatterSub) @@ -761,7 +864,7 @@ REG_OP(ConfusionMatrix) *@li var: An ND Tensor. *Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An ND Tensor. -*Must be one of the following types: int32 +*Must be one of the following types: int32 or int64 *@li updates: An ND Tensor . \n *Must be one of the following types: float16, float, int32, int8, uint8 @@ -778,7 +881,7 @@ REG_OP(ConfusionMatrix) */ REG_OP(ScatterMul) .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) - .INPUT(indices, TensorType({DT_INT32})) + .INPUT(indices, TensorType::IndexNumberType()) .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) @@ -791,13 +894,13 @@ REG_OP(ScatterMul) *@par Inputs: * Three inputs, including: *@li var: An ND Tensor. -*Must be one of the following types: float16, float, int32 +*Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An ND Tensor. -*Must be one of the following types: int32 +*Must be one of the following types: int32 or int64 *@li updates: An ND Tensor. -*Must be one of the following types: float16, float, int32 +*Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", the operation @@ -810,10 +913,10 @@ REG_OP(ScatterMul) * Compatible with the TensorFlow operator ScatterMin. */ REG_OP(ScatterMin) - .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) - .INPUT(indices, TensorType({DT_INT32})) - .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) - .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(indices, TensorType::IndexNumberType()) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ScatterMin) @@ -824,13 +927,13 @@ REG_OP(ScatterMin) * Three inputs, including: *@li var: An ND Tensor . \n -*Must be one of the following types: float16, float, int32 +*Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An NCHW, NHWC, or ND Tensor . \n -*Must be one of the following types: int32 +*Must be one of the following types: int32 or int64 *@li updates: An NCHW, NHWC, or ND Tensor . \n -*Must be one of the following types: float16, float, int32 +*Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: *use_locking: An optional bool. Defaults to "False". @@ -843,10 +946,10 @@ REG_OP(ScatterMin) * Compatible with the TensorFlow operator ScatterMax. */ REG_OP(ScatterMax) - .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) - .INPUT(indices, TensorType({DT_INT32})) - .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) - .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(indices, TensorType::IndexNumberType()) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ScatterMax) @@ -860,7 +963,7 @@ REG_OP(ScatterMax) *Must be one of the following types: float16, float, int32, int8, uint8 *@li indices: An ND Tensor . \n -*Must be one of the following types: int32 +*Must be one of the following types: int32 or int64 *@li updates: An ND Tensor . \n *Must be one of the following types: float16, float, int32, int8, uint8 @@ -876,10 +979,10 @@ REG_OP(ScatterMax) * Compatible with the TensorFlow operator ScatterUpdate. */ REG_OP(ScatterUpdate) - .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) - .INPUT(indices, TensorType({DT_INT32})) - .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) - .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(indices, TensorType::IndexNumberType()) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ScatterUpdate) @@ -979,6 +1082,137 @@ REG_OP(MatrixDiagV2) .OUTPUT(output, TensorType::BasicType()) .OP_END_FACTORY_REG(MatrixDiagV2) +/** +* @brief Add updates to var_out according to axis and indices. + +* @par Inputs: +* Three inputs, including: +* @li var: A Tensor. Must be one of the following types: +* float16, float32, int32, int8, uint8. +* @li indices: A Tensor of the indices, type should be int32. +* @li updates: A Tensor of the same type as "var". + +* @par Attributes: +* @li axis: An required int to specify the axis to perform indices add. + +* @par Outputs: +* @li var_out: A Tensor. Same as input "var". + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator index_add. + +* @par Restrictions: +* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(IndexAdd) + .INPUT(var, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + .INPUT(indices, TensorType({DT_INT32})) + .INPUT(updates, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + .OUTPUT(var_out, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + .ATTR(axis, Int, 0) + .OP_END_FACTORY_REG(IndexAdd) + +/** +*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n + +*@par Inputs: +* Two inputs, including: +*@li x: A Tensor. Must be one of the following types: +* float16, float32, double, int32, uint8, int16, int8, complex64, int64, +* qint8, quint8, qint32, uint16, complex128, uint32, uint64. +*@li diagonal:(int, optional) – the diagonal to consider。\n + +*@par Outputs: +*y: A Tensor. Has the same type as "x" . \n + +*@par Third-party framework compatibility +* Compatible with the Pytorch operator Triu. +*/ +REG_OP(Triu) + .INPUT(x, TensorType::BasicType()) + .ATTR(diagonal, Int, 0) + .OUTPUT(y, TensorType::BasicType()) + .OP_END_FACTORY_REG(Triu) + +/** +*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n + +*@par Inputs: +* Two inputs, including: +*@li x: A Tensor. Must be one of the following types: +* float16, float32, double, int32, uint8, int16, int8, complex64, int64, +* qint8, quint8, qint32, uint16, complex128, uint32, uint64. +*@li diagonal:(int, optional) – the diagonal to consider。\n + +*@par Outputs: +*y: A Tensor. Has the same type as "x" . \n + +*@par Third-party framework compatibility +* Compatible with the Pytorch operator Tril. +*/ +REG_OP(Tril) + .INPUT(x, TensorType::BasicType()) + .ATTR(diagonal, Int, 0) + .OUTPUT(y, TensorType::BasicType()) + .OP_END_FACTORY_REG(Tril) +/** +*@brief Concatenates a list of N tensors along the first dimension. +*@par Inputs: +* Two inputs, including: +* @li values: A list of Tensors. Must be one of the following types: int32, float16, float32. +* Tensors to be concatenated. All must have size 1 in the first dimension and same shape. +* It's a dynamic input. +* @li shape: A Tensor of the same type as "x". +* The final shape of the result. Should be equal to the shapes of any input +* but with the number of input values in the first dimension . \n + +*@par Attributes: +*equation: The subscripts for the Einstein summation. \n +*N: tensor size of input \n + +*@par Outputs: +*@li y: Sums the product of the elements of the input operands along dimensions specified + using a notation based on the Einstein summation convention. \n + +*@attention Constraints: +*Input N must be Int. \n + +*@par Third-party framework compatibility +*Compatible with Pytorch einsum operator. +*/ +REG_OP(Einsum) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .REQUIRED_ATTR(equation, String) + .REQUIRED_ATTR(N, Int) + .OP_END_FACTORY_REG(Einsum) + +/** +*@brief Returns a 2-D tensor with ones on the diagonal and zeros elsewhere. \n + +*@par Inputs: +*No inputs + +*@par Attributes: +*@li num_rows: An required int. \n +*@li num_columns: An optional int.Defaults to 0. \n +*@li batch_shape: An optional ListInt.Defaults to []. \n +*@li dtype: An optional int.Defaults to 0. \n + +*@par Outputs: +*y: A Tensor with targeted type and shape. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator Eye. \n +*/ +REG_OP(Eye) + .OUTPUT(y, TensorType::BasicType()) /* "Result, has targeted element type" */ + .REQUIRED_ATTR(num_rows, Int) + .ATTR(num_columns, Int, 0) + .ATTR(batch_shape, ListInt, {}) + .ATTR(dtype, Int, 0) + .OP_END_FACTORY_REG(Eye) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h index 0c6a5dff..9629976e 100644 --- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -144,6 +144,64 @@ REG_OP(BatchNorm) /** *@brief Performs batch normalization . \n +*@par Inputs: +* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported) +*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D. +*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. Must be 6D +if input "x" is with format NDC1HWC0. Specifies the scaling factor. +*@li offset: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D +if input "x" is with format NC1HWC0. Specifies the offset. +*@li mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D +if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the +operation is used for training. +*@li variance: A Tensor of type float32. Must be 3D if input "x" is with format NHWC or NCHW. Must be +5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None" +if the operation is used for training . \n + +*@par Attributes: +*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". +*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". +*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n + +*@par Outputs: +* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported) +*@li y: A 3D or 6D Tensor of type float16 or float32 for the normalized "x", with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D. +*@li batch_mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D +if input "x" is with format NDC1HWC0. Specifies the mean of "x". +*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. +Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x". +*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. +Must be 6D if input "x" is with format NDC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. +*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. +Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n + +*@attention Constraints: +*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, +then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance". +*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n + +*@par Third-party framework compatibility +*@li Compatible with the TensorFlow operator fused_batch_norm. +*@li Compatible with the TensorFlow operator fused_batch_norm_v2. +*/ +REG_OP(BatchNorm3D) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(scale, TensorType({DT_FLOAT})) + .INPUT(offset, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(batch_mean, TensorType({DT_FLOAT})) + .OUTPUT(batch_variance, TensorType({DT_FLOAT})) + .OUTPUT(reserve_space_1, TensorType({DT_FLOAT})) + .OUTPUT(reserve_space_2, TensorType({DT_FLOAT})) + .ATTR(epsilon, Float, 0.0001) + .ATTR(data_format, String, "NCDHW") + .ATTR(is_training, Bool, true) + .OP_END_FACTORY_REG(BatchNorm3D) +/** +*@brief Performs batch normalization . \n + *@par Inputs: * Five inputs, including: (NHWC or NCHW supported) *@li x: A 4D Tensor of type float16 or float32. @@ -242,6 +300,52 @@ REG_OP(BatchNormGrad) /** *@brief Performs the backpropagation of BatchNorm . \n +*@par Inputs: +* Five inputs, including: +*@li y_backprop: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0, for the gradient. +*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0. +*@li scale: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0. +*@li reserve_space_1: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm. +*@li reserve_space_2: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm . \n + +*@par Attributes: +*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". +*@li data_format: An optional string. Defaults to "NCDHW". +*@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n + +*@par Outputs: +*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x". +*@li scale_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "scale". +*@li *offset_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "offset". +*@li *reserve_space_4: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output. +*@li *reserve_space_5: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output . \n + +*@attention Constraints: +* The preceding layer of this operator must be operator BatchNorm . \n + +*@see BatchNorm +*@par Third-party framework compatibility +* Compatible with the TensorFlow operators FusedBatchNormGradV2 and FusedBatchNorm3DGrad. +*/ +REG_OP(BatchNorm3DGrad) + .INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(scale, TensorType({DT_FLOAT})) + .INPUT(reserve_space_1, TensorType({DT_FLOAT})) + .INPUT(reserve_space_2, TensorType({DT_FLOAT})) + .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(scale_backprop, TensorType({DT_FLOAT})) + .OUTPUT(offset_backprop, TensorType({DT_FLOAT})) + .OUTPUT(reserve_space_4, TensorType({DT_FLOAT})) + .OUTPUT(reserve_space_5, TensorType({DT_FLOAT})) + .ATTR(epsilon, Float, 0.0001) + .ATTR(data_format, String, "NCDHW") + .ATTR(is_training, Bool, true) + .OP_END_FACTORY_REG(BatchNorm3DGrad) + +/** +*@brief Performs the backpropagation of BatchNorm . \n + *@par Inputs: * Five inputs, including: *@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient. @@ -315,35 +419,7 @@ REG_OP(BNInference) .ATTR(use_global_stats, Bool,true) .ATTR(mode, Int,1) .OP_END_FACTORY_REG(BNInference) -/** -*@brief aicpu batch normalization host . \n -*@par Inputs: - -*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. -*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. -*@li momentum: An optional float, mean and variance's Scale factor -*@par Attributes: -*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". -*@li use_global_stats: mean inference mode , only can be "True". -*@li mode: An optional attr, not use -*@par Outputs: -*@li alpha: A Tensor of type float16 or float32 for the cpu calculate mean -*@li beta: A Tensor of type float16 or float32 for the cpu calculate variance -*/ -REG_OP(BnHost) - .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(momentum, TensorType({DT_FLOAT16,DT_FLOAT})) - .OPTIONAL_INPUT(scale, TensorType({DT_FLOAT16,DT_FLOAT})) - .OPTIONAL_INPUT(offset, TensorType({DT_FLOAT16,DT_FLOAT})) - .ATTR(epsilon, Float, 0.00001) - .ATTR(mode, Int, 1) - .ATTR(use_global_stats, Bool, true) - .OUTPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16})) - .OUTPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16})) - .OUTPUT(mu, TensorType({DT_FLOAT16,DT_FLOAT})) - .OP_END_FACTORY_REG(BnHost) /** *@brief Performs batch normalization . \n diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index 35296870..98473c65 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -365,6 +365,25 @@ REG_OP(BiasAddGrad) * 4-D with shape [batch, out_height, out_width, out_channels] * or [batch, out_channels, out_height, out_width]. * Gradients with respect to the output of the convolution. + *\n + *\n + * The following are the supported data types and data formats: +*@verbatim + | Tensor | out_bckprop | filter | y + ------------|-------------|---------|-------- + | Data Type | float16 | float16 | float16 + | |-------------|---------|-------- + | | float32 | float32 | float32 + | |-------------|---------|-------- + | | float64 | float64 | float64 + ------------|-------------|---------|-------- + | Format | NCHW | NCHW | NCHW + | | NHWC | HWCN | NHWC +@endverbatim + * For float32 and float64 type, the actual calculation on the chip is based on + * float16. + *\n + * *@par Attributes: * Five attributes: * @li strides: A tuple/list of 4 integers. The stride of the sliding window @@ -377,8 +396,53 @@ REG_OP(BiasAddGrad) * channels. * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to * "NHWC". Specify the data format of the input and output data. + *\n + *\n + * The following value range restrictions must be met: +*@verbatim + | Name | Field | Scope + -------------------|----------|-------------- + | input_size | H | [1, 4096] + | | W | [1, 4096] + -------------------|----------|-------------- + | Filter | H | [1, 255] + | | W | [1, 255] + -------------------|----------|-------------- + | out_backprop | H*strideH| [1, 4096] + | | W*strideW| [1, 4096] + -------------------|----------|-------------- + | y(fmap) | H | [1, 4096] + | | W | [1, 4096] + -------------------|----------|-------------- + | Stride | H | [1, 63] + | | W | [1, 63] + -------------------|----------|-------------- + | Padding | Top | [0, 255] + | | Bottom | [0, 255] + | | Left | [0, 255] + | | Right | [0, 255] + -------------------|----------|-------------- + | Dilation | H | [1, 255] + | | W | [1, 255] + +@endverbatim + * In Ascend910, fmap or out_backprop's H and W not support 1 when + * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 + * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096 + *\n + * *@par Outputs: * y: A Tensor. Has the same type as filter,and has same format as input_size. + *\n + * out_backprop_height = (fmap_height + pad_top + pad_bottom - + * (dilation_h * (filter_height - 1) + 1)) + * / stride_h + 1 + *\n + * out_backprop_width = (fmap_width + pad_left + pad_right - + * (dilation_w * (filter_width - 1) + 1)) + * / stride_w + 1 + *\n + * *@par Third-party framework compatibility * Compatible with Tensorflow's conv2d_backprop_input */ @@ -454,6 +518,21 @@ REG_OP(Conv2DBackpropInputD) * @li bias: An optional tensor. Must have the same type as "y". * @li offset_w: An optional 1D tensor for quantized deconvolution. * Type is int8. Reserved.\n + *\n + *\n + * The following are the supported data types and data formats: +*@verbatim + | Tensor | x | filter | bias | y + ------------|---------|---------|---------|-------- + | Data Type | float16 | float16 | float16 | float16 + | |---------|---------|---------|-------- + | | int8 | int8 | int32 | int32 + ------------|---------|---------|---------|-------- + | Format | NCHW | NCHW | ND | NCHW +@endverbatim + * For int8, a dequant or requant operator must be followed. + *\n + * *@par Attributes: * Six attributes: * @li strides: A tuple or list of 2 integers. The stride of the sliding window @@ -467,9 +546,54 @@ REG_OP(Conv2DBackpropInputD) * @li data_format: An optional string from: "NCHW". Defaults to "NCHW". \n Specify the data format of the input and output data. * @li offset_x: An optional integer for quantized deconvolution. - * Defaults to "0". + * The negative offset added to the input image for int8 type. Ensure offset_x + * within the effective range of int8 [-128, 127]. Defaults to "0". + *\n + *\n + * The following value range restrictions must be met: +*@verbatim + | Name | Field | Scope + -------------------|----------|-------------- + | x (out_backprop) | H*strideH| [1, 4096] + | | W*strideW| [1, 4096] + -------------------|----------|-------------- + | Filter | H | [1, 255] + | | W | [1, 255] + -------------------|----------|-------------- + | y (fmap) | H | [1, 4096] + | | W | [1, 4096] + -------------------|----------|-------------- + | Stride | H | [1, 63] + | | W | [1, 63] + -------------------|----------|-------------- + | Padding | Top | [0, 255] + | | Bottom | [0, 255] + | | Left | [0, 255] + | | Right | [0, 255] + -------------------|----------|-------------- + | Dilation | H | [1, 255] + | | W | [1, 255] + -------------------|----------|-------------- + | Offset_x | | [-128, 127] + +@endverbatim + * In Ascend910, fmap or out_backprop's H and W not support 1 when + * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 + * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096 + *\n + * *@par Outputs: * y: A Tensor. 4D tensor with shape [batch, channels, height, width]. + *\n + * out_backprop_height = (fmap_height + pad_top + pad_bottom - + * (dilation_h * (filter_height - 1) + 1)) + * / stride_h + 1 + *\n + * out_backprop_width = (fmap_width + pad_left + pad_right - + * (dilation_w * (filter_width - 1) + 1)) + * / stride_w + 1 + *\n + * * When type of x is float16, the type of y must be float16. * When type of x is int8, the type of y must be int32. */ @@ -502,6 +626,25 @@ REG_OP(Deconvolution) * [batch, out_height, out_width, out_channels] or [batch, out_channels, * out_height, out_width]. Gradients with respect to the output of the * convolution. + *\n + *\n + * The following are the supported data types and data formats: +*@verbatim + | Tensor | x | out_backprop | y + ------------|---------|--------------|--------- + | Data Type | float16 | float16 | float16 + | |---------|--------------|--------- + | | float32 | float32 | float32 + | |---------|--------------|--------- + | | float64 | float64 | float64 + |-----------|---------|--------------|--------- + | Format | NCHW | NCHW | NCHW + | | NHWC | NHWC | HWCN +@endverbatim + * For float32 and float64 type of x and outbackprop, the actual calculation on the chip + * is based on float16. + *\n + * *@par Attributes: * Five attributes: * @li strides: A tuple/list of 4 integers. The stride of the sliding window @@ -514,8 +657,52 @@ REG_OP(Deconvolution) * channels. * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to * "NHWC". Specify the data format of the input and output data. + *\n +*\n +* The following value range restrictions must be met: +*@verbatim + | Name | Field | Scope + -------------------|----------|-------------- + | x(fmap) | H | [1, 4096] + | | W | [1, 4096] + -------------------|----------|-------------- + | Filter Size | H | [1, 255] + | | W | [1, 255] + -------------------|----------|-------------- + | out_backprop | H | [1, 4096] + | | W | [1, 4096] + -------------------|----------|-------------- + | y | H | [1, 4096] + | | W | [1, 4096] + -------------------|----------|-------------- + | Stride | H | [1, 63] + | | W | [1, 63] + -------------------|----------|-------------- + | Padding | Top | [0, 255] + | | Bottom | [0, 255] + | | Left | [0, 255] + | | Right | [0, 255] + -------------------|----------|-------------- + | Dilation | H | [1, 255] + | | W | [1, 255] + +@endverbatim + * In Ascend910, out_backprop's H and W not support 1 when + * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 + *\n + * *@par Outputs: * y: A Tensor. Has the same type as x, has the same format as filter_size. + *\n + * out_backprop_height = (in_height + pad_top + pad_bottom - + * (dilation_h * (filter_height - 1) + 1)) + * / stride_h + 1 + *\n + * out_backprop_width = (in_width + pad_left + pad_right - + * (dilation_w * (filter_width - 1) + 1)) + * / stride_w + 1 + *\n + * *@par Third-party framework compatibility * Compatible with Tensorflow's conv2d_backprop_filter */ @@ -597,16 +784,14 @@ REG_OP(Conv2DBackpropFilterD) | Tensor | x | filter | bias | y ------------|---------|---------|---------|-------- | Data Type | float16 | float16 | float16 | float16 - | |---------|---------|---------|-------- | | float32 | float32 | float32 | float32 - | |---------|---------|---------|-------- | | int8 | int8 | int32 | int32 ------------|---------|---------|---------|-------- | Format | NCHW | NCHW | ND | NCHW | | NHWC | HWCN | | NHWC @endverbatim * For float32 type, the actual calculation on the chip is based on -* float16. For int8, a dequant or requant operator must be followed. +* float16. *\n * *@par Attributes: @@ -617,8 +802,7 @@ REG_OP(Conv2DBackpropFilterD) * (top, bottom, left, right) side of the input. *@li dilations: Optional. A list of 4 integers. The dilation factor for each * dimension of input. The dimension order is determined by the data format of -* "x". The N and C dimensions must be set to 1. The H and W dimensions must be -* set to 1 for int8 type. Defaults to [1, 1, 1, 1]. +* "x". The N and C dimensions must be set to 1. Defaults to [1, 1, 1, 1]. *@li groups: Optional. An integer of type int32. The number of blocked * connections from input channels to output channels. In_channels and * out_channels must both be divisible by "groups". Defaults to 1. @@ -652,6 +836,8 @@ REG_OP(Conv2DBackpropFilterD) | Offset_x | | [-128, 127] @endverbatim +* The W dimension of the input image supports cases exceeding 4096, but it may +* cause compilation errors. *\n * *@par Outputs: @@ -666,21 +852,6 @@ REG_OP(Conv2DBackpropFilterD) * out_width = (in_width + pad_left + pad_right - * (dilation_w * (filter_width - 1) + 1)) * / stride_w + 1 -* -*@attention Constraints: -*@li The following restrictions on the output must be met: -*@verbatim - | Output | Restrictions - ----------|-------------------------------- - | H == 1 | H * W(input) == H * W(filter) - | W == 1 | - ----------|-------------------------------- - | H != 1 | W(input) == W(filter) - | W == 1 | Only for Ascend310 Hi3796V300CS -@endverbatim -* "H * W (input)" indicates the image size after padding and "H * W (filter)" -* indicates the filter size after dilation."W(input)" and W(filter) indicate -* the same rule on the W dimension. *\n * *@par Quantization supported or not @@ -778,7 +949,7 @@ REG_OP(Conv2DCompress) * With the format "HWCN" , the data is stored in the order of: [filter_height, * filter_width, in_channels / groups, out_channels]. *@li offsets: A 4D tensor of x-y coordinates offset and mask. With the format -* "NHWC", the data is stored in the order of: [batch, in_height, in_width, +* "NHWC", the data is stored in the order of: [batch, out_height, out_width, * deformable_groups * filter_height * filter_width * 3]. *@li bias: An optional 1D tensor of additive biases to the filter outputs. * The data is stored in the order of: [out_channels]. @@ -816,31 +987,20 @@ REG_OP(Conv2DCompress) *@li deformable_groups: Optional. An integer of type int32. The number of * deformable group partitions. In_channels must be divisible by * "deformable_groups". Defaults to 1. +*@li modulated: Optional. Specify version of DeformableConv2D, true means v2, +* false means v1, currently only support v2. *\n *\n * The following value range restrictions must be met: *@verbatim | Name | Field | Scope --------------------|--------|---------------------------- - | Input Image Size | H | [1, 100000] - | | W | [1, 4096] - --------------------|--------|---------------------------- - | Filter Size | H | [1, 255] - | | W | [1, 255] + | Input Image Size | H | [1, 100000 / filter_height] + | | W | [1, 4096 / filter_width] --------------------|--------|---------------------------- - | Stride | H | [1, 63] + | Filter Size | H | [1, 63] | | W | [1, 63] - --------------------|--------|---------------------------- - | Padding | Top | [0, 255] - | | Bottom | [0, 255] - | | Left | [0, 255] - | | Right | [0, 255] - ------------ -------|--------|---------------------------- - | Dilation | H | [1, 255] - | | W | [1, 255] @endverbatim -* "W(input)" indicate the image width after padding and W(filter) indicates the -* filter width after dilation. *\n * *@par Outputs: @@ -855,21 +1015,7 @@ REG_OP(Conv2DCompress) * out_width = (in_width + pad_left + pad_right - * (dilation_w * (filter_width - 1) + 1)) * / stride_w + 1 -* -*@attention Constraints: -*@li The following restrictions on the output must be met: -*@verbatim - | Output | Restrictions - ----------|-------------------------------- - | H == 1 | H * W(input) == H * W(filter) - | W == 1 | - ----------|-------------------------------- - | H != 1 | W(input) == W(filter) - | W == 1 | Only for Ascend310 Hi3796V300CS -@endverbatim -* "H * W(input)" indicates the image size after padding and "H * W(filter)" -* indicates the filter size after dilation. "W(input)" and W(filter) indicate -* the same rule on the W dimension. +*\n * *@par Quantization supported or not *@li No @@ -891,6 +1037,7 @@ REG_OP(DeformableConv2D) .ATTR(groups, Int, 1) .ATTR(data_format, String, "NHWC") .ATTR(deformable_groups, Int, 1) + .ATTR(modulated, Bool, true) .OP_END_FACTORY_REG(DeformableConv2D) /** @@ -916,12 +1063,12 @@ REG_OP(DeformableConv2D) *@par Attributes: * @li groups: Number of blocked connections from input channels to output - * channels. Reserved. + * channels. * @li data_format: An optional string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. * @li dilations: A list of 5 integers. Specifies the dilation factor for each - * dimension of "x", now only support [1,1,1,1,1] - * The N and C dimensions must be 1. Has the same format as "x". + * dimension of "x". + * The N, C and D dimensions must be 1. Has the same format as "x". * @li offset_x: An optional int. Input offset, used for quantized inference. * Defaults to 0. Reserved . \n @@ -967,8 +1114,8 @@ REG_OP(Conv3D) *@par Required Attributes: * @li strides: A list of 5 integers. Specifies the stride of the sliding window - * for each dimension of "x". - * The N and C dimensions must be 1. Has the same format as "x". + * for each dimension of "out_backprop". + * The N and C dimensions must be 1. Has the same format as "out_backprop". * @li pads: A list of 6 integers. * Supports only padding along the D, H and W dimensions in sequence of head, * tail, top, bottom, left and right . \n @@ -976,14 +1123,15 @@ REG_OP(Conv3D) *@par Attributes: * Three attributes: * @li groups: Number of blocked connections from input channels to output - * channels. Reserved. + * channels. * @li data_format: An optional string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. * @li dilations: A tuple/list of 5 integers, The dilation factor for each - * dimension of the input, now only support [1,1,1,1,1] + * dimension of the input. + * The N, C and D dimensions must be 1. Has the same format as "out_backprop". *@par Outputs: - * y: A Tensor. Has the same type as filter,and has same format as input_size + * y: A Tensor. Has the same type as filter,and has same format as "input_size" *@par Third-party framework compatibility * Compatible with Tensorflow's conv3d_backprop_input @@ -1011,8 +1159,8 @@ REG_OP(Conv3DBackpropInput) *@par Required Attributes: * @li strides: A list of 5 integers. Specifies the stride of the sliding window - * for each dimension of "x". - * The N and C dimensions must be 1. Has the same format as "x". + * for each dimension of "out_backprop". + * The N and C dimensions must be 1. Has the same format as "out_backprop". * @li pads: A list of 6 integers. Supports only padding along the D, H and W * dimensions in sequence of head, tail, top, bottom, left and right. * @li input_size: A tuple/list of type int32, int64. An integer vector @@ -1023,13 +1171,14 @@ REG_OP(Conv3DBackpropInput) *@par Attributes: * Three attributes: * @li groups: Number of blocked connections from input channels to output - * channels. Reserved. + * channels. * @li data_format: An optional string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. * @li dilations: A tuple/list of 5 integers, The dilation factor for each - * dimension of input, now only support [1,1,1,1,1] + * dimension of input. + * The N, C and D dimensions must be 1. Has the same format as "out_backprop". *@par Outputs: - * y: A Tensor. Has the same type and data format as out_backprop. + * y: A Tensor. Has the same type and data format as "out_backprop". *@par Third-party framework compatibility * Compatible with Tensorflow's conv3d_backprop_input @@ -1072,9 +1221,7 @@ REG_OP(Conv3DBackpropInputD) * @li c_t: A optinal Tensor dtype of float16, float32. The cell state at time t . \n *@par Third-party framework compatibility: -* Compatible with the Pytorch operator adds. -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +* Compatible with the Caffe operator LSTM. */ REG_OP(LSTM) .INPUT(x, TensorType({DT_FLOAT16})) @@ -1121,14 +1268,15 @@ REG_OP(LSTM) *@par Attributes: * Three attributes: * @li dilations: A tuple/list of 5 integers, The dilation factor for each - * dimension of input, now only support [1,1,1,1,1]. + * dimension of input. + * The N, C and D dimensions must be 1. Has the same format as "x". * @li groups: Number of blocked connections from input channels to output - * channels. Reserved. + * channels. * @li data_format: An optional string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. *@par Outputs: - * y: A Tensor that has the same type as x + * y: A Tensor that has the same type as "x" * and the format is NDHWC, NCDHW or DHWCN. *@par Third-party framework compatibility * Compatible with Tensorflow's conv3d_backprop_filter @@ -1172,9 +1320,10 @@ REG_OP(Conv3DBackpropFilter) *@par Attributes: * Three attributes: * @li dilations: A tuple/list of 5 integers, The dilation factor for each - * dimension of input, now only support [1,1,1,1,1]. + * dimension of input. + * The N, C and D dimensions must be 1. Has the same format as "x". * @li groups: Number of blocked connections from input channels to output - * channels. Reserved. + * channels. * @li data_format: An optional string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. @@ -1224,15 +1373,16 @@ REG_OP(Conv3DBackpropFilterD) *@par Attributes: * Five attributes: * @li groups: Number of blocked connections from input channels to output - * channels. Reserved. + * channels. * @li dilations: A tuple/list of 5 integers, - * The dilation factor for each dimension of input, now only support [1,1,1,1,1] + * The dilation factor for each dimension of input. + * The N, C and D dimensions must be 1. Has the same format as "x". * @li data_format: An optional string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. * @li output_padding: The size will be added in the output shape. * @li offset_x: Input offset_x value. Reserved. *@par Outputs: - * y: A Tensor. Has the same type and format as x. + * y: A Tensor. Has the same type and format as "x". */ REG_OP(Conv3DTranspose) .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) @@ -1273,15 +1423,16 @@ REG_OP(Conv3DTranspose) *@par Attributes: * Five attributes: * @li dilations: A tuple/list of 5 integers, The dilation factor for each - * dimension of input, now only support [1,1,1,1,1] + * dimension of input. + * The N, C and D dimensions must be 1. Has the same format as "x". * @li groups: Number of blocked connections from input channels to output - * channels. Reserved. + * channels. * @li data_format: An optional string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. * @li output_padding: The size will be added in the output shape. * @li offset_x: Input offset_x value. Reserved. *@par Outputs: - * y: A Tensor. Has the same type and format as x. + * y: A Tensor. Has the same type and format as "x". *@par Restrictions: * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead. */ @@ -1316,6 +1467,22 @@ REG_OP(Conv3DTransposeD) * or [out_channels, in_channel, filter_height, filter_width]. * @li bias: An optional 1D tensor of type float16 or int32. Format is "ND". * @li offset_w: An optional 1D tensor for quantized inference. Reserved. + *\n + *\n + * The following are the supported data types and data formats: +*@verbatim + | Tensor | x | filter | bias | y + ------------|---------|---------|---------|-------- + | Data Type | float16 | float16 | float16 | float16 + | |---------|---------|---------|-------- + | | int8 | int8 | int32 | int32 + ------------|---------|---------|---------|-------- + | Format | NCHW | NCHW | ND | NCHW + | | NHWC | HWCN | | NHWC +@endverbatim + * For int8, a dequant or requant operator must be followed. + *\n + * *@par Required Attributes: * @li strides: A required tuple/list of 4 integers. The stride of the sliding * window for H/W dimension. The index of H/W is same as data_format. @@ -1333,10 +1500,58 @@ REG_OP(Conv3DTransposeD) * @li output_padding: The size will be added in the output shape. Defaults * to [0, 0, 0, 0]. * @li offset_x: An optional int. Input offset, used for quantized inference. - * Defaults to "0". + * The negative offset added to the input image for int8 type. Ensure offset_x + * within the effective range of int8 [-128, 127]. Defaults to "0". + *\n + *\n + * The following value range restrictions must be met: +*@verbatim + | Name | Field | Scope + -------------------|----------|-------------- + | input_size | H | [1, 4096] + | | W | [1, 4096] + -------------------|----------|-------------- + | x (out_backprop) | H*strideH| [1, 4096] + | | W*strideW| [1, 4096] + -------------------|----------|-------------- + | filter | H | [1, 255] + | | W | [1, 255] + -------------------|----------|-------------- + | y (fmap) | H | [1, 4096] + | | W | [1, 4096] + -------------------|----------|-------------- + | Stride | H | [1, 63] + | | W | [1, 63] + -------------------|----------|-------------- + | Padding | Top | [0, 255] + | | Bottom | [0, 255] + | | Left | [0, 255] + | | Right | [0, 255] + -------------------|----------|-------------- + | Dilation | H | [1, 255] + | | W | [1, 255] + -------------------|----------|-------------- + | Offset_x | | [-128, 127] + +@endverbatim + * In Ascend910, fmap or out_backprop's H and W not support 1 when + * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 + * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096 + *\n + * *@par Outputs: * y: A Tensor. A Tensor of type float16 or int32, and has same format as * input_size. + *\n + * out_backprop_height = (fmap_height + pad_top + pad_bottom - + * (dilation_h * (filter_height - 1) + 1)) + * / stride_h + 1 + *\n + * out_backprop_width = (fmap_width + pad_left + pad_right - + * (dilation_w * (filter_width - 1) + 1)) + * / stride_w + 1 + *\n + * */ REG_OP(Conv2DTranspose) .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) @@ -1405,21 +1620,22 @@ REG_OP(Conv2DTransposeD) /** *@brief Computes the deformed convolution output with the expected input *@par Inputs: - * Four inputs: + * Two inputs: * @li x: A Tensor of type float16,float32 * @li offsets: A Tensor of type float16,float32.Deformation offset parameter. *@par Required Attributes: * @li strides: A tuple/list of 4 integers.The stride of the sliding window for * height and width for H/W dimension. - * @li pads: A tuple/list of 4 integers.Padding added to each dimension + * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension * of the input. * @li ksize: A tuple/list of 2 integers.kernel size. *@par Attributes: - * Three attributes: + * Four attributes: * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension * of input. Defaults to [1, 1, 1, 1] * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. * @li deformable_groups: Specify the c-axis grouping number of input x. + * @li modulated: Specify version of DeformableConv2D, true means v2, false means v1 *@par Outputs: * y: A Tensor. A Tensor of type float16, float32. */ @@ -1433,7 +1649,69 @@ REG_OP(DeformableOffsets) .ATTR(dilations, ListInt, {1, 1, 1, 1}) .ATTR(data_format, String, "NCHW") .ATTR(deformable_groups, Int, 1) + .ATTR(modulated, Bool, true) .OP_END_FACTORY_REG(DeformableOffsets) +/** +*@brief Computes the gradients of DeformableOffsets with respect to input and offsets +*@par Inputs: + * Three inputs: + * @li grad: A Tensor of type float16,float32. gradients with respect to DeformableOffsets output + * @li x: A Tensor of type float16,float32. + * @li offsets: A Tensor of type float16,float32.Deformation offset parameter. +*@par Required Attributes: + * @li strides: A tuple/list of 4 integers.The stride of the sliding window for + * height and width for H/W dimension. + * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension + * of the input. + * @li ksize: A tuple/list of 2 integers.kernel size. +*@par Attributes: + * Three attributes: + * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension + * of input. Defaults to [1, 1, 1, 1] + * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. + * @li deformable_groups: Specify the c-axis grouping number of input x. + * @li modulated: Specify version of DeformableConv2D, true means v2, false means v1. +*@par Outputs: + * grad_x: A Tensor of type float16, float32. Gradients with respect to input_x + * grad_offsets: A Tensor of type float16, float32. Gradients with respect to input_offsets +*/ +REG_OP(DeformableOffsetsGrad) + .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(offsets, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(grad_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(grad_offsets, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .REQUIRED_ATTR(ksize, ListInt) + .ATTR(dilations, ListInt, {1, 1, 1, 1}) + .ATTR(data_format, String, "NCHW") + .ATTR(deformable_groups, Int, 1) + .ATTR(modulated, Bool, true) + .OP_END_FACTORY_REG(DeformableOffsetsGrad) + +/** +*@brief Computes the deformed dilation output with the expected input +*@par Inputs: + * One inputs: + * @li x: A Tensor of type int8, float16, float32 +*@par Required Attributes: + * @li dilations: A tuple/list of integers. +*@par Attributes: + * Two attributes: + * @li padding_value: default value filling in blank + * @li pads: A tuple/list of integers. +*@par Outputs: + * y: A Tensor. A Tensor of type int8, float16, float32. +*/ +REG_OP(Dilation) + .INPUT(x, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(dilations, ListInt) + .ATTR(pads, ListInt, {}) + .ATTR(padding_value, Float, 0.0) + .OP_END_FACTORY_REG(Dilation) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index a013fb33..5fa40ad6 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -254,22 +254,22 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. */ - REG_OP(PriorBox) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) - .REQUIRED_ATTR(min_size, ListFloat) - .REQUIRED_ATTR(max_size, ListFloat) - .REQUIRED_ATTR(aspect_ratio, ListFloat) - .ATTR(img_h, Int, 0) - .ATTR(img_w, Int, 0) - .ATTR(step_h, Float, 0.0) - .ATTR(step_w, Float, 0.0) - .ATTR(flip, Bool, true) - .ATTR(clip, Bool, false) - .ATTR(offset, Float, 0.5) - .ATTR(variance, ListFloat, {0.1}) - .OP_END_FACTORY_REG(PriorBox); +REG_OP(PriorBox) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(min_size, ListFloat) + .REQUIRED_ATTR(max_size, ListFloat) + .REQUIRED_ATTR(aspect_ratio, ListFloat) + .ATTR(img_h, Int, 0) + .ATTR(img_w, Int, 0) + .ATTR(step_h, Float, 0.0) + .ATTR(step_w, Float, 0.0) + .ATTR(flip, Bool, true) + .ATTR(clip, Bool, false) + .ATTR(offset, Float, 0.5) + .ATTR(variance, ListFloat, {0.1}) + .OP_END_FACTORY_REG(PriorBox); /** *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n @@ -306,25 +306,25 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul *@par Restrictions: *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead. */ - REG_OP(PriorBoxD) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) - .REQUIRED_ATTR(min_size, ListFloat) - .REQUIRED_ATTR(max_size, ListFloat) - .ATTR(img_h, Int, 0) - .ATTR(img_w, Int, 0) - .ATTR(step_h, Float, 0.0) - .ATTR(step_w, Float, 0.0) - .ATTR(flip, Bool, true) - .ATTR(clip, Bool, false) - .ATTR(offset, Float, 0.5) - .ATTR(variance, ListFloat, {0.1}) - .OP_END_FACTORY_REG(PriorBoxD); +REG_OP(PriorBoxD) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(min_size, ListFloat) + .REQUIRED_ATTR(max_size, ListFloat) + .ATTR(img_h, Int, 0) + .ATTR(img_w, Int, 0) + .ATTR(step_h, Float, 0.0) + .ATTR(step_w, Float, 0.0) + .ATTR(flip, Bool, true) + .ATTR(clip, Bool, false) + .ATTR(offset, Float, 0.5) + .ATTR(variance, ListFloat, {0.1}) + .OP_END_FACTORY_REG(PriorBoxD); /** *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n @@ -358,22 +358,22 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul *@par Restrictions: *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead. */ - REG_OP(PriorBoxDV2) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) - .REQUIRED_ATTR(min_size, ListFloat) - .REQUIRED_ATTR(max_size, ListFloat) - .ATTR(img_h, Int, 0) - .ATTR(img_w, Int, 0) - .ATTR(step_h, Float, 0.0) - .ATTR(step_w, Float, 0.0) - .ATTR(flip, Bool, true) - .ATTR(clip, Bool, false) - .ATTR(offset, Float, 0.5) - .ATTR(variance, ListFloat, {0.1}) - .OP_END_FACTORY_REG(PriorBoxDV2); +REG_OP(PriorBoxDV2) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(min_size, ListFloat) + .REQUIRED_ATTR(max_size, ListFloat) + .ATTR(img_h, Int, 0) + .ATTR(img_w, Int, 0) + .ATTR(step_h, Float, 0.0) + .ATTR(step_w, Float, 0.0) + .ATTR(flip, Bool, true) + .ATTR(clip, Bool, false) + .ATTR(offset, Float, 0.5) + .ATTR(variance, ListFloat, {0.1}) + .OP_END_FACTORY_REG(PriorBoxDV2); /** *@brief Performs Position Sensitive ROI Pooling . \n @@ -531,10 +531,10 @@ as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n * It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(Yolo) - .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) - .OUTPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT})) - .OUTPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT})) - .OUTPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT})) .ATTR(boxes, Int, 3) .ATTR(coords, Int, 4) .ATTR(classes, Int, 80) @@ -584,10 +584,10 @@ REG_OP(Yolo) * It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(YoloV2DetectionOutput) - .INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(biases, ListFloat) .ATTR(boxes, Int, 5) .ATTR(coords, Int, 4) @@ -598,7 +598,7 @@ REG_OP(YoloV2DetectionOutput) .ATTR(score_threshold, Float, 0.5) .ATTR(iou_threshold, Float, 0.45) .ATTR(pre_nms_topn, Int, 512) - .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(box_out_num, TensorType({DT_INT32})) .OP_END_FACTORY_REG(YoloV2DetectionOutput) @@ -647,12 +647,12 @@ REG_OP(YoloV2DetectionOutput) *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV2DetectionOutput instead. */ REG_OP(YoloV2DetectionOutputD) - .INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(biases, ListFloat) .ATTR(boxes, Int, 5) .ATTR(coords, Int, 4) @@ -663,7 +663,7 @@ REG_OP(YoloV2DetectionOutputD) .ATTR(score_threshold, Float, 0.5) .ATTR(iou_threshold, Float, 0.45) .ATTR(pre_nms_topn, Int, 512) - .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(box_out_num, TensorType({DT_INT32})) .OP_END_FACTORY_REG(YoloV2DetectionOutputD) @@ -707,16 +707,16 @@ REG_OP(YoloV2DetectionOutputD) * It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(YoloV3DetectionOutput) - .INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(biases_low, ListFloat) .REQUIRED_ATTR(biases_mid, ListFloat) .REQUIRED_ATTR(biases_high, ListFloat) @@ -729,7 +729,7 @@ REG_OP(YoloV3DetectionOutput) .ATTR(score_threshold, Float, 0.5) .ATTR(iou_threshold, Float, 0.45) .ATTR(pre_nms_topn, Int, 512) - .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(box_out_num, TensorType({DT_INT32})) .OP_END_FACTORY_REG(YoloV3DetectionOutput) @@ -776,22 +776,22 @@ s *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutput instead. */ REG_OP(YoloV3DetectionOutputD) - .INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(windex1, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(windex2, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(windex3, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(hindex1, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(hindex2, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(hindex3, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(windex1, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(windex2, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(windex3, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(hindex1, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(hindex2, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(hindex3, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(biases_low, ListFloat) .REQUIRED_ATTR(biases_mid, ListFloat) .REQUIRED_ATTR(biases_high, ListFloat) @@ -804,7 +804,7 @@ REG_OP(YoloV3DetectionOutputD) .ATTR(score_threshold, Float, 0.5) .ATTR(iou_threshold, Float, 0.45) .ATTR(pre_nms_topn, Int, 512) - .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(box_out_num, TensorType({DT_INT32})) .OP_END_FACTORY_REG(YoloV3DetectionOutputD) @@ -848,7 +848,7 @@ There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yol * It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(YoloV3DetectionOutputV2) - .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(biases, ListFloat) .ATTR(boxes, Int, 3) .ATTR(coords, Int, 4) @@ -862,7 +862,7 @@ REG_OP(YoloV3DetectionOutputV2) .ATTR(N, Int, 10) .ATTR(resize_origin_img_to_net, Bool, false) .ATTR(out_box_dim, Int, 3) - .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(box_out_num, TensorType({DT_INT32})) .OP_END_FACTORY_REG(YoloV3DetectionOutputV2) @@ -910,9 +910,9 @@ REG_OP(YoloV3DetectionOutputV2) * Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutputV2 instead. */ REG_OP(YoloV3DetectionOutputV2D) - .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) - .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT})) - .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT})) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(biases, ListFloat) .ATTR(boxes, Int, 3) .ATTR(coords, Int, 4) @@ -926,7 +926,7 @@ REG_OP(YoloV3DetectionOutputV2D) .ATTR(N, Int, 10) .ATTR(resize_origin_img_to_net, Bool, false) .ATTR(out_box_dim, Int, 3) - .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(box_out_num, TensorType({DT_INT32})) .OP_END_FACTORY_REG(YoloV3DetectionOutputV2D) @@ -968,8 +968,9 @@ REG_OP(SPP) * Three inputs, including: *@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature * map. -*@li rois: A tensor of type float16 or float32, with shape +*@li rois: A tensor of type float16 or float32, with 3D shape * [batch, 5, roi_max_num], describing the RIOs. +* roi_max_num must be less than or equal to 6000 and must be divided by 16. *@li roi_actual_num: A optional tensor of type int32, with shape [batch, 8], specifying * the number of ROIs per batch . \n @@ -1201,35 +1202,6 @@ REG_OP(RpnProposalsD) .OUTPUT(sorted_box, TensorType({DT_FLOAT16})) .OP_END_FACTORY_REG(RpnProposalsD) -/** -*@brief Computes Score Filte Pre-Sort function. - -*@par Inputs: -*Inputs include: -* @li rois: A Tensor. Must be float16. N-D with shape [N, 4]. -* @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1]. - -*@par Attributes: -* @li score_threshold: required, float, threahold of topk process. -* @li k: required, Int, threahold of topk process. -* @li score_filter: bool, mark of score_filter. Defaults to "true" -* @li core_max_num: int, max number of core. Defaults to "8" -*@par Outputs: -* @li sorted_proposal: A Tensor. Must be float16. -* N-D with shape [8*6002, 8]. -* @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8]. -*/ - -REG_OP(ScoreFiltePreSort) - .INPUT(rois, TensorType({DT_FLOAT16})) - .INPUT(cls_bg_prob, TensorType({DT_FLOAT16})) - .OUTPUT(sorted_proposal, TensorType({ DT_FLOAT16})) - .OUTPUT(proposal_num, TensorType({ DT_UINT32})) - .REQUIRED_ATTR(score_threshold, Float) - .REQUIRED_ATTR(k, Int) - .ATTR(score_filter, Bool, true) - .ATTR(core_max_num, Int, 8) - .OP_END_FACTORY_REG(ScoreFiltePreSort) /** *@brief Computes Score Filte Pre-Sort function. @@ -1383,6 +1355,7 @@ REG_OP(DecodeWheelsTarget) *@attention Constraints: * Only computation of float16 data is supported. +* Note: when the class num per image * max_size_per_class is too big, will compile fail with ERROR-insufficient memory */ REG_OP(BatchMultiClassNonMaxSuppression) .INPUT(boxes, TensorType({DT_FLOAT16})) @@ -1464,9 +1437,9 @@ REG_OP(NormalizeBBox) * y: A Tensor. Must have the same type as box_predictions. */ REG_OP(DecodeBboxV2) - .INPUT(boxes, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(anchors, TensorType({DT_FLOAT16,DT_FLOAT})) - .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(anchors, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .ATTR(scales, ListFloat, {1.0, 1.0, 1.0, 1.0}) .ATTR(decode_clip, Float, 0.0) .ATTR(reversed_box, Bool, false) @@ -1477,7 +1450,8 @@ REG_OP(DecodeBboxV2) * *@par Inputs: *Inputs include: -* x: A Tensor. Must be float16 or float32. +* x: A Tensor. Dtype support: flaot16, flaot, int16, int8, + uint8, int32, int64. * *@par Attributes: * @li axis: optional, int. @@ -1485,16 +1459,364 @@ REG_OP(DecodeBboxV2) * *@par Outputs: * @li y1: A Tensor. Must have the same type as x. -* @li y2: A Tensor. Indices of y1 in x.Dtype must be int32. +* @li y2: A Tensor. Indices of y1 in x. Dtype must be int32. +* */ REG_OP(Sort) - .INPUT(x, TensorType({ DT_FLOAT16 })) - .OUTPUT(y1, TensorType({ DT_FLOAT16 })) - .OUTPUT(y2, TensorType({ DT_INT32 })) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT16, DT_INT8, + DT_UINT8, DT_INT32, DT_INT64})) + .OUTPUT(y1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT16, DT_INT8, + DT_UINT8, DT_INT32, DT_INT64})) + .OUTPUT(y2, TensorType({DT_INT32})) .ATTR(axis, Int, -1) .ATTR(descending, Bool, false) .OP_END_FACTORY_REG(Sort) +/** +*@brief Computes iou for input bboxes and gtboxes. + +*@par Inputs: +* Two inputs, including: +*@li bboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1), +*@li gtboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1).\n + +*@par Attributes: +*@li mode: A optional attribute of type string, whether judge the mode of iou. \n + +*@par Outputs: +*@li overlap: A 2D Tensor of type float16 with shape [n, m]. \n + +*@attention Constraints: +* Only computation of float16 data is supported. + +*@par Restrictions: +*Warning:THIS FUNCTION IS DEPRECATED. Please use Iou instead. +*/ +REG_OP(PtIou) + .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(mode, String, "iou") + .OP_END_FACTORY_REG(PtIou) + +/** +*@brief Greedily selects a subset of bounding boxes in descending order of +score . \n + +*@par Inputs: +*Input boxes and scores must be float16 type. Inputs include: +*@li boxes: A input tensor with shape [num_batches,spatial_dimension,4]. +The single box data format is indicated by center_point_box. +*@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension] +*@li max_output_size: A scalar integer tensor representing the maximum number +of boxes to be selected by non max suppression. +*@li iou_threshold: A 0-D float tensor representing the threshold for deciding +whether boxes overlap too much with respect to IOU. +*@li score_threshold: A 0-D float tensor representing the threshold for +deciding when to remove boxes based on score . \n + +*@par Attributes: +*center_point_box:Integer indicate the format of the box data. +The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] +where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair +of box corners and the coordinates can be provided as normalized +(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models. +1 - the box data is supplied as [x_center, y_center, width, height]. + Mostly used for Pytorch models. \n + +*@par Outputs: +*@li selected_indices: A 2-D integer tensor of shape [M] representing the +selected indices from the boxes tensor, where M <= max_output_size. \n + +*@attention Constraints: +*Input boxes and scores must be float16 type . \n + +*@par Third-party framework compatibility +*Compatible with onnx NonMaxSuppression operator. + +*@par Restrictions: +*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ + +REG_OP(NonMaxSuppressionV6) + .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32})) + .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT})) + .OUTPUT(selected_indices, TensorType({DT_INT32})) + .ATTR(center_point_box, Int, 0) + .ATTR(max_boxes_size, Int, 0) + .OP_END_FACTORY_REG(NonMaxSuppressionV6) + +/** +*@brief Greedily selects a subset of bounding boxes in descending order of +score . \n + +*@par Inputs: +*Input boxes and scores must be float16 type. Inputs include: +*@li boxes: A input tensor with shape [num_batches,spatial_dimension,4]. +The single box data format is indicated by center_point_box. +*@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension] +*@li max_output_size: A scalar integer tensor representing the maximum number +of boxes to be selected by non max suppression. +*@li iou_threshold: A 0-D float tensor representing the threshold for deciding +whether boxes overlap too much with respect to IOU. +*@li score_threshold: A 0-D float tensor representing the threshold for +deciding when to remove boxes based on score . \n +*@li index_id: A input tensor with shape [num_batches,num_classes,spatial_dimension,3] +the last dim representing (batch_id,class_id,index_id) . \n + +*@par Attributes: +*center_point_box:Integer indicate the format of the box data. +The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] +where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair +of box corners and the coordinates can be provided as normalized +(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models. +1 - the box data is supplied as [x_center, y_center, width, height]. + Mostly used for Pytorch models. \n + +*@par Outputs: +*@li selected_indices: A 2-D integer tensor of shape [M] representing the +selected indices from the boxes tensor, where M <= max_output_size. \n + +*@attention Constraints: +*Input boxes and scores must be float16 type . \n + +*@par Third-party framework compatibility +*Compatible with onnx NonMaxSuppression operator. +*/ + +REG_OP(NonMaxSuppressionV7) + .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32})) + .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(index_id, TensorType({DT_FLOAT16})) + .OUTPUT(selected_indices, TensorType({DT_INT32})) + .ATTR(center_point_box, Int, 0) + .ATTR(max_boxes_size, Int, 0) + .OP_END_FACTORY_REG(NonMaxSuppressionV7) + +/** +*@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n + +*@par Inputs: +* Three inputs, including: +*@li features: A 5HD Tensor list of type float32 or float16. +*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, +* the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1". + +*@par Attributes: +*@li finest_scale: A optional attribute of type int, specifying the scale of calculate levels of "rois". +*@li roi_scale_factor: A optional attribute of type float32, specifying the rescaling of "rois" coordinates. +*@li spatial_scale: A optional attribute of type list float32, specifying the scaling ratio of "features" +* to the original image. +*@li pooled_height: A optional attribute of type int32, specifying the H dimension. +*@li pooled_width: A optional attribute of type int32, specifying the W dimension. +*@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency +* of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois", +* which is a floating point number. Defaults to "0". +*@li pool_mode: An optional attribute of type string to indicate pooling mode. Defaults to "avg" . \n +*@li aligned: An optional attribute of type bool, specifying the align to corner. Defaults to true . \n + +*@par Outputs: +* output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16. +* The axis N is the number of input ROIs. Axes H, W, and C are consistent with the values of "pooled_height", +* "pooled_width", and "features", respectively. + +*@par Third-party framework compatibility +*Compatible with mmdetection SingleRoIExtractor operator. +*/ +REG_OP(RoiExtractor) + .DYNAMIC_INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(finest_scale, Int, 56) + .ATTR(roi_scale_factor, Float, 0) + .ATTR(spatial_scale, ListFloat, {1.f / 4, 1.f / 8, 1.f / 16, 1.f / 32}) + .ATTR(pooled_height, Int, 7) + .ATTR(pooled_width, Int, 7) + .ATTR(sample_num, Int, 0) + .ATTR(pool_mode, String, "avg") + .ATTR(aligned, Bool, true) + .OP_END_FACTORY_REG(RoiExtractor) + +/** +*@brief Performs Position Sensitive PS ROI Pooling . \n + +*@par Inputs: +* Two inputs, including: +*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature +* map, dimension C1 must be equal to +* (int(output_dim+15)/C0))*group_size*group_size. +*@li rois: A tensor of type float16 or float32, with shape +* [batch, 5, rois_num], describing the ROIs, each ROI consists of five +* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates +* the index of the input feature map, "x1", "y1", "x2", or "y2" must be +* greater than or equal to "0.0" . \n + +*@par Attributes: +*@li output_dim: A required int32, specifying the number of output channels, +* must be greater than 0. +*@li group_size: A required int32, specifying the number of groups to encode +* position-sensitive score maps, must be within the range (0, 128). +*@li spatial_scale: A required float32, scaling factor for mapping the input +* coordinates to the ROI coordinates . \n + +*@par Outputs: +*y: An NC1HWC0 tensor of type float16 or float32, describing the result +* feature map . \n + +*@attention Constraints: +* HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16 +*/ +REG_OP(PSROIPoolingV2) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(spatial_scale, Float) + .REQUIRED_ATTR(output_dim, Int) + .REQUIRED_ATTR(group_size, Int) + .OP_END_FACTORY_REG(PSROIPoolingV2) + +/** +*@brief Performs Position Sensitive PS ROI Pooling Grad . \n + +*@par Inputs: +* Two inputs, including: +*@li x: An NC1HWC0 tensor of type float16 or float32, describing the result +* feature map . \n +*@li rois: A tensor of type float16 or float32, with shape +* [batch, 5, rois_num], describing the ROIs, each ROI consists of five +* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates +* the index of the input feature map, "x1", "y1", "x2", or "y2" must be +* greater than or equal to "0.0" . \n + +*@par Attributes: +*@li output_dim: A required int32, specifying the number of output channels, +* must be greater than 0. +*@li group_size: A required int32, specifying the number of groups to encode +* position-sensitive score maps, must be within the range (0, 128). +*@li spatial_scale: A required float32, scaling factor for mapping the input +* coordinates to the ROI coordinates . \n +*@li input_size: A required listInt, mapping the gradinput size: (H, W) + +*@par Outputs: +*y: An NC1HWC0 tensor of type float16 or float32, describing the feature +* map, dimension C1 must be equal to +* (int(output_dim+15)/C0))*group_size*group_size. + +*@attention Constraints: +* HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16 +*/ +REG_OP(PSROIPoolingGradV2D) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(spatial_scale, Float) + .REQUIRED_ATTR(output_dim, Int) + .REQUIRED_ATTR(group_size, Int) + .REQUIRED_ATTR(input_size, ListInt) + .OP_END_FACTORY_REG(PSROIPoolingGradV2D) + +/** +*@brief Generate the responsible flags of anchor in a single feature map. + +*@par Inputs: +*@li gt_bboxes: Ground truth box, 2-D Tensor with shape `[batch, 4]`. + +*@par Attributes: +*@li featmap_size: The size of feature maps, listint. +*@li strides: Stride of current level, listint. +*@li num_base_anchors: The number of base anchors. + +*@par Outputs: +*flags: The valid flags of each anchor in a single level. +*/ +REG_OP(AnchorResponseFlags) + .INPUT(gt_bboxes, TensorType({DT_FLOAT})) + .OUTPUT(flags, TensorType({DT_UINT8})) + .REQUIRED_ATTR(featmap_size, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(num_base_anchors, Int) + .OP_END_FACTORY_REG(AnchorResponseFlags) + +/** +*@brief Generates bounding boxes based on yolo's "anchor" and "ground-truth" boxes. +* It is a customized mmdetection operator . \n + +*@par Inputs: +* Three inputs, including: +*@li anchor_boxes: anchor boxes generated by the yolo training set. +* A 2D Tensor of type float32 or float16 with shape (N, 4). "N" indicates the number +* of ROIs, "N" indicates the number of ROIs, and the value "4" refers to (tx, ty, tw, th). +*@li gt_bboxes: target of the transformation, e.g, ground-truth boxes. +* A 2D Tensor of type float32 or float16 with shape (N, 4). +* "N" indicates the number of ROIs, and 4 indicates "dx", "dy", "dw", and "dh" . +*@li stride: Scale for each box. +* A 1D Tensor of type int32 shape (N,). +* "N" indicates the number of ROIs. \n + +*@par Attributes: +*@li performance_mode: select performance mode, "high_precision" or "high_performance". +* select "high_precision" when input type is float32, the output tensor precision +* will be smaller than 0.0001, select "high_performance" when input type is float32, +* the ops will be best performance, but precision will be only smaller than 0.005. + +*@par Outputs: +*encoded_bboxes: Bboxes generated based on "anchor_boxes" and "gt_bboxes". Have the +* same format and type as "anchor_boxes". +* +*@attention Constraints: +* input anchor boxes only support maximum N=20480. \n +*/ +REG_OP(YoloBoxesEncode) + .INPUT(anchor_boxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(gt_bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(stride, TensorType({DT_INT32})) + .ATTR(performance_mode, String, "high_precision") + .OUTPUT(encoded_bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(YoloBoxesEncode) + +/** +*@brief Performs Position Sensitive PS ROI Pooling Grad. + +*@par Inputs: +* Eight inputs, including: +*@li assigned_gt_inds: Tensor of type float16 or float32, shape (n, ) +*@li overlaps: A Tensor. Datatype is same as assigned_gt_inds. IOU between gt_bboxes and bboxes. shape(k, n) +*@li box_responsible_flags: A Tensor. Support uint8. Flag to indicate whether box is responsible. +*@li max_overlaps: A Tensor. Datatype is same as assigned_gt_inds. overlaps.max(axis=0). +*@li argmax_overlaps: A Tensor. Support int32. overlaps.argmax(axis=0). +*@li gt_max_overlaps: A Tensor. Datatype is same as assigned_gt_inds. overlaps.max(axis=1). +*@li gt_argmax_overlaps: A Tensor. Support int32. overlaps.argmax(axis=1). +*@li num_gts: A Tensor. Support int32. real k. shape (1, ) + +*@par Attributes: +*@li output_dim: float. IOU threshold for positive bboxes. +*@li group_size: float. minimum iou for a bbox to be considered as a positive bbox +*@li spatial_scale: bool. whether to assign all bboxes with the same highest overlap with some gt to that gt. + +*@par Outputs: +*@li assigned_gt_inds_pos: A Tensor. Support float16/float32. shape (n, ). +*/ +REG_OP(GridAssignPositive) + .INPUT(assigned_gt_inds, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(box_responsible_flags, TensorType({ DT_UINT8 })) + .INPUT(max_overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(argmax_overlaps, TensorType({ DT_INT32 })) + .INPUT(gt_max_overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(gt_argmax_overlaps, TensorType({ DT_INT32 })) + .INPUT(num_gts, TensorType({ DT_INT32 })) + .OUTPUT(assigned_gt_inds_pos, TensorType({DT_FLOAT, DT_FLOAT16})) + .REQUIRED_ATTR(pos_iou_thr, Float) + .REQUIRED_ATTR(min_pos_iou, Float) + .REQUIRED_ATTR(gt_max_assign_all, Bool) + .OP_END_FACTORY_REG(GridAssignPositive) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ + diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 35c4c7d4..b44c0780 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -55,7 +55,9 @@ REG_OP(LogSoftmaxGrad) *Two inputs, including: * @li features: A Tensor. Must be one of the following types: half, float32, double. * A "batch_size * num_classes" matrix. -* @li labels: A Tensor of the same type as "features". batch_size vector with values in [0, num_classes). +* @li labels: A Tensor. Must be one of the following types: 'int32', 'int64'. +* batch_size vector with values in [0, num_classes). +* This is the label for the given minibatch entry. *@par Outputs: @@ -105,6 +107,9 @@ REG_OP(SoftmaxCrossEntropyWithLogits) * @li grad_softmax: A Tensor. Has the same shape and type as "softmax". * The format is NC1HWC0 or DN . \n +*@par Attributes: +* axes: An optional list of ints. Defaults to "{-1}" . \n + *@par Outputs: *grad_x: A Tensor. Has the same shape and type as "softmax" . \n @@ -115,6 +120,7 @@ REG_OP(SoftmaxGrad) .INPUT(softmax, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .INPUT(grad_softmax, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) .OUTPUT(grad_x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .ATTR(axes, ListInt, {-1}) .OP_END_FACTORY_REG(SoftmaxGrad) /** @@ -160,20 +166,20 @@ REG_OP(SigmoidCrossEntropyWithLogits) .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogits) /** -*@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n +*@brief Computes the sigmoid cross entropy loss of "predict" and "target". *@par Inputs: * four inputs, including: *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value. -*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n -*@li weight: An multi-dimensional Tensor, specifying the weight value. \n +*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value. +*@li weight: An multi-dimensional Tensor, specifying the weight value. *@li pos_weight: An multi-dimensional Tensor, specifying the pos weight value. \n *@par Attributes: -*reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean" . \n +*reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean". \n *@par Outputs: -*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n +*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict". \n *@par Third-party framework compatibility * Compatible with PyTorch operator BCEWithLogitsLoss. @@ -330,6 +336,41 @@ REG_OP(SoftmaxV2) .ATTR(axes, ListInt, {-1}) .OP_END_FACTORY_REG(SoftmaxV2) +/** +*@brief Function softmax with dropoutDoMaskV3D + +*@par Inputs: +*Two inputs, including: +* @li x: A mutable Tensor. The type only support float16. +* @li mask: A mutable Tensor. Must met all of the following rules: +* shape of mask should be 1D. +* dtype of mask should be uint8. +* value of shape should met the following algorithm: +* value = (size(x) + 128 - 1) // 128 * 128 + +*@par Attributes: +* @li keep_prob: A mutable Tensor. Must met all of the following rules: +* shape of "keep_prob" should be (1,) or [1,]. +* Has the same type as "x" . \n +* @li axes: A list of int. The dimension softmax would be performed on. Defaults +* to "[-1]" . \n + +*@par Outputs: +*y1: A mutable Tensor. Has the same type as "x". +*y2: A mutable Tensor. Has the same type as "x". \n + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(SoftmaxV2WithDropOutDoMaskV3D) + .INPUT(x, TensorType({DT_FLOAT16})) + .INPUT(mask, TensorType({DT_UINT8})) + .OUTPUT(y1, TensorType({DT_FLOAT16})) + .OUTPUT(y2, TensorType({DT_FLOAT16})) + .REQUIRED_ATTR(keep_prob, Float) + .ATTR(axes, ListInt, {-1}) + .OP_END_FACTORY_REG(SoftmaxV2WithDropOutDoMaskV3D) + /** *@brief Computes log softmax activations . \n @@ -427,6 +468,33 @@ REG_OP(MVN) .ATTR(eps, Float, 1e-9) .OP_END_FACTORY_REG(MVN) +/** +*@brief Normalizes the input . \n + +*@par Inputs: +* One input: +*x: An NCHW tensor of type float16 or float32 . \n + +*@par Attributes: +*@li eps: An optional float32 epsilon for not dividing by zero. Defaults to "1e-9" . \n +*@li axes: A list of Intefers, along which axis to reduce. Defaults to "[0, 2, 3]" . \n + +*@par Outputs: +*y: An NCHW tensor of type float16 or float32 . \n + +*@attention Constraints: +* The input tensor must have the NCHW format, whose shape length must be 4. +*@par Third-party framework compatibility +* Compatible with the ONNX operator MeanVarianceNormalization. +*/ + +REG_OP(MVNV2) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) /* "First operand." */ + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) /* "Result, has same element type as inputs" */ + .ATTR(eps, Float, 1e-9) + .ATTR(axes, ListInt, {0, 2, 3}) + .OP_END_FACTORY_REG(MVNV2) + /** *@brief Normalizes the input "x1" . \n @@ -498,6 +566,31 @@ REG_OP(LayerNorm) .ATTR(epsilon, Float, 0.0000001) .OP_END_FACTORY_REG(LayerNorm) +/** +*@brief Returns a tensor where each sub-tensor of input along dimension +* dim is normalized such that the p-norm of the sub-tensor is lower than the value maxnorm. \n + +*@par Inputs: +*One input, including: +* @li x: A Tensor. Must be one of the following types: float16, float32 . \n + +*@par Attributes: +* @li p: Specify L_p norm, the type is float. +* @li dim: The processed dim, the type is int. +* @li maxnorm: Threshold for comparison, the type is float. \n + +*@par Outputs: +*One outputs, including: +* @li y: shape and dtype of output, should be same shape and type as input. +*/ +REG_OP(Renorm) + .INPUT(x, TensorType::BasicType()) + .OUTPUT(y, TensorType::BasicType()) + .REQUIRED_ATTR(p, Float) + .REQUIRED_ATTR(dim, Int) + .REQUIRED_ATTR(maxnorm, Float) + .OP_END_FACTORY_REG(Renorm) + /** *@brief LayerNormGrad operator interface implementation * calculating: dy, x, variance, mean, gamma @@ -586,6 +679,48 @@ REG_OP(LayerNormXBackprop) .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16})) .OP_END_FACTORY_REG(LayerNormXBackprop) +/** +*@brief LayerNormXBackpropV2 operator interface implementation +* calculating: dy, x, variance, mean, gamma +* pd_xl = data_dy*data_gamma +* pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean) +* np.power((data_variance + EPSLON), (-1.5))), +* reduce_axis, keepdims=True) +* pd_mean = np.sum(((-1.0)*pd_xl +* np.power((data_variance + EPSLON), (-0.5))), +* reduce_axis, keepdims=True) +* + pd_var*(1.0/m) +* np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True) +* pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) + +* pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m) +* res_for_gamma = (data_x - data_mean) * np.power((data_variance + EPSLON), (-0.5)) + +*@par Inputs: +*Five inputs, including: +* @li dy: A Tensor. Must be one of the following types: float16, float32. +* @li x: A Tensor. Must be one of the following types: float16, float32. +* @li variance: A Tensor. Must be one of the following types: float16, float32. +* @li mean: A Tensor. Must be one of the following types: float16, float32. +* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n + +*@par Outputs: +*Three outputs, including: +* @li pd_x: A Tensor. Must be one of the following types: float16, float32. +* @li res_for_gamma: A Tensor. Must be one of the following types: float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(LayerNormXBackpropV2) + .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(res_for_gamma, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(LayerNormXBackpropV2) + /** *@brief LayerNormBetaGammaBackprop operator interface implementation * calculating: dy, x, variance, mean @@ -629,6 +764,35 @@ REG_OP(LayerNormBetaGammaBackprop) .REQUIRED_ATTR(shape_gamma, ListInt) .OP_END_FACTORY_REG(LayerNormBetaGammaBackprop) +/** +*@brief LayerNormBetaGammaBackpropV2 operator interface implementation +* calculating: dy, x, variance, mean +* pd_gamma = np.sum((data_dy*res_for_gamma), param_axis, keepdims=True) +* pd_beta = np.sum(data_dy, param_axis, keepdims=True) + +*@par Inputs: +*Three inputs, including: +* @li dy: A Tensor. Must be one of the following types: float16, float32. +* @li x: A Tensor. Must be one of the following types: float16, float32. +* @li variance: A Tensor. Must be one of the following types: float16, float32. +* @li mean: A Tensor. Must be one of the following types: float16, float32 . \n + +*@par Outputs: +*Three outputs, including: +* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. +* @li pd_beta: A Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(LayerNormBetaGammaBackpropV2) + .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(res_for_gamma, TensorType({DT_FLOAT})) + .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16})) + .REQUIRED_ATTR(shape_gamma, ListInt) + .OP_END_FACTORY_REG(LayerNormBetaGammaBackpropV2) + /** *@brief Return "output" according to the algorithm of dropout_do_mask: * scale_x = x *(1 / keep_prob) @@ -656,7 +820,68 @@ REG_OP(DropOutDoMask) .INPUT(keep_prob, TensorType({DT_FLOAT, DT_FLOAT16})) .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) .OP_END_FACTORY_REG(DropOutDoMask) - + +/** +*@brief Return "output" according to the algorithm of dropout_do_mask: +* scale_x = x *(1 / keep_prob) +* output = select(mask == 1, scale_x, 0) + +*@par Inputs: +*Three inputs, including: +* @li x: A mutable Tensor. Must be one of the following types: +* float16, float32 +* @li mask: A mutable Tensor. Must met all of the following rules: +* shape of mask should be 1D. +* dtype of mask should be uint8. +* value of shape should met the following algorithm: +* value = (size(x) + 128 - 1) // 128 * 128 +* @li keep_prob: A mutable Tensor. Must met all of the following rules: +* shape of "keep_prob" should be (1,) or [1,]. +* Has the same type as "x" . \n + +*@par Output: +*y: A mutable Tensor. Has the same type as "x". +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(DropOutDoMaskV3) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(mask, TensorType({DT_UINT8})) + .INPUT(keep_prob, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .OP_END_FACTORY_REG(DropOutDoMaskV3) + +/** +*@brief Return "output" according to the algorithm of dropout_do_mask: +* scale_x = x *(1 / keep_prob) +* output = select(mask == 1, scale_x, 0) + +*@par Inputs: +*Two inputs, including: +* @li x: A mutable Tensor. Must be one of the following types: +* float16, float32 +* @li mask: A mutable Tensor. Must met all of the following rules: +* shape of mask should be 1D. +* dtype of mask should be uint8. +* value of shape should met the following algorithm: +* value = (size(x) + 128 - 1) // 128 * 128 +*@par Attributes: +* @li keep_prob: A mutable Tensor. Must met all of the following rules: +* shape of "keep_prob" should be (1,) or [1,]. +* Has the same type as "x" . \n + +*@par Output: +*y: A mutable Tensor. Has the same type as "x". +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(DropOutDoMaskV3D) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(mask, TensorType({DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .REQUIRED_ATTR(keep_prob, Float) + .OP_END_FACTORY_REG(DropOutDoMaskV3D) + /** *@brief Scales the input . \n @@ -703,7 +928,7 @@ REG_OP(Scale) *@par Inputs: *One input, including: -*@li x: A Tensor. Must be 4-D shape, and only support the following types: float16, float32 . \n +*x: A Tensor. Must be 4-D shape, and only support the following types: float16, float32 . \n *@par Attributes: *@li depth_radius: An optional int32, specifying the half-width of the normalization window. Defaults to "5". @@ -960,24 +1185,532 @@ REG_OP(INInferV2D) .OP_END_FACTORY_REG(INInferV2D) /** -*@brief Performs instance normalization for inference of InHost part. +* @brief InstanceNorm operator interface implementation. -*@par Inputs:\n -* One input, including: (NC1HWC0 supported) -* variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance. +* @par Inputs: +* Three inputs, including: +* @li x: A Tensor. Must be one of the following types: float16, float32. +* @li gamma: A Tensor. Must be one of the following types: float16, float32. +* @li beta: A Tensor. Must be one of the following types: float16, float32. + +* @par Attributes: +* @li data_format: An attribute of type String \n +* @li epsilon: An attribute of type Float. \n + +* @par Outputs: +*Three outputs, including: +* @li y: A Tensor. Has the same type as "x". \n +* @li mean: A Tensor. Has the same type as "x". \n +* @li variance: A Tensor. Has the same type as "x". \n + +* @par Third-party framework compatibility +* Can be used by onnx InstanceNormalization +*/ +REG_OP(InstanceNorm) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(gamma, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(beta, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(variance, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(data_format, String, "NDHWC") + .ATTR(epsilon, Float, 1e-6) + .OP_END_FACTORY_REG(InstanceNorm) + +/** +*@brief InstanceNormGrad operator interface implementation. + +*@par Inputs: +*Five inputs, including: +* @li dy: A Tensor. Must be one of the following types: float16, float32. +* @li x: A Tensor. Must be one of the following types: float16, float32. +* @li variance: A Tensor. Must be one of the following types: float16, float32. +* @li mean: A Tensor. Must be one of the following types: float16, float32. +* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n + +*@par Outputs: +*Three outputs, including: +* @li pd_x: A Tensor. Must be one of the following types: float16, float32. +* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. +* @li pd_beta: A Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(InstanceNormGrad) + .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16})) + .OP_END_FACTORY_REG(InstanceNormGrad) + +/** +*@brief InstanceNormXBackprop operator interface implementation. + +*@par Inputs: +*Five inputs, including: +* @li dy: A Tensor. Must be one of the following types: float16, float32. +* @li x: A Tensor. Must be one of the following types: float16, float32. +* @li variance: A Tensor. Must be one of the following types: float16, float32. +* @li mean: A Tensor. Must be one of the following types: float16, float32. +* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n + +*@par Outputs: +*Two outputs, including: +* @li pd_x: A Tensor. Must be one of the following types: float16, float32. +* @li res_for_gamma: A Tensor. Must be one of the following types: float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(InstanceNormXBackprop) + .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(res_for_gamma, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(InstanceNormXBackprop) + +/** +*@brief InstanceNormBetaGammaBackprop operator interface implementation. + +*@par Inputs: +*Two inputs, including: +* @li dy: A Tensor. Must be one of the following types: float16, float32. +* @li res_for_gamma: A Tensor. Must be one of the following types: float32.\n + +*@par Outputs: +*Two outputs, including: +* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. +* @li pd_beta: A Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(InstanceNormBetaGammaBackprop) + .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(res_for_gamma, TensorType({DT_FLOAT})) + .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16})) + .OP_END_FACTORY_REG(InstanceNormBetaGammaBackprop) + +/** +* @brief Computes Kl_div_loss_grad or Kl_div_loss_backward. \n + +* @par Inputs: +* Three inputs, including: +* @li grad: A Tensor. Must be one of the following types: float16, float32. +* Required. +* @li input: A Tensor. Has the same type as "grad". Required. +* @li target: A Tensor. Has the same type as "grad". Required. \n + +* @par Attributes: +* @li reduction: An optional attribute of type String. Defaults to "mean". \n +* @li log_target: An optional attribute of type Bool. Defaults to false. \n + +* @par Outputs: +* @li y: A Tensor. Has the same type as "grad". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator KlDivLossGrad. +*/ +REG_OP(KlDivLossGrad) + .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(reduction, String, "mean") + .ATTR(log_target, Bool, false) + .OP_END_FACTORY_REG(KlDivLossGrad) + +/** +* @brief Computes l1_loss_grad or l1_loss_backward. \n + +* @par Inputs: +* Three inputs, including: +* @li grads: A Tensor. Must be one of the following types: float16, float32. +* Required. +* @li predict: A Tensor. Has the same type as "grads". Required. +* @li label: A Tensor. Has the same type as "grads". Required. \n + +* @par Attributes: +* @li reduction: An optional attribute of type String. Defaults to "mean". \n + +* @par Outputs: +* @li y: A Tensor. Has the same type as "x". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator L1LossGrad. +*/ +REG_OP(L1LossGrad) + .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(L1LossGrad) + +/** +* @brief Computes loss of lp, p=1,2,3.... + +* @par Inputs: +* @li predict: An ND tensor of type float16, float32. +* @li label: An ND tensor of type float16, float32. \n + +* @par Attributes: +* @li p: A required int attribute that decides which loss to compute, now the p only can be 1 to compute l1_loss. +* @li reduction: An optional string.Defaults to "mean". \n + +* @par Outputs: +* @li y: An ND tensor tensor with the same shape and type as "predict". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator LpLoss. +*/ +REG_OP(LpLoss) + .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(p, Int) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(LpLoss) + +/** +* @brief Computes gradients of mse loss. + +* @par Inputs: +* @li predict: An ND tensor of type float16, float32. +* @li label: An ND tensor of type float16, float32. +* @li dout: An ND tensor of type float16, float32. \n + +* @par Attributes: +* @li reduction: An optional string.Defaults to "mean". \n + +* @par Outputs: +* @li y: An ND tensor tensor with the same shape and type as "predict". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator MseLossGrad. +*/ +REG_OP(MseLossGrad) + .INPUT(predict, TensorType({DT_FLOAT32, DT_FLOAT16})) + .INPUT(label, TensorType({DT_FLOAT32, DT_FLOAT16})) + .INPUT(dout, TensorType({DT_FLOAT32, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT32, DT_FLOAT16})) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(MseLossGrad) + +/** +* @brief Computes mse loss. +* @par Inputs: +* two inputs, including: +* @li predict: An ND Tensor of dtype float16 or float32. +* @li label: An ND Tensor of dtype float16 or float32.\n +* +* @par Attributes: +* @li reduction:An optional str from sum, none, mean, Defaults to "mean".\n +* +* @par Outputs: +* @li y: when reduction=sum/mean, y is scale. when reduction=none, y has +* same type and shape as "predict".\n +*/ +REG_OP(MseLoss) + .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(MseLoss) + +/** +* @brief Calculates the reversed outputs of the function "smooth_l1_loss_v2". \n + +* @par Inputs: +* Three Inputs, including: +* @li predict: A Tensor. Must be one of the following types: +* float16, float32. +* @li label: A Tensor. Has the same type as "predict". +* @li dout: A Tensor. Has the same type as "predict". \n + +* @par Attributes: +* Two Attributes, including: +* @li sigma: An optional float. Defaults to 1.0. \n + +* @li reduction: An optional string. Defaults to "mean", +* Must be one of the following: "none", "mean", "sum". \n + +* @par Outputs: +* @li gradient: A Tensor. Has the same type as "predict". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator SmoothL1LossBackward. +*/ +REG_OP(SmoothL1LossGradV2) + .INPUT(predict, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(label, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(dout, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(gradient, TensorType({DT_FLOAT, DT_FLOAT16})) + .ATTR(sigma, Float, 1.0) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(SmoothL1LossGradV2) + +/** +* @brief Creates a criterion that uses a squared term if the absolute +* element-wise error falls below beta and an L1 term otherwise. It is +* less sensitive to outliers than the MSELoss and in some cases prevents +* exploding gradients. + +* @par Inputs: +* @li predict: A multi-dimensional Tensor of type float16 or float32, +* specifying the predictive value. \n +* @li label: A multi-dimensional Tensor of type float16 or float32, +* specifying the target value. \n + +* @par Attributes: +* @li sigma: An optional int. Specifies the threshold of loss. Defaults +* to "1.0". \n +* @li reduction: An optional str. Specifies the reduction to apply to +* the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, +* 'mean': the sum of the output will be divided by the number of elements in +* the output,'sum': the output will be summed. Default: 'mean'. \n + +* @par Outputs: +* @li loss: Indicates the loss between the predictive value and target value. +* Has the same dimensions as "predict". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator smooth_l1_loss. \n +*/ +REG_OP(SmoothL1LossV2) + .INPUT(predict, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(label, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .OUTPUT(loss, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .ATTR(sigma, Float, 1.0) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(SmoothL1LossV2) + +/** +* @brief Computes Centralization. result = x - mean(x, axes) + +* @par Inputs: +* @li x: An ND tensor of type float16, float32. +* @par Attributes: +* @li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType. +* Must be in the range [-rank(x), rank(x)). +* @par Outputs: +* @li y: A Tensor. Has the same type as "x". \n + +* @par Third-party framework compatibility +* custom operator \n +*/ +REG_OP(Centralization) + .INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .ATTR(axes, ListInt, {-1}) + .OP_END_FACTORY_REG(Centralization) + +/** +*@brief Roll the tensor along the given dimension(s). +* Elements that are shifted beyond the last position are re-introduced at the first position. +* If a dimension is not specified, the tensor will be flattened before rolling and then restored to the original shape. \n + +*@par Inputs: +*One inputs, including: +* @li x: A tensor . Must be one of the following types: +* float16, float32, int32, uint32, int8, uint8. \n *@par Attributes: -* epsilon: An optional float32, specifying the small value added to -variance to avoid dividing by zero. Defaults to "0.00001" . \n +* @li shifts: The number of places by which the elements of the tensor are shifted. \n +* @li dims: Axis along which to roll. \n -*@par Outputs:\n -* variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt. +*@par Outputs: +* y: A Tensor with the same type and shape of x's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator Roll. \n */ -REG_OP(InHost) - .INPUT(variance, TensorType({DT_FLOAT})) - .OUTPUT(variance_sqrt, TensorType({DT_FLOAT})) - .ATTR(epsilon, Float, 0.00001) - .OP_END_FACTORY_REG(InHost) +REG_OP(Roll) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_UINT32,DT_INT8,DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_UINT32,DT_INT8,DT_UINT8})) + .REQUIRED_ATTR(shifts, ListInt) + .ATTR(dims, ListInt, {}) + .OP_END_FACTORY_REG(Roll) + +/** + *@brief Calculate the loss. Creates a criterion that optimizes a two-class classification + logistic loss between input_x and input_y (containing 1 or -1). \n + + *@par Inputs: + *One inputs, including: + * @li input_x: A tensor. Must be one of the following types: + * float16, float32. \n + * @li input_y: A tensor. Must be one of the following types: + * float16, float32. \n + + *@par Attributes: + *@li lambd: An optional string.Defaults to "mean". \n + + *@par Outputs: + *output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n + * while reduction == "sum" or "mean", A Tensor with the same type of input_x , shape of which is (1,) + + *@par Third-party framework compatibility + *Compatible with the Pytorch operator SoftMarginLoss. \n + */ +REG_OP(SoftMarginLoss) + .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16})) + .ATTR(reduction, String, "mean") + .OUTPUT(output_z, TensorType({DT_FLOAT, DT_FLOAT16})) + .OP_END_FACTORY_REG(SoftMarginLoss) + +/** +* @brief Computes gradients of sigmoid_cross_entropy_with_logits_v2. + +* @par Inputs: +* @li predict: An ND tensor of type float16, float32. +* @li target: An ND tensor of type float16, float32. +* @li dout: An ND tensor of type float16, float32. +* @li weight: An optional ND tensor of type float16, float32. +* @li pos_weight: An optional ND tensor of type float16, float32. \n + +* @par Attributes: +* @li reduction: An optional string.Defaults to "mean". \n + +* @par Outputs: +* @li gradient: An ND tensor tensor with the same shape and type as "predict". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator SigmoidCrossEntropyWithLogitsGrad. +*/ +REG_OP(SigmoidCrossEntropyWithLogitsGradV2) + .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dout, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(pos_weight, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2) +/** + * @brief Calculate the PoissonNllLoss function. + * target∼Poisson(input)loss(input,target)=input−target∗log(input)+log(target!) \n + + * @par Inputs: + * Two inputs, including: + * @li input_x: A tensor. Must be one of the following types: + * float16, float32. \n + * + * @par Inputs: + * @li target: A tensor. Must be one of the following types: + * float16, float32. \n + + * @par Attributes: + * four Attributes, including: + * @li log_input: An optional bool. Defaults to "True" \n + * + * @par Attributes: + * @li full: An optional bool. Defaults to "False" \n + * + * @par Attributes: + * @li eps: An optional float. Defaults to "1e-8" \n + * + * @par Attributes: + * @li reduction: An optional string. Defaults to "mean" \n + + * @par Outputs: + * loss: A Tensor has same element type as two inputs. \n + + * @par Third-party framework compatibility + * Compatible with the Pytorch operator PoissonNllLoss. \n + */ +REG_OP(PoissonNllLoss) + .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(loss, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(log_input, Bool, true) + .ATTR(full, Bool, false) + .ATTR(eps, Float, 1e-8) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(PoissonNllLoss) +/** + *@brief rnn_gen_mask + * @par Inputs: + * @li seq_length: A ND Tensor of type int32. Recoed the current length of each batch.\n + * + * @par Attributes: + * @li num_step: A required int.\n + * @li hidden_size: A required int. \n + * + * + * @par Output: + * y: A mutable Tensor of type float16, with the shape of [num_step, batch_size, hidden_size]. \n + * + */ +REG_OP(RnnGenMask) + .INPUT(seq_length, TensorType({DT_INT32})) + .OUTPUT(seq_mask, TensorType({DT_FLOAT16})) + .REQUIRED_ATTR(num_step, Int) + .REQUIRED_ATTR(hidden_size, Int) + .OP_END_FACTORY_REG(RnnGenMask) + +/** +* @brief Creates a criterion that optimizes a multi-class multi-classification hinge loss (margin-based loss) +* between input x (a 2D mini-batch Tensor) and output y (which is a 2D Tensor of target class indices) \n + +* @par Inputs: +* Two inputs, including: +* @li x: A tensor. Must be one of the following types: +* float16, float32. \n +* +* @par Inputs: +* @li target: A tensor. Must be the following types: +* int32. \n + +* @par Attributes: +* @li reduction: An optional string. Defaults to "mean" \n + +* @par Outputs: +* y: A Tensor has same element type as input x. \n +* is_target: A Tensor has same element type as input target. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator MultiLabelMarginLoss. \n +*/ +REG_OP(MultilabelMarginLoss) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(target, TensorType({DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(is_target, TensorType({DT_INT32})) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(MultilabelMarginLoss) + +/** +*@brief Performs batch normalization . \n +*@par Inputs: +* Two inputs +*@li input_x: A Tensor. Support float32. shape (n, c, d). +*@li seq_len: A Tensor. Each batch normalize data num. Support Int32. Shape (n, ). \n +*@par Attributes: +*@li normalize_type: Str. Support "per_feature" or "all_features". +*@li epsilon: An optional float32, specifying the small value added to +variance to avoid dividing by zero. Defaults to "0.00001" . \n +*@par Outputs: +* One outputs +*@li output_y: A Tensor for the normalized "x".Support float32. shape (n, c, d).\n +*/ +REG_OP(NormalizeBatch) + .INPUT(input_x, TensorType({ DT_FLOAT })) + .INPUT(seq_len, TensorType({ DT_INT32 })) + .OUTPUT(output_y, TensorType({ DT_FLOAT })) + .REQUIRED_ATTR(normalize_type, String) + .ATTR(epsilon, Float, 0.00001) + .OP_END_FACTORY_REG(NormalizeBatch) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_ops.h b/third_party/fwkacllib/inc/ops/nn_ops.h index 9edc469a..49fd02fa 100644 --- a/third_party/fwkacllib/inc/ops/nn_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,144 @@ */ #ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ #define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ - +#include "graph/operator_reg.h" #include "nn_pooling_ops.h" +namespace ge { +/** +* @brief Says whether the targets are in the top "k" predictions . \n + +* @par Inputs: +* Three inputs, including: +* @li predictions: A 2D Tensor of type float32. A "batch_size * classes" tensor. +* @li targets: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids. +* @li k: A 1D Tensor of the same type as "targets". +* Specifies the number of top elements to look at for computing precision . \n + +* @par Outputs: +* precision: A Tensor of type bool . \n + +* @attention Constraints: +* @li targets must be non-negative tensor. + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator InTopKV2. +*/ +REG_OP(InTopKV2) + .INPUT(predictions, TensorType({DT_FLOAT})) + .INPUT(targets, TensorType(IndexNumberType)) + .INPUT(k, TensorType({IndexNumberType})) + .OUTPUT(precision, TensorType({DT_BOOL})) + .OP_END_FACTORY_REG(InTopKV2) + +/** +*@brief Performs batch normalization . \n + +*@par Inputs: +* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported) +*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. +*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D +if input "x" is with format NC1HWC0. Specifies the scaling factor. +*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D +if input "x" is with format NC1HWC0. Specifies the offset. +*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D +if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the +operation is used for training. +*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be +5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None" +if the operation is used for training . \n + +*@par Attributes: +*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". +*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". +*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n + +*@par Outputs: +* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported) +*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D. +*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D +if input "x" is with format NC1HWC0. Specifies the mean of "x". +*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. +Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". +*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. +Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. +*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. +Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n + +*@attention Constraints: +*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, +then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance". +*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n +*/ +REG_OP(FusedBatchNormV2) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(scale, TensorType({DT_FLOAT})) + .INPUT(offset, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(batch_mean, TensorType({DT_FLOAT})) + .OUTPUT(batch_variance, TensorType({DT_FLOAT})) + .OUTPUT(reserve_space_1, TensorType({DT_FLOAT})) + .OUTPUT(reserve_space_2, TensorType({DT_FLOAT})) + .ATTR(epsilon, Float, 0.0001) + .ATTR(data_format, String, "NHWC") + .ATTR(is_training, Bool, true) + .OP_END_FACTORY_REG(FusedBatchNormV2) + +/** + * @brief: Large amount of data sort.First operator of TopK. + * @par Inputs: + * two input, including: + * @li input_data: A Tensor. Data to be sorted. Support float16 + * @li input_index: A Tensor. Range(0, 2048). Datatype and format is same as input_data. + * @par Attributes: + * @li k_num: Int.Number to be sorted. + * @par Outputs: + * 1 output, including: + * @li output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. + */ +REG_OP(SegmentSort) + .INPUT(input_data, TensorType({DT_FLOAT16})) + .INPUT(input_index, TensorType({DT_FLOAT16})) + .OUTPUT(output_proposal, TensorType({DT_FLOAT16})) + .REQUIRED_ATTR(k_num, Int) + .OP_END_FACTORY_REG(SegmentSort) + +/** + * @brief: Large amount of data sort.Second operator of TopK. + * @par Inputs: + * two input, including: + * @li input_proposal: A Tensor. Proposal sorted for each channel. Support float16 + * @par Attributes: + * @li k_num: Int.Number to be sorted. + * @par Outputs: + * 1 output, including: + * @li output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. + */ +REG_OP(MultiMerge) + .INPUT(input_proposal, TensorType({DT_FLOAT16})) + .OUTPUT(output_proposal, TensorType({DT_FLOAT16})) + .REQUIRED_ATTR(k_num, Int) + .OP_END_FACTORY_REG(MultiMerge) + +/** + * @brief: Large amount of data sort.Third operator of TopK. + * @par Inputs: + * two input, including: + * @li input_proposal: A Tensor. Proposal sorted for each channel. Support float16 + * @par Attributes: + * @li k_num: Int.Number to be sorted. + * @par Outputs: + * 2 output, including: + * @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted. + * @li output_index: A Tensor. int32. Data index. + */ +REG_OP(SingleMerge) + .INPUT(input_proposal, TensorType({DT_FLOAT16})) + .OUTPUT(output_data, TensorType({DT_FLOAT16})) + .OUTPUT(output_index, TensorType({DT_INT32})) + .REQUIRED_ATTR(k_num, Int) + .OP_END_FACTORY_REG(SingleMerge) +}// namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index ab35ba47..80a21333 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -182,6 +182,128 @@ REG_OP(AvgPool3D) .ATTR(data_format, String, "NDHWC") .OP_END_FACTORY_REG(AvgPool3D) + +/** +*@brief Performs average pooling on the input. + +*@par Inputs: +*@li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double. +*@li filter: An optional tensor of type float16, float32, double, fractal_z_3d layout. +*@li multiplier: An optional tensor of float16, float32, double. + +*@par Attributes: +*@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor. +*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor. +*@li pads: List of ints, implicit zero paddings on both sides of the input. +*@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. +*@li count_include_pad: When true, will include the zero-padding in the averaging calculation. +*@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. +*@li data_format: A string, format of input data . \n + +*@par Outputs: +*y: The average pooled output tensor . \n + +*@attention Constraints: +*@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator AvgPool3D. +*/ +REG_OP(AvgPool3DD) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .REQUIRED_ATTR(ksize, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .ATTR(ceil_mode, Bool, false) + .ATTR(count_include_pad, Bool, true) + .ATTR(divisor_override, Int, 0) + .ATTR(data_format, String, "NDHWC") + .OP_END_FACTORY_REG(AvgPool3DD) + +/** +* @brief Computes AvgPool3DGrad function. + +* @par Inputs: +* @li orig_input_shape: An NDHWC tensor of type int32. +* @li grads: An NDHWC tensor of type float16, float32, or double. + +* @par Attributes: +* @li ksize: List of ints that has length 5. The size of the window for each dimension of the input tensor. +* @li strides:List of ints that has length 5. The stride of the sliding window for each dimension of the input tensor. +* @li pads: List of ints, implicit zero paddings on both sides of the input. +* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. +* @li count_include_pad: When true, will include the zero-padding in the averaging calculation. +* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. +* @li data_format: A string, format of input data. + +* @par Outputs: +* @output: A mutable tensor with the same shape and type as "orig_input_shape". + +* @attention Constraints: +* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator AvgPoolGrad. +*/ + +REG_OP(AvgPool3DGrad) + .INPUT(orig_input_shape, TensorType({DT_INT32})) + .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .REQUIRED_ATTR(ksize, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .ATTR(ceil_mode, Bool, false) + .ATTR(count_include_pad, Bool, true) + .ATTR(divisor_override, Int, 0) + .ATTR(data_format, String, "NDHWC") + .OP_END_FACTORY_REG(AvgPool3DGrad) + +/** +* @brief Performs average pooling on the input. + +* @par Inputs: +* @li grads: An NDHWC tensor of type float16. +* @li filter: An optional tensor of type float16, fractal_z_3d layout. +* @li multiplier: An optional tensor of float16. + +* @par Attributes: +* @li orig_input_shape: List of ints that has length 5. The size of the window for each dimension of the input tensor. +* @li ksize: List of ints that has length 5. The size of the window for each dimension of the input tensor. +* @li strides:List of ints that has length 5. The stride of the sliding window for each dimension of the input tensor. +* @li pads: List of ints, implicit zero paddings on both sides of the input. +* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. +* @li count_include_pad: When true, will include the zero-padding in the averaging calculation. +* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. +* @li data_format: A string, format of input data . \n + +* @par Outputs: +* @output: The average pooled output tensor . \n + +* @attention Constraints: +* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator AvgPool3DGradD. +*/ +REG_OP(AvgPool3DGradD) + .INPUT(grads, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16})) + .OUTPUT(output, TensorType({DT_FLOAT16})) + .REQUIRED_ATTR(orig_input_shape, ListInt) + .REQUIRED_ATTR(ksize, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .ATTR(ceil_mode, Bool, false) + .ATTR(count_include_pad, Bool, true) + .ATTR(divisor_override, Int, 0) + .ATTR(data_format, String, "NDHWC") + .OP_END_FACTORY_REG(AvgPool3DGradD) + /** *@brief Performs max_pool_ext2 on the input . \n @@ -278,8 +400,8 @@ No default value. specifying the stride of the sliding window for each dimension of the input tensor. No default value. *@li padding: A required string type of float16. -*@li pads: A list type of int32. Default value {0, 0, 0}. -*@li dilation: A list type of int32. Default value {1, 1, 1}. +*@li pads: A list type of int32. Default value {0,0,0,0,0,0}. +*@li dilation: A list type of int32. Default value {1,1,1,1,1,1}. *@li ceil_mode: A ceil mode number of int32 . Default value 0. *@li data_format: An optional string. Defaults to "NDHWC" . \n @@ -302,12 +424,37 @@ REG_OP(MaxPool3D) .REQUIRED_ATTR(ksize, ListInt) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(padding, String) - .ATTR(pads, ListInt, {0,0,0}) - .ATTR(dilation, ListInt, {1,1,1}) + .ATTR(pads, ListInt, {0,0,0,0,0,0}) + .ATTR(dilation, ListInt, {1,1,1,1,1,1}) .ATTR(ceil_mode, Int, 0) .ATTR(data_format, String, "NDHWC") .OP_END_FACTORY_REG(MaxPool3D) +/** +*@brief Applies a 2D adaptive max pooling over an input signal conposed of several input planes. \n +* The output is of size H x W, for any input size. + +* @par Inputs: +* One input, including: +* @li x: A Tensor. Must be one of the following data types: +* float16, float32, float64. \n + +* @par Attributes: +* @li output_size: A required list of 2 ints +* specifying the size (H,W) of the output tensor. \n + +* @par Outputs: +* @li y: A Tensor. Has the same data type as "x" \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator AdaptiveMaxPool2d. +*/ +REG_OP(AdaptiveMaxPool2d) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .OUTPUT(argmax, TensorType::IndexNumberType()) + .REQUIRED_ATTR(output_size, ListInt) + .OP_END_FACTORY_REG(AdaptiveMaxPool2d) /** * @brief Computes second-order gradients of the maxpooling3d function . \n @@ -477,8 +624,9 @@ REG_OP(MaxPoolV2) *@par Inputs: * One input: -*x: An NC1HWC0 Tensor. Supported type: float, double, int32, - * uint8, int16, int8, int64, uint16, half, uint32, uint64 . \n +* x: An 4D Tensor. Supported type: float, double, int32, + * uint8, int16, int8, int64, uint16, half, uint32, uint64. + * Must set the format, supported format list ["NCHW, NHWC"]. \n *@par Attributes: *@li ksize: A required list of int8, int16, int32, or int64 values, @@ -490,8 +638,8 @@ REG_OP(MaxPoolV2) *@li padding: A required string. No default value . \n *@par Outputs: -*y: A Tensor. Has the same type and format as input "x". -*argmax: A Tensor. Has the same type and format as input "x". +*@li y: A Tensor. Has the same type and format as input "x". +*@li argmax: A Tensor. Has the same type and format as input "x". *@attention Constraints: *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, * ksize[1] * ksize[2] <= 255. @@ -517,10 +665,12 @@ REG_OP(MaxPoolWithArgmax) *@par Inputs: * Three inputs, including: -*@li x: An NC1HWC0 tensor. Supported type: float, double, int32, +*@li x: An 4d tensor. Supported type: float, double, int32, * uint8, int16, int8, int64, uint16, half, uint32, uint64. -*@li grad: An NC1HWC0 tensor. Supported type: float, double, int32, + * Must set the format, supported format list ["NCHW, NHWC"] +*@li grad: An 4d tensor. Supported type: float, double, int32, * uint8, int16, int8, int64, uint16, half, uint32, uint64. + * Must set the format, supported format list ["NCHW, NHWC"] *@li argmx: An NC1HWC0 tensor of type int32 or int64 . \n *@par Attributes: @@ -741,7 +891,7 @@ REG_OP(AvgPoolV2Grad) * @brief Computes gradients of averagev2 pooling function. * @par Inputs: -* @li input_grad: An NHWC tensor of type float16, float32, or double. +*input_grad: An NHWC tensor of type float16, float32, or double. * @par Attributes: * @li orig_input_shape: A required tuple or list of type int32. @@ -759,10 +909,10 @@ REG_OP(AvgPoolV2Grad) * @li data_format: An optional string. Defaults to "NHWC". * @par Outputs: -* @out_grad: A mutable tensor with the same shape and type as "orig_input". +*out_grad: A mutable tensor with the same shape and type as "orig_input". * @par Third-party framework compatibility -* @li Compatible with the TensorFlow operator AvgPoolGrad. +*Compatible with the TensorFlow operator AvgPoolGrad. */ REG_OP(AvgPoolV2GradD) .INPUT(input_grad, TensorType({DT_FLOAT16})) @@ -1037,6 +1187,7 @@ REG_OP(MaxPool3DGrad) .OUTPUT(y, TensorType::RealNumberType()) .REQUIRED_ATTR(ksize, ListInt) .REQUIRED_ATTR(strides, ListInt) + .ATTR(padding, String, "SAME") .REQUIRED_ATTR(pads, ListInt) .ATTR(data_format, String, "NDHWC") .OP_END_FACTORY_REG(MaxPool3DGrad) @@ -1107,7 +1258,7 @@ REG_OP(AvgPool1DD) *@par Inputs: * One input: -*x: An NC1HWC0 Tensor of type float16. +*x: An 4d Tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"]. *@par Attributes: *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for * each dimension of the input tensor. No default value. @@ -1148,9 +1299,9 @@ REG_OP(MaxPoolWithArgmaxV2) *@par Inputs: * Three inputs, including: -*@li x: An NC1HWC0 tensor of type float16. -*@li grad: An NC1HWC0 tensor of type float16. -*@li argmx: An NC1HWC0 tensor of type uint16 or int64 . \n +*@li x: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"] +*@li grad: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"] +*@li argmx: An 4d tensor of type uint16 or int64. Must set the format, supported format list ["NCHW, NHWC"] \n *@par Attributes: *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for @@ -1291,5 +1442,306 @@ REG_OP(MaxPoolV3Grad) .ATTR(global_pooling, Bool, false) .ATTR(ceil_mode, Bool, false) .OP_END_FACTORY_REG(MaxPoolV3Grad) + +/** +*@brief Performs Dilation2D on the input . \n + +*@par Inputs: +*x: A tensor of shape is 4d, format is support NHWC. +*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. \n + +*@par Attributes: +*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. +*@li rates: A required list of 4 ints. The rates of the N and C dimensions are 1. +*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID. +*@li pads: An optional list of 4 ints. +*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". +*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n + +*@par Outputs: +*y: The output tensor. Has the same type and format as input "x" . \n + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Dilation2D. +*/ +REG_OP(Dilation2D) + .INPUT(x,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .INPUT(filter,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .OUTPUT(y,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(rates, ListInt) + .ATTR(padding_mode, String, "SAME") + .ATTR(pads, ListInt, {0,0,0,0}) + .ATTR(ceil_mode, Bool, false) + .ATTR(data_format, String, "NHWC") + .OP_END_FACTORY_REG(Dilation2D) + +/** +*@brief Performs Dilation2DBackpropFilter on the input. \n + +*@par Inputs: +*x: A tensor of shape is 4d, format is support NHWC. +*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. +*out_backprop: Has the same type and format as input x and the c dimension is same with x. \n + +*@par Attributes +*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1. +*@li rates: A required list of 4 ints, the rates of the N and C dimensions are 1. +*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID. +*@li pads: A optional list of 4 ints. +*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". +*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n + +*@par Outputs: +*y: The output tensor. Has the same type and format as input "filter" . \n + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Dilation2DBackpropFilter. +*/ + +REG_OP(Dilation2DBackpropFilter) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .INPUT(filter, + TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .INPUT(out_backprop, + TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .OUTPUT(y, + TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(rates, ListInt) + .ATTR(padding_mode, String, "SAME") + .ATTR(pads, ListInt, {0, 0, 0, 0}) + .ATTR(ceil_mode, Bool, false) + .ATTR(data_format, String, "NHWC") + .OP_END_FACTORY_REG(Dilation2DBackpropFilter) + +/** +*@brief Performs Dilation2DBackpropInput on the input. \n + +*@par Inputs: +*x: A tensor of shape is 4d, format is support NHWC. +*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. +*out_backprop: Has the same type and format as input x and the c dimension is same with x. \n + +*@par Attributes +*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1. +*@li rates: A required list of 4 ints, the rates of the N and C dimensions are 1. +*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID. +*@li pads: A optional list of 4 ints. +*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". +*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n + +*@par Outputs: +*y: The output tensor. Has the same type and format as input "x" . \n + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Dilation2DBackpropInput. +*/ + +REG_OP(Dilation2DBackpropInput) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .INPUT(filter, + TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .INPUT(out_backprop, + TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .OUTPUT(y, + TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(rates, ListInt) + .ATTR(padding_mode, String, "SAME") + .ATTR(pads, ListInt, {0, 0, 0, 0}) + .ATTR(ceil_mode, Bool, false) + .ATTR(data_format, String, "NHWC") + .OP_END_FACTORY_REG(Dilation2DBackpropInput) + +/** +* @brief Applies a 2D adaptive average pooling over +* an input signal composed of several input planes. \n + +* @par Inputs: +* One input, including: +* @li x: A Tensor. Must be one of the following data types: +* float16, float32. \n + +* @par Attributes: +* @li output_size: A required list of 2 ints +* specifying the size (H,W) of the output tensor. \n + +* @par Outputs: +* @li y: A Tensor. Has the same data type as "x" \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator AdaptiveAvgPool2d. +*/ +REG_OP(AdaptiveAvgPool2d) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .REQUIRED_ATTR(output_size, ListInt) + .OP_END_FACTORY_REG(AdaptiveAvgPool2d) + +/** +* @brief Compute gradients of adaptive averagev2 pooling function. + +* @par Inputs: +* @li input_grad: A Tensor. Must be one of the following data types: +* float16, float32. + +* @par Attributes: +* @li orig_input_shape: A required tuple or list of type int32. + +* @par Outputs: +* @li output_grad: A tensor with the same type as "input_grad". + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator AdaptiveAvgPool2dGrad. +*/ +REG_OP(AdaptiveAvgPool2dGrad) + .INPUT(input_grad, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(output_grad, TensorType({DT_FLOAT, DT_FLOAT16})) + .REQUIRED_ATTR(orig_input_shape, ListInt) + .OP_END_FACTORY_REG(AdaptiveAvgPool2dGrad) + +/** +* @brief Performs the backpropagation of MaxPoolWithGradArgmaxV1. + +* @par Inputs: +* Three inputs, including: +* @li x: An NC1HWC0 tensor of type float16. +* @li grad: An NC1HWC0 tensor of type float16. +* @li argmax: An NC1HWC0 tensor of type uint16 or int64. \n + +* @par Attributes: +* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for +* each dimension of the input tensor. No default value. +* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for +* each dimension of the input tensor. No default value. +* @li pads: A required listint. \n + +* @par Outputs: +* y: A Tensor. Has the same type and format as input "x". \n + +* @attention Constraints: +* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. +* @li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1 +* @li "pads" is listint. +* @li "ceil_mode" defaults to False. +* @li "data_format" defaults to "NC1HWC0". \n + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV1. +*/ + +REG_OP(MaxPoolGradWithArgmaxV1) + .INPUT(x, TensorType({DT_FLOAT16})) + .INPUT(grad, TensorType({DT_FLOAT16})) + .INPUT(argmax, TensorType({DT_UINT16})) + .OUTPUT(y, TensorType({DT_FLOAT16})) + .REQUIRED_ATTR(ksize, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .ATTR(dtype, Int, 3) + .ATTR(dilation, ListInt, {1, 1, 1, 1}) + .ATTR(ceil_mode, Bool, false) + .OP_END_FACTORY_REG(MaxPoolGradWithArgmaxV1) + +/** +* @brief Performs max pooling on the input and outputs both max values and indices. + +* @par Inputs: +* One input: +* x: An NC1HWC0 Tensor of type float16. \n + +* @par Attributes: +* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for +* each dimension of the input tensor. No default value. +* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for +* each dimension of the input tensor. No default value. +* @li pads: A required string. No default value. \n + +* @par Outputs: +* y: A Tensor. Has the same type and format as input "x". +* argmax: A Tensor. type:uint16, format:NC1HWC0. \n + +* @attention Constraints: +* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. +* @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, +* strides[2] <= 63, strides[2] >= 1. +* @li "pads" is listint. +* @li "ceil_mode" defaults to False. +* @li "data_format" defaults to "NC1HWC0". \n + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator MaxPoolWithArgmaxV1. +*/ +REG_OP(MaxPoolWithArgmaxV1) + .INPUT(x, TensorType({DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT16})) + .OUTPUT(argmax, TensorType({DT_UINT16})) + .REQUIRED_ATTR(ksize, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .ATTR(dtype, Int, 3) + .ATTR(dilation, ListInt, {1, 1, 1, 1}) + .ATTR(ceil_mode, Bool, false) + .OP_END_FACTORY_REG(MaxPoolWithArgmaxV1) + +/** +*@brief Randomly sample a subset of positive and negative examples,and overwrite +the label vector to the ignore value (-1) for all elements that are not +included in the sample.\n + +* @par Inputs: +* One input: +* labels: shape of labels,(N, ) label vector with values. \n + +* @par Attributes: +* @li batch_size_per_images: A require attribute of type int. +* @li positive_fraction: A require attribute of type float. + +*@par Outputs: +*y: The result of subSample. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator SubSample. +*@par Restrictions: +*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. +*/ +REG_OP(SubSample) + .INPUT(labels, TensorType({DT_INT32})) + .OUTPUT(y, TensorType({DT_INT32})) + .REQUIRED_ATTR(batch_size_per_images, Int) + .REQUIRED_ATTR(positive_fraction, Float) + .OP_END_FACTORY_REG(SubSample) + +/** +*@brief Randomly sample a subset of positive and negative examples,and overwrite +the label vector to the ignore value (-1) for all elements that are not +included in the sample.\n + +* @par Inputs: +* two inputs, including: +* @li labels: shape of labels,(N, ) label vector with values:. +* @li shuffle_matrix: random matrix with shape (N, ). \n + +* @par Attributes: +* @li batch_size_per_images: A require attribute of type int. +* @li positive_fraction: A require attribute of type float. + +*@par Outputs: +*y: The result of subSample. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator SubSampleLabels. +*@par Restrictions: +*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. +*/ +REG_OP(SubSampleLabels) + .INPUT(labels, TensorType({DT_INT32})) + .INPUT(shuffle_matrix, TensorType({DT_INT32})) + .OUTPUT(y, TensorType({DT_INT32})) + .REQUIRED_ATTR(batch_size_per_images, Int) + .REQUIRED_ATTR(positive_fraction, Float) + .OP_END_FACTORY_REG(SubSampleLabels) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h index 047fd6da..75e91aee 100644 --- a/third_party/fwkacllib/inc/ops/nn_training_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -2101,6 +2101,55 @@ REG_OP(FusedMulApplyMomentumExtern) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(FusedMulApplyMomentumExtern) +/** +*@brief Updates '*var' according to the momentum scheme. +* accum = accum * momentum - x1 * x2 * lr +* if use_nesterov is True: +* var += accum * momentum - x1 * x2 * lr +* else: +* var += accum +* +*@par Inputs: +*@li var: A mutable tensor. Must be one of the data types defined in +* TensorType::NumberType(). Should be from a Variable(). +*@li accum: A mutable tensor. Has the same type as "var". Should be from a +* Variable(). +*@li lr: A tensor for the learning rate. Has the same type as "var". Should be +* from a Variable(). +*@li x1: A Tensor has type TensorType::NumberType(). +*@li momentum: A scalar. Has the same type as "var". +*@li x2: A scalar has the same type as "var". +* +*@par Attributes: +*@li use_nesterov: An optional bool. Defaults to "False". +* If "True", var will be updated by using Nesterov momentum. +*@li use_locking: An optional bool. Defaults to "False". +* If "True", updating of the "var" tensor is protected by a lock; +* otherwise the behavior is undefined, but may exhibit less contention. +* +*@par Outputs: +* var: A mutable tensor. Has the same type as input "var". +* +*@attention Constraints: +* The input tensors must have the same shape. +* +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ResourceApplyKerasMomentum. +* +*/ +REG_OP(FusedMulApplyKerasMomentum) + .INPUT(var, TensorType::NumberType()) + .INPUT(accum, TensorType::NumberType()) + .INPUT(lr, TensorType::NumberType()) + .INPUT(x1, TensorType::NumberType()) + .INPUT(momentum, TensorType::NumberType()) + .INPUT(x2, TensorType::NumberType()) + .OUTPUT(var, TensorType::NumberType()) + .OUTPUT(accum, TensorType::NumberType()) + .ATTR(use_locking, Bool, false) + .ATTR(use_nesterov, Bool, false) + .OP_END_FACTORY_REG(FusedMulApplyKerasMomentum) + /** *@brief Update "g" according to the LARS algorithm . \n diff --git a/third_party/fwkacllib/inc/ops/no_op.h b/third_party/fwkacllib/inc/ops/no_op.h index 7834591c..b27b1fa0 100644 --- a/third_party/fwkacllib/inc/ops/no_op.h +++ b/third_party/fwkacllib/inc/ops/no_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index e0e5dfc6..ca1c24eb 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -223,7 +223,29 @@ REG_OP(Relu6Grad) .INPUT(features, TensorType::RealNumberType()) .OUTPUT(backprops, TensorType::RealNumberType()) .OP_END_FACTORY_REG(Relu6Grad) - +/** +*@brief Calculate the elu_grad_v2 function. +*Applies the element-wise function: +* Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha . +*@par Inputs: +*One inputs, including: +* @li grads: A tensor. Must be one of the following types: +* float16, float32. +* @li activations: A tensor. Must be one of the following types: +* float16, float32. +* +*@par Outputs: +*y: A Tensor with the same type and shape of grads's. +* +*@par Attributes: +*@li alpha: scalar parameter, default value = 1.0 +*/ +REG_OP(EluGradV2) + .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .ATTR(alpha, Float, 1.0) + .OP_END_FACTORY_REG(EluGradV2) /** * @brief Compute sigmoid of "x" element-wise . \n @@ -508,6 +530,42 @@ REG_OP(Elu) .ATTR(alpha, Float, 1.0) .OP_END_FACTORY_REG(Elu) +/** +*@brief Continuously Differentiable Exponential Linear Uints: +* Perform the linear uint element-wise on the input tensor X using formula: +* max(0, x) + min(0, alpha * (exp(x/alpha) - 1)). \n + +*@par Inputs: +*x: A float16, float32, for the input data type . \n + +*@par Attributes: +*alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n + +*@par Attributes: +*alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n + +*@par Attributes: +*alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n + +*@par Outputs: +*y: A float16, float32, for the normalized result . \n + +*@attention Constraints: +*@li The input is of type float16 or float32 . \n + +*@par Multiple batches supported or not +*Supported +*@par Third-party framework compatibility +*@li Compatible with ONNX's Celu operator +*/ +REG_OP(Celu) + .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16})) + .ATTR(alpha1, Float, 1.0) + .ATTR(alpha2, Float, 1.0) + .ATTR(alpha3, Float, 1.0) + .OP_END_FACTORY_REG(Celu) + /** *@brief Computes gradients for the exponential linear (Elu) operation. * @@ -640,6 +698,352 @@ REG_OP(Mish) .OUTPUT(y, TensorType({ DT_FLOAT,DT_FLOAT16 })) .OP_END_FACTORY_REG(Mish) +/** + * @brief: pytorch mish_grad operator. + * @par Inputs: + * three input, including: + * @li grad: A Tensor. shape, datatype and format is same as x + * @li x: A Tensor. Must be one of the following types: float16, float32 + * @li tanhx: A Tensor. shape, datatype and format is same as x + * @par Outputs: + * 1 output, including: + * @li x_grad: A Tensor. shape, datatype and format is same as x + */ + +REG_OP(MishGrad) + .INPUT(grad, TensorType({ DT_FLOAT,DT_FLOAT16 })) + .INPUT(x, TensorType({ DT_FLOAT,DT_FLOAT16 })) + .OPTIONAL_INPUT(tanhx, TensorType({ DT_FLOAT,DT_FLOAT16 })) + .OUTPUT(x_grad, TensorType({ DT_FLOAT,DT_FLOAT16 })) + .OP_END_FACTORY_REG(MishGrad) + +/** + * @brief pytorch hardtanh_backward operator. + * + * @par Inputs: + * 2 inputs, including: + * @li result, minimum tensor of the linear region range, + * datatype: float16/float32, format:ND/5HD. + * @li grad, maximum tensor of the linear region range, + * datatype:float16/float32, format:ND/5HD. \n + + * @par Attributes: + * 2 attributes, including: + * @li min_val, minimum value of the linear region range, datatype:float. + * @li max_val, maximum value of the linear region range, datatype:float. \n + + * @par Outputs: + * 1 output, including: + * @li y, hardtanh_backward output tensor, datatype and format is same as + * input result. \n + + * @attention Constraints: + * This operator only supports dataType: float16/float32, format: ND/5HD. \n + + * @par Third-party framework compatibility + * Compatible with the Pytorch operator HardtanhGrad. + */ +REG_OP(HardtanhGrad) + .INPUT(result, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "First operand." */ + .INPUT(grad, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "Second operand." */ + .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "Result, has same element type as two inputs" */ + .ATTR(min_val, Float, -1.0) + .ATTR(max_val, Float, 1.0) + .OP_END_FACTORY_REG(HardtanhGrad) + +/** +* @brief Calculates the softplus loss function with attributes of beta and threshold. \n + +* @par Inputs: +* One inputs, including: +* @li x: A mutable Tensor. Must be one of the following types: +* float16, float32. \n + +* @par Attributes: +* @li beta: An optional float. Defaults to "1.0" \n + +* @li threshold: An optional float. Defaults to "20.0" \n + +* @par Outputs: +* @li y: A mutable Tensor. Has the same type as "x" \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Softplus. +*/ +REG_OP(SoftplusV2) + .INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .ATTR(beta, Float, 1.0) + .ATTR(threshold, Float, 20.0) + .OP_END_FACTORY_REG(SoftplusV2) + +/** +* @brief Calculates the reversed outputs of the function "softplus_v2". \n + +* @par Inputs: +* Two inputs, including: +* @li input_gradients: A mutable Tensor. Must be one of the following types: +* float16, float32. +* @li input_features: A mutable Tensor of the same type as "input_gradients" \n + +* @par Attributes: +* @li beta: An optional float. Defaults to "1.0" \n + +* @li threshold: An optional float. Defaults to "20.0" \n + +* @par Outputs: +* @li output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator SoftplusGrad. +*/ +REG_OP(SoftplusV2Grad) + .INPUT(input_gradients, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(input_features, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .OUTPUT(output_backprops, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .ATTR(beta, Float, 1.0) + .ATTR(threshold, Float, 20.0) + .OP_END_FACTORY_REG(SoftplusV2Grad) + +/** + * @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor) + * where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise. + * + * @par inputs + * one input including: + * @li x: input A Tensor. Must be one of the following types: float32, float16 + * + * @par output + * one output including: + * @li y:A Tensor of the same type as x + * + */ +REG_OP(ThresholdedRelu) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(alpha, Float, 1.0) + .OP_END_FACTORY_REG(ThresholdedRelu) + +/** +* @brief Calculate the hard shrinkage function. \n + +* @par Inputs: +* One inputs, including: +* @li input_x: A tensor. Must be one of the following types: +* float16, float32. \n + +* @par Attributes: +* @li lambd: An optional float. Defaults to 0.5. \n + +* @par Outputs: +* y: A Tensor with the same dtype and shape of input_x's. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Hardshrink. \n +*/ +REG_OP(HardShrink) + .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(lambd, Float, 0.5) + .OP_END_FACTORY_REG(HardShrink) + +/** +*@brief Calculate the hard shrink grad function. \n +* +* Computes the gradient for the HardShrink: if x > lambda or x < -lambda, x,otherwise 0 +* +*@par Inputs: +*Two inputs, including: +* @li gradients: A tensor. Must be one of the following types: +* float16, float32. \n +* @li features: A tensor. Must be one of the following types: +* float16, float32. \n +* +*@par Outputs: +*backprops: A Tensor with the same type and shape of features's. \n +* +*@par Attributes: +*@li lambd: An optional float.Defaults to 0.5. \n +* +*@par Third-party framework compatibility +*Compatible with the Pytorch operator Hardshrink_backward. \n +*/ + REG_OP(HardShrinkGrad) + .INPUT(gradients, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(lambd, Float, 0.5) + .OP_END_FACTORY_REG(HardShrinkGrad) + +/** +* @brief Calculate the hard sigmoid function. \n + +* @par Inputs: +* One inputs, including: +* @li input_x: A tensor. Must be one of the following types: +* float16, float32, int32. \n + +* @par Attributes: +* @li alpha: An optional float. Defaults to 0.16666666. \n +* @li beta: An optional float. Defaults to 0.5. \n + +* @par Outputs: +* y: A Tensor with the same dtype and shape of input_x's. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Hardsigmoid. \n +*/ +REG_OP(HardSigmoid) + .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16})) + .ATTR(alpha, Float, 0.16666666) + .ATTR(beta, Float, 0.5) + .OP_END_FACTORY_REG(HardSigmoid) + +/** +* @brief Calculate the soft shrinkage function. \n + +* @par Inputs: +* One inputs, including: +* @li input_x: A tensor. Must be one of the following types: +* float16, float32. \n + +* @par Attributes: +* @li lambd: An optional float. Defaults to 0.5. \n + +* @par Outputs: +* y: A Tensor with the same dtype and shape of input_x's. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Softshrink. \n +*/ +REG_OP(SoftShrink) + .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(lambd, Float, 0.5) + .OP_END_FACTORY_REG(SoftShrink) + +/** +* @brief Calculate the reversed outputs of the function "soft_shrink". \n + +* @par Inputs: +* Two inputs, including: +* @li input_grad: A tensor. Must be one of the following types: +* float16, float32. \n +* @li input_x: A tensor of the same dtype as "input_grad". \n + +* @par Attributes: +* @li lambd: An optional float. Defaults to 0.5. \n + +* @par Outputs: +* y: A Tensor of the same dtype and shape as "input_graxd". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator SoftShrinkGrad. \n +*/ +REG_OP(SoftShrinkGrad) + .INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(lambd, Float, 0.5) + .OP_END_FACTORY_REG(SoftShrinkGrad) + +/** +*@brief Calculate the gradient of log simoid. \n + +*@par Inputs: +*Two inputs, including: +* @li grads: A tensor, gradient of previous layer. Must be one of the following types: +* float16, float32. \n +* @li features: A tensor, input of log sigmoid. Must be one of the following types: +* float16, float32. \n + +*@par Outputs: +*One outputs, including: +* @li backprops: A tensor with the same type of and shape of grads. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator LogSigmoidBackward. \n +*/ +REG_OP(LogSigmoidGrad) + .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(LogSigmoidGrad) + +/** +*@brief Calculate -ln(1+e^(-x)). \n + +*@par Inputs: +*One inputs, including: +* @li x: A tensor. Must be one of the following types: +* float16, float32. \n + +*@par Outputs: +*One outputs, including: +* @li y: A tensor with the same type and shape of x's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator LogSigmoid. \n +*/ +REG_OP(LogSigmoid) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) /* "input:x" */ + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) /* "output:y" */ + .OP_END_FACTORY_REG(LogSigmoid) + +/** +*@brief Calculate the backward outputs of the function "hard_sigmoid" \n + +*@par Inputs: +*One inputs, including: +* @li grads: A tensor. Must be one of the following types: +* float16, float32. \n +* @li input_x: A tensor. Must be one of the following types: +* float16, float32. \n + +*@par Outputs: +*One outputs, including: +* @li y: A tensor with the same type and shape of x's. \n + +* @par Attributes: +* @li alpha: An optional float. Defaults to 0.16666666. \n +* @li beta: An optional float. Defaults to 0.5. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator LogSigmoidGrad. \n +*/ +REG_OP(HardSigmoidGrad) + .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .ATTR(alpha, Float, 0.16666666) + .ATTR(beta, Float, 0.5) + .OP_END_FACTORY_REG(HardSigmoidGrad) + +/** +* @brief Calculate the shrink function. \n + +* @par Inputs: +* One inputs, including: +* @li input_x: A tensor. Must be one of the following types: +* float16, float32. \n + +* @par Attributes: +* @li lambd: An optional float. Defaults to 0.5. \n +* @li bias: An optional float. Defaults to 0.0. \n + +* @par Outputs: +* y: A Tensor with the same dtype and shape of input_x's. \n + +* @par Third-party framework compatibility +* Compatible with the ONNX operator Shrink. \n +*/ +REG_OP(Shrink) + .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(lambd, Float, 0.5) + .ATTR(bias, Float, 0.0) + .OP_END_FACTORY_REG(Shrink) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h index 8d7ef9f9..f36d2935 100644 --- a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h +++ b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/outfeed_ops.h b/third_party/fwkacllib/inc/ops/outfeed_ops.h index e0b783bc..53b9d701 100644 --- a/third_party/fwkacllib/inc/ops/outfeed_ops.h +++ b/third_party/fwkacllib/inc/ops/outfeed_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h index f746b3b3..6854c866 100644 --- a/third_party/fwkacllib/inc/ops/pad_ops.h +++ b/third_party/fwkacllib/inc/ops/pad_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -101,7 +101,7 @@ REG_OP(FillD) */ REG_OP(BroadcastTo) .INPUT(x, TensorType::BasicType()) - .INPUT(shape, TensorType({DT_INT32})) + .INPUT(shape, TensorType({DT_INT32,DT_INT64})) .OUTPUT(y, TensorType::BasicType()) .OP_END_FACTORY_REG(BroadcastTo) @@ -161,7 +161,7 @@ REG_OP(Pad) *@brief Pads a tensor . \n *@par Inputs: -*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n +*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n *@par Attributes: *paddings: An optional "vector>". Defaults to "{}". @@ -180,8 +180,8 @@ REG_OP(Pad) * Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead. */ REG_OP(PadD) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) .REQUIRED_ATTR(paddings, ListListInt) .OP_END_FACTORY_REG(PadD) @@ -213,7 +213,7 @@ REG_OP(PadV2) *@brief Pads a tensor . \n *@par Inputs: -*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n +*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n *constant_values: A Tensor. Must have the same type as input. *@par Attributes: @@ -227,10 +227,7 @@ REG_OP(PadV2) *y: A Tensor of the same type as "x" . \n *@par Third-party framework compatibility: -* Compatible with TensorFlow operator Pad. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead. +* Compatible with TensorFlow operator PadV2. */ REG_OP(PadV2D) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) @@ -272,42 +269,42 @@ REG_OP(PadV3) .ATTR(paddings_contiguous, Bool, true) .OP_END_FACTORY_REG(PadV3) -/** -*@brief Pads a tensor. - -*@par Inputs: -*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32. - -*@par Attributes: -* @li paddings: An required "vector>". -* For each dimension D of input, paddings[D, 0] indicates how many -* values to add before the contents of tensor in that dimension, -* and paddings[D, 1] indicates how many values to add after the -* contents of tensor in that dimension. -* @li constant_values: An optional int value for pad. -* @li mode: An optional string, Defaults to "constant", indicates paddings mode, -* support "constant", "reflect", "edge" -* @li paddings_contiguous: An optional bool value, Defaults to true. -* If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...] -* If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...] - -*@par Outputs: -*y: A Tensor of the same type as "x". - -*@par Third-party framework compatibility: -* Compatible with ONNX operator Pad. - -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use PadV3 instead. -*/ -REG_OP(PadV3D) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) - .REQUIRED_ATTR(paddings, ListListInt) - .ATTR(constant_values, Int, 0) - .ATTR(mode, String, "constant") - .ATTR(paddings_contiguous, Bool, true) - .OP_END_FACTORY_REG(PadV3D) + /** + *@brief Pads a tensor. + + *@par Inputs: + *x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32. + + *@par Attributes: + * @li paddings: An required "vector>". + * For each dimension D of input, paddings[D, 0] indicates how many + * values to add before the contents of tensor in that dimension, + * and paddings[D, 1] indicates how many values to add after the + * contents of tensor in that dimension. + * @li constant_values: An optional int value for pad. + * @li mode: An optional string, Defaults to "constant", indicates paddings mode, + * support "constant", "reflect", "edge" + * @li paddings_contiguous: An optional bool value, Defaults to true. + * If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...] + * If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...] + + *@par Outputs: + *y: A Tensor of the same type as "x". + + *@par Third-party framework compatibility: + * Compatible with ONNX operator Pad. + + * @par Restrictions: + * Warning: THIS FUNCTION IS DEPRECATED. Please use PadV3 instead. + */ + REG_OP(PadV3D) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) + .REQUIRED_ATTR(paddings, ListListInt) + .ATTR(constant_values, Int, 0) + .ATTR(mode, String, "constant") + .ATTR(paddings_contiguous, Bool, true) + .OP_END_FACTORY_REG(PadV3D) /** *@brief Create a diagonal tensor @@ -403,5 +400,76 @@ REG_OP(EmbeddingRankId) .ATTR(mode, String, "mod") .OP_END_FACTORY_REG(EmbeddingRankId) +/** +*@brief EmbeddingLocalIndex, Sort statistics index according to rank_id \n + +*@par Inputs: +* @li addr_table: A 2D tensor which last dimension must be 3. +* @li index: A tensor with data type int32, int64, uint32, uint64. + +*@par Attributes: +* @li row_memory: The size of Embedding vector in a row, the default is 320. +* @li mode: String type, currently there are two options: 'mod' and 'order' + +*@par Outputs: +* @li local_idx:Index on each server. +* @li nums:The number of local_idx found on each server. +* @li recover_idx:The sorted local_idx element is at the position corresponding +* to the original input index. + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Diag. +*/ +REG_OP(EmbeddingLocalIndex) + .INPUT(addr_table, TensorType({DT_UINT64})) + .INPUT(index, TensorType({DT_INT64,DT_INT32,DT_UINT32,DT_UINT64})) + .OUTPUT(local_idx, TensorType({DT_INT64,DT_INT32,DT_UINT32,DT_UINT64})) + .OUTPUT(nums, TensorType({DT_INT64,DT_INT32,DT_UINT32,DT_UINT64})) + .OUTPUT(recover_idx, TensorType({DT_INT64,DT_INT32,DT_UINT32,DT_UINT64})) + .ATTR(row_memory, Int, 320) + .ATTR(mode, String, "mod") + .OP_END_FACTORY_REG(EmbeddingLocalIndex) + +/** +* @brief Fill the value to a tensor has the specified shape. + +* @par Inputs: +* One inputs, including: +* @li dims: An Tensor, specify the shape that the value to fill. + +* @par Attributes: +* @li value: An optional float value. Defaults to 0.0. + +* @par Outputs: +* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value. + +* @par Third-party framework compatibility +* Compatible with the ONNX operator ConstantOfShape. +*/ +REG_OP(FillV2) + .INPUT(dims, TensorType({DT_INT16, DT_INT32, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) + .ATTR(value, Float, 0) + .OP_END_FACTORY_REG(FillV2) + +/** +* @brief Fill the value to a tensor has the specified shape. + +* @par Attributes: +* @li value: An optional float value. Defaults to 0.0. + +* @li dims: An required listInt to specify the shape that the value to fill. + +* @par Outputs: +* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value. + +* @par Third-party framework compatibility +* Compatible with the ONNX operator ConstantOfShape. +*/ +REG_OP(FillV2D) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64})) + .ATTR(value, Float, 0) + .REQUIRED_ATTR(dims, ListInt) + .OP_END_FACTORY_REG(FillV2D) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/parsing_ops.h b/third_party/fwkacllib/inc/ops/parsing_ops.h index 5c7adfd8..b625180a 100644 --- a/third_party/fwkacllib/inc/ops/parsing_ops.h +++ b/third_party/fwkacllib/inc/ops/parsing_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,6 +51,246 @@ REG_OP(StringToNumber) .ATTR(out_type, Type, DT_FLOAT) .OP_END_FACTORY_REG(StringToNumber) +/** +*@brief Convert serialized tensorflow.TensorProto prototype to Tensor. +*@brief Parse an Example prototype. +*@par Input: +*serialized: A Tensor of type string. +*dense_defaults: DYNAMIC INPUT Tensor type as string, float, int64. \n + +*@par Attributes: +*num_sparse: type int num of inputs sparse_indices , sparse_values, sparse_shapes +*out_type: output type +*sparse_keys: ListString +*sparse_types: types of sparse_values +*dense_keys: ListString +*dense_shapes: output of dense_defaults shape +*dense_types: output of dense_defaults type \n + +*@par Outputs: +*sparse_indices: A Tensor of type string. +*sparse_values: Has the same type as sparse_types. +*sparse_shapes: A Tensor of type int64 +*dense_values: Has the same type as dense_defaults. + +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +**/ +REG_OP(ParseSingleExample) + .INPUT(serialized, TensorType({DT_STRING})) + .DYNAMIC_INPUT(dense_defaults, TensorType({DT_STRING,DT_FLOAT,DT_INT64})) + .DYNAMIC_OUTPUT(sparse_indices, TensorType({DT_INT64})) + .DYNAMIC_OUTPUT(sparse_values, TensorType({DT_STRING,DT_FLOAT,DT_INT64})) + .DYNAMIC_OUTPUT(sparse_shapes, TensorType({DT_INT64})) + .DYNAMIC_OUTPUT(dense_values, TensorType({DT_STRING,DT_FLOAT,DT_INT64})) + .ATTR(num_sparse, Int, 0) + .ATTR(sparse_keys, ListString, {}) + .ATTR(dense_keys, ListString, {}) + .ATTR(sparse_types, ListType, {}) + .ATTR(Tdense, ListType, {}) + .ATTR(dense_shapes, ListListInt, {}) + .OP_END_FACTORY_REG(ParseSingleExample) + +/** +*@brief Decodes raw file into tensor . \n +*@par Input: +*bytes: A Tensor of type string. + +*@par Attributes: +*little_endian: bool ture +*out_type: output type + +*@par Outputs: +*Output: A Tensor +**/ +REG_OP(DecodeRaw) + .INPUT(bytes, TensorType({DT_STRING})) + .OUTPUT(output, TensorType({DT_BOOL,DT_FLOAT16,DT_DOUBLE,DT_FLOAT, + DT_INT64,DT_INT32,DT_INT8,DT_UINT8,DT_INT16, + DT_UINT16,DT_COMPLEX64,DT_COMPLEX128})) + .ATTR(out_type, Type, DT_FLOAT) + .ATTR(little_endian, Bool, true) + .OP_END_FACTORY_REG(DecodeRaw) + +/** +*@brief Convert serialized tensorflow.TensorProto prototype to Tensor. \n + +*@par Inputs: +*serialized: A Tensor of string type. Scalar string containing serialized +*TensorProto prototype. \n + +*@par Attributes: +*out_type: The type of the serialized tensor. The provided type must match the +*type of the serialized tensor and no implicit conversion will take place. \n + +*@par Outputs: +*output: A Tensor of type out_type. \n + +*@attention Constraints: +*The implementation for StringToNumber on Ascend uses AICPU, +*with badperformance. \n + +*@par Third-party framework compatibility +*@li compatible with tensorflow ParseTensor operator. +*/ +REG_OP(ParseTensor) + .INPUT(serialized, TensorType({DT_STRING})) + .OUTPUT(output, TensorType(DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, + DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, + DT_UINT64, DT_BOOL, DT_DOUBLE, DT_STRING, + DT_COMPLEX64, DT_COMPLEX128})) + .ATTR(out_type, Type, DT_FLOAT) + .OP_END_FACTORY_REG(ParseTensor) + +/** +*@brief Converts each string in the input Tensor to the specified numeric +*type . \n + +*@par Inputs: +*Inputs include: +*records: Each string is a record/row in the csv and all records should have the +*same format. \n +*record_defaults: One tensor per column of the input record, with either a +*scalar default value for that column or an empty vector if the column is +*required. \n + +*@par Attributes: +*OUT_TYPE: The numeric type to interpret each string in string_tensor as . \n +*field_delim: char delimiter to separate fields in a record. \n +*use_quote_delim: If false, treats double quotation marks as regular characters +*inside of the string fields (ignoring RFC 4180, Section 2, Bullet 5). \n +*na_value: Additional string to recognize as NA/NaN. \n + +*@par Outputs: +*output: A Tensor. Has the same type as x . \n + +*@attention Constraints: +*The implementation for StringToNumber on Ascend uses AICPU, with bad +*performance. \n + +*@par Third-party framework compatibility +*@li compatible with tensorflow StringToNumber operator. +*/ +REG_OP(DecodeCSV) + .INPUT(records, TensorType({DT_STRING})) + .DYNAMIC_INPUT(record_defaults, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, + DT_INT64, DT_STRING})) + .DYNAMIC_OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, + DT_INT64, DT_STRING})) + .ATTR(OUT_TYPE, ListType, {}) + .ATTR(field_delim, String, ",") + .ATTR(use_quote_delim, Bool, true) + .ATTR(na_value, String, ",") + .ATTR(select_cols, ListInt, {}) + .OP_END_FACTORY_REG(DecodeCSV) + +/** +*@brief Convert serialized tensorflow.TensorProto prototype to Tensor. +*@brief Parse an Example prototype. +*@par Input: +*serialized: A Tensor of type string. \n +*name:A Tensor of type string. \n +*sparse_keys: Dynamic input tensor of string. \n +*dense_keys: Dynamic input tensor of string \n +*dense_defaults: Dynamic input tensor type as string, float, int64. \n + +*@par Attributes: +*Nsparse: Number of sparse_keys, sparse_indices and sparse_shapes \n +*Ndense: Number of dense_keys \n +*sparse_types: types of sparse_values \n +*Tdense: Type of dense_defaults dense_defaults and dense_values \n +*dense_shapes: output of dense_defaults shape \n + +*@par Outputs: +*sparse_indices: A Tensor of type string. \n +*sparse_values: Has the same type as sparse_types. \n +*sparse_shapes: A Tensor of type int64 \n +*dense_values: Has the same type as dense_defaults. \n +*@par Third-party framework compatibility \n +*@li compatible with tensorflow StringToNumber operator. \n +*/ +REG_OP(ParseExample) + .INPUT(serialized, TensorType({DT_STRING})) + .INPUT(name, TensorType({DT_STRING})) + .DYNAMIC_INPUT(sparse_keys, TensorType({DT_STRING})) + .DYNAMIC_INPUT(dense_keys, TensorType({DT_STRING})) + .DYNAMIC_INPUT(dense_defaults, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) + .DYNAMIC_OUTPUT(sparse_indices, TensorType({DT_INT64})) + .DYNAMIC_OUTPUT(sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) + .DYNAMIC_OUTPUT(sparse_shapes, TensorType({DT_INT64})) + .DYNAMIC_OUTPUT(dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) + .ATTR(Nsparse, Int, 0) + .ATTR(Ndense, Int, 0) + .ATTR(sparse_types, ListType, {}) + .ATTR(Tdense, ListType, {}) + .ATTR(dense_shapes, ListListInt, {}) + .OP_END_FACTORY_REG(ParseExample) + +/** +*@brief Transforms a scalar brain.SequenceExample proto (as strings) into typed +*tensors. +*@par Input: +*serialized: A Tensor of type string. \n +*feature_list_dense_missing_assumed_empty:A Tensor of type string. \n +*context_sparse_keys: Dynamic input tensor of string. \n +*context_dense_keys: Dynamic input tensor of string \n +*feature_list_sparse_keys: Dynamic input tensor of string \n +*feature_list_dense_keys: Dynamic input tensor of string \n +*context_dense_defaults: Dynamic input tensor of string, float, int64 \n +*debug_name: A Tensor of type string. \n + +*@par Attributes: +*Ncontext_sparse: Number of context_sparse_keys, context_sparse_indices and context_sparse_shapes \n +*Ncontext_dense: Number of context_dense_keys \n +*Nfeature_list_sparse: Number of feature_list_sparse_keys \n +*Nfeature_list_dense: Number of feature_list_dense_keys \n +*context_sparse_types: Types of context_sparse_values \n +*Tcontext_dense: Number of dense_keys \n +*feature_list_dense_types: Types of feature_list_dense_values \n +*context_dense_shapes: Shape of context_dense \n +*feature_list_sparse_types: Type of feature_list_sparse_values \n +*feature_list_dense_shapes: Shape of feature_list_dense \n + +*@par Outputs: +*context_sparse_indices: Dynamic output tensor of type int64. \n +*context_sparse_values: Dynamic output tensor of type string, float, int64. \n +*context_sparse_shapes: Dynamic output tensor of type int64 \n +*context_dense_values: Dynamic output tensor of type string, float, int64. \n +*feature_list_sparse_indices: Dynamic output tensor of type int64. \n +*feature_list_sparse_values: Dynamic output tensor of type string, float, int64. \n +*feature_list_sparse_shapes: Dynamic output tensor of type int64 \n +*feature_list_dense_values: Dynamic output tensor of type string, float, int64. \n +*@par Third-party framework compatibility \n +*@li compatible with tensorflow StringToNumber operator. \n +*/ +REG_OP(ParseSingleSequenceExample) + .INPUT(serialized, TensorType({DT_STRING})) + .INPUT(feature_list_dense_missing_assumed_empty, TensorType({DT_STRING})) + .DYNAMIC_INPUT(context_sparse_keys, TensorType({DT_STRING})) + .DYNAMIC_INPUT(context_dense_keys, TensorType({DT_STRING})) + .DYNAMIC_INPUT(feature_list_sparse_keys, TensorType({DT_STRING})) + .DYNAMIC_INPUT(feature_list_dense_keys, TensorType({DT_STRING})) + .DYNAMIC_INPUT(context_dense_defaults, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) + .INPUT(debug_name, TensorType({DT_STRING})) + .DYNAMIC_OUTPUT(context_sparse_indices, TensorType({DT_INT64})) + .DYNAMIC_OUTPUT(context_sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) + .DYNAMIC_OUTPUT(context_sparse_shapes, TensorType({DT_INT64})) + .DYNAMIC_OUTPUT(context_dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) + .DYNAMIC_OUTPUT(feature_list_sparse_indices, TensorType({DT_INT64})) + .DYNAMIC_OUTPUT(feature_list_sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) + .DYNAMIC_OUTPUT(feature_list_sparse_shapes, TensorType({DT_INT64})) + .DYNAMIC_OUTPUT(feature_list_dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) + .ATTR(Ncontext_sparse, Int, 0) + .ATTR(Ncontext_dense, Int, 0) + .ATTR(Nfeature_list_sparse, Int, 0) + .ATTR(Nfeature_list_dense, Int, 0) + .ATTR(context_sparse_types, ListType, {}) + .ATTR(Tcontext_dense, ListType, {}) + .ATTR(feature_list_dense_types, ListType, {}) + .ATTR(context_dense_shapes, ListListInt, {}) + .ATTR(feature_list_sparse_types, ListType, {}) + .ATTR(feature_list_dense_shapes, ListListInt, {}) + .OP_END_FACTORY_REG(ParseSingleSequenceExample) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h index b53cfeb6..69d5e67e 100644 --- a/third_party/fwkacllib/inc/ops/quantize_ops.h +++ b/third_party/fwkacllib/inc/ops/quantize_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -60,6 +60,26 @@ REG_OP(Dequantize) .ATTR(mode, String, "MIN_COMBINED") .OP_END_FACTORY_REG(Dequantize) +/** +*@brief Quantizes the input . \n +*@par Inputs: +*x: shape and dtype of input_x. \n +*scales: shape and dtype of input_scales. \n +*zero_points: shape and dtype of input_zero_points \n +*@par Attributes: +*@li axis: the processed dim. \n +*@par Outputs: +*y: shape and dtype of output_y, should be same shape as input, dtype is same as the quantified type . \n +*/ +REG_OP(Quantize) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(scales, TensorType({DT_FLOAT})) + .INPUT(zero_points, TensorType({DT_INT8,DT_UINT8,DT_INT32})) + .OUTPUT(y, TensorType({DT_INT8,DT_UINT8,DT_INT32})) + .REQUIRED_ATTR(dtype, String) + .ATTR(axis, Int, 1) + .OP_END_FACTORY_REG(Quantize) + /** *@brief Quantizes the input . \n @@ -194,7 +214,7 @@ REG_OP(AscendRequant) *@brief Requantizes the input of int16 . \n *@par Inputs: -*@li x: An NC1HWC0 tensor of type int16, specifying the input. +*@li x0: An NC1HWC0 tensor of type int16, specifying the input. *@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio. *@li x1: An NC1HWC0 tensor of type int16 . \n @@ -203,22 +223,21 @@ REG_OP(AscendRequant) *@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n *@par Outputs: -*@li y: The dequantized output tensor of type int8 and with format NC1HWC0. +*@li y0: The dequantized output tensor of type int8 and with format NC1HWC0. *@li y1: The dequantized output tensor of type int16 and with format NC1HWC0 . \n *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(AscendRequantS16) - .INPUT(x, TensorType({DT_INT16})) + .INPUT(x0, TensorType({DT_INT16})) .INPUT(req_scale, TensorType({DT_UINT64})) .OPTIONAL_INPUT(x1, TensorType({DT_INT16})) - .OUTPUT(y, TensorType({DT_INT8})) + .OUTPUT(y0, TensorType({DT_INT8})) .OUTPUT(y1, TensorType({DT_INT16})) .ATTR(dual_output, Bool, false) .ATTR(relu_flag, Bool, false) .OP_END_FACTORY_REG(AscendRequantS16) - } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_QUANTIZE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/ragged_array_ops.h b/third_party/fwkacllib/inc/ops/ragged_array_ops.h index 9b31aa8e..20484623 100644 --- a/third_party/fwkacllib/inc/ops/ragged_array_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_array_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h index 13488a25..020e3da4 100644 --- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/ragged_math_ops.h b/third_party/fwkacllib/inc/ops/ragged_math_ops.h index 8af4f867..258b0ca1 100644 --- a/third_party/fwkacllib/inc/ops/ragged_math_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_math_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index b46da435..b65a68f1 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -356,6 +356,39 @@ REG_OP(DropOutGenMask) .ATTR(seed2, Int, 0) .OP_END_FACTORY_REG(DropOutGenMask) + +/** +*@brief Generate random uint8 mask for dropout v3 . \n + +*@par Inputs: +include: +*@li shape:The shape of the output tensor. +*@li prob:0-D. Prob of 1 . \n + +*@par Attributes: +*@li seed:If either seed or seed2 are set to be non-zero, the random number +*generator is seeded by the given seed. Otherwise, it is seeded by a random seed. +*@li seed2:A second seed to avoid seed collision . \n + +*@par Outputs: +*y:Output (1-D) random number using uint8 data format . \n + +*@attention Constraints: +*The output is aligned with 16 + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. + +*@see DropOutGenMaskV3() +*/ +REG_OP(DropOutGenMaskV3) + .INPUT(shape, TensorType({ DT_INT32, DT_INT64 })) + .INPUT(prob, TensorType({ DT_FLOAT16, DT_FLOAT })) + .OUTPUT(y, TensorType({ DT_UINT8 })) + .ATTR(seed, Int, 0) + .ATTR(seed2, Int, 0) + .OP_END_FACTORY_REG(DropOutGenMaskV3) + /** *@brief Generates values in an interval . \n @@ -495,6 +528,62 @@ REG_OP(ShuffleChannel) DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64})) .ATTR(group, Int, 1) .OP_END_FACTORY_REG(ShuffleChannel) + +/** + * @briefGenerate a tensor of samples from a multinomial + * distribution according to the probabilities of each of + * the possible outcomes. + * + * @par inputs + * one input including: + * @li x:Input tensor with shape [batch_size, class_size], + * where class_size is the number of all possible outcomes. + * Each value along the axis zero represents the unnormalized + * log-probability of each corresponding outcome in a batch. + * + * @par output + * one output including: + * @li y:Output tensor with shape [batch_size, sample_size], + * where sample_size is the number of times to sample. + * Each value along the axis zero represents the outcome of + * the corresponding sample in a batch. + * + * @par Restrictions: + * Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. + */ +REG_OP(MultinomialFuss) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64})) + .OUTPUT(y, TensorType({DT_INT32, DT_INT64})) + .ATTR(dtype, Int, 6) + .ATTR(sample_size, Int, 1) + .ATTR(seed, Float, 0) + .OP_END_FACTORY_REG(MultinomialFuss) + +/** +* @brief During training, randomly zeroes some of the elements of the input tensor +* with probability +* +* @par Inputs: +* @li x: A ND Tensor. Must be one of the following data types: Float, Float16 +* @li seed: A ND Tensor. Must be one of the following data types: Float +* +* @par Attributes: +* @li p: probability of an element to be zeroed +* +* @par Outputs: +* @li y: A tensor with the same shape and type as "x". +* @li mask: A tensor with the same shape and type as "x". +* @li new_seed: A tensor with the same shape and type as "seed". +*/ + +REG_OP(DropoutV2) + .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT })) + .INPUT(seed, TensorType({ DT_FLOAT })) + .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT })) + .OUTPUT(mask, TensorType({ DT_FLOAT })) + .OUTPUT(seed, TensorType({ DT_FLOAT })) + .REQUIRED_ATTR(p, Float) + .OP_END_FACTORY_REG(DropoutV2) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index 6f44093e..97c7b8e1 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,7 +37,7 @@ namespace ge { *@attention Constraints: * This operator is a BatchNorm fusion operator for updating the moving * averages for training. -* This operator is used in conjunction with BNTrainingUpdate. +* This operator is used in conjunction with BNTrainingReduce. */ REG_OP(BNTrainingReduce) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -45,6 +45,27 @@ REG_OP(BNTrainingReduce) .OUTPUT(square_sum, TensorType({DT_FLOAT})) .OP_END_FACTORY_REG(BNTrainingReduce) +/** +*@brief Performs reduced batch normalization . \n + +*@par Inputs: +*x: A 6D Tensor of type float16 or float32, with format NDC1HWC0 . \n + +*@par Outputs: +*@li sum: A 3D Tensor of type float32 for SUM reduced "x". +*@li square_sum: A 3D Tensor of type float32 for SUMSQ reduced "x" . \n + +*@attention Constraints: +* This operator is a BatchNorm fusion operator for updating the moving +* averages for training. +* This operator is used in conjunction with BN3DTrainingReduce. +*/ +REG_OP(BN3DTrainingReduce) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(sum, TensorType({DT_FLOAT})) + .OUTPUT(square_sum, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(BN3DTrainingReduce) + /** *@brief Performs the backpropagation of BatchNorm . \n @@ -88,6 +109,49 @@ REG_OP(BNTrainingReduceGrad) .ATTR(epsilon, Float, 0.0001) .OP_END_FACTORY_REG(BNTrainingReduceGrad) +/** +*@brief Performs the backpropagation of BatchNorm . \n + +*@par Inputs: +* Seven inputs, including: +*@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0, for +* the gradient. +*@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0. +*@li diff_scale: A 6D Tensor of type float32, with format NDC1HWC0, +* for the mean of "x". +*@li diff_offset: A 6D Tensor of type float32, with format NDC1HWC0, +* for the variance of "x". +*@li scale: A 6D Tensor of type float32, with format NDC1HWC0. +*@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0, +* for the mean of "x". +*@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0, +* for the variance of "x" . \n + +*@par Attributes: +*epsilon: An optional float32. Defaults to "0.0001". A small float number +* added to the variance of "x" . \n + +*@par Outputs: +*y: A Tensor of type float16 or float32, with format NDC1HWC0, for the offset +* of "x" . \n + +*@attention Constraints: +* The preceding layer of this operator must be BN3DTrainingReduceGrad . \n + +*@see BN3DTrainingReduceGrad +*/ +REG_OP(BN3DTrainingReduceGrad) + .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(diff_scale, TensorType({DT_FLOAT})) + .INPUT(diff_offset, TensorType({DT_FLOAT})) + .INPUT(scale, TensorType({DT_FLOAT})) + .INPUT(batch_mean, TensorType({DT_FLOAT})) + .INPUT(batch_variance, TensorType({DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) + .ATTR(epsilon, Float, 0.0001) + .OP_END_FACTORY_REG(BN3DTrainingReduceGrad) + /** *@brief Performs reduced batch normalization . \n @@ -120,7 +184,7 @@ REG_OP(BNTrainingReduceGrad) *@attention Constraints: *@li This operator is a BatchNorm fusion operator for updating the moving averages for training. -*This operator is used in conjunction with BNTrainingReduce. +*This operator is used in conjunction with BNTrainingUpdate. *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square * root instruction. */ @@ -141,6 +205,59 @@ REG_OP(BNTrainingUpdate) .OUTPUT(batch_variance, TensorType({DT_FLOAT})) .OP_END_FACTORY_REG(BNTrainingUpdate) +/** +*@brief Performs reduced batch normalization . \n + +*@par Inputs: +* Seven inputs, including: (NDC1HWC0 supported) +*@li x: A 6D Tensor of type float16 or float32. +*@li sum: A 6D Tensor of type float32 for the output of operator +* BN3DTrainingUpdate. +*@li square_sum: A 6D Tensor of type float32 for the output of operator +* BN3DTrainingUpdate. +*@li scale: A 6D Tensor of type float32, for the scaling factor. +*@li offset: A 6D Tensor of type float32, for the scaling offset. +*@li mean: A 6D Tensor of type float32, for the updated mean. +*@li variance: A 6D Tensor of type float32, for the updated variance . \n + +*@par Attributes: +*@li epsilon: A required float32, specifying the small value added to variance +* to avoid dividing by zero. +*@li factor: A required float32, specifying the weight for updating the mean +* and variance . \n + +*@par Outputs: +* Five outputs, including: (NDC1HWC0 supported) +*@li y: A 6D Tensor of type float16 or float32, for normalized "x". +*@li mean: A 6D Tensor of type float32, for the updated mean. +*@li variance: A 6D Tensor of type float32, for the updated variance. +*@li batch_mean: A 6D Tensor of type float32, for the mean of "x". +*@li batch_variance: A 6D Tensor of type float32, for the variance of "x" . \n + +*@attention Constraints: +*@li This operator is a BatchNorm fusion operator for updating the moving +averages for training. +*This operator is used in conjunction with BN3DTrainingUpdate. +*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square +* root instruction. +*/ +REG_OP(BN3DTrainingUpdate) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(sum, TensorType({DT_FLOAT})) + .INPUT(square_sum, TensorType({DT_FLOAT})) + .INPUT(scale, TensorType({DT_FLOAT})) + .INPUT(offset, TensorType({DT_FLOAT})) + .INPUT(mean, TensorType({DT_FLOAT})) + .INPUT(variance, TensorType({DT_FLOAT})) + .REQUIRED_ATTR(factor, Float) + .REQUIRED_ATTR(epsilon, Float) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(mean, TensorType({DT_FLOAT})) + .OUTPUT(variance, TensorType({DT_FLOAT})) + .OUTPUT(batch_mean, TensorType({DT_FLOAT})) + .OUTPUT(batch_variance, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(BN3DTrainingUpdate) + /** *@brief Performs batch normalization for inference . \n @@ -284,6 +401,40 @@ REG_OP(BNTrainingUpdateGrad) .OUTPUT(diff_offset, TensorType({DT_FLOAT})) .OP_END_FACTORY_REG(BNTrainingUpdateGrad) +/** +*@brief Performs the backpropagation of BatchNorm . \n + +*@par Inputs: +* Four inputs, including: +*@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0, +* for the gradient. +*@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0. +*@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0, +* for the mean of "x". +*@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0, +* for the variance of "x" . \n + +*@par Attributes: +*epsilon: An optional float32. Defaults to "0.0001". A small float number +* added to the variance of "x" . \n + +*@par Outputs: +*@li diff_scale: A Tensor of type float32, with format NDC1HWC0, +* for the offset of "scale". +*@li diff_offset: A Tensor of type float32, with format NDC1HWC0, +* for the offset of "offset" . \n + +*/ +REG_OP(BN3DTrainingUpdateGrad) + .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(batch_mean, TensorType({DT_FLOAT})) + .INPUT(batch_variance, TensorType({DT_FLOAT})) + .ATTR(epsilon, Float, 0.0001) + .OUTPUT(diff_scale, TensorType({DT_FLOAT})) + .OUTPUT(diff_offset, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(BN3DTrainingUpdateGrad) + /** *@brief Performs the backpropagation of BatchNorm for inference . \n @@ -635,8 +786,8 @@ REG_OP(ReduceMin) * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead. */ REG_OP(ReduceMinD) - .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) - .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32})) .REQUIRED_ATTR(axes, ListInt) .ATTR(keep_dims, Bool, false) .OP_END_FACTORY_REG(ReduceMinD) @@ -747,14 +898,14 @@ REG_OP(Reduction) *@brief Computes the euclidean norm of elements across dimensions of a tensor . \n *@par Inputs: -*@li input_tensor: A Tensor. Must be one of the following types: float16, float32, int32. +*@li x: A Tensor. Must be one of the following types: float16, float32, int32. *@li axes: A Tensor of type int8 or int32. Specifies the dimensions to reduce. Defaults to "None" . \n *@par Attributes: *keep_dims: An optional bool. If "True", reduced dimensions will be retained. Defaults to "False" . \n *@par Outputs: -*output_tensor: A Tensor. Must be one of the following types: float16, float32, int32 . \n +*y: A Tensor. Must be one of the following types: float16, float32, int32 . \n *@attention Constraints: * If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)) . \n @@ -821,7 +972,7 @@ Defaults to "0.00001" . \n *batch_ variance: A Tensor of type float32 for the result variance . \n *@attention Constraints: -*For Ascend 310, the result accuracy fails to reach 1 due to the square root instruction. +*For Ascend 310, the result accuracy fails to reach 0.001 due to the square root instruction. */ REG_OP(INInferV2) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -839,7 +990,7 @@ REG_OP(INInferV2) *@brief Performs reduced instance normalization . \n *@par Inputs: -*x: A Tensor of type float16 or float32, with format NC1HWC0 . \n +*x: A Tensor of type float16 or float32. \n *@par Outputs: *@li sum: A Tensor of type float32 for SUM reduced "x". @@ -862,19 +1013,19 @@ REG_OP(INTrainingReduceV2) *@par Inputs: * Seven inputs, including: (NC1HWC0supported) *@li x: A Tensor of type float16 or float32. -*@li sum: A T [N, C1, 1, 1, C0] ensor of type float32 for the output of operator INTrainingReduceV2. -*@li square_sum: A [N, C1, 1, 1, C0] Tensor of type float32 for the output of operator INTrainingReduceV2. -*@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma. -*@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta. -*@li mean: A [N, C1, 1, 1, C0] Tensor of type float32, for the updated mean. -*@li variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the updated variance . \n +*@li sum: A Tensor of type float32 for the output of operator INTrainingReduceV2. +*@li square_sum: A Tensor of type float32 for the output of operator INTrainingReduceV2. +*@li gamma: A Tensor of type float32, for the scaling gamma. +*@li beta: A Tensor of type float32, for the scaling beta. +*@li mean: A Tensor of type float32, for the updated mean. +*@li variance: A Tensor of type float32, for the updated variance . \n *@par Attributes: *@li momentum: A required float32, specifying the momentum to update mean and var. *@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n *@par Outputs: -* Three outputs, including: (NC1HWC0 supported) +* Three outputs *@li y: A Tensor of type float16 or float32, for normalized "x". *@li batch_mean: A Tensor of type float32, for the updated mean. *@li batch_variance: A Tensor of type float32, for the updated variance . \n @@ -882,7 +1033,7 @@ REG_OP(INTrainingReduceV2) *@attention Constraints: *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. * This operator is used in conjunction with INTrainingReduceV2. -*@li For Ascend 310, the result accuracy fails to reach 1 due to the square root instruction. +*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. */ REG_OP(INTrainingUpdateV2) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -965,7 +1116,7 @@ for the updated variance. *@attention Constraints: *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. * This operator is used in conjunction with GNTrainingUpdate. -*@li For Ascend 310, the result accuracy fails to reach 1 due to the square root instruction. +*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. */ REG_OP(GNTrainingUpdate) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -982,6 +1133,98 @@ REG_OP(GNTrainingUpdate) .OUTPUT(batch_variance, TensorType({DT_FLOAT})) .OP_END_FACTORY_REG(GNTrainingUpdate) +/** +*@brief Joins a string Tensor across the given dimensions. \n + +*@par Inputs: +include: +*@li input:A Tensor of type string. The text to be processed. +*@li reduction_indices:A Tensor of type int. The text to be processed. + +*@par Attributes: +*@li keep_dims:A bool, An optional bool. Defaults to False. If True, retain reduced dimensions with length 1.. +*@li separator:string. + +*@par output: +*@li output::A Tensor of type string.. +*/ +REG_OP(ReduceJoin) + .INPUT(input, TensorType({DT_STRING})) + .INPUT(reduction_indices, TensorType({DT_INT32})) + .OUTPUT(output, TensorType({DT_STRING})) + .ATTR(keep_dims, Bool, true) + .ATTR(separator, String, "") + .OP_END_FACTORY_REG(ReduceJoin) + +/** +* @brief Calculates the standard deviation and average value of Tensors. + +* @par Inputs: +* @li x: A Tensor. Must be one of the following types: +* float16, float32. \n + +* @par Attributes: +* Three Attributes, including: +* @li dim: An optional listint, Defaults to "None". \n + +* @li unbiased: An optional bool. Defaults to "True". +* If "True", Use Bessel Correction. +* If "False", Do not use Bessel Correction. \n + +* @li keepdim: An optional bool. Defaults to "False". +* If "True", Keep the original tensor dimension. +* If "False", Do not keep the original tensor dimension. \n + +* @par Outputs: +* Two Outputs, including: +* @li y1: A Tensor. Has the same type as "x". +* @li y2: A Tensor. Has the same type as "x". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator ReduceStd. +*/ +REG_OP(ReduceStd) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y1, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y2, TensorType({DT_FLOAT, DT_FLOAT16})) + .ATTR(dim, ListInt, {}) + .ATTR(unbiased, Bool, true) + .ATTR(keepdim, Bool, false) + .OP_END_FACTORY_REG(ReduceStd) + +/** +* @brief Calculates the standard deviation of Tensors. + +* @par Inputs: +* include: +* @li x: A Tensor. Must be one of the following types: float16, float32. \n +* @li mean: A Tensor. It's the mean of X. Must be one of the following types: float16, float32. \n + + +* @par Attributes: +* Three Attributes, including: +* @li dim: An optional listint, Defaults to "None". \n +* @li unbiased: An optional bool. Defaults to "True". +* If "True", Use Bessel Correction. +* If "False", Do not use Bessel Correction. \n +* @li keepdim: An optional bool. Defaults to "False". +* If "True", Keep the original tensor dimension. +* If "False", Do not keep the original tensor dimension. \n + +* @par Outputs: +* @li y: A Tensor. It's the std of X. Has the same type as "x". + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator ReduceStdWithMean. +*/ +REG_OP(ReduceStdWithMean) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .ATTR(dim, ListInt, {}) + .ATTR(unbiased, Bool, true) + .ATTR(keepdim, Bool, false) + .OP_END_FACTORY_REG(ReduceStdWithMean) } //namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/resource_variable_ops.h b/third_party/fwkacllib/inc/ops/resource_variable_ops.h index 1b60d42a..74ac83f8 100644 --- a/third_party/fwkacllib/inc/ops/resource_variable_ops.h +++ b/third_party/fwkacllib/inc/ops/resource_variable_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index 84723872..80546860 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,6 +33,7 @@ namespace ge { *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. *@li b:A 1D Tensor. Must be one of the following types: float16. The format must be ND . \n +*@li mask:A 1D Tensor. Must be one of the following types: uint8. *@par Attributes: *@li keep_prob:An integer identifying the keep prob in the op. Default to 1. @@ -42,7 +43,6 @@ namespace ge { *@par Outputs: *seven outputs: -*@li mask:A 1D Tensor. Must be one of the following types: uint8. *@li ct:A 4D Tensor. Must be one of the following types: float16, float32. *@li ht:A 4D Tensor. Must be one of the following types: float16. *@li it:A 4D Tensor. Must be one of the following types: float16, float32. @@ -187,16 +187,16 @@ REG_OP(DynamicRNNGrad) *@brief: DynamicRNN calculation. *@par Inputs: *ten inputs: -*@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. -*@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. -*@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND. -*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. -*@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. -*@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. -*@li mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n +*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li seq_length:A optional Tensor. Only Support float16 in FRACTAL_NZ and int32 in ND. +*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n *@par Attributes: *@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. @@ -209,6 +209,7 @@ REG_OP(DynamicRNNGrad) *@li time_major:An bool identifying the time major in the op. Default to true. *@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. *@li forget_bias:An float identifying the forget bias in the op. Default to 0. +*@li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifjo". Default to "ijfo". *@li is_training:An bool identifying is training in the op. Default to true . \n *@par Outputs: @@ -221,12 +222,14 @@ REG_OP(DynamicRNNGrad) *@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@par Third-party framework compatibility: +* Compatible with the TF operator LSTM. */ REG_OP(DynamicRNN) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) - .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16})) .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -251,9 +254,237 @@ REG_OP(DynamicRNN) .ATTR(time_major, Bool, true) .ATTR(activation, String, "tanh") .ATTR(forget_bias, Float, 0.0) + .ATTR(gate_order, String, "ijfo") .ATTR(is_training, Bool, true) .OP_END_FACTORY_REG(DynamicRNN) +/** +*@brief: DynamicRNNV2 calculation. +*@par Inputs: +*ten inputs: +*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li weight_input:A required 4D Tensor. Must be one of the following types: float16, float32. +*The format must be FRACTAL_Z. +*@li weight_hidden:A required 4D Tensor. Must be one of the following types: float16, float32. +*The format must be FRACTAL_Z. +*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND. +*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n + +*@par Attributes: +*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. +*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". +*Only UNIDIRECTIONAL is currently supported. +*@li cell_depth:An integer identifying the cell depth in the op. Default to 1. +*@li use_peephole:An bool identifying if use peephole in the op. Default to false. +*@li keep_prob:An float identifying the keep prob in the op. Default to 1. +*@li cell_clip:An float identifying the cell clip in the op. Default to -1. +*@li num_proj:An integer identifying the num projection in the op. Default to 0. +*@li time_major:An bool identifying the time major in the op. Default to true. +*@li activation:An string identifying the type of activation function in the op. Default to "tanh". +*Only tanh is currently supported. +*@li recurrent_activation:An string identifying the type of activation function in the op. Default to "sigmoid". +*Supprot "sigmoid" and "hard_sigmoid". In general, set "hard_sigmoid" for TF Keras LSTM. +*@li forget_bias:An float identifying the forget bias in the op. Default to 0. +*@li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifco". Default to "ijfo". +*Set "ijfo" for TF operator LSTM, Set "ifco" for TF Keras LSTM. +*@li stateful: An bool identifying the type of stateful in the op. Default to fasle.Only false is currently supported. +*@li merge_mode: An string identifying the type of merge_modein the op. Default to "concat". +*Only "concat" is currently supported +*@li is_training:An bool identifying is training in the op. Default to true . \n + +*@par Outputs: +*eight outputs: +*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*Return the last output_h. +*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*Return the last output_c. +*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@par Third-party framework compatibility: +* Compatible with the TF operator LSTM or TF keras operator LSTM. +*/ + +REG_OP(DynamicRNNV2) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(weight_input, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) + .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(mask, TensorType({DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(j, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(f, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(o, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(tanhc, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(cell_type, String, "LSTM") + .ATTR(direction, String, "UNIDIRECTIONAL") + .ATTR(cell_depth, Int, 1) + .ATTR(use_peephole, Bool, false) + .ATTR(keep_prob, Float, 1.0) + .ATTR(cell_clip, Float, -1.0) + .ATTR(num_proj, Int, 0) + .ATTR(time_major, Bool, true) + .ATTR(activation, String, "tanh") + .ATTR(recurrent_activation, String, "sigmoid") + .ATTR(forget_bias, Float, 0.0) + .ATTR(gate_order, String, "ijfo") + .ATTR(stateful, Bool, false) + .ATTR(merge_mode, String, "concat") + .ATTR(is_training, Bool, true) + .OP_END_FACTORY_REG(DynamicRNNV2) + +/** +*@brief: DynamicRNNV3 calculation. +*@par Inputs: +*ten inputs: +*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND. +*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n +*@li real_mask:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li project:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Attributes: +*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. +*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. +*@li cell_depth:An integer identifying the cell depth in the op. Default to 1. +*@li use_peephole:An bool identifying if use peephole in the op. Default to false. +*@li keep_prob:An float identifying the keep prob in the op. Default to 1. +*@li cell_clip:An float identifying the cell clip in the op. Default to -1. +*@li num_proj:An integer identifying the num projection in the op. Default to 0. +*@li time_major:An bool identifying the time major in the op. Default to true. +*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. +*@li forget_bias:An float identifying the forget bias in the op. Default to 0. +*@li is_training:An bool identifying is training in the op. Default to true . \n + +*@par Outputs: +*eight outputs: +*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@par Third-party framework compatibility: +* Compatible with the TF operator LSTM. +*/ +REG_OP(DynamicRNNV3) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) + .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(mask, TensorType({DT_UINT8})) + .OPTIONAL_INPUT(real_mask, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(project, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(j, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(f, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(o, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(tanhc, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(cell_type, String, "LSTM") + .ATTR(direction, String, "UNIDIRECTIONAL") + .ATTR(cell_depth, Int, 1) + .ATTR(use_peephole, Bool, false) + .ATTR(keep_prob, Float, 1.0) + .ATTR(cell_clip, Float, -1.0) + .ATTR(num_proj, Int, 0) + .ATTR(time_major, Bool, true) + .ATTR(activation, String, "tanh") + .ATTR(forget_bias, Float, 0.0) + .ATTR(is_training, Bool, true) + .OP_END_FACTORY_REG(DynamicRNNV3) + +/** +*@brief: DynamicLSTMV2 calculation. +*@par Inputs: +*ten inputs: +*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li cont:A required 2D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li w_xc_x_static:A optional 2D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li h0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li c0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li wci:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li wcf:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li wco:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li mask:A optional 1D Tensor. Must be one of the following types: uint8. The format must be ND . + +*@par Attributes: +*@li num_output:An integer identifying the num projection in the op. Default to 0. +*@li expose_hidden:An bool identifying the expose_hidden in the op. Default to flase. +*@li need_output_last:An bool identifying the time major in the op. Default to true. +*@li forget_bias:An float identifying the forget bias in the op. Default to 0. + +*@par Outputs: +*eight outputs: +*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li last_output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li last_output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@par Third-party framework compatibility: +* Compatible with the Caffe operator LSTM. +*@par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(DynamicLSTMV2) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(cont, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(w_xc_x_static, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(h0, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(c0, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(mask, TensorType({DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(last_output_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(last_output_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(num_output, Int, 0) + .ATTR(expose_hidden, Bool, false) + .ATTR(need_output_last, Bool, false) + .ATTR(forget_bias, Float, 0.0) + .OP_END_FACTORY_REG(DynamicLSTMV2) + /** *@brief: LSTMInputGrad calculation. *@par Inputs: @@ -297,6 +528,60 @@ REG_OP(LSTMInputGrad) .OP_END_FACTORY_REG(LSTMInputGrad) + +/** +*@brief: Dynamic LSTM Cell grad calculation.Calculate the gradient of gates and cell state. +*@par Inputs: +*twelve inputs: +*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dc:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li mask:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li t_state:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ . \n + +*@par Attributes: +*@li forget_bias:An integer identifying the forget bias in the op. Default to 1. +*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported . \n +*@li direction:An string that marks the calculation sequence of the operator. Default to "Forward". +*@li gate_order:An string mark the order of output 4 gate. Default to "ijfo". + +*@par Outputs: +*two outputs: +*@li dgate:A 4D Tensor. Must be one of the following types: float16. +*@li dct_1:A 4D Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(DynamicLSTMGradCell) + .INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dc, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(i, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(j, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(mask, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(t_state, TensorType({DT_INT32, DT_INT32})) + .OUTPUT(dgate, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dct_1, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(forget_bias, Float, 1) + .ATTR(activation, String, "") + .ATTR(direction, String, "Forward") + .ATTR(gate_order, String, "ijfo") + .OP_END_FACTORY_REG(DynamicLSTMGradCell) + + /** *@brief: Basic LSTM Cell backward calculation.Calculate the gradient of input and hidden state. *@par Inputs: @@ -475,9 +760,9 @@ REG_OP(BasicRNNCell) .OP_END_FACTORY_REG(BasicRNNCell) /** -*@brief: DynamicGRU calculation. +*@brief DynamicGRU calculation. *@par Inputs: -*seven inputs: \n +*seven inputs: *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. *@li w:Must be one of the following types: float16. The format must be FRACTAL_Z. *@li b:Must be one of the following types: float16, float32. The format must be ND. @@ -497,7 +782,7 @@ REG_OP(BasicRNNCell) *@li is_training:An bool identifying is training in the op. Default to true. *@par Outputs: -*five outputs: \n +*five outputs: *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. @@ -531,9 +816,9 @@ REG_OP(DynamicGRU) .OP_END_FACTORY_REG(DynamicGRU) /** -*@brief: DynamicGRUV2 calculation. +*@brief DynamicGRUV2 calculation. *@par Inputs: -*seven inputs: \n +*seven inputs: *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. *@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z. *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. @@ -555,16 +840,13 @@ REG_OP(DynamicGRU) *@li is_training:An bool identifying is training in the op. Default to true. *@par Outputs: -*six outputs: \n +*six outputs: *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. - -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DynamicGRUV2) .INPUT(x, TensorType({DT_FLOAT16})) @@ -592,6 +874,68 @@ REG_OP(DynamicGRUV2) .ATTR(is_training, Bool, true) .OP_END_FACTORY_REG(DynamicGRUV2) + +/** +*@brief DynamicGRUV2Hidden calculation. +*@par Inputs: +*five inputs: +*@li x_weight_input:Must be one of the following types: float32. The format must be FRACTAL_NZ. +*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. +*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. +*@li seq_length:Must be one of the following types: int32. The format must be ND. +*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Attributes: +*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". +Only UNIDIRECTIONAL is currently supported. +*@li cell_depth:An integer identifying the cell depth in the op. Default to 1. +*@li keep_prob:An float identifying the keep prob in the op. Default to 1. +*@li cell_clip:An float identifying the cell clip in the op. Default to -1. +*@li num_proj:An integer identifying the num projection in the op. Default to 0. +*@li time_major:An bool identifying the time major in the op. Default to true. +*@li activation:An string identifying the type of activation function in the op. Default to "tanh". +Only tanh is currently supported. +*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. +*@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true. +*@li is_training:An bool identifying is training in the op. Default to true. + +*@par Outputs: +*six outputs: +*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(DynamicGRUV2Hidden) + .INPUT(x_weight_input, TensorType({DT_FLOAT32})) + .INPUT(weight_hidden, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) + .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(update, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(new, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(direction, String, "UNIDIRECTIONAL") + .ATTR(cell_depth, Int, 1) + .ATTR(keep_prob, Float, 1.0) + .ATTR(cell_clip, Float, -1.0) + .ATTR(num_proj, Int, 0) + .ATTR(time_major, Bool, true) + .ATTR(activation, String, "tanh") + .ATTR(gate_order, String, "zrh") + .ATTR(reset_after, Bool, true) + .ATTR(is_training, Bool, true) + .OP_END_FACTORY_REG(DynamicGRUV2Hidden) + + /** *@brief: DynamicGRUV2Grad calculation. *@par Inputs: @@ -618,7 +962,6 @@ REG_OP(DynamicGRUV2) *@li cell_clip:An float identifying the cell clip in the op. Default to -1. *@li num_proj:An integer identifying the num projection in the op. Default to 0. *@li time_major:An bool identifying the time major in the op. Default to true. -*@li bias_type:An string identifying the type of bias_type function in the op. Default to "double_bias". *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. *@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true. @@ -630,6 +973,9 @@ REG_OP(DynamicGRUV2) *@li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DynamicGRUV2Grad) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -658,7 +1004,6 @@ REG_OP(DynamicGRUV2Grad) .ATTR(cell_clip, Float, -1.0) .ATTR(num_proj, Int, 0) .ATTR(time_major, Bool, true) - .ATTR(bias_type, String, "double_bias") .ATTR(gate_order, String, "zrh") .ATTR(reset_after, Bool, true) .OP_END_FACTORY_REG(DynamicGRUV2Grad) @@ -667,7 +1012,7 @@ REG_OP(DynamicGRUV2Grad) *@brief: GRUV2HiddenGrad calculation. *@par Inputs: *nine inputs: \n -*@li weight_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. @@ -678,6 +1023,7 @@ REG_OP(DynamicGRUV2Grad) *@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Attributes: +*@li t_state:An Int identifying the current t state. Default to [0, 4]. *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. *@par Outputs: @@ -685,10 +1031,12 @@ REG_OP(DynamicGRUV2Grad) *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ -REG_OP(GRUV2HiddenGrad) - .INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) +REG_OP(GRUV2HiddenGradCell) + .INPUT(dh_pre_t, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -699,8 +1047,197 @@ REG_OP(GRUV2HiddenGrad) .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(t_state, Int, 0) .ATTR(gate_order, String, "zrh") - .OP_END_FACTORY_REG(GRUV2HiddenGrad) + .OP_END_FACTORY_REG(GRUV2HiddenGradCell) + +/** +* @brief Calculates the reversed outputs of the function "embedding". \n + +* @par Inputs: +* Two inputs, including: +* @li grad: A mutable Tensor of word grad. Must be one of the following types: +* float32. +* @li indices: A mutable word index Tensor of the int32 type.\n + +* @par Attributes: +* @li num_weights: An int attr which use to judge how many words in dict. \n + +* @li padding_idx: An int attr judge which word to fill zeros. Defaults to "-1". \n + +* @li scale_grad_by_freq: An optional bool. Defaults to "False". +* If "True", "grad_weight" will be scale by word_frequency. +* If "False", "grad_weight" will not be scale by word_frequency. \n + +* @par Outputs: +* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator EmbeddingDenseGrad. +*/ +REG_OP(EmbeddingDenseGrad) + .INPUT(grad, TensorType({ DT_FLOAT32 })) /* "First operand." */ + .INPUT(indices, TensorType({ DT_INT32 })) /* "Second operand." */ + .OUTPUT(y, TensorType({ DT_FLOAT32 })) /* "Result, has same element type as two inputs" */ + .REQUIRED_ATTR(num_weights, Int) + .ATTR(padding_idx, Int, -1) + .ATTR(scale_grad_by_freq, Bool, false) + .OP_END_FACTORY_REG(EmbeddingDenseGrad) + +/** +*@brief CommonLSTM calculation. +*@par Inputs: +*eight inputs: \n +*@li x:Each time step is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li w:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li r:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li b:An optional input. Each direction is a 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li sequence_lens:An optional input. A 1D Tensor.Must be one of the following types: int32. The format must be ND. +*@li initial_h:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li initial_c:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li p:An optional input. Each direction is a 1D Tensor.Must be one of the following types: float16, float32. The format must be ND. + +*@par Attributes: +*@li activation_alpha:Optional scaling values used by some activation functions. Empty is currently supported. +*@li activation_beta:Optional scaling values used by some activation functions. Empty is currently supported. +*@li activations:The list of activation functions. Empty is currently supported. +*@li clip:An float identifying the cell clip in the op. Default to -1. +*@li direction:Specify if the RNN is forward, reverse, or bidirectional. Must be one of forward(default), reverse, or bidirectional. +*@li hidden_size:Number of neurons in the hidden layer. Reserved. +*@li input_forget:Couple the input and forget gates if 1. Reserved. + +*@par Outputs: +*three outputs: \n +*@li y:First dimension is time step, second dimension is direction, others is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li y_h:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li y_c:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*/ + +REG_OP(CommonLSTM) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32})) + .OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(initial_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(p, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(activation_alpha, ListFloat, {}) + .ATTR(activation_beta, ListFloat, {}) + .ATTR(activations, ListString, {}) + .ATTR(clip, Float, -1.0) + .ATTR(direction, String, "forward") + .REQUIRED_ATTR(hidden_size, Int) + .ATTR(input_forget, Int, 0) + .OP_END_FACTORY_REG(CommonLSTM) + +/** + * @brief Calculate the mask. According to hidden_size and num_step, convert seq_length to mask. + * + * @par Inputs: + * @li seq_length: A 1D Tensor. Must be one of the following types: int32. Record the current length of each batch. [batch_size]. + * @li b: A 1D Tensor. Must be one of the following types: fp16/fp32. Record the hidden_size. [4 * hidden_size]. + * @li x: A 3D Tensor. Must be one of the following types: fp16/fp32. Record the num_step/batch_size/input_size. [num_step, batch_size, input_size]. + * + * @par Outputs: + * seq_mask: A 3D Tensor. Must be one of the following types: fp16/fp32. with the shape of [num_step, batch_size, hidden_size]. And has the same type as "b" \n + * + * @par Restrictions: + * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. + */ +REG_OP(RnnGenMaskV2) + .INPUT(seq_length, TensorType({DT_INT32})) + .INPUT(b, TensorType({{DT_FLOAT16, DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(seq_mask, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(RnnGenMaskV2) + +/** +* @brief Common GRU calculation. + +* @par Inputs: +* Eight inputs, including: +* @li x: The input sequences packed (and pontentially padded) into on 3D Tesnor(float16). The format must be FRACTAL_NZ +* @li w: The weight tensor for the gates is 3D Tensor(float16). The format must be FRACTAL_Z +* @li r: The recurrence weight tesnor is 3D Tensor(float16). The format must be FRACTAL_Z +* @li b: The bias tensor for the gates. The format must be ND +* @li sequence_lens: Optional tensor specifying lengths of sequences(int32). The format must be ND +* @li init_h: Optional initial value of the hidden(float16,float32). The format must be FRACTAL_NZ + +* @par Attributes: +* @li activation_alpha: Optional scaling values used by some activation functions. \n +* @li activation_beta: Optional scaling values used by some activation functions. \n +* @li activations: A list of 2 (or 4 if bidirectional) activation functions for update, reset, and hidden gates. \n +* @li clip: Cell clip threshold. \n +* @li direction: Specify if the RNN is forward, reverse, or bidirectional. \n +* @li hidden_size: Number of neurons in the hidden layer. \n +* @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n + +* @par Outputs: +* @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). The format must be FRACTAL_NZ +* @li y_h: The last output value of the hidden(float16,float32). The format must be FRACTAL_NZ +*/ +REG_OP(CommonGRU) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32})) + .OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(activation_alpha, ListFloat, {}) + .ATTR(activation_beta , ListFloat, {}) + .ATTR(activations , ListString, {}) + .ATTR(clip, Float, -1.0) + .ATTR(direction, String, "forward") + .REQUIRED_ATTR(hidden_size, Int) + .ATTR(linear_before_reset , Int, 0) + .OP_END_FACTORY_REG(CommonGRU) +/** +* @brief Calculates the reversed outputs of the function "embedding". \n + +* @par Inputs: +* Four inputs, including: +* @li weight: A mutable Tensor of word grad. Must be one of the following types: +* float32. +* @li indices: A mutable word index Tensor of the int32 type.\n +* @li offsets: A mutable word index Tensor of the int32 type.\n +* @li per_sample_weights: to indicate all weights should be taken to be 1. +* If specified, per_sample_weights must have exactly the same shape as input +* and is treated as having the same offsets, if those are not None. +* Only supported for mode='sum'..\n + +* @par Attributes: +* @li mode: An string attr which use "sum"``, ``"mean"`` or ``"max"``. Specifies the way to reduce the bag.. \n + +* @li scale_grad_by_freq: An optional bool. Defaults to "False". +* If "True", "grad_weight" will be scale by word_frequency. +* If "False", "grad_weight" will not be scale by word_frequency. \n +* @li sparse: if True, gradient w.r.t.attr weight matrix will be a sparse tensor. \n +* @li include_last_offset: if True, attr offsets has one additional element, where the last element +* is equivalent to the size of indices. This matches the CSR format.. \n + +* @par Outputs: +* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator EmbeddingBag. +*/ +REG_OP(EmbeddingBag) + .INPUT(weight, TensorType({ DT_FLOAT32 })) + .INPUT(indices, TensorType({ DT_INT32 })) + .OPTIONAL_INPUT(offsets, TensorType({DT_INT32})) + .OPTIONAL_INPUT(per_sample_weights, TensorType({DT_FLOAT32})) + .OUTPUT(y, TensorType({ DT_FLOAT32 })) + .ATTR(mode, String, "mean") + .ATTR(scale_grad_by_freq, Bool, false) + .ATTR(sparse, Bool, false) + .ATTR(include_last_offset, Bool, false) + .OP_END_FACTORY_REG(EmbeddingBag) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ diff --git a/third_party/fwkacllib/inc/ops/rpn_ops.h b/third_party/fwkacllib/inc/ops/rpn_ops.h index b7649a44..089af326 100644 --- a/third_party/fwkacllib/inc/ops/rpn_ops.h +++ b/third_party/fwkacllib/inc/ops/rpn_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/save_ops.h b/third_party/fwkacllib/inc/ops/save_ops.h index 0ce473b7..5ce6c2e0 100644 --- a/third_party/fwkacllib/inc/ops/save_ops.h +++ b/third_party/fwkacllib/inc/ops/save_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/sdca_ops.h b/third_party/fwkacllib/inc/ops/sdca_ops.h index cbd9839d..34c6a268 100644 --- a/third_party/fwkacllib/inc/ops/sdca_ops.h +++ b/third_party/fwkacllib/inc/ops/sdca_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index 2c99e82e..1c26e033 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -239,6 +239,30 @@ REG_OP(GatherV2D) .REQUIRED_ATTR(axis, Int) .OP_END_FACTORY_REG(GatherV2D) +/** +*@Gathers values along an axis specified by dim . \n + +*@par Inputs: +*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64. +*@li index: A Tensor. Must be one of the following types: int64 . \n + +*@par Attributes: +* dim: the axis along which to index . \n + +*@par Outputs: +* y: A Tensor. Has the same type as "x" . \n + +*@par Third-party framework compatibility +*Compatible with the PyTorch operator Gather. +*/ + +REG_OP(GatherElements) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) + .INPUT(index, TensorType({DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) + .ATTR(dim, Int, 0) + .OP_END_FACTORY_REG(GatherElements) + /** *@brief Extracts a strided slice of a tensor. Roughly speaking, this op extracts a slice of size (end-begin)/stride from the given input tensor. @@ -275,8 +299,6 @@ REG_OP(GatherV2D) *@par Outputs: *y: A Tensor. Has the same type as "x" . \n -*@attention Constraints: - *@par Third-party framework compatibility * Compatible with the TensorFlow operator StridedSlice. */ @@ -327,8 +349,6 @@ REG_OP(StridedSlice) *@par Outputs: *y: A Tensor. Has the same type as "x" . \n -*@attention Constraints: - *@par Third-party framework compatibility * Compatible with the TensorFlow operator StridedSlice. @@ -385,8 +405,6 @@ REG_OP(StridedSliceD) *@par Outputs: *output: A Tensor. Has the same type as "dy" . \n -*@attention Constraints: - *@par Third-party framework compatibility * Compatible with the TensorFlow operator StridedSliceGradD. @@ -444,8 +462,6 @@ REG_OP(StridedSliceGradD) *@par Outputs: *output: A Tensor has the same type as "dy" . \n -*@attention Constraints: - *@par Third-party framework compatibility * Compatible with the TensorFlow operator StridedSliceGrad. */ @@ -486,6 +502,38 @@ REG_OP(UnsortedSegmentSum) .OUTPUT(y, TensorType::NumberType()) .OP_END_FACTORY_REG(UnsortedSegmentSum) +/** +*@brief Creates a one-dimensional tensor of size steps whose values are evenly spaced from start to +* end, inclusive, on a logarithmic scale with base base. \n + +*@par Inputs: +*One inputs, including: +* @li assist: A tensor. Must be one of the following types: +* float16, float32. \n + +* @par Attributes: +* @li start: An required float. Used to select the start. \n +* @li end: An required float. Used to select the end. \n +* @li steps: An optional int.Defaults to 100. \n +* @li base: An optional float.Defaults to 10.0. \n +* @li dtype: An optional int.Defaults to 1. \n + +*@par Outputs: +*y: A Tensor with the same type and shape of input_x's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator logspaced. \n +*/ +REG_OP(LogSpaceD) + .INPUT(assist, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .REQUIRED_ATTR (start, Float) + .REQUIRED_ATTR (end, Float) + .ATTR(steps, Int, 100) + .ATTR(base, Float, 10.0) + .ATTR(dtype, Int, 1) + .OP_END_FACTORY_REG(LogSpaceD) + /** *@brief Computes the sum along segments of a tensor . \n @@ -796,6 +844,34 @@ REG_OP(SliceD) .REQUIRED_ATTR(size, ListInt) .OP_END_FACTORY_REG(SliceD) +/** +*@brief Extracts a slice from a tensor. +* This operation extracts a slice of size "size" from a tensor "x" +* starting at the location specified by "begin" . \n + +*@par Inputs: +*@li x: A Tensor. Must be one of the following types: +* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, +* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32 . \n + +*@par Inputs: +*@li offsets: The starting location for the slice. + +*@par Attributes: +*@li size: The tensor shape . \n + +*@par Outputs: +*y: A Tensor. Has the same type as "x". The slice extracted from the tensor. +*@par Restrictions: +*Warning: THIS FUNCTION IS DEPRECATED. Please use Slice instead. +*/ +REG_OP(SliceDV2) + .INPUT(x, TensorType::BasicType()) + .INPUT(offsets, TensorType::IndexNumberType()) + .OUTPUT(y, TensorType::BasicType()) + .REQUIRED_ATTR(size, ListInt) + .OP_END_FACTORY_REG(SliceDV2) + /** * @brief Finds values and indices of the "k" largest elements for the last * dimension . \n @@ -829,8 +905,8 @@ REG_OP(SliceD) * @li sorted = true * @li It's unstable sorted indices on the platform of Ascend310 -* @par Third-party framework compatibility -* @li Compatible with the TensorFlow operator TopK. +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use TopKV2 instead. */ REG_OP(TopKD) .INPUT(x, TensorType::RealNumberType()) @@ -855,6 +931,44 @@ REG_OP(TopKD) * Number of top elements to look for along the last dimension (along each row * for matrices) . \n +* @par Attributes: +* @li sorted: An optional bool. Defaults to true. +* If true, the resulting "k" elements will be sorted by the values in descending +* order. +* @li dim: An optional int. Defaults to -1. For reserved use. +* @li largest: An optional bool. Defaults to true. For reserved use. \n + +* @par Outputs: +* @li values: A Tensor, specifying the sorted data. Has the same type as +* "input". +* @li indices: A Tensor of type int32, specifying the indices of sorted data . \n + +* @see TopK() +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator TopKV2. +*/ +REG_OP(TopKV2) + .INPUT(x, TensorType::RealNumberType()) + .INPUT(k, TensorType({DT_INT32})) + .OUTPUT(values, TensorType::RealNumberType()) + .OUTPUT(indices, TensorType({DT_INT32})) + .ATTR(sorted, Bool, true) + .ATTR(dim, Int, -1) + .ATTR(largest, Bool, true) + .OP_END_FACTORY_REG(TopKV2) + +/** +* @brief Finds values and indices of the "k" largest elements for the last +* dimension . \n + +* @par Inputs: +* Two inputs, including: +* @li x: A 1D or higher tensor of type BasicType, with the last dimension +* at least "k". +* @li k: A 0D Tensor of type int32. +* Number of top elements to look for along the last dimension (along each row +* for matrices) . \n + * @par Attributes: * @li sorted: An optional bool. Defaults to true. * If true, the resulting "k" elements will be sorted by the values in descending @@ -876,15 +990,17 @@ REG_OP(TopK) .OUTPUT(values, TensorType::RealNumberType()) .OUTPUT(indices, TensorType({DT_INT32})) .ATTR(sorted, Bool, true) + .ATTR(largest, Bool, true) + .ATTR(dim, Int, -1) .OP_END_FACTORY_REG(TopK) /** *@brief Creates a new tensor by applying sparse "updates" to individual values or slices within a tensor (initially zero for numeric, empty for string) of the given "shape" according to "indices" . \n *@par Inputs: *Inputs including: -* @li indices: A required index tensor. Must be one of the following types: float32, float16, int32, int8, uint8. -* @li x: A required slice tensor. Must be one of the following types: float32, float16, int32, int8, uint8. -* @li shape: A required list of int32, specifying the output shape. +* @li indices: A required index tensor. Must be one of the following types: int32 or int64. +* @li x: A required slice tensor. Must be one of the following types: float32, float16, int32, int8, uint8... +* @li shape: A required list of int32 or int64, specifying the output shape. *@par Outputs: *y:A output Tensor with same datatype as "updates" . \n @@ -895,7 +1011,7 @@ REG_OP(TopK) * Compatible with the TensorFlow operator ScatterNd. */ REG_OP(ScatterNd) - .INPUT(indices, TensorType::BasicType()) + .INPUT(indices, TensorType::IndexNumberType()) .INPUT(x, TensorType::BasicType()) .INPUT(shape, TensorType::IndexNumberType()) .OUTPUT(y, TensorType::BasicType()) @@ -908,11 +1024,11 @@ REG_OP(ScatterNd) *@par Inputs: *Inputs including: * @li indices: A required index tensor. Must be one of the following types: - * float, float16, int32, int16. format:ND. + * int32 or int64. format:ND. * @li x: A required slice tensor. Must be one of the following types: - * float, float16, int32, int16. format:ND. + * float16, float, int32, int8, uint8. format:ND. *@par Attributes: -* @li shape: A required list of int32, specifying the output shape. +* @li shape: A required list of int32 or int64, specifying the output shape. *@par Outputs: *y: A Tensor. Has the same type as "x". format:ND . \n @@ -927,8 +1043,8 @@ REG_OP(ScatterNd) */ REG_OP(ScatterNdD) .INPUT(indices, TensorType::IndexNumberType()) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) .REQUIRED_ATTR(shape, ListInt) .OP_END_FACTORY_REG(ScatterNdD) @@ -1752,6 +1868,33 @@ REG_OP(Crop) .REQUIRED_ATTR(offsets, ListInt) .OP_END_FACTORY_REG(Crop) +/** +*@brief Returns a namedtuple (values, indices) where values is the cumulative +* the cumulative minimum of elements of input in the dimension dim. +* And indices is the index location of each maximum value found in the dimension dim. \n + +*@par Inputs: +*One inputs, including: +* @li x: A tensor . Must be one of the following types: +* float16, float32, int32, uint32, int8, uint8. \n + +*@par Attributes: +* @li axis: Axis along which to cummin. \n + +*@par Outputs: +* y: A Tensor with the same type and shape of x's. \n +* indices: A Tensor with the int32 type and the same shape of x's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator Cummin. \n +*/ +REG_OP(Cummin) + .INPUT(x, TensorType::BasicType()) + .OUTPUT(y, TensorType::BasicType()) + .OUTPUT(indices, TensorType::BasicType()) + .REQUIRED_ATTR(axis, Int) + .OP_END_FACTORY_REG(Cummin) + /** *@brief Extends the input with copies of data along a specified dimension. For example: *(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2); @@ -1921,6 +2064,249 @@ REG_OP(CumulativeLogsumexpD) .ATTR(exclusive, Bool, false) .ATTR(reverse, Bool, false) .OP_END_FACTORY_REG(CumulativeLogsumexpD) + +/** +* @brief Add updates to var according to axis and indices. + +* @par Inputs: +* Three inputs, including: +* @li var: A Tensor. Must be one of the following types: +* float16, float32, int16, int32, int8, uint8. +* @li indices: A Tensor of the indices, type should be int32. +* @li updates: A Tensor of the same type as "var". \n + +* @par Attributes: +* @li axis: An required int to specify the axis to perform indices add. \n + +* @par Outputs: +* @li var: A Tensor. Same as input "var". + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator index_add_. +*/ +REG_OP(InplaceIndexAdd) + .INPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8, + DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + .INPUT(indices, TensorType({DT_INT32})) + .INPUT(updates, TensorType({DT_INT16, DT_INT32, DT_INT8, + DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + .OUTPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8, + DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + .REQUIRED_ATTR(axis, Int) + .OP_END_FACTORY_REG(InplaceIndexAdd) + +/** +* @brief Replace the value of X with value according to mask. +* @par Inputs: +* three inputs, including: +* @li x: A Tensor of dtype is float16 or float32 or int64 or int32 or int8. +* @li mask: A Tensor of dtype bool. +* @li value: A Tensor of dtype float16 or float32 or int64 or int32 or int8. + +* @par Outputs: +* @li y: A tensor. Must be one of the following dtypes: +* float16, float32, int64, int32, int8. +*/ +REG_OP(MaskedFill) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32, DT_INT64})) + .INPUT(mask, TensorType({DT_BOOL})) + .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32, DT_INT64})) + .OP_END_FACTORY_REG(MaskedFill) + +/** +* @brief Choose the value of X with value according to mask. + +* @par Inputs: +* two inputs, including: +* @li x: A Tensor of dtype is float16 or float32. +* @li mask: A Tensor of dtype is bool. \n + +* @par Outputs: +* @li y: A tensor with the same type as x. \n + +* @par Third-party framework compatibility +* Compatible with the Numpy operator select. +* Replaces the pytorch operator masked_select in some scenarios.\n +*/ +REG_OP(MaskedSelectV2) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(mask, TensorType({DT_BOOL})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(MaskedSelectV2) + +/** +* @brief Slice a tensor at its last dim, e.x. a[..., begin:end:stride]. \n + +* @par Inputs: +* One inputs, including: +* @li x: A Tensor. Must be one of the following types: float16, float32, int16, int32. + +* @par Attributes: +* @li start: An attribute of type Int, start index of last dim. \n +* @li end: An attribute of type Int, end index of last dim. \n +* @li stride: An attribute of type Int, stride of slice. \n + +* @par Outputs: +* @li y: A Tensor. Has the same type as "x". \n + +* @par Third-party framework compatibility +* No compatibility +*/ +REG_OP(SliceLastDim) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) + .REQUIRED_ATTR(start, Int) + .REQUIRED_ATTR(end, Int) + .ATTR(stride, Int, 1) + .OP_END_FACTORY_REG(SliceLastDim) + +/** +* @brief Extracts a strided slice of a tensor. Roughly speaking, this op \n +* extracts a slice of size (end-begin)/stride from the given input tensor. \n +* Starting at the location specified by begin the slice continues by \n +* adding stride to the index until all dimensions are not less than end. \n +* +* @par Inputs: +* Four inputs, including: +* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n +* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n +* complex128, float16, uint32, uint64, complex64, complex128. \n +* @li begin: A Tensor of type int32 or int64, for the index of the first value to select. +* +* @li end: A Tensor of type int32 or int64, for the index of the last value to select. +* +* @li axes: A Tensor of type int32 or int64, indicate axis to be select. +* +* @li strides: A Tensor of type int32 or int64, for the increment. +* +* @par Attributes: +* @li begin_mask: A Tensor of type int32. \n +* A bitmask where a bit "i" being "1" means to ignore the begin \n +* value and instead use the largest interval possible. +* @li end_mask: A Tensor of type int32. \n +* Analogous to "begin_mask". +* @li ellipsis_mask: A Tensor of type int32. \n +* A bitmask where bit "i" being "1" means the "i"th position \n +* is actually an ellipsis. +* @li new_axis_mask: A Tensor of type int32. \n +* A bitmask where bit "i" being "1" means the "i"th \n +* specification creates a new shape 1 dimension. +* @li shrink_axis_mask: A Tensor of type int32. \n +* A bitmask where bit "i" implies that the "i"th \n +* specification should shrink the dimensionality. +* +* @par Outputs: +* y: A Tensor. Has the same type as "x". +* +* @attention Constraints: +* +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator StridedSliceV2. +*/ +REG_OP(StridedSliceV2) + .INPUT(x, TensorType::BasicType()) + .INPUT(begin, TensorType::IndexNumberType()) + .INPUT(end, TensorType::IndexNumberType()) + .OPTIONAL_INPUT(axes, TensorType::IndexNumberType()) + .OPTIONAL_INPUT(strides, TensorType::IndexNumberType()) + .ATTR(begin_mask, Int, 0) + .ATTR(end_mask, Int, 0) + .ATTR(ellipsis_mask, Int, 0) + .ATTR(new_axis_mask, Int, 0) + .ATTR(shrink_axis_mask, Int, 0) + .OUTPUT(y, TensorType::BasicType()) + .OP_END_FACTORY_REG(StridedSliceV2) + +/** +*@brief Fills the elements of the input tensor with value val by selecting the indices in the order given in index. \n + +*@par Inputs: +*Three inputs, including: +* @li x: A tensor. Must be one of the following types: +* float16, float32, int32. \n +*@li assist1: A tensor. Must be one of the following types: +* float16, float32, int32. \n +*@li assist2: A tensor. Must be one of the following types: +* float16, float32, int32. \n + +* @par Attributes: +* @li dim: A required int. Used to select the dimension of this tensor. \n + +*@par Outputs: +*y: A Tensor with the same type and shape of input_x's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator IndexFill. \n +*/ +REG_OP(IndexFillD) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .INPUT(assist1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .INPUT(assist2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .REQUIRED_ATTR(dim, Int) + .OP_END_FACTORY_REG(IndexFillD) + +/** +* @brief For each row r of this and for each column c, do (*this)(r, c) += src(j, c), \n +* where j ranges from indexes[r].first through indexes[r].second - 1. \n +* In general indexes must be >= 0 and < src.NumRows(); \n +* but to represent an empty range you may use the pair (-1, -1) or any pair of numbers (i, j) such that i >= j. \n + +* @par Inputs: +* Three inputs, including: +* @li x: A Tensor. Must be one of the following types: +* float16, float32. +* @li indices: A Tensor of the indices, type should be int32. +* @li src: A Tensor of the same type as "x". \n + +* @par Outputs: +* @li x: A Tensor. Same as input "x". + +* @par Third-party framework compatibility +* Compatible with the kaldi operator AddRowRanges. +*/ +REG_OP(AddRowRanges) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(src, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(indices, TensorType({DT_INT32})) + .OUTPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .OP_END_FACTORY_REG(AddRowRanges) + +/** +*@brief masked fill tensor along with one axis by range. +* boxes. It is a customized masked fill range operator . \n + +*@par Inputs: +* Four inputs, including: +*@li x: input tensor. A ND Tensor of float32/float16/int32/int8 with shapes +* 1-D (D,), 2-D(N, D), 3-D(N, C, D) +*@li start: masked fill start pos. A 3D Tensor of int32 with +* shape (num, N). "num" indicates the number of loop masked fill, and the value N +* indicates the batch of ND Tensor, if input x shape is 1-D, N = 1. \n +*@li end: masked fill end pos. A 3D Tensor of int32 with +* shape (num, N). "num" indicates the number of loop masked fill, and the value N +* indicates the batch of ND Tensor. \n +*@li value: masked fill value. A 2D Tensor of float32/float16/int32/int8 with +* shape (num,). "num" indicates the number of loop masked fill + +*@par Attributes: +*@li axis: axis with masked fill of int32. Defaults to -1. + +*@par Outputs: +*y: A ND Tensor of float32/float16/int32/int8 with shapes 1-D (D,), 2-D(N, D), 3-D(N, C, D) + +* @par Restrictions: +* Warning: input shape's length must not be bigger than 1024 * 1024 * 1024. +*/ +REG_OP(MaskedFillRange) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32})) + .INPUT(start, TensorType({DT_INT32})) + .INPUT(end, TensorType({DT_INT32})) + .INPUT(value, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32})) + .REQUIRED_ATTR(axis, Int) + .OP_END_FACTORY_REG(MaskedFillRange) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/set_ops.h b/third_party/fwkacllib/inc/ops/set_ops.h index 1d02fa15..04e04f1b 100644 --- a/third_party/fwkacllib/inc/ops/set_ops.h +++ b/third_party/fwkacllib/inc/ops/set_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/ops/sparse_ops.h index d7512790..a1fc9ee6 100644 --- a/third_party/fwkacllib/inc/ops/sparse_ops.h +++ b/third_party/fwkacllib/inc/ops/sparse_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -383,11 +383,11 @@ REG_OP(SparseFillEmptyRowsGrad) REG_OP(SparseTensorDenseMatMul) .INPUT(x1_indices, TensorType({DT_INT32, DT_INT64})) .INPUT(x1_values, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, \ - DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16})) + DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16, DT_INT64})) .INPUT(x1_shape, TensorType({DT_INT64})) - .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \ + .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \ DT_COMPLEX128, DT_FLOAT16})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \ + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \ DT_COMPLEX128, DT_FLOAT16})) .ATTR(adjoint_a, Bool, false) .ATTR(adjoint_b, Bool, false) diff --git a/third_party/fwkacllib/inc/ops/spectral_ops.h b/third_party/fwkacllib/inc/ops/spectral_ops.h index 64fa7814..34ccb398 100644 --- a/third_party/fwkacllib/inc/ops/spectral_ops.h +++ b/third_party/fwkacllib/inc/ops/spectral_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,6 +26,24 @@ namespace ge { +/** +*@brief Computes the inverse 1-dimensional discrete Fourier transform over the +inner-most dimension of `x`. \n + +*@par Inputs: +*@li x: A Tensor. Must be the following types: complex64, complex128. \n + +*@par Outputs: +*@li y: A complex tensor of the same rank as `x`. \n + +*@par Third-party framework compatibility +* Compatible with TensorFlow IFFT operator. +*/ +REG_OP(IFFT) + .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128})) + .OP_END_FACTORY_REG(IFFT) + /** *@brief Real-valued fast Fourier transform . \n @@ -47,6 +65,84 @@ REG_OP(RFFT) .OUTPUT(y, TensorType({DT_COMPLEX64})) .OP_END_FACTORY_REG(RFFT) +/** +*@brief Inverse real-valued fast Fourier transform. \n + +*@par Inputs: +*@li x: A complex64 tensor. +*@li fft_length: An int32 tensor of shape [1]. The FFT length. \n + +*@par Outputs: +*@li y: A float32 tensor of the same rank as `input`. The inner-most + dimension of `input` is replaced with the `fft_length` samples of its inverse + 1D Fourier transform. \n + +*@par Third-party framework compatibility +* Compatible with TensorFlow IRFFT operator. +*/ +REG_OP(IRFFT) + .INPUT(x, TensorType({DT_COMPLEX64})) + .INPUT(fft_length, TensorType({DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(IRFFT) + + +/** +*@brief 2D fast Fourier transform. \n + +*@par Inputs: +*@li x: A complex64 tensor. + +*@par Outputs: +*@li y: A complex64 tensor of the same shape as `input`. The inner-most 2 + dimensions of `input` are replaced with their 2D Fourier transform. \n + +*@par Third-party framework compatibility +* Compatible with TensorFlow FFT2D operator. +*/ +REG_OP(FFT2D) + .INPUT(x, TensorType({DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_COMPLEX64, DT_COMPLEX128})) + .OP_END_FACTORY_REG(FFT2D) + +/** +*@brief Calculate the one-dimensional discrete Fourier transform on the +innermost dimension of the input. \n + +*@par Inputs: +*@li x: A Tensor. Must be the following types: complex64, complex128. \n + +*@par Outputs: +*@li y: A complex tensor with the same shape as input. The innermost dimension +of the input is replaced by its 1-dimensional Fourier transform. \n + +*@par Third-party framework compatibility +* Compatible with TensorFlow FFT operator. +*/ +REG_OP(FFT) + .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128})) + .OP_END_FACTORY_REG(FFT) + +/** +*@brief Calculate the inverse 1-dimensional discrete Fourier transform on the +innermost dimension of the input. \n + +*@par Inputs: +*@li x: A Tensor. Must be the following types: complex64, complex128. \n + +*@par Outputs: +*@li y: A complex tensor with the same shape as input. The innermost dimension +of the input is replaced by its inverse two-dimensional Fourier transform. \n + +*@par Third-party framework compatibility +* Compatible with TensorFlow IFFT2D operator. +*/ +REG_OP(IFFT2D) + .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128})) + .OP_END_FACTORY_REG(IFFT2D) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h index efe4715d..fe25a46f 100644 --- a/third_party/fwkacllib/inc/ops/split_combination_ops.h +++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -62,8 +62,8 @@ REG_OP(Split) *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 *@par Attributes: -*@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value. -*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n +*@li split_dim: A required int32. Specifies the dimension along which to split. No default value. +*@li num_split: A required int32. Specifies the number of output tensors. No default value . \n *@par Outputs: *y:Dynamic output. A list of output tensors. Has the same type and format as "x" . \n @@ -94,12 +94,12 @@ REG_OP(SplitD) *@par Inputs: * Three inputs, including: *@li x: An ND Tensor. -*Must be one of the following types: -*@li size_splits: A list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension. -*@li split_dim: An int8, int16, int32, or int64. Specifies the dimension along which to split . \n +*Must be one of the types:float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. +*@li size_splits: Must be one of the types:int32, int64. Specifies a list containing the sizes of each output tensor along the split dimension. +*@li split_dim: Must be the following type:int32. Specifies the dimension along which to split . \n *@par Attributes: -*num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n +*num_split: A required int32. Specifies the number of output tensors. No default value . \n *@par Outputs: *y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n @@ -129,9 +129,9 @@ REG_OP(SplitV) *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 *@par Attributes: -*@li size_splits: A required list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension. -*@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value. -*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n +*@li size_splits: A required list of int32. Specifies a list containing the sizes of each output tensor along the split dimension. +*@li split_dim: A required int32. Specifies the dimension along which to split. No default value. +*@li num_split: A required int32. Specifies the number of output tensors. No default value . \n *@par Outputs: *y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n @@ -317,15 +317,15 @@ REG_OP(Concat) * int64, uint8, uint16, uint32, uint64, float16, float32, bool . It's a dynamic input. \n *@par Attributes: -*@li axis: A optional int, defaultvalue is 0. +*@li axis: A optional int, default value is 0. * Dimension along which to pack. The range is [-(R+1), R+1). *@li N: A required int. Number of tensors . \n *@par Outputs: *y: A Tensor. Has the same type as "x". + *@par Third-party framework compatibility -*Compatible with the TensorFlow operator Pack. -It's a dynamic output. +* Compatible with the TensorFlow operator Pack. */ REG_OP(Pack) .DYNAMIC_INPUT(x, TensorType::BasicType()) diff --git a/third_party/fwkacllib/inc/ops/state_ops.h b/third_party/fwkacllib/inc/ops/state_ops.h index db1f5353..3c8e32b6 100644 --- a/third_party/fwkacllib/inc/ops/state_ops.h +++ b/third_party/fwkacllib/inc/ops/state_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/stateful_random_ops.h b/third_party/fwkacllib/inc/ops/stateful_random_ops.h index 366112d6..c2f65c6a 100644 --- a/third_party/fwkacllib/inc/ops/stateful_random_ops.h +++ b/third_party/fwkacllib/inc/ops/stateful_random_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/stateless_random_ops.h b/third_party/fwkacllib/inc/ops/stateless_random_ops.h index dad3c379..ff9daaa3 100644 --- a/third_party/fwkacllib/inc/ops/stateless_random_ops.h +++ b/third_party/fwkacllib/inc/ops/stateless_random_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/string_ops.h b/third_party/fwkacllib/inc/ops/string_ops.h index 4a88bc79..f9cc2549 100644 --- a/third_party/fwkacllib/inc/ops/string_ops.h +++ b/third_party/fwkacllib/inc/ops/string_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,235 @@ #include "graph/operator_reg.h" namespace ge { +/** +*@brief Creates ngrams from ragged string data . \n + +*@par Inputs: +include: +*@li data:1-D.The values tensor of the ragged string tensor to make ngrams out of. +*@li data_splits:The splits tensor of the ragged string tensor to make ngrams out of . \n + +*@par Attributes: +* separator:The string to append between elements of the token. Use "" for no separator. +* ngram_widths:The sizes of the ngrams to create. +* left_pad:The string to use to pad the left side of the ngram sequence. Only used if pad_width != 0. +* right_pad:The string to use to pad the right side of the ngram sequence. Only used if pad_width != 0. +* pad_width:The number of padding elements to add to each side of each sequence. +* preserve_short_sequences: Preserve short sequences. \n + +*@par Outputs: +*@li ngrams:The values tensor of the output ngrams ragged tensor. +*@li ngrams_splits:The splits tensor of the output ngrams ragged tensor. \n + +*@see StringNGrams() + +*@par Third-party framework compatibility +*compatible with StringNGrams op of tensorflow + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(StringNGrams) + .INPUT(data, TensorType({DT_STRING})) + .INPUT(data_splits, TensorType({DT_INT32, DT_INT64})) + .OUTPUT(ngrams, TensorType({DT_STRING})) + .OUTPUT(ngrams_splits, TensorType({DT_INT32, DT_INT64})) + .REQUIRED_ATTR(separator, String) + .ATTR(ngram_widths, ListInt, {}) + .REQUIRED_ATTR(left_pad, String) + .REQUIRED_ATTR(right_pad, String) + .REQUIRED_ATTR(pad_width, Int) + .REQUIRED_ATTR(preserve_short_sequences, Bool) + .OP_END_FACTORY_REG(StringNGrams) + +/** +*@brief Decodes each string in `input` into a sequence of Unicode code points . \n + +*@par Inputs: +include: +*@li input:The text to be decoded. Can have any shape. Note that the output is flattened +to a vector of char values. \n + +*@par Attributes: +* input_encoding:Text encoding of the input strings. This is any of the encodings supported +by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. +* errors:Error handling policy when there is invalid formatting found in the input. +The value of 'strict' will cause the operation to produce a InvalidArgument +error on any invalid input formatting. A value of 'replace' (the default) will +cause the operation to replace any invalid formatting in the input with the +`replacement_char` codepoint. A value of 'ignore' will cause the operation to +skip any invalid formatting in the input and produce no corresponding output +character. +* replacement_char:The replacement character codepoint to be used in place of any invalid +formatting in the input when `errors='replace'`. Any valid unicode codepoint may +be used. The default value is the default unicode replacement character is +0xFFFD or U+65533. +* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the +`replacement_char`. Default is false. \n + +*@par Outputs: +*@li row_splits:A 1D tensor containing the row splits. +*@li char_values:A 1D tensor containing the decoded codepoints. +*@li char_to_byte_starts:A 1D int32 Tensor containing the byte index in the input string where each +character in `char_values` starts. \n + +*@see UnicodeDecodeWithOffsets() + +*@par Third-party framework compatibility +*compatible with UnicodeDecodeWithOffsets op of tensorflow + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(UnicodeDecodeWithOffsets) + .INPUT(input, TensorType({DT_STRING})) + .OUTPUT(row_splits, TensorType({DT_INT64})) + .OUTPUT(char_values, TensorType({DT_INT32})) + .OUTPUT(char_to_byte_starts, TensorType({DT_INT64})) + .REQUIRED_ATTR(input_encoding, String) + .ATTR(errors, String, "replace") + .ATTR(replacement_char, Int, 65533) + .ATTR(replace_control_characters, Bool, false) + .ATTR(Tsplits, Type, DT_INT64) + .OP_END_FACTORY_REG(UnicodeDecodeWithOffsets) + +/** +*@brief Decodes each string in `input` into a sequence of Unicode code points. \n + +*@par Inputs: +include: +*@li input:The text to be decoded. Can have any shape. Note that the output is flattened +to a vector of char values. \n + +*@par Attributes: +* input_encoding:Text encoding of the input strings. This is any of the encodings supported +by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. +* errors:Error handling policy when there is invalid formatting found in the input. +The value of 'strict' will cause the operation to produce a InvalidArgument +error on any invalid input formatting. A value of 'replace' (the default) will +cause the operation to replace any invalid formatting in the input with the +`replacement_char` codepoint. A value of 'ignore' will cause the operation to +skip any invalid formatting in the input and produce no corresponding output +character. +* replacement_char:The replacement character codepoint to be used in place of any invalid +formatting in the input when `errors='replace'`. Any valid unicode codepoint may +be used. The default value is the default unicode replacement character is +0xFFFD or U+65533. +* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the +`replacement_char`. Default is false. \n + +*@par Outputs: +*@li row_splits:A 1D tensor containing the row splits. +*@li char_values:A 1D tensor containing the decoded codepoints. \n + +*@see UnicodeDecode() + +*@par Third-party framework compatibility +*compatible with UnicodeDecode op of tensorflow + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(UnicodeDecode) + .INPUT(input, TensorType({DT_STRING})) + .OUTPUT(row_splits, TensorType({DT_INT64})) + .OUTPUT(char_values, TensorType({DT_INT32})) + .REQUIRED_ATTR(input_encoding, String) + .ATTR(errors, String, "replace") + .ATTR(replacement_char, Int, 65533) + .ATTR(replace_control_characters, Bool, false) + .ATTR(Tsplits, Type, DT_INT64) + .OP_END_FACTORY_REG(UnicodeDecode) + +/** +*@brief Transcode the input text from a source encoding to a destination encoding. \n + +*@par Inputs: +include: +*@li input:The text to be processed. Can have any shape. \n + +*@par Attributes: +* input_encoding:Text encoding of the input strings. This is any of the encodings supported +by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. +* output_encoding:The unicode encoding to use in the output. Must be one of `"UTF-8", "UTF-16-BE", "UTF-32-BE"`. +Multi-byte encodings will be big-endian. +* errors:Error handling policy when there is invalid formatting found in the input. +The value of 'strict' will cause the operation to produce a InvalidArgument +error on any invalid input formatting. A value of 'replace' (the default) will +cause the operation to replace any invalid formatting in the input with the +`replacement_char` codepoint. A value of 'ignore' will cause the operation to +skip any invalid formatting in the input and produce no corresponding output +character. +* replacement_char:The replacement character codepoint to be used in place of any invalid +formatting in the input when `errors='replace'`. Any valid unicode codepoint may +be used. The default value is the default unicode replacement character is +0xFFFD or U+65533. +* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the +`replacement_char`. Default is false. \n + +*@par Outputs: +*@li output:A string tensor containing unicode text encoded using `output_encoding`. \n + +*@see UnicodeTranscode() + +*@par Third-party framework compatibility +*compatible with UnicodeTranscode op of tensorflow + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(UnicodeTranscode) + .INPUT(input, TensorType({DT_STRING})) + .OUTPUT(output, TensorType({DT_STRING})) + .REQUIRED_ATTR(input_encoding, String) + .ATTR(output_encoding, String, "UTF-8") + .ATTR(errors, String, "replace") + .ATTR(replacement_char, Int, 65533) + .ATTR(replace_control_characters, Bool, false) + .OP_END_FACTORY_REG(UnicodeTranscode) + +/** +*@brief Encode a tensor of ints into unicode strings. \n + +*@par Inputs: +include: +*@li input_values:A 1D tensor containing the unicode codepoints that should be encoded. +*@li input_splits:A 1D tensor specifying how the unicode codepoints should be split into strings. \n + +*@par Attributes: +* output_encoding:The unicode encoding to use in the output. Must be one of `"UTF-8", "UTF-16-BE", "UTF-32-BE"`. +Multi-byte encodings will be big-endian. +* errors:Error handling policy when there is invalid formatting found in the input. +The value of 'strict' will cause the operation to produce a InvalidArgument +error on any invalid input formatting. A value of 'replace' (the default) will +cause the operation to replace any invalid formatting in the input with the +`replacement_char` codepoint. A value of 'ignore' will cause the operation to +skip any invalid formatting in the input and produce no corresponding output +character. +* replacement_char:The replacement character codepoint to be used in place of any invalid +formatting in the input when `errors='replace'`. Any valid unicode codepoint may +be used. The default value is the default unicode replacement character is +0xFFFD or U+65533. \n + +*@par Outputs: +*@li output:The 1-D Tensor of strings encoded from the provided unicode codepoints. \n + +*@see UnicodeEncode() + +*@par Third-party framework compatibility +*compatible with UnicodeEncode op of tensorflow + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(UnicodeEncode) + .INPUT(input_values, TensorType({DT_INT32})) + .INPUT(input_splits, TensorType({DT_INT32, DT_INT64})) + .OUTPUT(output, TensorType({DT_STRING})) + .ATTR(errors, String, "replace") + .ATTR(output_encoding, String, "UTF-8") + .ATTR(replacement_char, Int, 65533) + .OP_END_FACTORY_REG(UnicodeEncode) /** *@brief Split elements of input based on delimiter into a SparseTensor . \n @@ -61,6 +290,116 @@ REG_OP(StringSplit) .ATTR(skip_empty, Bool, true) .OP_END_FACTORY_REG(StringSplit) +/** +*@brief Replaces the match of pattern in input with rewrite. \n + +*@par Inputs: +include: +*@li input:A Tensor of type string. The text to be processed. \n + +*@par Attributes: +*@li pattern:A string. The regular expression to match the input. +*@li rewrite:A string. The rewrite to be applied to the matched expression. +*@li replace_global:An optional bool. Defaults to True. If True, the replacement is global, +otherwise the replacement is done only on the first match. + +*@par output: +*@li output::A Tensor of type string. +*/ +REG_OP(StaticRegexReplace) + .INPUT(input, TensorType({DT_STRING})) + .OUTPUT(output, TensorType({DT_STRING})) + .ATTR(pattern, String, "") + .ATTR(rewrite, String, "") + .ATTR(replace_global, Bool, true) + .OP_END_FACTORY_REG(StaticRegexReplace) + +/** +*@brief The input is a string tensor of any shape. The pattern is the +*regular expression to be matched with every element of the input tensor. +*The boolean values (True or False) of the output tensor indicate +*if the input matches the regex pattern provided. + +*@par Inputs: +include: +*@li input:A Tensor of type string. The text to be processed. \n + +*@par Attributes: +*@li pattern:A string. The regular expression to match the input. + +*@par output: +*@li output::A bool tensor with the same shape as `input`. +*/ +REG_OP(StaticRegexFullMatch) + .INPUT(input, TensorType({DT_STRING})) + .OUTPUT(output, TensorType({DT_BOOL})) + .ATTR(pattern, String, "") + .OP_END_FACTORY_REG(StaticRegexFullMatch) + +/** +*@brief A Tensor of type string. The input to be joined. \n + +*@par Inputs: +include: +*@li input:A Tensor of type string. The text to be processed. +*@li segment_ids:A Tensor. Must be one of the following types: int32, int64. +*A tensor whose shape is a prefix of data.shape. Negative segment ids are not supported. +*@li num_segments:A Tensor. Must be one of the following types: int32, int64. A scalar. + +*@par Attributes: +*@li separator:An optional string. Defaults to "". The separator to use when joining. + +*@par output: +*@li output::A Tensor of type string.. +*/ +REG_OP(UnsortedSegmentJoin) + .INPUT(input, TensorType({DT_STRING})) + .INPUT(segment_ids, TensorType({DT_INT32,DT_INT64})) + .INPUT(num_segments, TensorType({DT_INT32,DT_INT64})) + .OUTPUT(output, TensorType({DT_STRING})) + .ATTR(separator, String, "") + .OP_END_FACTORY_REG(UnsortedSegmentJoin) + +/** +*@brief Inputs to TensorFlow operations are outputs of another TensorFlow operation. +*This method is used to obtain a symbolic handle that represents the computation of the input. + +*@par Inputs: +include: +*@li input:A Tensor of type string. The text to be processed. + +*@par Attributes: +*@li encoding:An optional string. Defaults to "". + +*@par output: +*@li output::A Tensor of type string.. +*/ +REG_OP(StringLower) + .INPUT(input, TensorType({DT_STRING})) + .OUTPUT(output, TensorType({DT_STRING})) + .ATTR(encoding, String, "") + .OP_END_FACTORY_REG(StringLower) + +/** +*@brief Inputs to TensorFlow operations are outputs of another TensorFlow operation. +*This method is used to obtain a symbolic handle that represents the computation of the input. + +*@par Inputs: +include: +*@li input:A Tensor of type string. The text to be processed. + +*@par Attributes: +*@li encoding:An optional string. Defaults to "". + +*@par output: +*@li output::A Tensor of type string.. +*/ +REG_OP(StringUpper) + .INPUT(input, TensorType({DT_STRING})) + .OUTPUT(output, TensorType({DT_STRING})) + .ATTR(encoding, String, "") + .OP_END_FACTORY_REG(StringUpper) + /** *@brief Split elements of source based on sep into a SparseTensor . \n @@ -488,7 +827,7 @@ include: */ REG_OP(AsString) .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT, \ - DT_DOUBLE, DT_BOOL})) + DT_DOUBLE, DT_BOOL, DT_COMPLEX64, DT_COMPLEX128})) .OUTPUT(y, TensorType({DT_STRING})) .ATTR(precision, Int, -1) .ATTR(scientific, Bool, false) @@ -557,6 +896,45 @@ REG_OP(DecodeBase64) .INPUT(x, TensorType({DT_STRING})) .OUTPUT(y, TensorType({DT_STRING})) .OP_END_FACTORY_REG(DecodeBase64) + +/** +*@brief StringNormalization performs string operations for basic cleaning . \n + +*@par Inputs: +*@li input: only accepts [C] or [1, C] UTF-8 strings tensor . \n + +*@par Outputs: +*@li output: UTF-8 strings tensor after cleaning . \n + +*@par Attributes: +*@li stopwords : list of strings (default is empty). +*List of stop words. If not set, no word would be removed from input strings +tensor. + +*@li is_case_sensitive : bool (default is false). +*Boolean. Whether the identification of stop words in input strings tensor is +case-sensitive. Default is false. + +*@li case_change_action : string (default is "NONE"). +*string enum that cases output to be lowercased/uppercases/unchanged. Valid +values are "LOWER", "UPPER", "NONE". Default is "NONE". + +*@li local : string (default is "en_US"). +*Environment dependent string that denotes the locale according to which output +strings needs to be upper/lowercased.Default en_US or platform specific equivalent +as decided by the implementation . \n + +*@attention Constraints: +*@li input can be either a 1-D or 2-D tensor, the shape of 2-D tensor must be [1, C]. +*/ +REG_OP(StringNormalizer) + .INPUT(input, TensorType({DT_STRING})) + .OUTPUT(output, TensorType({DT_STRING})) + .ATTR(stopwords, ListString, {}) + .ATTR(is_case_sensitive, Bool, false) + .ATTR(case_change_action, String, "NONE") + .ATTR(local, String, "en_US") + .OP_END_FACTORY_REG(StringNormalizer) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_STRING_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/swap_co_ops.h b/third_party/fwkacllib/inc/ops/swap_co_ops.h index a1bf4f8b..6e8eaac3 100644 --- a/third_party/fwkacllib/inc/ops/swap_co_ops.h +++ b/third_party/fwkacllib/inc/ops/swap_co_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/target_crop_and_resize.h b/third_party/fwkacllib/inc/ops/target_crop_and_resize.h index 9c61f2c9..9bef1d7b 100644 --- a/third_party/fwkacllib/inc/ops/target_crop_and_resize.h +++ b/third_party/fwkacllib/inc/ops/target_crop_and_resize.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index 64e18fc7..4a46e35f 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -130,28 +130,27 @@ REG_OP(Transpose) .OP_END_FACTORY_REG(Transpose) /** -*@brief Doing format_transfer for various data format only -support "NHWC/NCHW" to "NC1HWC0" and "NC1HWC0" to "NHWC/NCHW" -"NCHW" to "FRACTAL_Zn" or "FRACTAL_Zn" to "NCHW". -"HWCN" to "FRACTAL_Zn" or "FRACTAL_Zn" to "HWCN" . \n +*@brief Do format transfer for various data format. +* In general, the framework will insert it atomatically . \n *@par Inputs: -*src: A Tensor dtype of all types . \n +*src: A Tensor. For all branches can be types: float16, float32, int32, int8, bool. +* For branches without padding also can be types: int16, int64, uint8, uint16, uint32, uint64 . \n *@par Attributes: -*@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Zn" etc. -*@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Zn" etc. -*@li group: A required int32, default value is 1. \n +*@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Z" etc. +*@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Z" etc. +*@li group: A optional int32, default value is 1. \n *@par Outputs: -*dst: A Tensor dtype of all types. +*dst: A Tensor. Has the same type as "src". */ REG_OP(TransData) .INPUT(src, TensorType::BasicType()) .OUTPUT(dst, TensorType::BasicType()) .REQUIRED_ATTR(src_format, String) .REQUIRED_ATTR(dst_format, String) - .ATTR(group, Int, 1) + .ATTR(groups, Int, 1) .OP_END_FACTORY_REG(TransData) /** @@ -174,21 +173,27 @@ REG_OP(Permute) .OP_END_FACTORY_REG(Permute) /** -*@brief Flattens the inputs. Reserves axis 0 and flattens the input tensors -* along axis 1 . \n +*@brief Flattens the inputs tensor into a 2D matrix. If input tensor has shape (d_0, d_1,..., d_n), +* then the output will have shape (d_0 X d_1 ... d_(axis-1), d_axis X d_(axis + 1)...X d_n)\n *@par Inputs: -*One input: -*x: A multi-dimensional Tensor. Must be one of the following types: -* int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32 . \n +* One input: +* x: A multi-dimensional Tensor. Must be one of the following types: +* int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32. *@par Outputs: -*y: A 2D flattened Tensor (Reserves axis 0 and flattens the input tensors -* along axis 1). Must be one of the following data types: int8, uint8, int16, -* uint16, int32, uint32, int64,uint64, float16, float32 . \n +* y: A 2D flattened Tensor with the contents of the input tensor, with input dimensions up to axis flattened +* to the outer dimension of the output and remaining input dimensions flattened into the inner dimension of the output. +* Must be one of the following data types: int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32 . + +*@par Attributes: +* axis: A optional int32, default value is 1. Indicate up to which input dimensions (exclusive) should be flattened +* to the outer dimension of the output. The value for axis must be in the range [-r, r], where r is the rank of +* the input tensor. Negative value means counting dimensions from the back. When axis = 0, the shape of +* the output tensor is (1, (d_0 X d_1 ... d_n), where the shape of the input tensor is (d_0, d_1, ... d_n). *@par Third-party framework compatibility -* Compatible with TensorFlow operator Flatten. +* Compatible with TensorFlow / ONNX operator Flatten. */ REG_OP(Flatten) .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, @@ -197,6 +202,7 @@ REG_OP(Flatten) .OUTPUT(y, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64, DT_FLOAT, DT_FLOAT16})) + .ATTR(axis, Int, 1) .OP_END_FACTORY_REG(Flatten) /** @@ -357,7 +363,7 @@ REG_OP(DepthToSpace) *@brief Permutes data into spatial data blocks and then prunes them . \n *@par Inputs: -*@li x: A 4D Tensor with format NHWC. +*@li x: A 4D Tensor with format. Must set the format, supported format list ["NCHW, NHWC"] *@li crops: A 1D list or tuple of int32 or int64 . \n *Must be one of the following types: float16, float32 @@ -418,12 +424,8 @@ REG_OP(BatchToSpace) * Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpace instead. */ REG_OP(BatchToSpaceD) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, - DT_UINT16, DT_UINT32, DT_UINT64, DT_INT8, DT_INT16, DT_COMPLEX64, - DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, - DT_UINT16, DT_UINT32, DT_UINT64, DT_INT8, DT_INT16, DT_COMPLEX64, - DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32})) + .INPUT(x, TensorType::BasicType()) + .OUTPUT(y, TensorType::BasicType()) .REQUIRED_ATTR(block_size, Int) .REQUIRED_ATTR(crops, ListInt) .OP_END_FACTORY_REG(BatchToSpaceD) @@ -434,9 +436,10 @@ REG_OP(BatchToSpaceD) *@par Inputs: * Two inputs, including: -*@li x: An NHWC Tensor. Must be one of the following types: +*@li x: An 4D Tensor. Must be one of the following types: * float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, * int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. +* Must set the format, supported format list ["NCHW, NHWC"] *@li paddings: A 2D tensor of type int, specifying the input . \n *@par Attributes: @@ -518,7 +521,8 @@ REG_OP(Unpack) * @par Inputs: * x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the * following types:float32, double, int32, uint8, int16, int8, int64, uint16, -* float16, uint32, uint64 +* float16, uint32, uint64. The inputs must have data_format with one of follows: +* NHWC, NCHW. * @par Attributes: * @li ksizes: A required list or tuple. The size of the sliding window for each @@ -533,7 +537,6 @@ REG_OP(Unpack) * This is equivalent to rate in dilated (a.k.a. Atrous) convolutions. * @li padding: A required string. The type of padding algorithm to use, support "SAME" or "VALID". \n -* @li data_format: A required string. The format of input, only supported NHWC. \n * @par Outputs: * y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows * @@ -554,7 +557,6 @@ REG_OP(ExtractImagePatches) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(rates, ListInt) .REQUIRED_ATTR(padding, String) - .ATTR(data_format, String, "NHWC") .OP_END_FACTORY_REG(ExtractImagePatches) /** @@ -563,6 +565,7 @@ REG_OP(ExtractImagePatches) * @par Inputs: * x: A 5D Tensor with shape [batch, in_planes, in_rows, in_cols, depth] . \n +* The inputs must have data_format with one of follows: NDHWC, NCDHW. \n * @par Attributes: * @li ksizes: A required list or tuple. The size of the sliding window for each @@ -571,7 +574,6 @@ REG_OP(ExtractImagePatches) * patches are in "x". Must be: [1, stride_planes, stride_rows, stride_cols, 1]. * @li padding: A required string. The type of padding algorithm to use , * support "SAME" or "VALID" . \n -* @li data_format: An optional string. The format of input, only supported NDHWC. \n * @par Outputs: * Output: A 5D Tensor with shape [batch, out_planes, out_rows, out_cols, ksize_planes * @@ -590,7 +592,6 @@ REG_OP(ExtractVolumePatches) .REQUIRED_ATTR(ksizes, ListInt) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(padding, String) - .ATTR(data_format, String, "NDHWC") .OP_END_FACTORY_REG(ExtractVolumePatches) /** @@ -717,6 +718,210 @@ REG_OP(CompressFcOp) .OUTPUT(compress_index, TensorType({DT_INT8})) .REQUIRED_ATTR(compress_parameters, ListInt) .OP_END_FACTORY_REG(CompressFcOp) + +/** +*@brief Performs Col2im for each batch entry. \n + +*@par Inputs: +*@li input_x: The Col Tensor. 5-D, shape: `(n, c1, kernel_h*kernel_w, ho*wo, c0)`. +where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//stride_d + 1 \n + +*@par Outputs: +*@li output_y: The img Tensor. 5-D, shape: `(n, c1, output_h, output_w, c0)`. \n + +*@par Attributes: +*@li kernel_shape: ListInt, value: `(kernel_h, kernel_w)`, the shape of kernel in convolution. +*@li dilation: ListInt, value: `(dilation_h, dilation_w)`, the dilation in convolution. +*@li padding: ListInt, value: `(padding_h, padding_w)`, the dilation in convolution. +*@li stride: ListInt, value: `(stride_h, stride_w)`, the dilation in convolution. \n + +*@par Third-party framework compatibility +* Compatible with Pytorch col2im/im2col_backward operator. +*/ +REG_OP(Col2im) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(output_size, TensorType({DT_INT32, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .REQUIRED_ATTR(kernel_size, ListInt) + .REQUIRED_ATTR(dilation, ListInt) + .REQUIRED_ATTR(padding, ListInt) + .REQUIRED_ATTR(stride, ListInt) + .OP_END_FACTORY_REG(Col2im) + +/** +* @brief Performs Im2col for each batch entry. \n + +* @par Inputs: +* x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the +* following types:float32, int8, float16. The inputs must have data_format with +* one of follows:NHWC, NCHW. + +* @par Attributes: +* @li ksizes: A required list or tuple. The size of the sliding window for each +* dimension of images. +* @li strides: A optional list or tuple. How far the centers of two consecutive +* patches are in the images. Defaults to "{1}". +* @li dilations: A optional list or tuple. Defaults to "{1}". +* This is the input stride, specifying how far two consecutive patch +* samples are in the input. Equivalent to extracting patches +* with patch_sizes_eff = patch_sizes + (patch_sizes - 1) * +* (dilations - 1), followed by subsampling them spatially by a factor of dilations. +* This is equivalent to rate in dilated (a.k.a. Atrous) convolutions. +* @li padding_mode: A optional String. The type of padding algorithm to use, +* support "SAME", "VALID", "CALCULATED". Among the three modes, only the "CALCULATED" +* means to use the pads below. Defaults to "CALCULATED". +* @li pads: A optional list or tuple. The pad distance. Defaults to "{0}". \n + +* @par Outputs: +* y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows * +* ksize_cols * depth] containing image patches with size ksize_rows x ksize_cols +* x depth vectorized in the "depth" dimension. Note "out_rows" and "out_cols" +* are the dimensions of the output patches . \n + +* @attention Constraints: +* "ksizes", "strides", "dilations" and "pads" are lists of integers . \n + +* @par Third-party framework compatibility +* Compatible with Pytorch Im2col operator. +*/ +REG_OP(Im2col) + .INPUT(x, TensorType::RealNumberType()) + .OUTPUT(y, TensorType::RealNumberType()) + .REQUIRED_ATTR(ksizes, ListInt) + .ATTR(strides, ListInt, {1}) + .ATTR(dilations, ListInt, {1}) + .ATTR(padding_mode, String, "CALCULATED") + .ATTR(pads, ListInt, {0}) + .OP_END_FACTORY_REG(Im2col) + +/** +*@brief Generates a 2D or 3D flow field (sampling grid), given a batch of affine +matrices theta. \n + +*@par Inputs: +*Input theta must be float16 or float, output_size must be int32 type.Inputs +include: +*@li theta: input batch of affine matrices with shape (N,2,3) for 2D or (N,3,4) +for 3D +*@li output_size: the target output image size. (N×C×H×W for 2D or N×C×D×H×W for +3D) Example: torch.Size((32, 3, 24, 24)) . \n + + +*@par Attributes: +*align_corners: if True, consider -1 and 1 to refer to the centers of the corner +pixels rather than the image corners.Refer to grid_sample() for a more complete +description. A grid generated by affine_grid() should be passed to grid_sample() +with the same setting for this option. Default: False \n + +*@par Outputs: +*@li y: A 2-D integer tensor of shape [M] representing the +selected indices from the boxes tensor, where M <= max_output_size. \n + +*@attention Constraints: +*Input theta must be float16 or float, output_size must be int32 type . \n + +*@par Third-party framework compatibility +*Compatible with Pytorch affine_grid operator. +*/ + +REG_OP(AffineGrid) + .INPUT(theta, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(output_size, TensorType({DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(align_corners, Bool, false) + .OP_END_FACTORY_REG(AffineGrid) + +/** +*@brief Make memory of a view be contiguous. \n + +*@par Inputs: +*Four inputs, including: +*@li x: The input tensor. +*@li size: The shape of output tensor. +*@li stride: The stride of output tensor. +*@li storage_offset: The offset in the underlying storage of the output tensor. \n + +*@par Outputs: +*y: A Tensor. Has the same type as "x" . \n + +*@par Third-party framework compatibility +*Compatible with the pytorch operator as_strided. +*/ +REG_OP(AsStrided) + .INPUT(x, TensorType::BasicType()) + .INPUT(size, TensorType::IndexNumberType()) + .INPUT(stride, TensorType::IndexNumberType()) + .INPUT(storage_offset, TensorType::IndexNumberType()) + .OUTPUT(y, TensorType::BasicType()) + .OP_END_FACTORY_REG(AsStrided) + +/** +*@brief This transform extracts n-grams from the input sequence and save them as a +vector. \n + +*@par Inputs: +*@li input: can be either a 1-D or 2-D tensor for n-gram extraction, It is ether string UTF-8 or int32/int64 . \n + +*@par Attributes: +*@li max_gram_length : int (required) +*Maximum n-gram length. If this value is 3, 3-grams will be used to generate the output . +*@li max_skip_count : int (required) +*Maximum number of items (integers/strings) to be skipped when constructing an n-gram from X. +If max_skip_count=1, min_gram_length=2, max_gram_length=3, this operator may generate 2-grams +with skip_count=0 and skip_count=1, and 3-grams with skip_count=0 and skip_count=1. +*@li min_gram_length : int (required) +*Minimum n-gram length. If this value is 2 and max_gram_length is 3, output may contain counts of +2-grams and 3-grams. +*@li mode : string (required) +*The weighting criteria. It can be one of "TF" (term frequency), "IDF" (inverse document frequency), +and "TFIDF" (the combination of TF and IDF). +*@li ngram_counts : list of ints (required) +*The starting indexes of 1-grams, 2-grams, and so on in pool. It is useful when determining the boundary +between two consecutive collections of n-grams. For example, if ngram_counts is [0, 17, 36], +the first index (zero-based) of 1-gram/2-gram/3-gram in pool are 0/17/36. This format is essentially identical +to CSR (or CSC) sparse matrix format, and we choose to use this due to its popularity. +*@li ngram_indexes : list of ints (required) +*list of int64s (type: AttributeProto::INTS). This list is parallel to the specified 'pool_*' attribute. The i-th element +in ngram_indexes indicate the coordinate of the i-th n-gram in the output tensor. +*@li pool_int64s : list of ints +*List of int64 n-grams learned from the training set. Either this or pool_strings attributes must be present but not both. +It's an 1-D tensor starting with the collections of all 1-grams and ending with the collections of n-grams. The i-th element +in pool stores the n-gram that should be mapped to coordinate ngram_indexes[i] in the output vector. +*@li pool_strings : list of strings +*List of strings n-grams learned from the training set. Either this or pool_int64s attributes must be present but not both. +It's an 1-D tensor starting with the collections of all 1-grams and ending with the collections of n-grams. The i-th element +in pool stores the n-gram that should be mapped to coordinate ngram_indexes[i] in the output vector. +*@li weights : list of floats +*list of floats. This attribute stores the weight of each n-gram in pool. The i-th element in weights is the weight of +the i-th n-gram in pool. Its length equals to the size of ngram_indexes. By default, weights is an all-one tensor.This attribute +is used when mode is "IDF" or "TFIDF" to scale the associated word counts. \n + +*@par Outputs: +*@li output: tensor(float) +*For 1-D input, output is the n-gram representation of that input. For 2-D input, the output is also a 2-D tensor +whose i-th row is the n-gram representation of the i-th input row. More specifically, if input shape is [C], the corresponding +output shape would be [max(ngram_indexes) + 1]. If input shape is [N, C], this operator produces a [N, max(ngram_indexes) + 1]-tensor. \n + +*@attention Constraints: +*@li input can be either a 1-D or 2-D tensor, shape is [C] or [N, C]. +*@li max(ngram_indexes) + 1 == len(weights), len(y) == len(weights). +*@li ngram_counts and pool(pool_int64s or pool_strings) must match. +*@li either pool_strings or pool_int64s attributes must be present but not both. +*/ + +REG_OP(TfidVectorizer) + .INPUT(input, TensorType({DT_INT32, DT_INT64, DT_STRING})) + .OUTPUT(output, TensorType({DT_FLOAT})) + .REQUIRED_ATTR(max_gram_length, Int) + .REQUIRED_ATTR(max_skip_count, Int) + .REQUIRED_ATTR(min_gram_length, Int) + .REQUIRED_ATTR(mode, String) + .REQUIRED_ATTR(ngram_counts, ListInt) + .REQUIRED_ATTR(ngram_indexes, ListInt) + .ATTR(pool_int64s, ListInt, {}) + .ATTR(pool_strings, ListString, {}) + .ATTR(weights, ListFloat, {}) + .OP_END_FACTORY_REG(TfidVectorizer) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h index e19cbd7c..8ef69d8b 100644 --- a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h +++ b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h index 57948c47..01f63705 100644 --- a/third_party/fwkacllib/inc/runtime/event.h +++ b/third_party/fwkacllib/inc/runtime/event.h @@ -41,6 +41,11 @@ typedef enum rtEventWaitStatus { #define RT_EVENT_DDSYNC 0x04U #define RT_EVENT_TIME_LINE 0x08U +#define RT_EVENT_DDSYNC_NS 0x01U +#define RT_EVENT_STREAM_MARK 0x02U +#define RT_EVENT_DDSYNC 0x04U +#define RT_EVENT_TIME_LINE 0x08U + /** * @ingroup dvrt_event * @brief create event instance diff --git a/third_party/fwkacllib/inc/runtime/rt.h b/third_party/fwkacllib/inc/runtime/rt.h index aa394eea..10f884f2 100644 --- a/third_party/fwkacllib/inc/runtime/rt.h +++ b/third_party/fwkacllib/inc/runtime/rt.h @@ -27,6 +27,7 @@ #include "mem.h" #include "rt_model.h" #include "stream.h" +#include "rt_stars.h" #include "rt_ffts.h" #endif // __CCE_RUNTIME_RT_H__ diff --git a/third_party/fwkacllib/inc/runtime/rt_stars.h b/third_party/fwkacllib/inc/runtime/rt_stars.h new file mode 100644 index 00000000..188656b1 --- /dev/null +++ b/third_party/fwkacllib/inc/runtime/rt_stars.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. + * Description: + */ + +#ifndef __CCE_RUNTIME_STARS_H +#define __CCE_RUNTIME_STARS_H + +#include "base.h" + +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +extern "C" { +#endif + +/** + * @ingroup rt_stars + * @brief launch stars task. + * used for send star sqe directly. + * @param [in] taskSqe stars task sqe + * @param [in] sqeLen stars task sqe length + * @param [in] stream associated stream + * @return RT_ERROR_NONE for ok, others failed + */ +RTS_API rtError_t rtStarsTaskLaunch(const void *taskSqe, uint32_t sqeLen, rtStream_t stream); + +/** + * @ingroup rt_stars + * @brief create cdq instance. + * @param [in] batchNum batch number + * @param [in] batchSize batch size + * @param [in] queName cdq name + * @return RT_ERROR_NONE for ok, ACL_ERROR_RT_NO_CDQ_RESOURCE for no cdq resources + */ +RTS_API rtError_t rtCdqCreate(uint32_t batchNum, uint32_t batchSize, const char *queName); + +/** + * @ingroup rt_stars + * @brief destroy cdq instance. + * @param [in] queName cdq name + * @return RT_ERROR_NONE for ok, others failed + */ +RTS_API rtError_t rtCdqDestroy(const char *queName); + +/** + * @ingroup rt_stars + * @brief get free batch in the queue. + * @param [in] queName cdq name + * @param [in] timeout batch size + * @param [out] batchId batch index + * @return RT_ERROR_NONE for ok, ACL_ERROR_RT_WAIT_TIMEOUT for timeout + */ +RTS_API rtError_t rtCdqAllocBatch(const char *queName, int32_t timeout, uint32_t *batchId); + +/** + * @ingroup rt_stars + * @brief launch a write_cdqm task on the stream. + * When the task is executed, the data information will be inserted into the cdqe index position of the queue. + * @param [in] queName cdq name + * @param [in] cdqeIndex cdqe index + * @param [in] data cdqe infomation + * @param [in] dataSize data size + * @param [in] stream launch task on the stream + * @return RT_ERROR_NONE for ok, others failed + */ +RTS_API rtError_t rtCdqEnQueue(const char *queName, uint32_t cdqeIndex, void *data, uint32_t dataSize, + rtStream_t stream); + +/** + * @ingroup rt_stars + * @brief launch a write_cdqm task on the stream. + * When the task is executed, the data information will be inserted into the cdqe index position of the queue. + * @param [in] queName cdq name + * @param [in] cdqeIndex cdqe index + * @param [in] data cdqe infomation + * @param [in] dataSize data size + * @param [in] stream launch task on the stream + * @return RT_ERROR_NONE for ok, others failed + */ +RTS_API rtError_t rtCdqEnQueuePtrMode(const char *queName, uint32_t cdqeIndex, const void *prtAddr, + rtStream_t stream); + +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +} +#endif +#endif // __CCE_RUNTIME_STARS_H diff --git a/third_party/fwkacllib/inc/tdt/tsd_client.h b/third_party/fwkacllib/inc/tdt/tsd_client.h index 665c8b82..36fc500e 100644 --- a/third_party/fwkacllib/inc/tdt/tsd_client.h +++ b/third_party/fwkacllib/inc/tdt/tsd_client.h @@ -107,88 +107,6 @@ TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, con */ TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback); -/** -* @ingroup CreateCmdParameterObj -* @brief creat tsdclient func parameter obj. -* -* @par Function -* creat tsdclient func parameter obj. -* -* @param type [IN] type tdt::TsdCmdType, tsd func type. -* @param cmdParameterObj [IN] type void *, func parameter obj. -* @retval TDT_OK Success -* @retval TDT_INTERFACE_NOT_SUPPORT -* -* @par Dependency -* @li libtsdclient.so: Library to which the interface belongs. -* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined. -* @li status.h: Header file where 'TDT_StatusT' defined -*/ -TDT_StatusT CreateCmdParameterObj(tdt::TsdCmdType type, void **cmdParameterObj); - -/** -* @ingroup SetCmdParameterObjAttribute -* @brief set cmdParameterObj input value. -* -* @par Function -* set cmdParameterObj input value. -* -* @param type [IN] type tdt::TsdCmdType, tsd func type. -* @param cmdParameterObj [IN] type void *, func parameter obj. -* @param itemType [IN] type tdt::InputItem, func input type. -* @param valuePtr [IN] type const void *, input value. -* @param valueLength [IN] type int, input value length. -* @retval TDT_OK Success -* @retval TDT_INTERFACE_NOT_SUPPORT -* -* @par Dependency -* @li libtsdclient.so: Library to which the interface belongs. -* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined. -* @li status.h: Header file where 'TDT_StatusT' defined -*/ -TDT_StatusT SetCmdParameterObjAttribute(tdt::TsdCmdType type, void *cmdParameterObj, tdt::InputItem itemType, const void *valuePtr, int valueLength); - -/** -* @ingroup GetCmdParameterObjAttribute -* @brief set cmdParameterObj input value. -* -* @par Function -* set cmdParameterObj input value. -* -* @param type [IN] type tdt::TsdCmdType, tsd func type. -* @param cmdParameterObj [IN] type void *, func parameter obj. -* @param itemType [IN] type tdt::InputItem, func input type. -* @param valuePtr [IN] type const void *, input value. -* @param valueLength [IN] type int, input value length. -* @retval TDT_OK Success -* @retval TDT_INTERFACE_NOT_SUPPORT -* -* @par Dependency -* @li libtsdclient.so: Library to which the interface belongs. -* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined. -* @li status.h: Header file where 'TDT_StatusT' defined -*/ -TDT_StatusT GetCmdParameterObjAttribute(tdt::TsdCmdType type, void *cmdParameterObj, tdt::InputItem itemType, void *valuePtr, int &valueLength); - -/** -* @ingroup TsdClientCmd -* @brief creat tsdclient func parameter obj. -* -* @par Function -* creat tsdclient func parameter obj. -* -* @param type [IN] type tdt::TsdCmdType, tsd func type. -* @param cmdParameterObj [IN] type void *, func parameter obj. -* @retval TDT_OK Success -* @retval TDT_INTERFACE_NOT_SUPPORT -* -* @par Dependency -* @li libtsdclient.so: Library to which the interface belongs. -* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined. -* @li status.h: Header file where 'TDT_StatusT' defined -*/ -TDT_StatusT TsdClientCmd(tdt::TsdCmdType cmd, void *cmdParameterObj); - #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h index a1c39a51..67adecd9 100644 --- a/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h +++ b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h @@ -1,12 +1,18 @@ /** -* @file adx_datadump_server.h -* -* Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -*/ + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #ifndef ADX_DATADUMP_SERVER_H #define ADX_DATADUMP_SERVER_H diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h index c8715041..07b32149 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h +++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h @@ -14,151 +14,99 @@ * limitations under the License. */ -#ifndef MSPROF_ENGINE_PROF_ACL_API_H_ -#define MSPROF_ENGINE_PROF_ACL_API_H_ - -#define MSVP_MAX_DEV_NUM 64 -#define MSVP_PROF_API __attribute__((visibility("default"))) +#ifndef MSPROFILER_API_PROF_ACL_API_H_ +#define MSPROFILER_API_PROF_ACL_API_H_ // DataTypeConfig -#define PROF_ACL_API 0x0001 -#define PROF_TASK_TIME 0x0002 -#define PROF_AICORE_METRICS 0x0004 -#define PROF_AICPU_TRACE 0x0008 -#define PROF_MODEL_EXECUTE 0x0010 -#define PROF_RUNTIME_API 0x0020 -#define PROF_RUNTIME_TRACE 0x0040 -#define PROF_SCHEDULE_TIMELINE 0x0080 -#define PROF_SCHEDULE_TRACE 0x0100 -#define PROF_AIVECTORCORE_METRICS 0x0200 -#define PROF_SUBTASK_TIME 0x0400 - -#define PROF_TRAINING_TRACE 0x0800 -#define PROF_HCCL_TRACE 0x1000 -#define PROF_DATA_PROCESS 0x2000 -#define PROF_TASK_TRACE 0x3842 +#define PROF_ACL_API 0x00000001 +#define PROF_TASK_TIME 0x00000002 +#define PROF_AICORE_METRICS 0x00000004 +#define PROF_AICPU_TRACE 0x00000008 +#define PROF_MODEL_EXECUTE 0x00000010 +#define PROF_RUNTIME_API 0x00000020 +#define PROF_RUNTIME_TRACE 0x00000040 +#define PROF_SCHEDULE_TIMELINE 0x00000080 +#define PROF_SCHEDULE_TRACE 0x00000100 +#define PROF_AIVECTORCORE_METRICS 0x00000200 +#define PROF_SUBTASK_TIME 0x00000400 + +#define PROF_TRAINING_TRACE 0x00000800 +#define PROF_HCCL_TRACE 0x00001000 + +#define PROF_TASK_TRACE 0x00001852 + +// system profilinig switch +#define PROF_CPU 0x00010000 +#define PROF_HARDWARE_MEMORY 0x00020000 +#define PROF_IO 0x00040000 +#define PROF_INTER_CONNECTION 0x00080000 +#define PROF_DVPP 0x00100000 +#define PROF_SYS_AICORE_SAMPLE 0x00200000 +#define PROF_AIVECTORCORE_SAMPLE 0x00400000 #define PROF_MODEL_LOAD 0x8000000000000000 // DataTypeConfig MASK -#define PROF_ACL_API_MASK 0x0001 -#define PROF_TASK_TIME_MASK 0x0002 -#define PROF_AICORE_METRICS_MASK 0x0004 -#define PROF_AICPU_TRACE_MASK 0x0008 -#define PROF_MODEL_EXECUTE_MASK 0x0010 -#define PROF_RUNTIME_API_MASK 0x0020 -#define PROF_RUNTIME_TRACE_MASK 0x0040 -#define PROF_SCHEDULE_TIMELINE_MASK 0x0080 -#define PROF_SCHEDULE_TRACE_MASK 0x0100 -#define PROF_AIVECTORCORE_METRICS_MASK 0x0200 -#define PROF_SUBTASK_TIME_MASK 0x0400 - -#define PROF_TRAINING_TRACE_MASK 0x0800 -#define PROF_HCCL_TRACE_MASK 0x1000 -#define PROF_DATA_PROCESS_MASK 0x2000 +#define PROF_ACL_API_MASK 0x00000001 +#define PROF_TASK_TIME_MASK 0x00000002 +#define PROF_AICORE_METRICS_MASK 0x00000004 +#define PROF_AICPU_TRACE_MASK 0x00000008 +#define PROF_MODEL_EXECUTE_MASK 0x00000010 +#define PROF_RUNTIME_API_MASK 0x00000020 +#define PROF_RUNTIME_TRACE_MASK 0x00000040 +#define PROF_SCHEDULE_TIMELINE_MASK 0x00000080 +#define PROF_SCHEDULE_TRACE_MASK 0x00000100 +#define PROF_AIVECTORCORE_METRICS_MASK 0x00000200 +#define PROF_SUBTASK_TIME_MASK 0x00000400 + +#define PROF_TRAINING_TRACE_MASK 0x00000800 +#define PROF_HCCL_TRACE_MASK 0x00001000 + +// system profilinig mask +#define PROF_CPU_MASK 0x00010000 +#define PROF_HARDWARE_MEMORY_MASK 0x00020000 +#define PROF_IO_MASK 0x00040000 +#define PROF_INTER_CONNECTION_MASK 0x00080000 +#define PROF_DVPP_MASK 0x00100000 +#define PROF_SYS_AICORE_SAMPLE_MASK 0x00200000 +#define PROF_AIVECTORCORE_SAMPLE_MASK 0x00400000 #define PROF_MODEL_LOAD_MASK 0x8000000000000000 -#include -#include - -/** - * @name ProrErrorCode - * @brief error code enum of prof_acl_apis - */ -enum ProfErrorCode { - PROF_ERROR_NONE = 0, // ok - PROF_ERROR_PARAM_INVALID, // param invalid, for example nullptr - PROF_ERROR_REPEAT_INIT, // profiling has already been inited - PROF_ERROR_CONFIG_INVALID, // config invalid, for example invalid json string - PROF_ERROR_DIR_NO_ACCESS, // dir is not accessable - PROF_ERROR_FAILURE, // failed to init or start profiling - PROF_ERROR_NOT_INITED, // profiling has not been inited - PROF_ERROR_DEVICE_INVALID, // device id invalid - PROF_ERROR_UNSUPPORTED, // unsupported data type or ai core metrics - PROF_ERROR_REPEAT_START, // profiilng has already been started - PROF_ERROR_NOT_STARTED, // profiling has not been started -}; - -/** - * @brief transfer profiling config in acl.json to sample config - * @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...} - * @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]} - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg); +#ifndef OS_TYPE +#define OS_TYPE 0 +#endif // OS_TYPE -/** - * @name ProfInit - * @brief init profiling - * @param profInitCfg [IN] config of init profiling of json format - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg); - -/** - * @name ProfAicoreMetrics - * @brief aicore metrics enum - */ -enum ProfAicoreMetrics { - PROF_AICORE_ARITHMATIC_THROUGHPUT = 0, - PROF_AICORE_PIPELINE = 1, - PROF_AICORE_SYNCHRONIZATION = 2, - PROF_AICORE_MEMORY = 3, - PROF_AICORE_INTERNAL_MEMORY = 4, - PROF_AICORE_STALL = 5, - PROF_AICORE_EVENT = 255 -}; +#if (OS_TYPE != LINUX) +#define MSVP_PROF_API __declspec(dllexport) +#else +#define MSVP_PROF_API __attribute__((visibility("default"))) +#endif -/** - * @name ProfConfig - * @brief struct of ProfStart - */ -struct ProfConfig { - uint32_t devNums; // length of device id list - uint32_t devIdList[MSVP_MAX_DEV_NUM]; // physical device id list - ProfAicoreMetrics aicoreMetrics; // aicore metric - uint64_t dataTypeConfig; // data type to start profiling -}; +#include +namespace Msprofiler { +namespace Api { /** - * @name ProfStartProfiling - * @brief start profiling - * @param profStartCfg [IN] config to start profiling - * @return ProfErrorCode + * @name ProfGetOpExecutionTime + * @brief get op execution time of specific part of data + * @param data [IN] data read from pipe + * @param len [IN] data length + * @param index [IN] index of part(op) + * @return op execution time (us) */ -MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg); +MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); +} +} -/** - * @name ProfStopConfig - * @brief struct of ProfStop - */ -struct ProfStopConfig { - uint64_t padding; -}; +#ifdef __cplusplus +extern "C" { +#endif -/** - * @name ProfStopProfiling - * @brief stop profiling - * @param profStopCfg [IN] config to stop profiling - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg); - -/** - * @name ProfFinalize - * @brief finalize profiling task - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfFinalize(); +MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); -/** - * @name ProfGetDataTypeConfig - * @brief get dataTypeConfig started with of one device - * @param deviceId [IN] deviceId to get dataTypeConfig - * @param dataTypeConfig [OUT] result get - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig); +#ifdef __cplusplus +} +#endif -#endif // MSPROF_ENGINE_PROF_ACL_API_H_ +#endif // MSPROFILER_API_PROF_ACL_API_H_ diff --git a/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h b/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h index 4f013eef..f8cb1b22 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h +++ b/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h @@ -16,7 +16,16 @@ #ifndef MSPROF_ENGINE_PROF_MGR_CORE_H_ #define MSPROF_ENGINE_PROF_MGR_CORE_H_ +#ifndef OS_TYPE +#define OS_TYPE 0 +#endif // OS_TYPE + +#if (OS_TYPE != LINUX) +#define MSVP_PROF_API __declspec(dllexport) +#else #define MSVP_PROF_API __attribute__((visibility("default"))) +#endif + #include #include diff --git a/third_party/fwkacllib/inc/toolchain/prof_reporter.h b/third_party/fwkacllib/inc/toolchain/prof_reporter.h index ff91351b..d5ed7569 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_reporter.h +++ b/third_party/fwkacllib/inc/toolchain/prof_reporter.h @@ -41,42 +41,44 @@ namespace Engine { * the Reporter class .used to send data to profiling */ class MSVP_PROF_API Reporter { - public: - virtual ~Reporter() {} +public: + virtual ~Reporter() {} - public: - /** - * @ingroup reporter - * @name : Report - * @brief : API of libmsprof, report data to libmsprof, it's a non-blocking function \n - The data will be firstly appended to cache, if the cache is full, data will be ignored - * @param data [IN] const ReporterData * the data send to libmsporf - * @retval PROFILING_SUCCESS 0 (success) - * @retval PROFILING_FAILED -1 (failed) - * - * @par depend: - * @li libmsprof - * @li prof_reporter.h - * @since c60 - * @see Flush - */ - virtual int Report(const ReporterData *data) = 0; +public: + /** + * @ingroup reporter + * @name : Report + * @brief : API of libmsprof, report data to libmsprof, it's a non-blocking function \n + The data will be firstly appended to cache, if the cache is full, data will be ignored + * @param data [IN] const ReporterData * the data send to libmsporf + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * + * @par depend: + * @li libmsprof + * @li prof_reporter.h + * @since c60 + * @see Flush + */ + virtual int Report(const ReporterData *data) = 0; - /** - * @ingroup reporter - * @name : Flush - * @brief : API of libmsprof, notify libmsprof send data over, it's a blocking function \n - The all datas of cache will be write to file or send to host - * @retval PROFILING_SUCCESS 0 (success) - * @retval PROFILING_FAILED -1 (failed) - * - * @par depend: - * @li libmsprof - * @li prof_reporter.h - * @since c60 - * @see ProfMgrStop - */ - virtual int Flush() = 0; + /** + * @ingroup reporter + * @name : Flush + * @brief : API of libmsprof, notify libmsprof send data over, it's a blocking function \n + The all datas of cache will be write to file or send to host + * @retval PROFILING_SUCCESS 0 (success) + * @retval PROFILING_FAILED -1 (failed) + * + * @par depend: + * @li libmsprof + * @li prof_reporter.h + * @since c60 + * @see ProfMgrStop + */ + virtual int Flush() = 0; + + virtual uint32_t GetReportDataMaxLen() = 0; }; } // namespace Engine diff --git a/third_party/prebuild/aarch64/libalog.so b/third_party/prebuild/aarch64/libalog.so index e041ad7e..65aefa59 100755 Binary files a/third_party/prebuild/aarch64/libalog.so and b/third_party/prebuild/aarch64/libalog.so differ diff --git a/third_party/prebuild/aarch64/liberror_manager.so b/third_party/prebuild/aarch64/liberror_manager.so index 759d8e30..6358365b 100755 Binary files a/third_party/prebuild/aarch64/liberror_manager.so and b/third_party/prebuild/aarch64/liberror_manager.so differ diff --git a/third_party/prebuild/aarch64/libmmpa.a b/third_party/prebuild/aarch64/libmmpa.a index d7c29e2b..7d042c4c 100755 Binary files a/third_party/prebuild/aarch64/libmmpa.a and b/third_party/prebuild/aarch64/libmmpa.a differ diff --git a/third_party/prebuild/x86_64/libalog.so b/third_party/prebuild/x86_64/libalog.so index 051f85d9..4c8a45a4 100755 Binary files a/third_party/prebuild/x86_64/libalog.so and b/third_party/prebuild/x86_64/libalog.so differ diff --git a/third_party/prebuild/x86_64/liberror_manager.so b/third_party/prebuild/x86_64/liberror_manager.so index cd9ad8bc..d97e6ef1 100755 Binary files a/third_party/prebuild/x86_64/liberror_manager.so and b/third_party/prebuild/x86_64/liberror_manager.so differ diff --git a/third_party/prebuild/x86_64/libmmpa.a b/third_party/prebuild/x86_64/libmmpa.a index bec195ad..13ca68db 100755 Binary files a/third_party/prebuild/x86_64/libmmpa.a and b/third_party/prebuild/x86_64/libmmpa.a differ