@@ -125,7 +125,6 @@ else () | |||||
message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") | message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") | ||||
endif() | endif() | ||||
endif() | endif() | ||||
set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef) | set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef) | ||||
set(PARSER_DIR ${CMAKE_CURRENT_LIST_DIR}/parser) | set(PARSER_DIR ${CMAKE_CURRENT_LIST_DIR}/parser) | ||||
set(GE_DEPEND_DIR ${CMAKE_CURRENT_LIST_DIR}/..) | set(GE_DEPEND_DIR ${CMAKE_CURRENT_LIST_DIR}/..) | ||||
@@ -158,6 +157,7 @@ else () | |||||
elseif(ENABLE_MS_TESTCASES) | elseif(ENABLE_MS_TESTCASES) | ||||
include(cmake/external_libs/protobuf_static.cmake) | include(cmake/external_libs/protobuf_static.cmake) | ||||
include(cmake/external_libs/protoc.cmake) | include(cmake/external_libs/protoc.cmake) | ||||
include(cmake/external_libs/json.cmake) | |||||
include(cmake/external_libs/securec.cmake) | include(cmake/external_libs/securec.cmake) | ||||
include(cmake/FindModule.cmake) | include(cmake/FindModule.cmake) | ||||
include(cmake/intf_pub_linux.cmake) | include(cmake/intf_pub_linux.cmake) | ||||
@@ -175,5 +175,4 @@ else () | |||||
endif() | endif() | ||||
add_subdirectory(ge) | add_subdirectory(ge) | ||||
endif () | endif () |
@@ -9,10 +9,6 @@ if (GE_PB_PKG) | |||||
set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip") | set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip") | ||||
set(MD5 "0dc903888211db3a0f170304cd9f3a89") | set(MD5 "0dc903888211db3a0f170304cd9f3a89") | ||||
set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) | set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) | ||||
#elseif (ENABLE_GITEE) | |||||
# set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip") | |||||
# set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7") | |||||
#set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include") | |||||
else() | else() | ||||
set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip") | set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip") | ||||
set(MD5 "0dc903888211db3a0f170304cd9f3a89") | set(MD5 "0dc903888211db3a0f170304cd9f3a89") | ||||
@@ -16,6 +16,7 @@ set(GE_SRC_LIST | |||||
"task/label_goto_task.cc" | "task/label_goto_task.cc" | ||||
"task/label_set_task.cc" | "task/label_set_task.cc" | ||||
"task/label_switch_task.cc" | "task/label_switch_task.cc" | ||||
"task/label_manager.cc" | |||||
) | ) | ||||
add_library(ge_runtime SHARED ${GE_SRC_LIST}) | add_library(ge_runtime SHARED ${GE_SRC_LIST}) | ||||
@@ -53,15 +53,7 @@ HcclTask::HcclTask(const ModelContext &model_context, const std::shared_ptr<Hccl | |||||
} | } | ||||
} | } | ||||
HcclTask::~HcclTask() { | |||||
if (workspace_mem_ != nullptr) { | |||||
rtError_t rt_ret = rtFree(workspace_mem_); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "rtFree workspace_mem_ failed! ret: 0x%X.", rt_ret); | |||||
} | |||||
workspace_mem_ = nullptr; | |||||
} | |||||
} | |||||
HcclTask::~HcclTask() {} | |||||
bool HcclTask::Distribute() { | bool HcclTask::Distribute() { | ||||
// Ops kernel info store | // Ops kernel info store | ||||
@@ -80,11 +72,7 @@ bool HcclTask::Distribute() { | |||||
SetSecondaryStream(); | SetSecondaryStream(); | ||||
if (task_info_->workspace_size() > 0) { | if (task_info_->workspace_size() > 0) { | ||||
rtError_t rt_ret = rtMalloc(&workspace_mem_, task_info_->workspace_size(), RT_MEMORYINFO_HBM); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
return false; | |||||
} | |||||
workspace_mem_ = task_info_->workspace_addr(); | |||||
} | } | ||||
GELOGI("HcclTaskInfo Distribute Start. begin to call function LoadTask in hccl."); | GELOGI("HcclTaskInfo Distribute Start. begin to call function LoadTask in hccl."); | ||||
@@ -16,33 +16,46 @@ | |||||
#include "ge_runtime/task/label_goto_task.h" | #include "ge_runtime/task/label_goto_task.h" | ||||
#include "ge_runtime/task/task_factory.h" | #include "ge_runtime/task/task_factory.h" | ||||
#include "framework/common/util.h" | |||||
namespace ge { | namespace ge { | ||||
namespace model_runner { | namespace model_runner { | ||||
LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::shared_ptr<LabelGotoTaskInfo> &task_info) | LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::shared_ptr<LabelGotoTaskInfo> &task_info) | ||||
: TaskRepeater<LabelGotoTaskInfo>(model_context, task_info), task_info_(task_info) { | |||||
: TaskRepeater<LabelGotoTaskInfo>(model_context, task_info), | |||||
task_info_(task_info), | |||||
stream_(nullptr), | |||||
index_value_(nullptr) { | |||||
if (task_info_ == nullptr) { | if (task_info_ == nullptr) { | ||||
GELOGW("task_info_ is null!"); | GELOGW("task_info_ is null!"); | ||||
return; | return; | ||||
} | } | ||||
auto stream_list = model_context.stream_list(); | auto stream_list = model_context.stream_list(); | ||||
auto label_list = model_context.label_list(); | auto label_list = model_context.label_list(); | ||||
rt_model_handle_ = model_context.rt_model_handle(); | |||||
uint32_t stream_id = task_info->stream_id(); | uint32_t stream_id = task_info->stream_id(); | ||||
uint32_t label_id = task_info->label_id(); | |||||
label_id_ = task_info->label_id(); | |||||
GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); | GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); | ||||
GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id); | |||||
if (stream_id >= stream_list.size() || label_id >= label_list.size()) { | |||||
GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id_); | |||||
if (stream_id >= stream_list.size() || label_id_ >= label_list.size()) { | |||||
GELOGW("Stream/Label id invalid."); | GELOGW("Stream/Label id invalid."); | ||||
return; | return; | ||||
} | } | ||||
stream_ = stream_list[stream_id]; | stream_ = stream_list[stream_id]; | ||||
label_ = label_list[label_id]; | |||||
label_manager_ = LabelManager::GetInstance(); | |||||
if (label_manager_ == nullptr) { | |||||
GELOGW("Get label manager instance failed."); | |||||
return; | |||||
} | |||||
label_info_ = label_manager_->GetLabelInfo(rt_model_handle_, {label_id_}, label_list); | |||||
} | } | ||||
LabelGotoTask::~LabelGotoTask() { | LabelGotoTask::~LabelGotoTask() { | ||||
GE_FREE_RT_LOG(label_info_); | |||||
GE_FREE_RT_LOG(index_value_); | |||||
if (index_value_ != nullptr) { | |||||
rtError_t rt_ret = rtFree(index_value_); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "rtFree index_value_ failed! ret: 0x%X.", rt_ret); | |||||
} | |||||
index_value_ = nullptr; | |||||
} | |||||
} | } | ||||
bool LabelGotoTask::Distribute() { | bool LabelGotoTask::Distribute() { | ||||
@@ -94,21 +107,34 @@ bool LabelGotoTask::CheckParamValid() { | |||||
return false; | return false; | ||||
} | } | ||||
if (label_ == nullptr) { | |||||
GELOGE(PARAM_INVALID, "label is null!"); | |||||
if (label_info_ == nullptr) { | |||||
GELOGE(PARAM_INVALID, "label info is null!"); | |||||
return false; | return false; | ||||
} | } | ||||
if (label_info_ != nullptr) { | |||||
GELOGE(PARAM_INVALID, "label_info_ has dirty data."); | |||||
return false; | |||||
if (index_value_ == nullptr) { | |||||
rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), RT_MEMORY_HBM); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
return false; | |||||
} | |||||
uint64_t index = 0; | |||||
rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &index, sizeof(index), RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
return false; | |||||
} | |||||
} | } | ||||
if (index_value_ != nullptr) { | |||||
GELOGE(PARAM_INVALID, "index_value_ has dirty data."); | |||||
void *label_info = label_info_->GetLabelInfo(); | |||||
rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, 1, label_info, stream_); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
return false; | return false; | ||||
} | } | ||||
GELOGI("DistributeTask end."); | |||||
return true; | return true; | ||||
} | } | ||||
@@ -18,7 +18,11 @@ | |||||
#define GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ | #define GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ | ||||
#include <memory> | #include <memory> | ||||
#include <vector> | |||||
#include <map> | |||||
#include <mutex> | |||||
#include "ge_runtime/task/task.h" | #include "ge_runtime/task/task.h" | ||||
#include "ge_runtime/task/label_manager.h" | |||||
namespace ge { | namespace ge { | ||||
namespace model_runner { | namespace model_runner { | ||||
@@ -31,13 +35,13 @@ class LabelGotoTask : public TaskRepeater<LabelGotoTaskInfo> { | |||||
bool Distribute() override; | bool Distribute() override; | ||||
private: | private: | ||||
bool CheckParamValid(); | |||||
std::shared_ptr<LabelGotoTaskInfo> task_info_; | std::shared_ptr<LabelGotoTaskInfo> task_info_; | ||||
void *stream_{nullptr}; | |||||
void *label_{nullptr}; | |||||
void *label_info_{nullptr}; | |||||
void *index_value_{nullptr}; | |||||
void *stream_; | |||||
std::shared_ptr<LabelGuard> label_info_; | |||||
void *index_value_; | |||||
uint32_t label_id_; | |||||
rtModel_t rt_model_handle_; | |||||
std::shared_ptr<LabelManager> label_manager_; | |||||
}; | }; | ||||
} // namespace model_runner | } // namespace model_runner | ||||
} // namespace ge | } // namespace ge | ||||
@@ -0,0 +1,119 @@ | |||||
/** | |||||
* Copyright 2021 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include "ge_runtime/task/label_manager.h" | |||||
#include <algorithm> | |||||
#include <string> | |||||
#include "runtime/mem.h" | |||||
#include "runtime/rt_model.h" | |||||
#include "common/ge_inner_error_codes.h" | |||||
#include "framework/common/debug/ge_log.h" | |||||
namespace ge { | |||||
namespace model_runner { | |||||
std::weak_ptr<LabelManager> LabelManager::instance_; | |||||
std::mutex LabelManager::instance_mutex_; | |||||
template <class T> | |||||
static std::string GetVectorString(const std::vector<T> &vec) { | |||||
std::string ret; | |||||
for (size_t i = 0; i < vec.size(); ++i) { | |||||
if (i != 0) { | |||||
ret.push_back(','); | |||||
} | |||||
ret += std::to_string(vec[i]); | |||||
} | |||||
return ret; | |||||
} | |||||
LabelGuard::~LabelGuard() { | |||||
void *label_info = GetLabelInfo(); | |||||
if (label_info != nullptr) { | |||||
rtError_t rt_ret = rtFree(label_info); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "rtFree label_info failed! ret: 0x%X.", rt_ret); | |||||
} | |||||
} | |||||
} | |||||
std::shared_ptr<LabelManager> LabelManager::GetInstance() { | |||||
std::lock_guard<std::mutex> lock(instance_mutex_); | |||||
auto instance = instance_.lock(); | |||||
if (instance != nullptr) { | |||||
return instance; | |||||
} | |||||
instance = std::make_shared<LabelManager>(); | |||||
instance_ = instance; | |||||
return instance; | |||||
} | |||||
std::shared_ptr<LabelGuard> LabelManager::GetLabelInfo(rtModel_t model, const std::vector<uint32_t> &label_ids, | |||||
const std::vector<void *> &all_label) { | |||||
std::lock_guard<std::mutex> lock(model_info_mapping_mutex_); | |||||
rtError_t rt_ret; | |||||
auto model_iter = model_info_mapping_.find(model); | |||||
if (model_iter == model_info_mapping_.end()) { | |||||
model_info_mapping_.emplace(model, std::map<std::string, std::weak_ptr<LabelGuard>>()); | |||||
model_iter = model_info_mapping_.find(model); | |||||
} | |||||
std::string label_id_str = GetVectorString(label_ids); | |||||
auto &label_map = model_iter->second; | |||||
auto label_iter = label_map.find(label_id_str); | |||||
if (label_iter != label_map.end()) { | |||||
auto label_guard = label_iter->second.lock(); | |||||
if (label_guard != nullptr) { | |||||
GELOGI("model %p find same label id %s.", model, label_id_str.c_str()); | |||||
return label_guard; | |||||
} | |||||
} | |||||
GELOGI("Alloc label id %s for model %p.", label_id_str.c_str(), model); | |||||
void *label_info; | |||||
std::vector<void *> label_list; | |||||
bool status = true; | |||||
std::transform(label_ids.begin(), label_ids.end(), std::back_inserter(label_list), | |||||
[&all_label, &status](uint32_t idx) -> void * { | |||||
if (idx >= all_label.size()) { | |||||
GELOGE(PARAM_INVALID, "Invalid label id %u, all label list size %zu.", idx, all_label.size()); | |||||
status = false; | |||||
return nullptr; | |||||
} | |||||
return all_label[idx]; | |||||
}); | |||||
if (!status) { | |||||
GELOGE(PARAM_INVALID, "Get label info failed."); | |||||
return nullptr; | |||||
} | |||||
uint32_t label_info_size = sizeof(rtLabelDevInfo) * label_list.size(); | |||||
rt_ret = rtMalloc(&label_info, label_info_size, RT_MEMORY_HBM); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
return nullptr; | |||||
} | |||||
rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info, label_info_size); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
return nullptr; | |||||
} | |||||
auto label_guard = std::make_shared<LabelGuard>(label_info); | |||||
label_map.emplace(label_id_str, label_guard); | |||||
return label_guard; | |||||
} | |||||
} // namespace model_runner | |||||
} // namespace ge |
@@ -0,0 +1,54 @@ | |||||
/** | |||||
* Copyright 2021 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_ | |||||
#define GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_ | |||||
#include <vector> | |||||
#include <memory> | |||||
#include <mutex> | |||||
#include <map> | |||||
#include <runtime/base.h> | |||||
namespace ge { | |||||
namespace model_runner { | |||||
class LabelGuard { | |||||
public: | |||||
explicit LabelGuard(void *label_info) : label_info_(reinterpret_cast<uintptr_t>(label_info)) {} | |||||
~LabelGuard(); | |||||
void *GetLabelInfo() { return reinterpret_cast<void *>(label_info_); } | |||||
private: | |||||
uintptr_t label_info_; | |||||
}; | |||||
class LabelManager { | |||||
public: | |||||
static std::shared_ptr<LabelManager> GetInstance(); | |||||
std::shared_ptr<LabelGuard> GetLabelInfo(rtModel_t model, const std::vector<uint32_t> &label_ids, | |||||
const std::vector<void *> &all_label); | |||||
private: | |||||
std::mutex model_info_mapping_mutex_; | |||||
std::map<rtModel_t, std::map<std::string, std::weak_ptr<LabelGuard>>> model_info_mapping_; | |||||
static std::weak_ptr<LabelManager> instance_; | |||||
static std::mutex instance_mutex_; | |||||
}; | |||||
} // namespace model_runner | |||||
} // namespace ge | |||||
#endif // GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_ |
@@ -24,14 +24,14 @@ LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context, | |||||
: TaskRepeater<LabelSwitchTaskInfo>(model_context, task_info), | : TaskRepeater<LabelSwitchTaskInfo>(model_context, task_info), | ||||
task_info_(task_info), | task_info_(task_info), | ||||
stream_(nullptr), | stream_(nullptr), | ||||
all_label_resource_(), | |||||
label_info_(nullptr) { | label_info_(nullptr) { | ||||
if (task_info_ == nullptr) { | if (task_info_ == nullptr) { | ||||
GELOGW("task_info_ is null!"); | GELOGW("task_info_ is null!"); | ||||
return; | return; | ||||
} | } | ||||
all_label_resource_ = model_context.label_list(); | |||||
rt_model_handle_ = model_context.rt_model_handle(); | |||||
auto all_label_resource = model_context.label_list(); | |||||
auto stream_list = model_context.stream_list(); | auto stream_list = model_context.stream_list(); | ||||
uint32_t stream_id = task_info->stream_id(); | uint32_t stream_id = task_info->stream_id(); | ||||
GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); | GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); | ||||
@@ -40,18 +40,16 @@ LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context, | |||||
return; | return; | ||||
} | } | ||||
stream_ = stream_list[stream_id]; | stream_ = stream_list[stream_id]; | ||||
} | |||||
LabelSwitchTask::~LabelSwitchTask() { | |||||
if (label_info_ != nullptr) { | |||||
rtError_t rt_ret = rtFree(label_info_); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "rtFree fwkOpBuf failed! ret: 0x%X.", rt_ret); | |||||
} | |||||
label_info_ = nullptr; | |||||
label_manager_ = LabelManager::GetInstance(); | |||||
if (label_manager_ == nullptr) { | |||||
GELOGW("Get label manager instance failed."); | |||||
return; | |||||
} | } | ||||
label_info_ = label_manager_->GetLabelInfo(rt_model_handle_, task_info_->label_list(), all_label_resource); | |||||
} | } | ||||
LabelSwitchTask::~LabelSwitchTask() {} | |||||
bool LabelSwitchTask::Distribute() { | bool LabelSwitchTask::Distribute() { | ||||
GELOGI("LabelSwitchTask Distribute start."); | GELOGI("LabelSwitchTask Distribute start."); | ||||
if (!CheckParamValid()) { | if (!CheckParamValid()) { | ||||
@@ -117,8 +115,8 @@ bool LabelSwitchTask::CheckParamValid() { | |||||
return false; | return false; | ||||
} | } | ||||
if (label_info_ != nullptr) { | |||||
GELOGE(PARAM_INVALID, "label_info_ has dirty data."); | |||||
if (label_info_ == nullptr) { | |||||
GELOGE(PARAM_INVALID, "CopyLabelList failed, label info is null."); | |||||
return false; | return false; | ||||
} | } | ||||
@@ -126,6 +124,5 @@ bool LabelSwitchTask::CheckParamValid() { | |||||
} | } | ||||
REGISTER_TASK(TaskInfoType::LABEL_SWITCH, LabelSwitchTask, LabelSwitchTaskInfo); | REGISTER_TASK(TaskInfoType::LABEL_SWITCH, LabelSwitchTask, LabelSwitchTaskInfo); | ||||
} // namespace model_runner | } // namespace model_runner | ||||
} // namespace ge | } // namespace ge |
@@ -19,6 +19,7 @@ | |||||
#include <memory> | #include <memory> | ||||
#include "ge_runtime/task/task.h" | #include "ge_runtime/task/task.h" | ||||
#include "ge_runtime/task/label_manager.h" | |||||
namespace ge { | namespace ge { | ||||
namespace model_runner { | namespace model_runner { | ||||
@@ -35,8 +36,9 @@ class LabelSwitchTask : public TaskRepeater<LabelSwitchTaskInfo> { | |||||
std::shared_ptr<LabelSwitchTaskInfo> task_info_; | std::shared_ptr<LabelSwitchTaskInfo> task_info_; | ||||
void *stream_; | void *stream_; | ||||
std::vector<void *> all_label_resource_; | |||||
void *label_info_; | |||||
rtModel_t rt_model_handle_; | |||||
std::shared_ptr<LabelGuard> label_info_; | |||||
std::shared_ptr<LabelManager> label_manager_; | |||||
}; | }; | ||||
} // namespace model_runner | } // namespace model_runner | ||||
} // namespace ge | } // namespace ge | ||||
@@ -0,0 +1,82 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_EXTERNAL_ACL_ACL_H_ | |||||
#define INC_EXTERNAL_ACL_ACL_H_ | |||||
#include "acl_rt.h" | |||||
#include "acl_op.h" | |||||
#include "acl_mdl.h" | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
// Current version is 1.0.0 | |||||
#define ACL_MAJOR_VERSION 1 | |||||
#define ACL_MINOR_VERSION 0 | |||||
#define ACL_PATCH_VERSION 0 | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief acl initialize | |||||
* | |||||
* @par Restriction | |||||
* The aclInit interface can be called only once in a process | |||||
* @param configPath [IN] the config path,it can be NULL | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclInit(const char *configPath); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief acl finalize | |||||
* | |||||
* @par Restriction | |||||
* Need to call aclFinalize before the process exits. | |||||
* After calling aclFinalize,the services cannot continue to be used normally. | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclFinalize(); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief query ACL interface version | |||||
* | |||||
* @param majorVersion[OUT] ACL interface major version | |||||
* @param minorVersion[OUT] ACL interface minor version | |||||
* @param patchVersion[OUT] ACL interface patch version | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *minorVersion, int32_t *patchVersion); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get recent error message | |||||
* | |||||
* @retval null for failed | |||||
* @retval OtherValues success | |||||
*/ | |||||
ACL_FUNC_VISIBILITY const char *aclGetRecentErrMsg(); | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif | |||||
#endif // INC_EXTERNAL_ACL_ACL_H_ |
@@ -0,0 +1,638 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_EXTERNAL_ACL_ACL_BASE_H_ | |||||
#define INC_EXTERNAL_ACL_ACL_BASE_H_ | |||||
#include <stdint.h> | |||||
#include <stddef.h> | |||||
#include "error_codes/rt_error_codes.h" | |||||
#include "error_codes/ge_error_codes.h" | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
#if defined(_MSC_VER) | |||||
#ifdef FUNC_VISIBILITY | |||||
#define ACL_FUNC_VISIBILITY _declspec(dllexport) | |||||
#else | |||||
#define ACL_FUNC_VISIBILITY | |||||
#endif | |||||
#else | |||||
#ifdef FUNC_VISIBILITY | |||||
#define ACL_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
#else | |||||
#define ACL_FUNC_VISIBILITY | |||||
#endif | |||||
#endif | |||||
#ifdef __GNUC__ | |||||
#define ACL_DEPRECATED __attribute__((deprecated)) | |||||
#define ACL_DEPRECATED_MESSAGE(message) __attribute__((deprecated(message))) | |||||
#elif defined(_MSC_VER) | |||||
#define ACL_DEPRECATED __declspec(deprecated) | |||||
#define ACL_DEPRECATED_MESSAGE(message) __declspec(deprecated(message)) | |||||
#else | |||||
#define ACL_DEPRECATED | |||||
#define ACL_DEPRECATED_MESSAGE(message) | |||||
#endif | |||||
typedef void *aclrtStream; | |||||
typedef void *aclrtEvent; | |||||
typedef void *aclrtContext; | |||||
typedef int aclError; | |||||
typedef uint16_t aclFloat16; | |||||
typedef struct aclDataBuffer aclDataBuffer; | |||||
typedef struct aclTensorDesc aclTensorDesc; | |||||
static const int ACL_ERROR_NONE = 0; | |||||
static const int ACL_SUCCESS = 0; | |||||
static const int ACL_ERROR_INVALID_PARAM = 100000; | |||||
static const int ACL_ERROR_UNINITIALIZE = 100001; | |||||
static const int ACL_ERROR_REPEAT_INITIALIZE = 100002; | |||||
static const int ACL_ERROR_INVALID_FILE = 100003; | |||||
static const int ACL_ERROR_WRITE_FILE = 100004; | |||||
static const int ACL_ERROR_INVALID_FILE_SIZE = 100005; | |||||
static const int ACL_ERROR_PARSE_FILE = 100006; | |||||
static const int ACL_ERROR_FILE_MISSING_ATTR = 100007; | |||||
static const int ACL_ERROR_FILE_ATTR_INVALID = 100008; | |||||
static const int ACL_ERROR_INVALID_DUMP_CONFIG = 100009; | |||||
static const int ACL_ERROR_INVALID_PROFILING_CONFIG = 100010; | |||||
static const int ACL_ERROR_INVALID_MODEL_ID = 100011; | |||||
static const int ACL_ERROR_DESERIALIZE_MODEL = 100012; | |||||
static const int ACL_ERROR_PARSE_MODEL = 100013; | |||||
static const int ACL_ERROR_READ_MODEL_FAILURE = 100014; | |||||
static const int ACL_ERROR_MODEL_SIZE_INVALID = 100015; | |||||
static const int ACL_ERROR_MODEL_MISSING_ATTR = 100016; | |||||
static const int ACL_ERROR_MODEL_INPUT_NOT_MATCH = 100017; | |||||
static const int ACL_ERROR_MODEL_OUTPUT_NOT_MATCH = 100018; | |||||
static const int ACL_ERROR_MODEL_NOT_DYNAMIC = 100019; | |||||
static const int ACL_ERROR_OP_TYPE_NOT_MATCH = 100020; | |||||
static const int ACL_ERROR_OP_INPUT_NOT_MATCH = 100021; | |||||
static const int ACL_ERROR_OP_OUTPUT_NOT_MATCH = 100022; | |||||
static const int ACL_ERROR_OP_ATTR_NOT_MATCH = 100023; | |||||
static const int ACL_ERROR_OP_NOT_FOUND = 100024; | |||||
static const int ACL_ERROR_OP_LOAD_FAILED = 100025; | |||||
static const int ACL_ERROR_UNSUPPORTED_DATA_TYPE = 100026; | |||||
static const int ACL_ERROR_FORMAT_NOT_MATCH = 100027; | |||||
static const int ACL_ERROR_BIN_SELECTOR_NOT_REGISTERED = 100028; | |||||
static const int ACL_ERROR_KERNEL_NOT_FOUND = 100029; | |||||
static const int ACL_ERROR_BIN_SELECTOR_ALREADY_REGISTERED = 100030; | |||||
static const int ACL_ERROR_KERNEL_ALREADY_REGISTERED = 100031; | |||||
static const int ACL_ERROR_INVALID_QUEUE_ID = 100032; | |||||
static const int ACL_ERROR_REPEAT_SUBSCRIBE = 100033; | |||||
static const int ACL_ERROR_STREAM_NOT_SUBSCRIBE = 100034; | |||||
static const int ACL_ERROR_THREAD_NOT_SUBSCRIBE = 100035; | |||||
static const int ACL_ERROR_WAIT_CALLBACK_TIMEOUT = 100036; | |||||
static const int ACL_ERROR_REPEAT_FINALIZE = 100037; | |||||
static const int ACL_ERROR_NOT_STATIC_AIPP = 100038; | |||||
static const int ACL_ERROR_COMPILING_STUB_MODE = 100039; | |||||
static const int ACL_ERROR_GROUP_NOT_SET = 100040; | |||||
static const int ACL_ERROR_GROUP_NOT_CREATE = 100041; | |||||
static const int ACL_ERROR_PROF_ALREADY_RUN = 100042; | |||||
static const int ACL_ERROR_PROF_NOT_RUN = 100043; | |||||
static const int ACL_ERROR_DUMP_ALREADY_RUN = 100044; | |||||
static const int ACL_ERROR_DUMP_NOT_RUN = 100045; | |||||
static const int ACL_ERROR_PROF_REPEAT_SUBSCRIBE = 148046; | |||||
static const int ACL_ERROR_PROF_API_CONFLICT = 148047; | |||||
static const int ACL_ERROR_INVALID_MAX_OPQUEUE_NUM_CONFIG = 148048; | |||||
static const int ACL_ERROR_INVALID_OPP_PATH = 148049; | |||||
static const int ACL_ERROR_OP_UNSUPPORTED_DYNAMIC = 148050; | |||||
static const int ACL_ERROR_BAD_ALLOC = 200000; | |||||
static const int ACL_ERROR_API_NOT_SUPPORT = 200001; | |||||
static const int ACL_ERROR_INVALID_DEVICE = 200002; | |||||
static const int ACL_ERROR_MEMORY_ADDRESS_UNALIGNED = 200003; | |||||
static const int ACL_ERROR_RESOURCE_NOT_MATCH = 200004; | |||||
static const int ACL_ERROR_INVALID_RESOURCE_HANDLE = 200005; | |||||
static const int ACL_ERROR_FEATURE_UNSUPPORTED = 200006; | |||||
static const int ACL_ERROR_PROF_MODULES_UNSUPPORTED = 200007; | |||||
static const int ACL_ERROR_STORAGE_OVER_LIMIT = 300000; | |||||
static const int ACL_ERROR_INTERNAL_ERROR = 500000; | |||||
static const int ACL_ERROR_FAILURE = 500001; | |||||
static const int ACL_ERROR_GE_FAILURE = 500002; | |||||
static const int ACL_ERROR_RT_FAILURE = 500003; | |||||
static const int ACL_ERROR_DRV_FAILURE = 500004; | |||||
static const int ACL_ERROR_PROFILING_FAILURE = 500005; | |||||
#define ACL_TENSOR_SHAPE_RANGE_NUM 2 | |||||
#define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE | |||||
typedef enum { | |||||
ACL_DT_UNDEFINED = -1, | |||||
ACL_FLOAT = 0, | |||||
ACL_FLOAT16 = 1, | |||||
ACL_INT8 = 2, | |||||
ACL_INT32 = 3, | |||||
ACL_UINT8 = 4, | |||||
ACL_INT16 = 6, | |||||
ACL_UINT16 = 7, | |||||
ACL_UINT32 = 8, | |||||
ACL_INT64 = 9, | |||||
ACL_UINT64 = 10, | |||||
ACL_DOUBLE = 11, | |||||
ACL_BOOL = 12, | |||||
ACL_STRING = 13, | |||||
} aclDataType; | |||||
typedef enum { | |||||
ACL_FORMAT_UNDEFINED = -1, | |||||
ACL_FORMAT_NCHW = 0, | |||||
ACL_FORMAT_NHWC = 1, | |||||
ACL_FORMAT_ND = 2, | |||||
ACL_FORMAT_NC1HWC0 = 3, | |||||
ACL_FORMAT_FRACTAL_Z = 4, | |||||
ACL_FORMAT_NC1HWC0_C04 = 12, | |||||
ACL_FORMAT_NDHWC = 27, | |||||
ACL_FORMAT_FRACTAL_NZ = 29, | |||||
ACL_FORMAT_NCDHW = 30, | |||||
ACL_FORMAT_NDC1HWC0 = 32, | |||||
ACL_FRACTAL_Z_3D = 33 | |||||
} aclFormat; | |||||
typedef enum { | |||||
ACL_DEBUG = 0, | |||||
ACL_INFO = 1, | |||||
ACL_WARNING = 2, | |||||
ACL_ERROR = 3, | |||||
} aclLogLevel; | |||||
typedef enum { | |||||
ACL_MEMTYPE_DEVICE = 0, | |||||
ACL_MEMTYPE_HOST = 1, | |||||
} aclMemType; | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Converts data of type aclFloat16 to data of type float | |||||
* | |||||
* @param value [IN] Data to be converted | |||||
* | |||||
* @retval Transformed data | |||||
*/ | |||||
ACL_FUNC_VISIBILITY float aclFloat16ToFloat(aclFloat16 value); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Converts data of type float to data of type aclFloat16 | |||||
* | |||||
* @param value [IN] Data to be converted | |||||
* | |||||
* @retval Transformed data | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclFloat16 aclFloatToFloat16(float value); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create data of aclDataBuffer | |||||
* | |||||
* @param data [IN] pointer to data | |||||
* @li Need to be managed by the user, | |||||
* call aclrtMalloc interface to apply for memory, | |||||
* call aclrtFree interface to release memory | |||||
* | |||||
* @param size [IN] size of data in bytes | |||||
* | |||||
* @retval pointer to created instance. nullptr if run out of memory | |||||
* | |||||
* @see aclrtMalloc | aclrtFree | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclDataBuffer *aclCreateDataBuffer(void *data, size_t size); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief destroy data of aclDataBuffer | |||||
* | |||||
* @par Function | |||||
* Only the aclDataBuffer type data is destroyed here. | |||||
* The memory of the data passed in when the aclDataDataBuffer interface | |||||
* is called to create aclDataBuffer type data must be released by the user | |||||
* | |||||
* @param dataBuffer [IN] pointer to the aclDataBuffer | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclCreateDataBuffer | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclDestroyDataBuffer(const aclDataBuffer *dataBuffer); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief update new data of aclDataBuffer | |||||
* | |||||
* @param dataBuffer [OUT] pointer to aclDataBuffer | |||||
* @li The old data need to be released by the user, otherwise it may occur memory leak leakage | |||||
* call aclGetDataBufferAddr interface to get old data address | |||||
* call aclrtFree interface to release memory | |||||
* | |||||
* @param data [IN] pointer to new data | |||||
* @li Need to be managed by the user, | |||||
* call aclrtMalloc interface to apply for memory, | |||||
* call aclrtFree interface to release memory | |||||
* | |||||
* @param size [IN] size of data in bytes | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtMalloc | aclrtFree | aclGetDataBufferAddr | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclUpdateDataBuffer(aclDataBuffer *dataBuffer, void *data, size_t size); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get data address from aclDataBuffer | |||||
* | |||||
* @param dataBuffer [IN] pointer to the data of aclDataBuffer | |||||
* | |||||
* @retval data address | |||||
*/ | |||||
ACL_FUNC_VISIBILITY void *aclGetDataBufferAddr(const aclDataBuffer *dataBuffer); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get data size of aclDataBuffer | |||||
* | |||||
* @param dataBuffer [IN] pointer to the data of aclDataBuffer | |||||
* | |||||
* @retval data size | |||||
*/ | |||||
ACL_DEPRECATED_MESSAGE("aclGetDataBufferSize is deprecated, use aclGetDataBufferSizeV2 instead") | |||||
ACL_FUNC_VISIBILITY uint32_t aclGetDataBufferSize(const aclDataBuffer *dataBuffer); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get data size of aclDataBuffer to replace aclGetDataBufferSize | |||||
* | |||||
* @param dataBuffer [IN] pointer to the data of aclDataBuffer | |||||
* | |||||
* @retval data size | |||||
*/ | |||||
ACL_FUNC_VISIBILITY size_t aclGetDataBufferSizeV2(const aclDataBuffer *dataBuffer); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get size of aclDataType | |||||
* | |||||
* @param dataType [IN] aclDataType data the size to get | |||||
* | |||||
* @retval size of the aclDataType | |||||
*/ | |||||
ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType); | |||||
// interfaces of tensor desc | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create data aclTensorDesc | |||||
* | |||||
* @param dataType [IN] Data types described by tensor | |||||
* @param numDims [IN] the number of dimensions of the shape | |||||
* @param dims [IN] the size of the specified dimension | |||||
* @param format [IN] tensor format | |||||
* | |||||
* @retval aclTensorDesc pointer. | |||||
* @retval nullptr if param is invalid or run out of memory | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, | |||||
aclFormat format); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief destroy data aclTensorDesc | |||||
* | |||||
* @param desc [IN] pointer to the data of aclTensorDesc to destroy | |||||
*/ | |||||
ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set tensor shape range for aclTensorDesc | |||||
* | |||||
* @param desc [OUT] pointer to the data of aclTensorDesc | |||||
* @param dimsCount [IN] the number of dimensions of the shape | |||||
* @param dimsRange [IN] the range of dimensions of the shape | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount, | |||||
int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get data type specified by the tensor description | |||||
* | |||||
* @param desc [IN] pointer to the instance of aclTensorDesc | |||||
* | |||||
* @retval data type specified by the tensor description. | |||||
* @retval ACL_DT_UNDEFINED if description is null | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclDataType aclGetTensorDescType(const aclTensorDesc *desc); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get data format specified by the tensor description | |||||
* | |||||
* @param desc [IN] pointer to the instance of aclTensorDesc | |||||
* | |||||
* @retval data format specified by the tensor description. | |||||
* @retval ACL_FORMAT_UNDEFINED if description is null | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclFormat aclGetTensorDescFormat(const aclTensorDesc *desc); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get tensor size specified by the tensor description | |||||
* | |||||
* @param desc [IN] pointer to the instance of aclTensorDesc | |||||
* | |||||
* @retval data size specified by the tensor description. | |||||
* @retval 0 if description is null | |||||
*/ | |||||
ACL_FUNC_VISIBILITY size_t aclGetTensorDescSize(const aclTensorDesc *desc); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get element count specified by the tensor description | |||||
* | |||||
* @param desc [IN] pointer to the instance of aclTensorDesc | |||||
* | |||||
* @retval element count specified by the tensor description. | |||||
* @retval 0 if description is null | |||||
*/ | |||||
ACL_FUNC_VISIBILITY size_t aclGetTensorDescElementCount(const aclTensorDesc *desc); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get number of dims specified by the tensor description | |||||
* | |||||
* @param desc [IN] pointer to the instance of aclTensorDesc | |||||
* | |||||
* @retval number of dims specified by the tensor description. | |||||
* @retval 0 if description is null | |||||
* @retval ACL_UNKNOWN_RANK if the tensor dim is -2 | |||||
*/ | |||||
ACL_FUNC_VISIBILITY size_t aclGetTensorDescNumDims(const aclTensorDesc *desc); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Get the size of the specified dim in the tensor description | |||||
* | |||||
* @param desc [IN] pointer to the instance of aclTensorDesc | |||||
* @param index [IN] index of dims, start from 0. | |||||
* | |||||
* @retval dim specified by the tensor description and index. | |||||
* @retval -1 if description or index is invalid | |||||
*/ | |||||
ACL_DEPRECATED_MESSAGE("aclGetTensorDescDim is deprecated, use aclGetTensorDescDimV2 instead") | |||||
ACL_FUNC_VISIBILITY int64_t aclGetTensorDescDim(const aclTensorDesc *desc, size_t index); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Get the size of the specified dim in the tensor description | |||||
* | |||||
* @param desc [IN] pointer to the instance of aclTensorDesc | |||||
* @param index [IN] index of dims, start from 0. | |||||
* @param dimSize [OUT] size of the specified dim. | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, size_t index, int64_t *dimSize); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Get the range of the specified dim in the tensor description | |||||
* | |||||
* @param desc [IN] pointer to the instance of aclTensorDesc | |||||
* @param index [IN] index of dims, start from 0. | |||||
* @param dimRangeNum [IN] number of dimRange. | |||||
* @param dimRange [OUT] range of the specified dim. | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, size_t index, size_t dimRangeNum, | |||||
int64_t *dimRange); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set tensor description name | |||||
* | |||||
* @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
* @param name [IN] tensor description name | |||||
*/ | |||||
ACL_FUNC_VISIBILITY void aclSetTensorDescName(aclTensorDesc *desc, const char *name); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get tensor description name | |||||
* | |||||
* @param desc [IN] pointer to the instance of aclTensorDesc | |||||
* | |||||
* @retval tensor description name. | |||||
* @retval empty string if description is null | |||||
*/ | |||||
ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Convert the format in the source aclTensorDesc according to | |||||
* the specified dstFormat to generate a new target aclTensorDesc. | |||||
* The format in the source aclTensorDesc remains unchanged. | |||||
* | |||||
* @param srcDesc [IN] pointer to the source tensor desc | |||||
* @param dstFormat [IN] destination format | |||||
* @param dstDesc [OUT] pointer to the pointer to the destination tensor desc | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat, | |||||
aclTensorDesc **dstDesc); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Set the storage format specified by the tensor description | |||||
* | |||||
* @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
* @param format [IN] the storage format | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_DEPRECATED_MESSAGE("aclSetTensorStorageFormat is deprecated, use aclSetTensorFormat instead") | |||||
ACL_FUNC_VISIBILITY aclError aclSetTensorStorageFormat(aclTensorDesc *desc, aclFormat format); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Set the storage shape specified by the tensor description | |||||
* | |||||
* @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
* @param numDims [IN] the number of dimensions of the shape | |||||
* @param dims [IN] the size of the specified dimension | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_DEPRECATED_MESSAGE("aclSetTensorStorageShape is deprecated, use aclSetTensorShape instead") | |||||
ACL_FUNC_VISIBILITY aclError aclSetTensorStorageShape(aclTensorDesc *desc, int numDims, const int64_t *dims); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Set the format specified by the tensor description | |||||
* | |||||
* @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
* @param format [IN] the storage format | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclSetTensorFormat(aclTensorDesc *desc, aclFormat format); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Set the shape specified by the tensor description | |||||
* | |||||
* @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
* @param numDims [IN] the number of dimensions of the shape | |||||
* @param dims [IN] the size of the specified dimension | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclSetTensorShape(aclTensorDesc *desc, int numDims, const int64_t *dims); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Set the original format specified by the tensor description | |||||
* | |||||
* @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
* @param format [IN] the storage format | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclSetTensorOriginFormat(aclTensorDesc *desc, aclFormat format); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Set the original shape specified by the tensor description | |||||
* | |||||
* @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
* @param numDims [IN] the number of dimensions of the shape | |||||
* @param dims [IN] the size of the specified dimension | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int numDims, const int64_t *dims); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get op description info | |||||
* | |||||
* @param desc [IN] pointer to tensor description | |||||
* @param index [IN] index of tensor | |||||
* | |||||
* @retval null for failed. | |||||
* @retval OtherValues success. | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get address of tensor | |||||
* | |||||
* @param desc [IN] pointer to tensor description | |||||
* | |||||
* @retval null for failed | |||||
* @retval OtherValues success | |||||
*/ | |||||
ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Set the dynamic input name specified by the tensor description | |||||
* | |||||
* @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
* @param dynamicInputName [IN] pointer to the dynamic input name | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclSetTensorDynamicInput(aclTensorDesc *desc, const char *dynamicInputName); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Set const data specified by the tensor description | |||||
* | |||||
* @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
* @param dataBuffer [IN] pointer to the const databuffer | |||||
* @param length [IN] the length of const databuffer | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBuffer, size_t length); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Set tensor memory type specified by the tensor description | |||||
* | |||||
* @param desc [OUT] pointer to the instance of aclTensorDesc | |||||
* @param memType [IN] ACL_MEMTYPE_DEVICE means device, ACL_MEMTYPE_HOST means host | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclSetTensorPlaceMent(aclTensorDesc *desc, aclMemType memType); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief an interface for users to output APP logs | |||||
* | |||||
* @param logLevel [IN] the level of current log | |||||
* @param func [IN] the function where the log is located | |||||
* @param file [IN] the file where the log is located | |||||
* @param line [IN] Number of source lines where the log is located | |||||
* @param fmt [IN] the format of current log | |||||
* @param ... [IN] the value of current log | |||||
*/ | |||||
ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line, | |||||
const char *fmt, ...); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get soc name | |||||
* | |||||
* @retval null for failed | |||||
* @retval OtherValues success | |||||
*/ | |||||
ACL_FUNC_VISIBILITY const char *aclrtGetSocName(); | |||||
#define ACL_APP_LOG(level, fmt, ...) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif | |||||
#endif // INC_EXTERNAL_ACL_ACL_BASE_H_ |
@@ -0,0 +1,504 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_EXTERNAL_ACL_ACL_OP_H_ | |||||
#define INC_EXTERNAL_ACL_ACL_OP_H_ | |||||
#include "acl_base.h" | |||||
#include "acl_rt.h" | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
typedef struct aclopHandle aclopHandle; | |||||
typedef struct aclopAttr aclopAttr; | |||||
typedef struct aclopKernelDesc aclopKernelDesc; | |||||
typedef void (*aclDataDeallocator)(void *data, size_t length); | |||||
static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1; | |||||
typedef enum aclEngineType { | |||||
ACL_ENGINE_SYS, | |||||
ACL_ENGINE_AICORE, | |||||
ACL_ENGINE_VECTOR, | |||||
} aclopEngineType; | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Set base directory that contains single op models | |||||
* | |||||
* @par Restriction | |||||
* The aclopSetModelDir interface can be called only once in a process. | |||||
* @param modelDir [IN] path of the directory | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopSetModelDir(const char *modelDir); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief load single op models from memory | |||||
* | |||||
* @par Restriction | |||||
* The aclopLoad interface can be called more than one times in a process. | |||||
* @param model [IN] address of single op models | |||||
* @param modelSize [IN] size of single op models | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopLoad(const void *model, size_t modelSize); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create data of type aclopAttr | |||||
* | |||||
* @retval pointer to created instance. | |||||
* @retval nullptr if run out of memory | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclopAttr *aclopCreateAttr(); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief destroy data of typ aclopAttr | |||||
* | |||||
* @param attr [IN] pointer to the instance of aclopAttr | |||||
*/ | |||||
ACL_FUNC_VISIBILITY void aclopDestroyAttr(const aclopAttr *attr); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set an attribute. the type of the attribute is bool | |||||
* | |||||
* @param attr [OUT] pointer to the instance of aclopAttr | |||||
* @param attrName [IN] attribute name | |||||
* @param attrValue [IN] attribute value | |||||
* false if attrValue is 0, true otherwise. | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopSetAttrBool(aclopAttr *attr, const char *attrName, uint8_t attrValue); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set an attribute. the type of the attribute is int64_t | |||||
* | |||||
* @param attr [OUT] pointer to the instance of aclopAttr | |||||
* @param attrName [IN] attribute name | |||||
* @param attrValue [IN] attribute value | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopSetAttrInt(aclopAttr *attr, const char *attrName, int64_t attrValue); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set an attribute. the type of the attribute is float | |||||
* | |||||
* @param attr [OUT] pointer to the instance of aclopAttr | |||||
* @param attrName [IN] attribute name | |||||
* @param attrValue [IN] attribute value | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopSetAttrFloat(aclopAttr *attr, const char *attrName, float attrValue); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set an attribute. the type of the attribute is string | |||||
* | |||||
* @param attr [OUT] pointer to the instance of aclopAttr | |||||
* @param attrName [IN] attribute name | |||||
* @param attrValue [IN] attribute value | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *attrName, const char *attrValue); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set an attribute. the type of the attribute is list of bools | |||||
* | |||||
* @param attr [OUT] pointer to the instance of aclopAttr | |||||
* @param attrName [IN] attribute name | |||||
* @param numValues [IN] number of values. false if attrValue is 0, true otherwise. | |||||
* @param values [IN] pointer to values | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues, | |||||
const uint8_t *values); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set an attribute. the type of the attribute is list of ints | |||||
* | |||||
* @param attr [OUT] pointer to the instance of aclopAttr | |||||
* @param attrName [IN] attribute name | |||||
* @param numValues [IN] number of values | |||||
* @param values [IN] pointer to values | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues, | |||||
const int64_t *values); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set an attribute. the type of the attribute is list of floats | |||||
* | |||||
* @param attr [OUT] pointer to the instance of aclopAttr | |||||
* @param attrName [IN] attribute name | |||||
* @param numValues [IN] number of values | |||||
* @param values [IN] pointer to values | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues, | |||||
const float *values); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set an attribute. the type of the attribute is list of strings | |||||
* | |||||
* @param attr [OUT] pointer to the instance of aclopAttr | |||||
* @param attrName [IN] attribute name | |||||
* @param numValues [IN] number of values | |||||
* @param values [IN] pointer to values | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues, | |||||
const char **values); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set an attribute. the type of the attribute is list of list of ints | |||||
* | |||||
* @param attr [OUT] pointer to the instance of aclopAttr | |||||
* @param attrName [IN] attribute name | |||||
* @param numLists [IN] number of lists | |||||
* @param numValues [IN] pointer to number of values of each list | |||||
* @param values [IN] pointer to values | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char *attrName, int numLists, | |||||
const int *numValues, const int64_t *const values[]); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Load and execute the specified operator asynchronously | |||||
* | |||||
* @par Restriction | |||||
* @li The input and output organization of each operator is different, | |||||
* and the application needs to organize the operator strictly | |||||
* according to the operator input and output parameters when calling. | |||||
* @li When the user calls aclopExecute, | |||||
* the ACL finds the corresponding task according to the optype, | |||||
* the description of the input tesnsor, | |||||
* the description of the output tesnsor, and attr, and issues the execution. | |||||
* | |||||
* @param opType [IN] type of op | |||||
* @param numInputs [IN] number of inputs | |||||
* @param inputDesc [IN] pointer to array of input tensor descriptions | |||||
* @param inputs [IN] pointer to array of input buffers | |||||
* @param numOutputs [IN] number of outputs | |||||
* @param outputDesc [IN] pointer to array of output tensor descriptions | |||||
* @param outputs [OUT] pointer to array of output buffers | |||||
* @param attr [IN] pointer to instance of aclopAttr. | |||||
* may pass nullptr if the op has no attribute | |||||
* @param stream [IN] stream | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead") | |||||
ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], | |||||
const aclDataBuffer *const inputs[], int numOutputs, | |||||
const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], | |||||
const aclopAttr *attr, aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Load and execute the specified operator | |||||
* The difference with aclopExecute is that aclopExecuteV2 will refresh outputDesc | |||||
* | |||||
* @par Restriction | |||||
* @li The input and output organization of each operator is different, | |||||
* and the application needs to organize the operator strictly | |||||
* according to the operator input and output parameters when calling. | |||||
* @li When the user calls aclopExecuteV2, | |||||
* the ACL finds the corresponding task according to the optype, | |||||
* the description of the input tesnsor, | |||||
* the description of the output tesnsor, and attr, and issues the execution. | |||||
* | |||||
* @param opType [IN] type of op | |||||
* @param numInputs [IN] number of inputs | |||||
* @param inputDesc [IN] pointer to array of input tensor descriptions | |||||
* @param inputs [IN] pointer to array of input buffers | |||||
* @param numOutputs [IN] number of outputs | |||||
* @param outputDesc [IN|OUT] pointer to array of output tensor descriptions | |||||
* @param outputs [OUT] pointer to array of output buffers | |||||
* @param attr [IN] pointer to instance of aclopAttr. | |||||
* may pass nullptr if the op has no attribute | |||||
* @param stream [IN] stream | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[], | |||||
aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], | |||||
aclDataBuffer *outputs[], aclopAttr *attr, aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create a instance of aclopHandle. | |||||
* | |||||
* @param opType [IN] type of op | |||||
* @param numInputs [IN] number of inputs | |||||
* @param inputDesc [IN] pointer to array of input tensor descriptions | |||||
* @param numOutputs [IN] number of outputs | |||||
* @param outputDesc [IN] pointer to array of output tensor descriptions | |||||
* @param opAttr [IN] pointer to instance of aclopAttr. | |||||
* may pass nullptr if the op has no attribute | |||||
* @param handle [OUT] pointer to the pointer to the handle | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, int numInputs, | |||||
const aclTensorDesc *const inputDesc[], int numOutputs, | |||||
const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, | |||||
aclopHandle **handle); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief destroy aclopHandle instance | |||||
* | |||||
* @param handle [IN] pointer to the instance of aclopHandle | |||||
*/ | |||||
ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief execute an op with the handle. | |||||
* can save op model matching cost compared with aclopExecute | |||||
* | |||||
* @param handle [IN] pointer to the instance of aclopHandle. | |||||
* The aclopCreateHandle interface has been called | |||||
* in advance to create aclopHandle type data. | |||||
* @param numInputs [IN] number of inputs | |||||
* @param inputs [IN] pointer to array of input buffers. | |||||
* The aclCreateDataBuffer interface has been called | |||||
* in advance to create aclDataBuffer type data. | |||||
* @param numOutputs [IN] number of outputs | |||||
* @param outputs [OUT] pointer to array of output buffers | |||||
* @param stream [IN] stream | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclopCreateHandle | aclCreateDataBuffer | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInputs, | |||||
const aclDataBuffer *const inputs[], int numOutputs, | |||||
aclDataBuffer *const outputs[], aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief cast data type | |||||
* | |||||
* @param srcDesc [IN] source tensor desc | |||||
* @param srcBuffer [IN] source tensor buffer | |||||
* @param dstDesc [IN] destination tensor desc | |||||
* @param dstBuffer [OUT] destination tensor buffer | |||||
* @param truncate [IN] do not truncate if value is 0, truncate otherwise | |||||
* @param stream [IN] stream | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDataBuffer *srcBuffer, | |||||
const aclTensorDesc *dstDesc, aclDataBuffer *dstBuffer, uint8_t truncate, | |||||
aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create a handle for casting datatype | |||||
* | |||||
* @param srcDesc [IN] source tensor desc | |||||
* @param dstDesc [IN] destination tensor desc | |||||
* @param truncate [IN] do not truncate if value is 0, truncate otherwise | |||||
* @param handle [OUT] pointer to the pointer to the handle | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, aclTensorDesc *dstDesc, uint8_t truncate, | |||||
aclopHandle **handle); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create kernel | |||||
* | |||||
* @param opType [IN] op type | |||||
* @param kernelId [IN] kernel id | |||||
* @param kernelName [IN] kernel name | |||||
* @param binData [IN] kernel bin data | |||||
* @param binSize [IN] kernel bin size | |||||
* @param enginetype [IN] enigne type | |||||
* @param deallocator [IN] callback function for deallocating bin data, | |||||
* null if bin data to be deallocated by caller | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclopCompile | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *kernelId, const char *kernelName, | |||||
void *binData, int binSize, aclopEngineType enginetype, | |||||
aclDataDeallocator deallocator); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create kernel | |||||
* | |||||
* @param numInputs [IN] number of inputs | |||||
* @param inputDesc [IN] pointer to array of input tensor descriptions | |||||
* @param numOutputs [IN] number of outputs | |||||
* @param outputDesc [IN] pointer to array of output tensor descriptions | |||||
* @param opAttr [IN] pointer to instance of aclopAttr | |||||
* @param aclopKernelDesc [IN] pointer to instance of aclopKernelDesc | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
typedef aclError (*aclopCompileFunc)(int numInputs, const aclTensorDesc *const inputDesc[], int numOutputs, | |||||
const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, | |||||
aclopKernelDesc *aclopKernelDesc); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief register compile function | |||||
* | |||||
* @param opType [IN] op type | |||||
* @param func [IN] compile function | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclopUnregisterCompileFunc | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopRegisterCompileFunc(const char *opType, aclopCompileFunc func); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief unregister compile function | |||||
* | |||||
* @param opType [IN] op type | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set kernel args | |||||
* | |||||
* @param kernelDesc [IN] pointer to instance of aclopKernelDesc | |||||
* @param kernelId [IN] kernel id | |||||
* @param blockDim [IN] block dim | |||||
* @param args [IN] args | |||||
* @param argSize [IN] size in bytes of args | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, const char *kernelId, uint32_t blockDim, | |||||
const void *args, uint32_t argSize); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set workspace sizes | |||||
* | |||||
* @param kernelDesc [IN] pointer to instance of aclopKernelDesc | |||||
* @param numWorkspaces [IN] number of workspaces | |||||
* @param workspaceSizes [IN] pointer to array of sizes of workspaces | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kernelDesc, int numWorkspaces, | |||||
size_t *workspaceSizes); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief compile op with dynamic shape | |||||
* | |||||
* @param opType [IN] op type | |||||
* @param numInputs [IN] number of inputs | |||||
* @param inputDesc [IN] pointer to array of input tensor descriptions | |||||
* @param numOutputs [IN] number of outputs | |||||
* @param outputDesc [IN] pointer to array of output tensor descriptions | |||||
* @param attr [IN] pointer to instance of aclopAttr. | |||||
* may pass nullptr if the op has no attribute | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs, | |||||
const aclTensorDesc *const inputDesc[], int numOutputs, | |||||
const aclTensorDesc *const outputDesc[], const aclopAttr *attr); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief inferShape the specified operator synchronously | |||||
* | |||||
* @param opType [IN] type of op | |||||
* @param numInputs [IN] number of inputs | |||||
* @param inputDesc [IN] pointer to array of input tensor descriptions | |||||
* @param inputs [IN] pointer to array of input buffers | |||||
* @param numOutputs [IN] number of outputs | |||||
* @param outputDesc [OUT] pointer to array of output tensor descriptions | |||||
* @param attr [IN] pointer to instance of aclopAttr. | |||||
* may pass nullptr if the op has no attribute | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, int numInputs, aclTensorDesc *inputDesc[], | |||||
aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], | |||||
aclopAttr *attr); | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif | |||||
#endif // INC_EXTERNAL_ACL_ACL_OP_H_ |
@@ -0,0 +1,121 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ | |||||
#define INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ | |||||
#include "acl_base.h" | |||||
#include "acl_op.h" | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
typedef enum aclCompileType { ACL_COMPILE_SYS, ACL_COMPILE_UNREGISTERED } aclopCompileType; | |||||
typedef enum { | |||||
ACL_PRECISION_MODE, | |||||
ACL_AICORE_NUM, | |||||
ACL_AUTO_TUNE_MODE, | |||||
ACL_OP_SELECT_IMPL_MODE, | |||||
ACL_OPTYPELIST_FOR_IMPLMODE, | |||||
ACL_OP_DEBUG_LEVEL, | |||||
ACL_DEBUG_DIR, | |||||
ACL_OP_COMPILER_CACHE_MODE, | |||||
ACL_OP_COMPILER_CACHE_DIR, | |||||
ACL_OP_PERFORMANCE_MODE | |||||
} aclCompileOpt; | |||||
typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclOpCompileFlag; | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief compile op | |||||
* | |||||
* @param opType [IN] op type | |||||
* @param numInputs [IN] number of inputs | |||||
* @param inputDesc [IN] pointer to array of input tensor descriptions | |||||
* @param numOutputs [IN] number of outputs | |||||
* @param outputDesc [IN] pointer to array of output tensor descriptions | |||||
* @param attr [IN] pointer to instance of aclopAttr. | |||||
* may pass nullptr if the op has no attribute | |||||
* @param engineType [IN] engine type | |||||
* @param compileFlag [IN] compile flag | |||||
* @param opPath [IN] path of op | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], | |||||
int numOutputs, const aclTensorDesc *const outputDesc[], | |||||
const aclopAttr *attr, aclopEngineType engineType, | |||||
aclopCompileType compileFlag, const char *opPath); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief compile and execute op | |||||
* | |||||
* @param opType [IN] op type | |||||
* @param numInputs [IN] number of inputs | |||||
* @param inputDesc [IN] pointer to array of input tensor descriptions | |||||
* @param inputs [IN] pointer to array of input buffers | |||||
* @param numOutputs [IN] number of outputs | |||||
* @param outputDesc [IN] pointer to array of output tensor descriptions | |||||
* @param outputs [IN] pointer to array of outputs buffers | |||||
* @param attr [IN] pointer to instance of aclopAttr. | |||||
* may pass nullptr if the op has no attribute | |||||
* @param engineType [IN] engine type | |||||
* @param compileFlag [IN] compile flag | |||||
* @param opPath [IN] path of op | |||||
* @param stream [IN] stream handle | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute( | |||||
const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], | |||||
int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr, | |||||
aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set compile option | |||||
* | |||||
* @param aclCompileOpt [IN] compile option | |||||
* @param value [IN] pointer for the option value | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *value); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set compile flag | |||||
* | |||||
* @param flag [IN] compile flag, ACL_OP_COMPILE_DEFAULT means compile with default mode | |||||
* ACL_OP_COMPILE_FUZZ means compile with fuzz mode | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclopSetCompileFlag(aclOpCompileFlag flag); | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif | |||||
#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ |
@@ -0,0 +1,329 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_EXTERNAL_ACL_PROF_H_ | |||||
#define INC_EXTERNAL_ACL_PROF_H_ | |||||
#include "acl_base.h" | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
#define ACL_PROF_ACL_API 0x0001 | |||||
#define ACL_PROF_TASK_TIME 0x0002 | |||||
#define ACL_PROF_AICORE_METRICS 0x0004 | |||||
#define ACL_PROF_AICPU 0x0008 | |||||
/** | |||||
* @deprecated please use aclprofGetOpTypeLen and aclprofGetOpTNameLen instead | |||||
*/ | |||||
#define ACL_PROF_MAX_OP_NAME_LEN 257 | |||||
#define ACL_PROF_MAX_OP_TYPE_LEN 65 | |||||
typedef enum { | |||||
ACL_AICORE_ARITHMETIC_UTILIZATION = 0, | |||||
ACL_AICORE_PIPE_UTILIZATION = 1, | |||||
ACL_AICORE_MEMORY_BANDWIDTH = 2, | |||||
ACL_AICORE_L0B_AND_WIDTH = 3, | |||||
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, | |||||
ACL_AICORE_NONE = 0xFF | |||||
} aclprofAicoreMetrics; | |||||
typedef struct aclprofConfig aclprofConfig; | |||||
typedef struct aclprofStopConfig aclprofStopConfig; | |||||
typedef struct aclprofAicoreEvents aclprofAicoreEvents; | |||||
typedef struct aclprofSubscribeConfig aclprofSubscribeConfig; | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief profiling initialize | |||||
* | |||||
* @param profilerResultPath [IN] path of profiling result | |||||
* @param length [IN] length of profilerResultPath | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclprofFinalize | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclprofInit(const char *profilerResultPath, size_t length); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief profiling finalize | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclprofInit | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclprofFinalize(); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Start profiling modules by profilerConfig | |||||
* | |||||
* @param profilerConfig [IN] config of profiling | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclprofStop | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Create data of type aclprofConfig | |||||
* | |||||
* @param deviceIdList [IN] list of device id | |||||
* @param deviceNums [IN] number of devices | |||||
* @param aicoreMetrics [IN] type of aicore metrics | |||||
* @param aicoreEvents [IN] pointer to aicore events, only support NULL now | |||||
* @param dataTypeConfig [IN] config modules need profiling | |||||
* | |||||
* @retval the aclprofConfig pointer | |||||
* | |||||
* @see aclprofDestroyConfig | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, | |||||
aclprofAicoreMetrics aicoreMetrics, | |||||
aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Destroy data of type aclprofConfig | |||||
* | |||||
* @param profilerConfig [IN] config of profiling | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclprofCreateConfig | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclprofDestroyConfig(const aclprofConfig *profilerConfig); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief stop profiling modules by stopProfilingConfig | |||||
* | |||||
* @param profilerConfig [IN] pointer to stop config of profiling | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclprofStart | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief subscribe profiling data of model | |||||
* | |||||
* @param modelId [IN] the model id subscribed | |||||
* @param profSubscribeConfig [IN] pointer to config of model subscribe | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclprofModelUnSubscribe | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief unsubscribe profiling data of model | |||||
* | |||||
* @param modelId [IN] the model id unsubscribed | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclprofModelSubscribe | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create subscribe config | |||||
* | |||||
* @param timeInfoSwitch [IN] switch whether get time info from model | |||||
* @param aicoreMetrics [IN] aicore metrics | |||||
* @param fd [IN] pointer to write pipe | |||||
* | |||||
* @retval the aclprofSubscribeConfig pointer | |||||
* | |||||
* @see aclprofDestroySubscribeConfig | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch, | |||||
aclprofAicoreMetrics aicoreMetrics, void *fd); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief destroy subscribe config | |||||
* | |||||
* @param profSubscribeConfig [IN] subscribe config | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclprofCreateSubscribeConfig | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclprofDestroySubscribeConfig(const aclprofSubscribeConfig *profSubscribeConfig); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create subscribe config | |||||
* | |||||
* @param opDescSize [OUT] size of op desc | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclprofGetOpDescSize(size_t *opDescSize); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get op number from subscription data | |||||
* | |||||
* @param opInfo [IN] pointer to subscription data | |||||
* @param opInfoLen [IN] memory size of subscription data | |||||
* @param opNumber [OUT] op number of subscription data | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLen, uint32_t *opNumber); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get length op type from subscription data | |||||
* | |||||
* @param opInfo [IN] pointer to subscription data | |||||
* @param opInfoLen [IN] memory size of subscription data | |||||
* @param index [IN] index of op array in opInfo | |||||
* @param opTypeLen [OUT] actual length of op type string | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclprofGetOpTypeLen(const void *opInfo, size_t opInfoLen, uint32_t index, | |||||
size_t *opTypeLen); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get op type from subscription data | |||||
* | |||||
* @param opInfo [IN] pointer to subscription data | |||||
* @param opInfoLen [IN] memory size of subscription data | |||||
* @param index [IN] index of op array in opInfo | |||||
* @param opType [OUT] obtained op type string | |||||
* @param opTypeLen [IN] obtained length of op type string | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType, | |||||
size_t opTypeLen); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get length op name from subscription data | |||||
* | |||||
* @param opInfo [IN] pointer to subscription data | |||||
* @param opInfoLen [IN] memory size of subscription data | |||||
* @param index [IN] index of op array in opInfo | |||||
* @param opNameLen [OUT] actual length of op name string | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclprofGetOpNameLen(const void *opInfo, size_t opInfoLen, uint32_t index, | |||||
size_t *opNameLen); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get op type from subscription data | |||||
* | |||||
* @param opInfo [IN] pointer to subscription data | |||||
* @param opInfoLen [IN] memory size of subscription data | |||||
* @param index [IN] index of op array in opInfo | |||||
* @param opName [OUT] obtained op name string | |||||
* @param opNameLen [IN] obtained length of op name string | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName, | |||||
size_t opNameLen); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get start time of specified op from subscription data | |||||
* | |||||
* @param opInfo [IN] pointer to subscription data | |||||
* @param opInfoLen [IN] memory size of subscription data | |||||
* @param index [IN] index of op array in opInfo | |||||
* | |||||
* @retval start time(us) of specified op with timestamp | |||||
* @retval 0 for failed | |||||
*/ | |||||
ACL_FUNC_VISIBILITY uint64_t aclprofGetOpStart(const void *opInfo, size_t opInfoLen, uint32_t index); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get end time of specified op from subscription data | |||||
* | |||||
* @param opInfo [IN] pointer to subscription data | |||||
* @param opInfoLen [IN] memory size of subscription data | |||||
* @param index [IN] index of op array in opInfo | |||||
* | |||||
* @retval end time(us) of specified op with timestamp | |||||
* @retval 0 for failed | |||||
*/ | |||||
ACL_FUNC_VISIBILITY uint64_t aclprofGetOpEnd(const void *opInfo, size_t opInfoLen, uint32_t index); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get excution time of specified op from subscription data | |||||
* | |||||
* @param opInfo [IN] pointer to subscription data | |||||
* @param opInfoLen [IN] memory size of subscription data | |||||
* @param index [IN] index of op array in opInfo | |||||
* | |||||
* @retval execution time(us) of specified op with timestamp | |||||
* @retval 0 for failed | |||||
*/ | |||||
ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opInfoLen, uint32_t index); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get model id from subscription data | |||||
* | |||||
* @param opInfo [IN] pointer to subscription data | |||||
* @param opInfoLen [IN] memory size of subscription data | |||||
* | |||||
* @retval model id of subscription data | |||||
* @retval 0 for failed | |||||
*/ | |||||
ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index); | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif | |||||
#endif // INC_EXTERNAL_ACL_PROF_H_ |
@@ -0,0 +1,958 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_EXTERNAL_ACL_ACL_RT_H_ | |||||
#define INC_EXTERNAL_ACL_ACL_RT_H_ | |||||
#include <stdint.h> | |||||
#include <stddef.h> | |||||
#include "acl_base.h" | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
#define ACL_EVENT_TIME_LINE 0x00000008u | |||||
typedef enum aclrtRunMode { | |||||
ACL_DEVICE, | |||||
ACL_HOST, | |||||
} aclrtRunMode; | |||||
typedef enum aclrtTsId { | |||||
ACL_TS_ID_AICORE = 0, | |||||
ACL_TS_ID_AIVECTOR = 1, | |||||
ACL_TS_ID_RESERVED = 2, | |||||
} aclrtTsId; | |||||
typedef enum aclrtEventStatus { | |||||
ACL_EVENT_STATUS_COMPLETE = 0, | |||||
ACL_EVENT_STATUS_NOT_READY = 1, | |||||
ACL_EVENT_STATUS_RESERVED = 2, | |||||
} aclrtEventStatus; | |||||
typedef enum aclrtCallbackBlockType { | |||||
ACL_CALLBACK_NO_BLOCK, | |||||
ACL_CALLBACK_BLOCK, | |||||
} aclrtCallbackBlockType; | |||||
typedef enum aclrtMemcpyKind { | |||||
ACL_MEMCPY_HOST_TO_HOST, | |||||
ACL_MEMCPY_HOST_TO_DEVICE, | |||||
ACL_MEMCPY_DEVICE_TO_HOST, | |||||
ACL_MEMCPY_DEVICE_TO_DEVICE, | |||||
} aclrtMemcpyKind; | |||||
typedef enum aclrtMemMallocPolicy { | |||||
ACL_MEM_MALLOC_HUGE_FIRST, | |||||
ACL_MEM_MALLOC_HUGE_ONLY, | |||||
ACL_MEM_MALLOC_NORMAL_ONLY, | |||||
ACL_MEM_MALLOC_HUGE_FIRST_P2P, | |||||
ACL_MEM_MALLOC_HUGE_ONLY_P2P, | |||||
ACL_MEM_MALLOC_NORMAL_ONLY_P2P, | |||||
} aclrtMemMallocPolicy; | |||||
typedef enum aclrtMemAttr { | |||||
ACL_DDR_MEM, | |||||
ACL_HBM_MEM, | |||||
ACL_DDR_MEM_HUGE, | |||||
ACL_DDR_MEM_NORMAL, | |||||
ACL_HBM_MEM_HUGE, | |||||
ACL_HBM_MEM_NORMAL, | |||||
ACL_DDR_MEM_P2P_HUGE, | |||||
ACL_DDR_MEM_P2P_NORMAL, | |||||
ACL_HBM_MEM_P2P_HUGE, | |||||
ACL_HBM_MEM_P2P_NORMAL, | |||||
} aclrtMemAttr; | |||||
typedef enum aclrtGroupAttr { | |||||
ACL_GROUP_AICORE_INT, | |||||
ACL_GROUP_AIV_INT, | |||||
ACL_GROUP_AIC_INT, | |||||
ACL_GROUP_SDMANUM_INT, | |||||
ACL_GROUP_ASQNUM_INT, | |||||
ACL_GROUP_GROUPID_INT | |||||
} aclrtGroupAttr; | |||||
typedef struct tagRtGroupInfo aclrtGroupInfo; | |||||
typedef struct rtExceptionInfo aclrtExceptionInfo; | |||||
typedef void (*aclrtCallback)(void *userData); | |||||
typedef void (*aclrtExceptionInfoCallback)(aclrtExceptionInfo *exceptionInfo); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Set a callback function to handle exception information | |||||
* | |||||
* @param callback [IN] callback function to handle exception information | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtSetExceptionInfoCallback(aclrtExceptionInfoCallback callback); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Get task id from exception information | |||||
* | |||||
* @param info [IN] pointer of exception information | |||||
* | |||||
* @retval The task id from exception information | |||||
* @retval 0xFFFFFFFF if info is null | |||||
*/ | |||||
ACL_FUNC_VISIBILITY uint32_t aclrtGetTaskIdFromExceptionInfo(const aclrtExceptionInfo *info); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Get stream id from exception information | |||||
* | |||||
* @param info [IN] pointer of exception information | |||||
* | |||||
* @retval The stream id from exception information | |||||
* @retval 0xFFFFFFFF if info is null | |||||
*/ | |||||
ACL_FUNC_VISIBILITY uint32_t aclrtGetStreamIdFromExceptionInfo(const aclrtExceptionInfo *info); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Get thread id from exception information | |||||
* | |||||
* @param info [IN] pointer of exception information | |||||
* | |||||
* @retval The thread id of fail task | |||||
* @retval 0xFFFFFFFF if info is null | |||||
*/ | |||||
ACL_FUNC_VISIBILITY uint32_t aclrtGetThreadIdFromExceptionInfo(const aclrtExceptionInfo *info); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Get device id from exception information | |||||
* | |||||
* @param info [IN] pointer of exception information | |||||
* | |||||
* @retval The thread id of fail task | |||||
* @retval 0xFFFFFFFF if info is null | |||||
*/ | |||||
ACL_FUNC_VISIBILITY uint32_t aclrtGetDeviceIdFromExceptionInfo(const aclrtExceptionInfo *info); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief The thread that handles the callback function on the Stream | |||||
* | |||||
* @param threadId [IN] thread ID | |||||
* @param stream [IN] stream handle | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtSubscribeReport(uint64_t threadId, aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Add a callback function to be executed on the host | |||||
* to the task queue of the Stream | |||||
* | |||||
* @param fn [IN] Specify the callback function to be added | |||||
* The function prototype of the callback function is: | |||||
* typedef void (*aclrtCallback)(void *userData); | |||||
* @param userData [IN] User data to be passed to the callback function | |||||
* @param blockType [IN] callback block type | |||||
* @param stream [IN] stream handle | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtLaunchCallback(aclrtCallback fn, void *userData, aclrtCallbackBlockType blockType, | |||||
aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief After waiting for a specified time, trigger callback processing | |||||
* | |||||
* @par Function | |||||
* The thread processing callback specified by | |||||
* the aclrtSubscribeReport interface | |||||
* | |||||
* @param timeout [IN] timeout value | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtSubscribeReport | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtProcessReport(int32_t timeout); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Cancel thread registration, | |||||
* the callback function on the specified Stream | |||||
* is no longer processed by the specified thread | |||||
* | |||||
* @param threadId [IN] thread ID | |||||
* @param stream [IN] stream handle | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtUnSubscribeReport(uint64_t threadId, aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create context and associates it with the calling thread | |||||
* | |||||
* @par Function | |||||
* The following use cases are supported: | |||||
* @li If you don't call the aclrtCreateContext interface | |||||
* to explicitly create the context, | |||||
* the system will use the default context, which is implicitly created | |||||
* when the aclrtSetDevice interface is called. | |||||
* @li If multiple contexts are created in a process | |||||
* (there is no limit on the number of contexts), | |||||
* the current thread can only use one of them at the same time. | |||||
* It is recommended to explicitly specify the context of the current thread | |||||
* through the aclrtSetCurrentContext interface to increase. | |||||
* the maintainability of the program. | |||||
* | |||||
* @param context [OUT] point to the created context | |||||
* @param deviceId [IN] device to create context on | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtSetDevice | aclrtSetCurrentContext | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtCreateContext(aclrtContext *context, int32_t deviceId); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief destroy context instance | |||||
* | |||||
* @par Function | |||||
* Can only destroy context created through aclrtCreateContext interface | |||||
* | |||||
* @param context [IN] the context to destroy | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtCreateContext | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtDestroyContext(aclrtContext context); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set the context of the thread | |||||
* | |||||
* @par Function | |||||
* The following scenarios are supported: | |||||
* @li If the aclrtCreateContext interface is called in a thread to explicitly | |||||
* create a Context (for example: ctx1), the thread's Context can be specified | |||||
* without calling the aclrtSetCurrentContext interface. | |||||
* The system uses ctx1 as the context of thread1 by default. | |||||
* @li If the aclrtCreateContext interface is not explicitly created, | |||||
* the system uses the default context as the context of the thread. | |||||
* At this time, the aclrtDestroyContext interface cannot be used to release | |||||
* the default context. | |||||
* @li If the aclrtSetCurrentContext interface is called multiple times to | |||||
* set the thread's Context, the last one prevails. | |||||
* | |||||
* @par Restriction | |||||
* @li If the cevice corresponding to the context set for the thread | |||||
* has been reset, you cannot set the context as the context of the thread, | |||||
* otherwise a business exception will result. | |||||
* @li It is recommended to use the context created in a thread. | |||||
* If the aclrtCreateContext interface is called in thread A to create a context, | |||||
* and the context is used in thread B, | |||||
* the user must guarantee the execution order of tasks in the same stream | |||||
* under the same context in two threads. | |||||
* | |||||
* @param context [IN] the current context of the thread | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtCreateContext | aclrtDestroyContext | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtSetCurrentContext(aclrtContext context); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get the context of the thread | |||||
* | |||||
* @par Function | |||||
* If the user calls the aclrtSetCurrentContext interface | |||||
* multiple times to set the context of the current thread, | |||||
* then the last set context is obtained | |||||
* | |||||
* @param context [OUT] the current context of the thread | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtSetCurrentContext | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtGetCurrentContext(aclrtContext *context); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Specify the device to use for the operation | |||||
* implicitly create the default context and the default stream | |||||
* | |||||
* @par Function | |||||
* The following use cases are supported: | |||||
* @li Device can be specified in the process or thread. | |||||
* If you call the aclrtSetDevice interface multiple | |||||
* times to specify the same device, | |||||
* you only need to call the aclrtResetDevice interface to reset the device. | |||||
* @li The same device can be specified for operation | |||||
* in different processes or threads. | |||||
* @li Device is specified in a process, | |||||
* and multiple threads in the process can share this device to explicitly | |||||
* create a Context (aclrtCreateContext interface). | |||||
* @li In multi-device scenarios, you can switch to other devices | |||||
* through the aclrtSetDevice interface in the process. | |||||
* | |||||
* @param deviceId [IN] the device id | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtResetDevice |aclrtCreateContext | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtSetDevice(int32_t deviceId); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Reset the current operating Device and free resources on the device, | |||||
* including the default context, the default stream, | |||||
* and all streams created under the default context, | |||||
* and synchronizes the interface. | |||||
* If the task under the default context or stream has not been completed, | |||||
* the system will wait for the task to complete before releasing it. | |||||
* | |||||
* @par Restriction | |||||
* @li The Context, Stream, and Event that are explicitly created | |||||
* on the device to be reset. Before resetting, | |||||
* it is recommended to follow the following interface calling sequence, | |||||
* otherwise business abnormalities may be caused. | |||||
* @li Interface calling sequence: | |||||
* call aclrtDestroyEvent interface to release Event or | |||||
* call aclrtDestroyStream interface to release explicitly created Stream-> | |||||
* call aclrtDestroyContext to release explicitly created Context-> | |||||
* call aclrtResetDevice interface | |||||
* | |||||
* @param deviceId [IN] the device id | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtResetDevice(int32_t deviceId); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get target device of current thread | |||||
* | |||||
* @param deviceId [OUT] the device id | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtGetDevice(int32_t *deviceId); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get target side | |||||
* | |||||
* @param runMode [OUT] the run mode | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtGetRunMode(aclrtRunMode *runMode); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Wait for compute device to finish | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtSynchronizeDevice(void); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Set Scheduling TS | |||||
* | |||||
* @param tsId [IN] the ts id | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtSetTsDevice(aclrtTsId tsId); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get total device number. | |||||
* | |||||
* @param count [OUT] the device number | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtGetDeviceCount(uint32_t *count); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create event instance | |||||
* | |||||
* @param event [OUT] created event | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtCreateEvent(aclrtEvent *event); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create event instance with flag | |||||
* | |||||
* @param event [OUT] created event | |||||
* @param flag [IN] event flag | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtCreateEventWithFlag(aclrtEvent *event, uint32_t flag); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief destroy event instance | |||||
* | |||||
* @par Function | |||||
* Only events created through the aclrtCreateEvent interface can be | |||||
* destroyed, synchronous interfaces. When destroying an event, | |||||
* the user must ensure that the tasks involved in the aclrtSynchronizeEvent | |||||
* interface or the aclrtStreamWaitEvent interface are completed before | |||||
* they are destroyed. | |||||
* | |||||
* @param event [IN] event to destroy | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtCreateEvent | aclrtSynchronizeEvent | aclrtStreamWaitEvent | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtDestroyEvent(aclrtEvent event); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Record an Event in the Stream | |||||
* | |||||
* @param event [IN] event to record | |||||
* @param stream [IN] stream handle | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Reset an event | |||||
* | |||||
* @par Function | |||||
* Users need to make sure to wait for the tasks in the Stream | |||||
* to complete before resetting the Event | |||||
* | |||||
* @param event [IN] event to reset | |||||
* @param stream [IN] stream handle | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Queries an event's status | |||||
* | |||||
* @param event [IN] event to query | |||||
* @param status [OUT] event status | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Block Host Running, wait event to be complete | |||||
* | |||||
* @param event [IN] event to wait | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief computes the elapsed time between events. | |||||
* | |||||
* @param ms [OUT] time between start and end in ms | |||||
* @param start [IN] starting event | |||||
* @param end [IN] ending event | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, aclrtEvent end); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief alloc memory on device | |||||
* | |||||
* @par Function | |||||
* alloc for size linear memory on device | |||||
* and return a pointer to allocated memory by *devPtr | |||||
* | |||||
* @par Restriction | |||||
* @li The memory requested by the aclrtMalloc interface needs to be released | |||||
* through the aclrtFree interface. | |||||
* @li Before calling the media data processing interface, | |||||
* if you need to apply memory on the device to store input or output data, | |||||
* you need to call acldvppMalloc to apply for memory. | |||||
* | |||||
* @param devPtr [OUT] pointer to pointer to allocated memory on device | |||||
* @param size [IN] alloc memory size | |||||
* @param policy [IN] memory alloc policy | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtFree | acldvppMalloc | aclrtMallocCached | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief allocate memory on device with cache | |||||
* | |||||
* @par Function | |||||
* alloc for size linear memory on device | |||||
* and return a pointer to allocated memory by *devPtr | |||||
* | |||||
* @par Restriction | |||||
* @li The memory requested by the aclrtMallocCached interface needs to be released | |||||
* through the aclrtFree interface. | |||||
* | |||||
* @param devPtr [OUT] pointer to pointer to allocated memory on device | |||||
* @param size [IN] alloc memory size | |||||
* @param policy [IN] memory alloc policy | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtFree | aclrtMalloc | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief flush cache data to ddr | |||||
* | |||||
* @param devPtr [IN] the pointer that flush data to ddr | |||||
* @param size [IN] flush size | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtMemFlush(void *devPtr, size_t size); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief invalidate cache data | |||||
* | |||||
* @param devPtr [IN] pointer to invalidate cache data | |||||
* @param size [IN] invalidate size | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtMemInvalidate(void *devPtr, size_t size); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief free device memory | |||||
* | |||||
* @par Function | |||||
* can only free memory allocated through the aclrtMalloc interface | |||||
* | |||||
* @param devPtr [IN] Pointer to memory to be freed | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtMalloc | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtFree(void *devPtr); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief alloc memory on host | |||||
* | |||||
* @par Restriction | |||||
* @li The requested memory cannot be used in the Device | |||||
* and needs to be explicitly copied to the Device. | |||||
* @li The memory requested by the aclrtMallocHost interface | |||||
* needs to be released through the aclrtFreeHost interface. | |||||
* | |||||
* @param hostPtr [OUT] pointer to pointer to allocated memory on the host | |||||
* @param size [IN] alloc memory size | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtFreeHost | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtMallocHost(void **hostPtr, size_t size); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief free host memory | |||||
* | |||||
* @par Function | |||||
* can only free memory allocated through the aclrtMallocHost interface | |||||
* | |||||
* @param hostPtr [IN] free memory pointer | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtMallocHost | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief synchronous memory replication between host and device | |||||
* | |||||
* @param dst [IN] destination address pointer | |||||
* @param destMax [IN] Max length of the destination address memory | |||||
* @param src [IN] source address pointer | |||||
* @param count [IN] the length of byte to copy | |||||
* @param kind [IN] memcpy type | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count, | |||||
aclrtMemcpyKind kind); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Initialize memory and set contents of memory to specified value | |||||
* | |||||
* @par Function | |||||
* The memory to be initialized is on the Host or device side, | |||||
* and the system determines whether | |||||
* it is host or device according to the address | |||||
* | |||||
* @param devPtr [IN] Starting address of memory | |||||
* @param maxCount [IN] Max length of destination address memory | |||||
* @param value [IN] Set value | |||||
* @param count [IN] The length of memory | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t value, size_t count); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Asynchronous memory replication between Host and Device | |||||
* | |||||
* @par Function | |||||
* After calling this interface, | |||||
* be sure to call the aclrtSynchronizeStream interface to ensure that | |||||
* the task of memory replication has been completed | |||||
* | |||||
* @par Restriction | |||||
* @li For on-chip Device-to-Device memory copy, | |||||
* both the source and destination addresses must be 64-byte aligned | |||||
* | |||||
* @param dst [IN] destination address pointer | |||||
* @param destMax [IN] Max length of destination address memory | |||||
* @param src [IN] source address pointer | |||||
* @param count [IN] the number of byte to copy | |||||
* @param kind [IN] memcpy type | |||||
* @param stream [IN] asynchronized task stream | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtSynchronizeStream | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count, | |||||
aclrtMemcpyKind kind, aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Asynchronous initialize memory | |||||
* and set contents of memory to specified value async | |||||
* | |||||
* @par Function | |||||
* The memory to be initialized is on the Host or device side, | |||||
* and the system determines whether | |||||
* it is host or device according to the address | |||||
* | |||||
* @param devPtr [IN] destination address pointer | |||||
* @param maxCount [IN] Max length of destination address memory | |||||
* @param value [IN] set value | |||||
* @param count [IN] the number of byte to set | |||||
* @param stream [IN] asynchronized task stream | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtSynchronizeStream | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count, | |||||
aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create stream instance | |||||
* | |||||
* @param stream [OUT] the created stream | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtCreateStream(aclrtStream *stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief destroy stream instance | |||||
* | |||||
* @par Function | |||||
* Can only destroy streams created through the aclrtCreateStream interface | |||||
* | |||||
* @par Restriction | |||||
* Before calling the aclrtDestroyStream interface to destroy | |||||
* the specified Stream, you need to call the aclrtSynchronizeStream interface | |||||
* to ensure that the tasks in the Stream have been completed. | |||||
* | |||||
* @param stream [IN] the stream to destroy | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtCreateStream | aclrtSynchronizeStream | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtDestroyStream(aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief block the host until all tasks | |||||
* in the specified stream have completed | |||||
* | |||||
* @param stream [IN] the stream to wait | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtSynchronizeStream(aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Blocks the operation of the specified Stream until | |||||
* the specified Event is completed. | |||||
* Support for multiple streams waiting for the same event. | |||||
* | |||||
* @param stream [IN] the wait stream If using thedefault Stream, set NULL | |||||
* @param event [IN] the event to wait | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtStreamWaitEvent(aclrtStream stream, aclrtEvent event); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set group | |||||
* | |||||
* @par Function | |||||
* set the task to the corresponding group | |||||
* | |||||
* @param groupId [IN] group id | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtGetGroupCount | aclrtGetAllGroupInfo | aclrtGetGroupInfoDetail | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtSetGroup(int32_t groupId); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get the number of group | |||||
* | |||||
* @par Function | |||||
* get the number of group. if the number of group is zero, | |||||
* it means that group is not supported or group is not created. | |||||
* | |||||
* @param count [OUT] the number of group | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtGetGroupCount(uint32_t *count); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create group information | |||||
* | |||||
* @retval null for failed. | |||||
* @retval OtherValues success. | |||||
* | |||||
* @see aclrtDestroyGroupInfo | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclrtGroupInfo *aclrtCreateGroupInfo(); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief destroy group information | |||||
* | |||||
* @param groupInfo [IN] pointer to group information | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtCreateGroupInfo | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtDestroyGroupInfo(aclrtGroupInfo *groupInfo); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get all group information | |||||
* | |||||
* @param groupInfo [OUT] pointer to group information | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtGetGroupCount | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief get detail information of group | |||||
* | |||||
* @param groupInfo [IN] pointer to group information | |||||
* @param groupIndex [IN] group index value | |||||
* @param attr [IN] group attribute | |||||
* @param attrValue [OUT] pointer to attribute value | |||||
* @param valueLen [IN] length of attribute value | |||||
* @param paramRetSize [OUT] pointer to real length of attribute value | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtGetGroupCount | aclrtGetAllGroupInfo | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupIndex, | |||||
aclrtGroupAttr attr, void *attrValue, size_t valueLen, | |||||
size_t *paramRetSize); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief checking whether current device and peer device support the p2p feature | |||||
* | |||||
* @param canAccessPeer [OUT] pointer to save the checking result | |||||
* @param deviceId [IN] current device id | |||||
* @param peerDeviceId [IN] peer device id | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtDeviceEnablePeerAccess | aclrtDeviceDisablePeerAccess | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtDeviceCanAccessPeer(int32_t *canAccessPeer, int32_t deviceId, int32_t peerDeviceId); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief enable the peer device to support the p2p feature | |||||
* | |||||
* @param peerDeviceId [IN] the peer device id | |||||
* @param flags [IN] reserved field, now it must be zero | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtDeviceCanAccessPeer | aclrtDeviceDisablePeerAccess | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtDeviceEnablePeerAccess(int32_t peerDeviceId, uint32_t flags); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief disable the peer device to support the p2p function | |||||
* | |||||
* @param peerDeviceId [IN] the peer device id | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtDeviceCanAccessPeer | aclrtDeviceEnablePeerAccess | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtDeviceDisablePeerAccess(int32_t peerDeviceId); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Obtain the free memory and total memory of specified attribute. | |||||
* the specified memory include normal memory and huge memory. | |||||
* | |||||
* @param attr [IN] the memory attribute of specified device | |||||
* @param free [OUT] the free memory of specified device | |||||
* @param total [OUT] the total memory of specified device. | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, size_t *total); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Set the timeout interval for waitting of op | |||||
* | |||||
* @param timeout [IN] op wait timeout | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtSetOpWaitTimeout(uint32_t timeout); | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif | |||||
#endif // INC_EXTERNAL_ACL_ACL_RT_H_ |
@@ -0,0 +1,276 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_EXTERNAL_ACL_ACL_TDT_H_ | |||||
#define INC_EXTERNAL_ACL_ACL_TDT_H_ | |||||
#include "acl/acl_base.h" | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
enum acltdtTensorType { | |||||
ACL_TENSOR_DATA_UNDEFINED = -1, | |||||
ACL_TENSOR_DATA_TENSOR, | |||||
ACL_TENSOR_DATA_END_OF_SEQUENCE, | |||||
ACL_TENSOR_DATA_ABNORMAL | |||||
}; | |||||
typedef struct acltdtDataItem acltdtDataItem; | |||||
typedef struct acltdtDataset acltdtDataset; | |||||
typedef struct acltdtChannelHandle acltdtChannelHandle; | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Get tensor type from item | |||||
* | |||||
* @param dataItem [IN] pointer to the data item | |||||
* | |||||
* @retval Tensor type. | |||||
* @retval ACL_DT_UNDEFINED if dataItem is null | |||||
*/ | |||||
ACL_FUNC_VISIBILITY acltdtTensorType acltdtGetTensorTypeFromItem(const acltdtDataItem *dataItem); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Get data type from item | |||||
* | |||||
* @param dataItem [IN] pointer to the data item | |||||
* | |||||
* @retval Data type. | |||||
* @retval ACL_DT_UNDEFINED if dataItem is null | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *dataItem); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Get data address from item | |||||
* | |||||
* @param dataItem [IN] pointer to data item | |||||
* | |||||
* @retval null for failed | |||||
* @retval OtherValues success | |||||
*/ | |||||
ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Get data size from item | |||||
* | |||||
* @param dataItem [IN] pointer to data item | |||||
* | |||||
* @retval 0 for failed | |||||
* @retval OtherValues success | |||||
*/ | |||||
ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Get dim's number from item | |||||
* | |||||
* @param dataItem [IN] pointer to data item | |||||
* | |||||
* @retval 0 for failed | |||||
* @retval OtherValues success | |||||
*/ | |||||
ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Get dims from item | |||||
* | |||||
* @param dataItem [IN] the struct of data item | |||||
* @param dims [IN|OUT] pointer to the dims of dataTtem | |||||
* @param dimNum [IN] the size of the dims | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataItem, int64_t *dims, size_t dimNum); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Create the struct of data item | |||||
* | |||||
* @param tdtType [IN] Tdt tensor type | |||||
* @param dims [IN] pointer of tdtDataItem's dims | |||||
* @param dimNum [IN] Dim number | |||||
* @param dataType [IN] Data type | |||||
* @param data [IN] Data pointer | |||||
* @param size [IN] Data size | |||||
* | |||||
* @retval null for failed | |||||
* @retval OtherValues success | |||||
* | |||||
* @see acltdtDestroyDataItem | |||||
*/ | |||||
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum, | |||||
aclDataType dataType, void *data, size_t size); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Destroy the struct of data item | |||||
* | |||||
* @param dataItem [IN] pointer to the data item | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see acltdtCreateDataItem | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError acltdtDestroyDataItem(acltdtDataItem *dataItem); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Create the tdt dataset | |||||
* | |||||
* @retval null for failed | |||||
* @retval OtherValues success | |||||
* | |||||
* @see acltdtDestroyDataset | |||||
*/ | |||||
ACL_FUNC_VISIBILITY acltdtDataset *acltdtCreateDataset(); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Destroy the tdt dataset | |||||
* | |||||
* @param dataset [IN] pointer to the dataset | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see acltdtCreateDataset | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError acltdtDestroyDataset(acltdtDataset *dataset); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Get the data item | |||||
* | |||||
* @param dataset [IN] pointer to the dataset | |||||
* @param index [IN] index of the dataset | |||||
* | |||||
* @retval null for failed | |||||
* @retval OtherValues success | |||||
* | |||||
* @see acltdtAddDataItem | |||||
*/ | |||||
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtGetDataItem(const acltdtDataset *dataset, size_t index); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Get the data item | |||||
* | |||||
* @param dataset [OUT] pointer to the dataset | |||||
* @param dataItem [IN] pointer to the data item | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see acltdtGetDataItem | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError acltdtAddDataItem(acltdtDataset *dataset, acltdtDataItem *dataItem); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Get the size of dataset | |||||
* | |||||
* @param dataset [IN] pointer to the dataset | |||||
* | |||||
* @retval 0 for failed | |||||
* @retval OtherValues success | |||||
*/ | |||||
ACL_FUNC_VISIBILITY size_t acltdtGetDatasetSize(const acltdtDataset *dataset); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Stop the channel | |||||
* | |||||
* @param handle [IN] pointer to the channel handle | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see acltdtCreateChannel | acltdtDestroyChannel | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError acltdtStopChannel(acltdtChannelHandle *handle); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Create the channel | |||||
* | |||||
* @param deviceId [IN] the device id | |||||
* @param name [IN] the channel's name | |||||
* | |||||
* @retval null for failed | |||||
* @retval OtherValues success | |||||
* | |||||
* @see acltdtStopChannel | acltdtDestroyChannel | |||||
*/ | |||||
ACL_FUNC_VISIBILITY acltdtChannelHandle *acltdtCreateChannel(uint32_t deviceId, const char *name); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Destroy the channel | |||||
* | |||||
* @param handle [IN] pointer to the channel handle | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see acltdtCreateChannel | acltdtStopChannel | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Send tensor to device | |||||
* | |||||
* @param handle [IN] pointer to the channel handle | |||||
* @param dataset [IN] pointer to the dataset | |||||
* @param timeout [IN] to be reserved, now it must be -1 | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see acltdtReceiveTensor | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset, | |||||
int32_t timeout); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Receive tensor from device | |||||
* | |||||
* @param handle [IN] pointer to the channel handle | |||||
* @param dataset [OUT] pointer to the dataset | |||||
* @param timeout [IN] to be reserved, now it must be -1 | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see acltdtSendTensor | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset, | |||||
int32_t timeout); | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif | |||||
#endif // INC_EXTERNAL_ACL_ACL_TDT_H_ |
@@ -0,0 +1,75 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | |||||
#define INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | |||||
#if defined(_MSC_VER) | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#else | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#endif | |||||
#include <stddef.h> | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000; | |||||
static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001; | |||||
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002; | |||||
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003; | |||||
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006; | |||||
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007; | |||||
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008; | |||||
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009; | |||||
static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011; | |||||
static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012; | |||||
static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013; | |||||
static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014; | |||||
static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015; | |||||
static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016; | |||||
static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017; | |||||
static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018; | |||||
static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019; | |||||
static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020; | |||||
static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021; | |||||
static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022; | |||||
static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000; | |||||
static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001; | |||||
static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000; | |||||
static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001; | |||||
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002; | |||||
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003; | |||||
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004; | |||||
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005; | |||||
static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006; | |||||
static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007; | |||||
static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008; | |||||
static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009; | |||||
#ifdef __cplusplus | |||||
} // namespace ge | |||||
#endif | |||||
#endif // INC_EXTERNAL_GE_GE_ERROR_CODES_H_ |
@@ -0,0 +1,109 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||||
#define __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||||
#include <stddef.h> | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
static const int32_t ACL_RT_SUCCESS = 0; // success | |||||
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||||
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||||
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||||
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||||
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||||
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||||
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||||
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||||
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||||
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||||
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||||
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | |||||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||||
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource | |||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||||
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif | |||||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ |
@@ -0,0 +1,334 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ | |||||
#define INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ | |||||
#include "acl/acl.h" | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
typedef enum aclTransType { ACL_TRANS_N, ACL_TRANS_T, ACL_TRANS_NZ, ACL_TRANS_NZ_T } aclTransType; | |||||
typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECISION } aclComputeType; | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief perform the matrix-vector multiplication | |||||
* | |||||
* @param transA [IN] transpose type of matrix A | |||||
* @param m [IN] number of rows of matrix A | |||||
* @param n [IN] number of columns of matrix A | |||||
* @param alpha [IN] pointer to scalar used for multiplication. | |||||
* of same type as dataTypeC | |||||
* @param a [IN] pointer to matrix A | |||||
* @param lda [IN] leading dimension used to store the matrix A | |||||
* @param dataTypeA [IN] datatype of matrix A | |||||
* @param x [IN] pointer to vector x | |||||
* @param incx [IN] stride between consecutive elements of vector x | |||||
* @param dataTypeX [IN] datatype of vector x | |||||
* @param beta [IN] pointer to scalar used for multiplication. | |||||
* of same type as dataTypeC If beta == 0, | |||||
* then y does not have to be a valid input | |||||
* @param y [IN|OUT] pointer to vector y | |||||
* @param incy [IN] stride between consecutive elements of vector y | |||||
* @param dataTypeY [IN] datatype of vector y | |||||
* @param type [IN] computation type | |||||
* @param stream [IN] stream | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, const void *alpha, const void *a, int lda, | |||||
aclDataType dataTypeA, const void *x, int incx, aclDataType dataTypeX, | |||||
const void *beta, void *y, int incy, aclDataType dataTypeY, | |||||
aclComputeType type, aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create a handle for performing the matrix-vector multiplication | |||||
* | |||||
* @param transA [IN] transpose type of matrix A | |||||
* @param m [IN] number of rows of matrix A | |||||
* @param n [IN] number of columns of matrix A | |||||
* @param dataTypeA [IN] datatype of matrix A | |||||
* @param dataTypeX [IN] datatype of vector x | |||||
* @param dataTypeY [IN] datatype of vector y | |||||
* @param type [IN] computation type | |||||
* @param handle [OUT] pointer to the pointer to the handle | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, int m, int n, aclDataType dataTypeA, | |||||
aclDataType dataTypeX, aclDataType dataTypeY, | |||||
aclComputeType type, aclopHandle **handle); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief perform the matrix-vector multiplication | |||||
* | |||||
* @param transA [IN] transpose type of matrix A | |||||
* @param m [IN] number of rows of matrix A | |||||
* @param n [IN] number of columns of matrix A | |||||
* @param alpha [IN] pointer to scalar used for multiplication | |||||
* @param a [IN] pointer to matrix A | |||||
* @param lda [IN] leading dimension used to store the matrix A | |||||
* @param x [IN] pointer to vector x | |||||
* @param incx [IN] stride between consecutive elements of vector x | |||||
* @param beta [IN] pointer to scalar used for multiplication. | |||||
* If beta value == 0, | |||||
* then y does not have to be a valid input | |||||
* @param y [IN|OUT] pointer to vector y | |||||
* @param incy [IN] stride between consecutive elements of vector y | |||||
* @param type [IN] computation type | |||||
* @param stream [IN] stream | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, const aclFloat16 *alpha, | |||||
const aclFloat16 *a, int lda, const aclFloat16 *x, int incx, | |||||
const aclFloat16 *beta, aclFloat16 *y, int incy, aclComputeType type, | |||||
aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create a handle for performing the matrix-vector multiplication | |||||
* | |||||
* @param transA [IN] transpose type of matrix A | |||||
* @param m [IN] number of rows of matrix A | |||||
* @param n [IN] number of columns of matrix A | |||||
* @param type [IN] computation type | |||||
* @param handle [OUT] pointer to the pointer to the handle | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, int m, int n, aclComputeType type, | |||||
aclopHandle **handle); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief perform the matrix-vector multiplication | |||||
* | |||||
* @param transA [IN] transpose type of matrix A | |||||
* @param m [IN] number of rows of matrix A | |||||
* @param n [IN] number of columns of matrix A | |||||
* @param alpha [IN] pointer to scalar used for multiplication | |||||
* @param a [IN] pointer to matrix A | |||||
* @param lda [IN] leading dimension used to store the matrix A | |||||
* @param x [IN] pointer to vector x | |||||
* @param incx [IN] stride between consecutive elements of vector x | |||||
* @param beta [IN] pointer to scalar used for multiplication. | |||||
* If beta value == 0, | |||||
* then y does not have to be a valid input | |||||
* @param y [IN|OUT] pointer to vector y | |||||
* @param incy [IN] stride between consecutive elements of vector y | |||||
* @param type [IN] computation type | |||||
* @param stream [IN] stream | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, const int32_t *alpha, const int8_t *a, | |||||
int lda, const int8_t *x, int incx, const int32_t *beta, int32_t *y, | |||||
int incy, aclComputeType type, aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create a handle for performing the matrix-vector multiplication | |||||
* | |||||
* @param transA [IN] transpose type of matrix A | |||||
* @param m [IN] number of rows of matrix A | |||||
* @param n [IN] number of columns of matrix A | |||||
* @param handle [OUT] pointer to the pointer to the handle | |||||
* @param type [IN] computation type | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, int m, int n, aclComputeType type, | |||||
aclopHandle **handle); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief perform the matrix-matrix multiplication | |||||
* | |||||
* @param transA [IN] transpose type of matrix A | |||||
* @param transB [IN] transpose type of matrix B | |||||
* @param transC [IN] transpose type of matrix C | |||||
* @param m [IN] number of rows of matrix A and matrix C | |||||
* @param n [IN] number of columns of matrix B and matrix C | |||||
* @param k [IN] number of columns of matrix A and rows of matrix B | |||||
* @param alpha [IN] pointer to scalar used for multiplication. of same type as dataTypeC | |||||
* @param matrixA [IN] pointer to matrix A | |||||
* @param lda [IN] leading dimension array used to store matrix A | |||||
* @param dataTypeA [IN] datatype of matrix A | |||||
* @param matrixB [IN] pointer to matrix B | |||||
* @param ldb [IN] leading dimension array used to store matrix B | |||||
* @param dataTypeB [IN] datatype of matrix B | |||||
* @param beta [IN] pointer to scalar used for multiplication. | |||||
* of same type as dataTypeC If beta == 0, | |||||
* then matrixC does not have to be a valid input | |||||
* @param matrixC [IN|OUT] pointer to matrix C | |||||
* @param ldc [IN] leading dimension array used to store matrix C | |||||
* @param dataTypeC [IN] datatype of matrix C | |||||
* @param type [IN] computation type | |||||
* @param stream [IN] stream | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, | |||||
int k, const void *alpha, const void *matrixA, int lda, | |||||
aclDataType dataTypeA, const void *matrixB, int ldb, aclDataType dataTypeB, | |||||
const void *beta, void *matrixC, int ldc, aclDataType dataTypeC, | |||||
aclComputeType type, aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create a handle for performing the matrix-matrix multiplication | |||||
* | |||||
* @param transA [IN] transpose type of matrix A | |||||
* @param transB [IN] transpose type of matrix B | |||||
* @param transC [IN] transpose type of matrix C | |||||
* @param m [IN] number of rows of matrix A and matrix C | |||||
* @param n [IN] number of columns of matrix B and matrix C | |||||
* @param k [IN] number of columns of matrix A and rows of matrix B | |||||
* @param dataTypeA [IN] datatype of matrix A | |||||
* @param dataTypeB [IN] datatype of matrix B | |||||
* @param dataTypeC [IN] datatype of matrix C | |||||
* @param type [IN] computation type | |||||
* @param handle [OUT] pointer to the pointer to the handle | |||||
* @param type [IN] computation type | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, | |||||
int m, int n, int k, aclDataType dataTypeA, | |||||
aclDataType dataTypeB, aclDataType dataTypeC, | |||||
aclComputeType type, aclopHandle **handle); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief perform the matrix-matrix multiplication | |||||
* | |||||
* @param transA [IN] transpose type of matrix A | |||||
* @param transB [IN] transpose type of matrix B | |||||
* @param transC [IN] transpose type of matrix C | |||||
* @param m [IN] number of rows of matrix A and matrix C | |||||
* @param n [IN] number of columns of matrix B and matrix C | |||||
* @param k [IN] number of columns of matrix A and rows of matrix B | |||||
* @param alpha [IN] pointer to scalar used for multiplication | |||||
* @param matrixA [IN] pointer to matrix A | |||||
* @param lda [IN] leading dimension used to store the matrix A | |||||
* @param matrixB [IN] pointer to matrix B | |||||
* @param ldb [IN] leading dimension used to store the matrix B | |||||
* @param beta [IN] pointer to scalar used for multiplication. | |||||
* If beta value == 0, | |||||
* then matrixC does not have to be a valid input | |||||
* @param matrixC [IN|OUT] pointer to matrix C | |||||
* @param ldc [IN] leading dimension used to store the matrix C | |||||
* @param type [IN] computation type | |||||
* @param stream [IN] stream | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, | |||||
int k, const aclFloat16 *alpha, const aclFloat16 *matrixA, int lda, | |||||
const aclFloat16 *matrixB, int ldb, const aclFloat16 *beta, | |||||
aclFloat16 *matrixC, int ldc, aclComputeType type, aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create a handle for performing the matrix-matrix multiplication | |||||
* | |||||
* @param transA [IN] transpose type of matrix A | |||||
* @param transB [IN] transpose type of matrix B | |||||
* @param transC [IN] transpose type of matrix C | |||||
* @param m [IN] number of rows of matrix A and matrix C | |||||
* @param n [IN] number of columns of matrix B and matrix C | |||||
* @param k [IN] number of columns of matrix A and rows of matrix B | |||||
* @param type [IN] computation type | |||||
* @param handle [OUT] pointer to the pointer to the handle | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, aclTransType transB, aclTransType transC, | |||||
int m, int n, int k, aclComputeType type, | |||||
aclopHandle **handle); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief perform the matrix-matrix multiplication | |||||
* | |||||
* @param transA [IN] transpose type of matrix A | |||||
* @param transB [IN] transpose type of matrix B | |||||
* @param transC [IN] transpose type of matrix C | |||||
* @param m [IN] number of rows of matrix A and matrix C | |||||
* @param n [IN] number of columns of matrix B and matrix C | |||||
* @param k [IN] number of columns of matrix A and rows of matrix B | |||||
* @param alpha [IN] pointer to scalar used for multiplication | |||||
* @param matrixA [IN] pointer to matrix A | |||||
* @param lda [IN] leading dimension used to store the matrix A | |||||
* @param matrixB [IN] pointer to matrix B | |||||
* @param ldb [IN] leading dimension used to store the matrix B | |||||
* @param beta [IN] pointer to scalar used for multiplication. | |||||
* If beta value == 0, | |||||
* then matrixC does not have to be a valid input | |||||
* @param matrixC [IN|OUT] pointer to matrix C | |||||
* @param ldc [IN] leading dimension used to store the matrix C | |||||
* @param type [IN] computation type | |||||
* @param stream [IN] stream | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, | |||||
int k, const int32_t *alpha, const int8_t *matrixA, int lda, | |||||
const int8_t *matrixB, int ldb, const int32_t *beta, int32_t *matrixC, | |||||
int ldc, aclComputeType type, aclrtStream stream); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief create a handle for performing the matrix-matrix multiplication | |||||
* | |||||
* @param transA [IN] transpose type of matrix A | |||||
* @param transB [IN] transpose type of matrix B | |||||
* @param transC [IN] transpose type of matrix C | |||||
* @param m [IN] number of rows of matrix A and matrix C | |||||
* @param n [IN] number of columns of matrix B and matrix C | |||||
* @param k [IN] number of columns of matrix A and rows of matrix B | |||||
* @param type [IN] computation type | |||||
* @param handle [OUT] pointer to the pointer to the handle | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, | |||||
int m, int n, int k, aclComputeType type, | |||||
aclopHandle **handle); | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif | |||||
#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ |
@@ -0,0 +1,348 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ | |||||
#define INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ | |||||
#include "acl/acl.h" | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
typedef struct aclfvInitPara aclfvInitPara; | |||||
typedef struct aclfvFeatureInfo aclfvFeatureInfo; | |||||
typedef struct aclfvRepoRange aclfvRepoRange; | |||||
typedef struct aclfvQueryTable aclfvQueryTable; | |||||
typedef struct aclfvSearchInput aclfvSearchInput; | |||||
typedef struct aclfvSearchResult aclfvSearchResult; | |||||
// search operation type | |||||
enum aclfvSearchType { | |||||
SEARCH_1_N, // 1:N operation type | |||||
SEARCH_N_M // N:M operation type | |||||
}; | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Create fv init param. | |||||
* | |||||
* @param fsNum [IN] The feature num | |||||
* | |||||
* @retval null for failed. | |||||
* @retval OtherValues success. | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclfvInitPara *aclfvCreateInitPara(uint64_t fsNum); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Destroy fv init param. | |||||
* | |||||
* @par Function | |||||
* Can only destroy fv init param information created | |||||
* through aclfvCreateInitPara interface. | |||||
* | |||||
* @param initPara [IN] fv init param. | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclfvCreateInitPara | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclfvDestroyInitPara(aclfvInitPara *initPara); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set value for maxTopNumFor1N which in fv init param. | |||||
* | |||||
* @param initPara [IN|OUT] fv init param. | |||||
* @param maxTopNumFor1N [IN] maxTopNumFor1N value for init param. | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclfvSet1NTopNum(aclfvInitPara *initPara, uint32_t maxTopNumFor1N); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief set value for maxTopNumForNM which in fv init param. | |||||
* | |||||
* @param initPara [IN|OUT] fv init param. | |||||
* @param maxTopNumForNM [IN] maxTopNumForNM value for init param. | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t maxTopNumForNM); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Create fv feature info. | |||||
* | |||||
* @param id0 [IN] The first level library id0 | |||||
* @param id1 [IN] Secondary library id1 | |||||
* @param offset [IN] The offset of the first feature in the library | |||||
* @param featureLen [IN] Single feature length | |||||
* @param featureCount [IN] Single feature count | |||||
* @param featureData [IN] Feature value list | |||||
* @param featureDataLen [IN] Feature value list length | |||||
* | |||||
* @retval null for failed. | |||||
* @retval OtherValues success. | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset, | |||||
uint32_t featureLen, uint32_t featureCount, | |||||
uint8_t *featureData, uint32_t featureDataLen); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Destroy fv feature info. | |||||
* | |||||
* @par Function | |||||
* Can only destroy fv feature info information created | |||||
* through aclfvCreateFeatureInfo interface. | |||||
* | |||||
* @param featureInfo [IN] fv feature info. | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclfvCreateFeatureInfo | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclfvDestroyFeatureInfo(aclfvFeatureInfo *featureInfo); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Create fv repo range. | |||||
* | |||||
* @param id0Min [IN] id0 start value | |||||
* @param id0Min [IN] id0 max | |||||
* @param id1Min [IN] id0 start value | |||||
* @param id1Max [IN] id1 max | |||||
* | |||||
* @retval null for failed. OtherValues success | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclfvRepoRange *aclfvCreateRepoRange(uint32_t id0Min, uint32_t id0Max, uint32_t id1Min, | |||||
uint32_t id1Max); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Destroy fv repo range. | |||||
* | |||||
* @par Function | |||||
* Can only destroy fv repo range information created | |||||
* through aclfvCreateRepoRange interface. | |||||
* | |||||
* @param repoRange [IN] fv repo range. | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclfvCreateRepoRange | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclfvDestroyRepoRange(aclfvRepoRange *repoRange); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Create query table. | |||||
* | |||||
* @param queryCnt [IN] Number of tables, the maximum number is 6 | |||||
* @param tableLen [IN] Single table length, table length is 32KB | |||||
* @param tableData [IN] Feature value list | |||||
* @param tableDataLen [IN] The length of memory requested by the featureData pointer | |||||
* | |||||
* @retval null for failed. OtherValues success | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclfvQueryTable *aclfvCreateQueryTable(uint32_t queryCnt, uint32_t tableLen, uint8_t *tableData, | |||||
uint32_t tableDataLen); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Destroy query table. | |||||
* | |||||
* @par Function | |||||
* Can only destroy query table information created | |||||
* through aclfvCreateQueryTable interface. | |||||
* | |||||
* @param queryTable [IN] query table. | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclfvCreateQueryTable | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclfvDestroyQueryTable(aclfvQueryTable *queryTable); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Create search input. | |||||
* | |||||
* @param queryTable [IN] query table | |||||
* @param repoRange [IN] query repo range | |||||
* @param topk [IN] query topk | |||||
* | |||||
* @retval null for failed. OtherValues success | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclfvSearchInput *aclfvCreateSearchInput(aclfvQueryTable *queryTable, aclfvRepoRange *repoRange, | |||||
uint32_t topk); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Destroy search input. | |||||
* | |||||
* @par Function | |||||
* Can only destroy search input information created | |||||
* through aclfvCreateSearchInput interface. | |||||
* | |||||
* @param searchInput [IN] search input. | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclfvCreateSearchInput | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInput); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Create search result. | |||||
* | |||||
* @param queryCnt [IN] Retrieve the number of features | |||||
* @param resultNum [IN] The number of search results for each feature, the number is queryCnt | |||||
* @param resultNumDataLen [IN] resultNum memory length | |||||
* @param id0 [IN] Level 1 library id0 | |||||
* @param id1 [IN] Secondary library id1 | |||||
* @param resultOffset [IN] The offset of the bottom library corresponding | |||||
* to each feature retrieval result, total length topK * queryCnt | |||||
* @param resultDistance [IN] Distance, total length topK * queryCnt | |||||
* @param dataLen [IN] The memory size requested by | |||||
* id0\id1\reslutOffset\resultDistance | |||||
* | |||||
* @retval null for failed. OtherValues success | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum, | |||||
uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, | |||||
uint32_t *resultOffset, float *resultDistance, | |||||
uint32_t dataLen); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief Destroy search result. | |||||
* | |||||
* @par Function | |||||
* Can only destroy search result information created | |||||
* through aclfvCreateSearchResult interface. | |||||
* | |||||
* @param searchResult [IN] search result. | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclfvCreateSearchResult | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclfvDestroySearchResult(aclfvSearchResult *searchResult); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief fv IP initialize. | |||||
* | |||||
* @param initPara [IN] fv init param. | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure. | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclfvInit(aclfvInitPara *initPara); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief release fv resources. | |||||
* | |||||
* @par Function | |||||
* Can only release fv resources created | |||||
* through aclfvInit interface. | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure. | |||||
* | |||||
* @see aclfvInit | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclfvRelease(); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief fv repo add. | |||||
* | |||||
* @param type [IN] repo add type | |||||
* @param featureInfo [IN] add feature information | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure. | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclfvRepoAdd(aclfvSearchType type, aclfvFeatureInfo *featureInfo); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief fv repo del. | |||||
* | |||||
* @param type [IN] repo delete type | |||||
* @param repoRange [IN] repo range information | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure. | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclfvRepoDel(aclfvSearchType type, aclfvRepoRange *repoRange); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief fv accurate del. | |||||
* | |||||
* @param featureInfo [IN] accurate delete feature information | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure. | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclfvDel(aclfvFeatureInfo *featureInfo); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief fv accurate modify. | |||||
* | |||||
* @param featureInfo [IN] accurate modify feature information | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure. | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclfvModify(aclfvFeatureInfo *featureInfo); | |||||
/** | |||||
* @ingroup AscendCL | |||||
* @brief fv search. | |||||
* | |||||
* @param type [IN] search type | |||||
* @param searchInput [IN] search input | |||||
* @param searchRst [OUT] search result | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure. | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput *searchInput, | |||||
aclfvSearchResult *searchRst); | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif | |||||
#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ |
@@ -0,0 +1,159 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
/** | |||||
* @file hccl.h | |||||
* @brief HCCL API | |||||
*/ | |||||
#ifndef HCCL_H_ | |||||
#define HCCL_H_ | |||||
#include <hccl/hccl_types.h> | |||||
#include <acl/acl.h> | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif // __cplusplus | |||||
/** | |||||
* @brief Initialize HCCL. | |||||
* | |||||
* @param clusterInfo A string identifying the cluster info file path, include file name. | |||||
* @param rank A integer identifying the identify for the rank. | |||||
* @param comm A pointer identifying the initialized communication resource. | |||||
* @return HcclResult | |||||
* @see HcclCommDestroy() | |||||
*/ | |||||
extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm); | |||||
/** | |||||
* @brief Get hccl root info. | |||||
* | |||||
* @param rootInfo A pointer identifying the hccl root info. | |||||
* @return HcclResult | |||||
*/ | |||||
extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo); | |||||
/** | |||||
* @brief Initialize HCCL with root info. | |||||
* | |||||
* @param nRanks A integer identifying the rank size of the cluster. | |||||
* @param rootInfo A struct identifying the hccl root info. | |||||
* @param rank A integer identifying the identify for the rank. | |||||
* @param comm A pointer identifying the initialized communication resource. | |||||
* @return HcclResult | |||||
* @see HcclCommDestroy() | |||||
*/ | |||||
extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm); | |||||
/** | |||||
* @brief AllReduce operator. | |||||
* | |||||
* @param sendBuf A pointer identifying the input data address of the operator. | |||||
* @param recvBuf A pointer identifying the output data address of the operator. | |||||
* @param count An integer(u64) identifying the number of the output data. | |||||
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, | |||||
* float32. | |||||
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||||
* @param comm A pointer identifying the communication resource based on. | |||||
* @param stream A pointer identifying the stream information. | |||||
* @return HcclResult | |||||
*/ | |||||
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, | |||||
HcclComm comm, aclrtStream stream); | |||||
/** | |||||
* @brief Broadcast operator. | |||||
* | |||||
* @param buf A pointer identifying the data address of the operator. | |||||
* @param count An integer(u64) identifying the number of the data. | |||||
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||||
* @param root An integer(u32) identifying the the root rank in the operator. | |||||
* @param comm A pointer identifying the communication resource based on | |||||
* @param stream A pointer identifying the stream information. | |||||
* @return HcclResult | |||||
*/ | |||||
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, | |||||
aclrtStream stream); | |||||
/** | |||||
* @brief ReduceScatter operator. | |||||
* | |||||
* @param sendBuf A pointer identifying the input data address of the operator. | |||||
* @param recvBuf A pointer identifying the output data address of the operator. | |||||
* @param recvCount An integer(u64) identifying the number of the output data. | |||||
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||||
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||||
* @param comm A pointer identifying the communication resource based on. | |||||
* @param stream A pointer identifying the stream information. | |||||
* @return HcclResult | |||||
*/ | |||||
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, | |||||
HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||||
/** | |||||
* @brief AllGather operator. | |||||
* | |||||
* @param sendBuf A pointer identifying the input data address of the operator. | |||||
* @param recvBuf A pointer identifying the output data address of the operator. | |||||
* @param sendCount An integer(u64) identifying the number of the input data. | |||||
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||||
* @param comm A pointer identifying the communication resource based on. | |||||
* @param stream A pointer identifying the stream information. | |||||
* @return HcclResult | |||||
*/ | |||||
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, | |||||
aclrtStream stream); | |||||
/** | |||||
* @brief Get the rank size of this comm. | |||||
* | |||||
* @param comm A pointer identifying the communication resource based on. | |||||
* @param rankSize A pointer identifying the rank size. | |||||
* @return HcclResult | |||||
*/ | |||||
extern HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize); | |||||
/** | |||||
* @brief Get the rank id of this comm. | |||||
* | |||||
* @param comm A pointer identifying the communication resource based on. | |||||
* @param rankSize A pointer identifying the rank id. | |||||
* @return HcclResult | |||||
*/ | |||||
extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank); | |||||
/** | |||||
* @brief Barrier operator. | |||||
* | |||||
* @param comm A pointer identifying the communication resource based on. | |||||
* @param stream A pointer identifying the stream information. | |||||
* @return HcclResult | |||||
*/ | |||||
extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream); | |||||
/** | |||||
* @brief Destroy HCCL comm | |||||
* | |||||
* @param comm A pointer identifying the communication resource targetting | |||||
* @return HcclResult | |||||
* @see HcclCommInitClusterInfo() | |||||
*/ | |||||
extern HcclResult HcclCommDestroy(HcclComm comm); | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif // __cplusplus | |||||
#endif // HCCL_H_ |
@@ -0,0 +1,101 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
/** | |||||
* @file hccl_types.h | |||||
* @brief HCCL data type definition | |||||
* | |||||
*/ | |||||
#ifndef HCCL_TYPES_H_ | |||||
#define HCCL_TYPES_H_ | |||||
#include <stdint.h> | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif // __cplusplus | |||||
/** | |||||
* @brief HCCL functions return value definition | |||||
*/ | |||||
typedef enum { | |||||
HCCL_SUCCESS = 0, /**< success */ | |||||
HCCL_E_PARA = 1, /**< parameter error */ | |||||
HCCL_E_PTR = 2, /**< empty pointer */ | |||||
HCCL_E_MEMORY = 3, /**< memory error */ | |||||
HCCL_E_INTERNAL = 4, /**< internal error */ | |||||
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||||
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||||
HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||||
HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||||
HCCL_E_TIMEOUT = 9, /**< timeout */ | |||||
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||||
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||||
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||||
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||||
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||||
HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||||
HCCL_E_DRV = 16, /**< call driver api fail */ | |||||
HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||||
HCCL_E_CCE = 18, /**< call cce api fail */ | |||||
HCCL_E_NETWORK = 19, /**< call network api fail */ | |||||
HCCL_E_RESERVED /**< reserved */ | |||||
} HcclResult; | |||||
/** | |||||
* @brief handle to HCCL communicator | |||||
*/ | |||||
typedef void *HcclComm; | |||||
/** | |||||
* @brief HCCL Reduction opperation | |||||
*/ | |||||
typedef enum { | |||||
HCCL_REDUCE_SUM = 0, /**< sum */ | |||||
HCCL_REDUCE_PROD = 1, /**< prod */ | |||||
HCCL_REDUCE_MAX = 2, /**< max */ | |||||
HCCL_REDUCE_MIN = 3, /**< min */ | |||||
HCCL_REDUCE_RESERVED /**< reserved */ | |||||
} HcclReduceOp; | |||||
/** | |||||
* @brief HCCL data type | |||||
*/ | |||||
typedef enum { | |||||
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||||
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||||
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||||
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||||
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||||
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||||
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||||
} HcclDataType; | |||||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||||
/** | |||||
* @brief HCCL root info | |||||
*/ | |||||
typedef struct HcclRootInfoDef { | |||||
char internal[HCCL_ROOT_INFO_BYTES]; | |||||
} HcclRootInfo; | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif // __cplusplus | |||||
#endif // HCCL_TYPES_H_ |
@@ -0,0 +1,109 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||||
#define __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||||
#include <stddef.h> | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
static const int32_t ACL_RT_SUCCESS = 0; // success | |||||
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||||
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||||
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||||
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||||
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||||
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||||
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||||
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||||
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||||
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||||
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||||
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | |||||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||||
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource | |||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||||
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif | |||||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ |
@@ -271,13 +271,14 @@ class FusionEndTaskInfo : public TaskInfo { | |||||
class HcclTaskInfo : public TaskInfo { | class HcclTaskInfo : public TaskInfo { | ||||
public: | public: | ||||
HcclTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string hccl_type, void *input_data_addr, | HcclTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string hccl_type, void *input_data_addr, | ||||
void *output_data_addr, int64_t workspace_size, int64_t hccl_stream_num, | |||||
void *output_data_addr, void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num, | |||||
const std::vector<uint8_t> &private_def, void *ops_kernel_store, int32_t count, int64_t root_id, | const std::vector<uint8_t> &private_def, void *ops_kernel_store, int32_t count, int64_t root_id, | ||||
int64_t op_type, int64_t data_type, const std::string &group, bool dump_flag) | int64_t op_type, int64_t data_type, const std::string &group, bool dump_flag) | ||||
: TaskInfo(op_name, stream_id, TaskInfoType::HCCL, dump_flag), | : TaskInfo(op_name, stream_id, TaskInfoType::HCCL, dump_flag), | ||||
hccl_type_(hccl_type), | hccl_type_(hccl_type), | ||||
input_data_addr_(input_data_addr), | input_data_addr_(input_data_addr), | ||||
output_data_addr_(output_data_addr), | output_data_addr_(output_data_addr), | ||||
workspace_addr_(workspace_addr), | |||||
workspace_size_(workspace_size), | workspace_size_(workspace_size), | ||||
hccl_stream_num_(hccl_stream_num), | hccl_stream_num_(hccl_stream_num), | ||||
private_def_(private_def), | private_def_(private_def), | ||||
@@ -292,6 +293,7 @@ class HcclTaskInfo : public TaskInfo { | |||||
const std::string &hccl_type() const { return hccl_type_; } | const std::string &hccl_type() const { return hccl_type_; } | ||||
void *input_data_addr() const { return input_data_addr_; } | void *input_data_addr() const { return input_data_addr_; } | ||||
void *output_data_addr() const { return output_data_addr_; } | void *output_data_addr() const { return output_data_addr_; } | ||||
void *workspace_addr() const { return workspace_addr_; } | |||||
int64_t workspace_size() const { return workspace_size_; } | int64_t workspace_size() const { return workspace_size_; } | ||||
int64_t hccl_stream_num() const { return hccl_stream_num_; } | int64_t hccl_stream_num() const { return hccl_stream_num_; } | ||||
const std::vector<uint8_t> &private_def() const { return private_def_; } | const std::vector<uint8_t> &private_def() const { return private_def_; } | ||||
@@ -306,6 +308,7 @@ class HcclTaskInfo : public TaskInfo { | |||||
std::string hccl_type_; | std::string hccl_type_; | ||||
void *input_data_addr_; | void *input_data_addr_; | ||||
void *output_data_addr_; | void *output_data_addr_; | ||||
void *workspace_addr_; | |||||
int64_t workspace_size_; | int64_t workspace_size_; | ||||
int64_t hccl_stream_num_; | int64_t hccl_stream_num_; | ||||
std::vector<uint8_t> private_def_; | std::vector<uint8_t> private_def_; | ||||
@@ -1 +1 @@ | |||||
Subproject commit a725349b65aef2940555af2ddb7b9461fbe0d5fd | |||||
Subproject commit 211788997dcc9aa63527541a44d511388c06bce5 |
@@ -0,0 +1,107 @@ | |||||
#!/bin/bash | |||||
# Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
# | |||||
# Licensed under the Apache License, Version 2.0 (the "License"); | |||||
# you may not use this file except in compliance with the License. | |||||
# You may obtain a copy of the License at | |||||
# | |||||
# http://www.apache.org/licenses/LICENSE-2.0 | |||||
# | |||||
# Unless required by applicable law or agreed to in writing, software | |||||
# distributed under the License is distributed on an "AS IS" BASIS, | |||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
# See the License for the specific language governing permissions and | |||||
# limitations under the License. | |||||
# ============================================================================ | |||||
set -e | |||||
CLANG_FORMAT=$(which clang-format) || (echo "Please install 'clang-format' tool first"; exit 1) | |||||
version=$("${CLANG_FORMAT}" --version | sed -n "s/.*\ \([0-9]*\)\.[0-9]*\.[0-9]*.*/\1/p") | |||||
if [[ "${version}" -lt "8" ]]; then | |||||
echo "clang-format's version must be at least 8.0.0" | |||||
exit 1 | |||||
fi | |||||
CURRENT_PATH=$(pwd) | |||||
SCRIPTS_PATH=$(dirname "$0") | |||||
echo "CURRENT_PATH=${CURRENT_PATH}" | |||||
echo "SCRIPTS_PATH=${SCRIPTS_PATH}" | |||||
# print usage message | |||||
function usage() | |||||
{ | |||||
echo "Format the specified source files to conform the code style." | |||||
echo "Usage:" | |||||
echo "bash $0 [-a] [-c] [-l] [-h]" | |||||
echo "e.g. $0 -c" | |||||
echo "" | |||||
echo "Options:" | |||||
echo " -a format of all files" | |||||
echo " -c format of the files changed compared to last commit, default case" | |||||
echo " -l format of the files changed in last commit" | |||||
echo " -h Print usage" | |||||
} | |||||
# check and set options | |||||
function checkopts() | |||||
{ | |||||
# init variable | |||||
mode="changed" # default format changed files | |||||
# Process the options | |||||
while getopts 'aclh' opt | |||||
do | |||||
case "${opt}" in | |||||
a) | |||||
mode="all" | |||||
;; | |||||
c) | |||||
mode="changed" | |||||
;; | |||||
l) | |||||
mode="lastcommit" | |||||
;; | |||||
h) | |||||
usage | |||||
exit 0 | |||||
;; | |||||
*) | |||||
echo "Unknown option ${opt}!" | |||||
usage | |||||
exit 1 | |||||
esac | |||||
done | |||||
} | |||||
# init variable | |||||
# check options | |||||
checkopts "$@" | |||||
# switch to project root path, which contains clang-format config file '.clang-format' | |||||
cd "${SCRIPTS_PATH}/.." || exit 1 | |||||
FMT_FILE_LIST='__format_files_list__' | |||||
if [[ "X${mode}" == "Xall" ]]; then | |||||
find src -type f -name "*" | grep "\.h$\|\.cc$" > "${FMT_FILE_LIST}" || true | |||||
find inc -type f -name "*" | grep "\.h$\|\.cc$" >> "${FMT_FILE_LIST}" || true | |||||
elif [[ "X${mode}" == "Xchanged" ]]; then | |||||
# --diff-filter=ACMRTUXB will ignore deleted files in commit | |||||
git diff --diff-filter=ACMRTUXB --name-only | grep "^inc\|^src" | grep "\.h$\|\.cc$" >> "${FMT_FILE_LIST}" || true | |||||
else # "X${mode}" == "Xlastcommit" | |||||
git diff --diff-filter=ACMRTUXB --name-only HEAD~ HEAD | grep "^inc\|^src" | grep "\.h$\|\.cc$" > "${FMT_FILE_LIST}" || true | |||||
fi | |||||
while read line; do | |||||
if [ -f "${line}" ]; then | |||||
${CLANG_FORMAT} -i "${line}" | |||||
fi | |||||
done < "${FMT_FILE_LIST}" | |||||
rm "${FMT_FILE_LIST}" | |||||
cd "${CURRENT_PATH}" || exit 1 | |||||
echo "Specified cpp source files have been format successfully." |
@@ -27,15 +27,16 @@ namespace cce { | |||||
#define CC_FUSION_OP_MAX 32 | #define CC_FUSION_OP_MAX 32 | ||||
typedef enum tagccKernelType { | typedef enum tagccKernelType { | ||||
CCE_AI_CORE = 0, /* cce aicore */ | |||||
CCE_AI_CPU = 1, /* cce aicpu */ | |||||
TE = 2, /* te operator*/ | |||||
CUSTOMIZED = 3, /* customized operator */ | |||||
TE_AI_CORE = 4, /* te aicore operator*/ | |||||
TE_AI_CPU = 5, /* te aicpu operator */ | |||||
AI_CPU = 6, /* aicpu */ | |||||
CUST_AI_CPU = 7, /* custom aicpu*/ | |||||
INVALID = 8, /* unknown kernel type */ | |||||
CCE_AI_CORE = 0, /* cce aicore */ | |||||
CCE_AI_CPU = 1, /* cce aicpu */ | |||||
TE = 2, /* te operator*/ | |||||
CUSTOMIZED = 3, /* customized operator */ | |||||
TE_AI_CORE = 4, /* te aicore operator*/ | |||||
TE_AI_CPU = 5, /* te aicpu operator */ | |||||
AI_CPU = 6, /* aicpu */ | |||||
CUST_AI_CPU = 7, /* custom aicpu*/ | |||||
HOST_CPU = 8, /* host cpu */ | |||||
INVALID = 10000 /* unknown kernel type */ | |||||
} ccKernelType; | } ccKernelType; | ||||
typedef struct tagOpContext { | typedef struct tagOpContext { | ||||
@@ -124,27 +124,27 @@ struct HcomRemoteAccessAddrInfo { | |||||
}; | }; | ||||
struct HcomAllToAllVParams { | struct HcomAllToAllVParams { | ||||
void *sendbuf; | |||||
void *sendcounts; | |||||
void *sdispls; | |||||
HcclDataType sendtype; | |||||
void *recvbuf; | |||||
void *recvcounts; | |||||
void *rdispls; | |||||
HcclDataType recvtype; | |||||
const char *group; | |||||
void *sendbuf; // device mem | |||||
void *sendcounts; // device mem; Type: uint_64 | |||||
void *sdispls; // device mem; Type: uint_64 | |||||
HcclDataType sendtype; | |||||
void *recvbuf; // device mem | |||||
void *recvcounts; // device mem; Type: uint_64 | |||||
void *rdispls; // device mem; Type: uint_64 | |||||
HcclDataType recvtype; | |||||
const char *group; // not used now | |||||
}; | }; | ||||
struct HcomGatherAllToAllVParams { | struct HcomGatherAllToAllVParams { | ||||
void *addrInfo; | |||||
void *addrInfoCountPerRank; | |||||
void *recvbuf; | |||||
void *recvcounts; | |||||
void *rdispls; | |||||
void *gatheredbuf; | |||||
s32 addrLength; | |||||
HcclDataType recvtype; | |||||
const char *group; | |||||
void *addrInfo; // device mem; contains host VA[uint_64]: [addr, length, addr, length, addr, length, ...] | |||||
void *addrInfoCountPerRank; // device mem; length: ranksize; contains addrInfoCounts for every rank | |||||
void *recvbuf; // device mem | |||||
void *recvcounts; // device mem; Type: uint_64 | |||||
void *rdispls; // device mem; Type: uint_64 | |||||
void *gatheredbuf; // device mem | |||||
s32 addrLength; | |||||
HcclDataType recvtype; | |||||
const char *group; // not used now | |||||
}; | }; | ||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
@@ -1,101 +0,0 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
/** | |||||
* @file hccl_types.h | |||||
* @brief HCCL data type definition | |||||
* | |||||
*/ | |||||
#ifndef HCCL_TYPES_H_ | |||||
#define HCCL_TYPES_H_ | |||||
#include <stdint.h> | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif // __cplusplus | |||||
/** | |||||
* @brief HCCL functions return value definition | |||||
*/ | |||||
typedef enum { | |||||
HCCL_SUCCESS = 0, /**< success */ | |||||
HCCL_E_PARA = 1, /**< parameter error */ | |||||
HCCL_E_PTR = 2, /**< empty pointer */ | |||||
HCCL_E_MEMORY = 3, /**< memory error */ | |||||
HCCL_E_INTERNAL = 4, /**< internal error */ | |||||
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||||
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||||
HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||||
HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||||
HCCL_E_TIMEOUT = 9, /**< timeout */ | |||||
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||||
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||||
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||||
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||||
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||||
HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||||
HCCL_E_DRV = 16, /**< call driver api fail */ | |||||
HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||||
HCCL_E_CCE = 18, /**< call cce api fail */ | |||||
HCCL_E_NETWORK = 19, /**< call network api fail */ | |||||
HCCL_E_RESERVED /**< reserved */ | |||||
} HcclResult; | |||||
/** | |||||
* @brief handle to HCCL communicator | |||||
*/ | |||||
typedef void *HcclComm; | |||||
/** | |||||
* @brief HCCL Reduction opperation | |||||
*/ | |||||
typedef enum { | |||||
HCCL_REDUCE_SUM = 0, /**< sum */ | |||||
HCCL_REDUCE_PROD = 1, /**< prod */ | |||||
HCCL_REDUCE_MAX = 2, /**< max */ | |||||
HCCL_REDUCE_MIN = 3, /**< min */ | |||||
HCCL_REDUCE_RESERVED /**< reserved */ | |||||
} HcclReduceOp; | |||||
/** | |||||
* @brief HCCL data type | |||||
*/ | |||||
typedef enum { | |||||
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||||
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||||
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||||
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||||
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||||
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||||
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||||
} HcclDataType; | |||||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||||
/** | |||||
* @brief HCCL root info | |||||
*/ | |||||
typedef struct HcclRootInfoDef { | |||||
char internal[HCCL_ROOT_INFO_BYTES]; | |||||
} HcclRootInfo; | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif // __cplusplus | |||||
#endif // HCCL_TYPES_H_ |
@@ -164,8 +164,22 @@ HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, | |||||
const std::vector<HcomRemoteAccessAddrInfo>& addrInfos, | const std::vector<HcomRemoteAccessAddrInfo>& addrInfos, | ||||
std::function<void(HcclResult status)> callback); | std::function<void(HcclResult status)> callback); | ||||
/** | |||||
* @brief Put alltoallv communication operation into hcom executor. | |||||
* | |||||
* @param params information about alltoallv communication operation. | |||||
* @param callback callback after collective communication operation. | |||||
* @return HcclResult | |||||
*/ | |||||
HcclResult HcomExecEnqueueAllToAllV(HcomAllToAllVParams params, std::function<void(HcclResult status)> callback); | HcclResult HcomExecEnqueueAllToAllV(HcomAllToAllVParams params, std::function<void(HcclResult status)> callback); | ||||
/** | |||||
* @brief Put agther alltoallv communication operation into hcom executor. | |||||
* | |||||
* @param params information about agther alltoallv communication operation. | |||||
* @param callback callback after collective communication operation. | |||||
* @return HcclResult | |||||
*/ | |||||
HcclResult HcomExecEnqueueGatherAllToAllV(HcomGatherAllToAllVParams params, | HcclResult HcomExecEnqueueGatherAllToAllV(HcomGatherAllToAllVParams params, | ||||
std::function<void(HcclResult status)> callback); | std::function<void(HcclResult status)> callback); | ||||
@@ -56,6 +56,7 @@ | |||||
#include <dirent.h> | #include <dirent.h> | ||||
#include <getopt.h> | #include <getopt.h> | ||||
#include <libgen.h> | #include <libgen.h> | ||||
#include <malloc.h> | |||||
#include <linux/types.h> | #include <linux/types.h> | ||||
#include <linux/hdreg.h> | #include <linux/hdreg.h> | ||||
@@ -550,6 +550,10 @@ MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMod | |||||
MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); | MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); | ||||
MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); | MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); | ||||
MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra); | MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra); | ||||
MMPA_FUNC_VISIBILITY mmSize mmGetPageSize(); | |||||
MMPA_FUNC_VISIBILITY VOID *mmAlignMalloc(mmSize mallocSize, mmSize alignSize); | |||||
MMPA_FUNC_VISIBILITY VOID mmAlignFree(VOID *addr); | |||||
#define MMPA_DLL_API | #define MMPA_DLL_API | ||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
@@ -557,6 +557,10 @@ MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMod | |||||
MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); | MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); | ||||
MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); | MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); | ||||
MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra); | MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra); | ||||
MMPA_FUNC_VISIBILITY mmSize mmGetPageSize(); | |||||
MMPA_FUNC_VISIBILITY VOID *mmAlignMalloc(mmSize mallocSize, mmSize alignSize); | |||||
MMPA_FUNC_VISIBILITY VOID mmAlignFree(VOID *addr); | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
#if __cplusplus | #if __cplusplus | ||||
} | } | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -65,6 +65,8 @@ in aipp config file, framework will auto add one input node to graph at last. \n | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator AippData. | *Compatible with the TensorFlow operator AippData. | ||||
*@par Restrictions: | |||||
*Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly. | |||||
*/ | */ | ||||
REG_OP(AippData) | REG_OP(AippData) | ||||
.INPUT(data, TensorType::ALL()) | .INPUT(data, TensorType::ALL()) | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -39,6 +39,7 @@ | |||||
#include "image_ops.h" | #include "image_ops.h" | ||||
#include "internal_ops.h" | #include "internal_ops.h" | ||||
#include "linalg_ops.h" | #include "linalg_ops.h" | ||||
#include "list_ops.h" | |||||
#include "logging_ops.h" | #include "logging_ops.h" | ||||
#include "lookup_ops.h" | #include "lookup_ops.h" | ||||
#include "math_ops.h" | #include "math_ops.h" | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -626,7 +626,7 @@ REG_OP(StopGradient) | |||||
*x: A tensor. \n | *x: A tensor. \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A tensor. \n | |||||
*y: A tensor with the same shape and contents as input. \n | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator Identity. | *Compatible with the TensorFlow operator Identity. | ||||
@@ -666,7 +666,7 @@ REG_OP(IdentityN) | |||||
*@li axis: The dimension index at which to expand. \n | *@li axis: The dimension index at which to expand. \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A tensor. \n | |||||
*y: A tensor with the same data as input, with an additional dimension inserted at the index specified by axis. \n | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator ExpandDims. | *Compatible with the TensorFlow operator ExpandDims. | ||||
@@ -713,7 +713,7 @@ REG_OP(Unsqueeze) | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A tensor. \n | *y: A tensor. \n | ||||
*@par Attention: | |||||
*@attention Constraints: | |||||
*This operator cannot be directly called by the acllopExecute API. \n | *This operator cannot be directly called by the acllopExecute API. \n | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
@@ -1153,6 +1153,102 @@ REG_OP(EditDistance) | |||||
.OUTPUT(output, TensorType({DT_FLOAT})) | .OUTPUT(output, TensorType({DT_FLOAT})) | ||||
.OP_END_FACTORY_REG(EditDistance) | .OP_END_FACTORY_REG(EditDistance) | ||||
/** | |||||
* @brief sort_v2. | |||||
* @par Inputs: | |||||
* @li x: An ND tensor of type float16. | |||||
* @par Attributes: | |||||
* @li axis: An optional int. The dimension to sort along. This value defaults to -1. | |||||
* @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False. | |||||
* @par Outputs: | |||||
* @li y: An ND tensor of type float16. | |||||
* @attention Constraints: | |||||
* @li Axis should select the last dim. | |||||
* @li When the sorting data is less than 150K, it is recommended to use this tbe ops, | |||||
and the descending performance is better than the ascending. | |||||
* @li The upper limit of data on Ascend910 is 2000K. | |||||
*/ | |||||
REG_OP(SortV2) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.ATTR(axis, Int, -1) | |||||
.ATTR(descending, Bool, false) | |||||
.OP_END_FACTORY_REG(SortV2) | |||||
/** | |||||
* @brief Expand the input tensor to a compatible shape. \n | |||||
* @par Inputs: | |||||
* One inputs, including: | |||||
* @li x: A Tensor. Must be one of the following types: | |||||
* float16, float32, int32, int8 ,uint8. \n | |||||
* @li shape: A Tensor to specify the shape that the input tensor expanded to. \n | |||||
* @par Outputs: | |||||
* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the ONNX operator Expand. | |||||
*/ | |||||
REG_OP(Expand) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||||
.INPUT(shape, TensorType({DT_INT16, DT_INT32, DT_INT64})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||||
.OP_END_FACTORY_REG(Expand) | |||||
/** | |||||
*@Returns a tensor containing the indices of all non-zero elements of input. \n | |||||
*@par Inputs: | |||||
*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64. | |||||
*@par Attributes: | |||||
* transpose: the output tensor will be transposed if true. \n | |||||
*@par Outputs: | |||||
* y: A Tensor. Has the same type as "x" . \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the PyTorch operator NonZero. | |||||
*/ | |||||
REG_OP(NonZero) | |||||
.INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \ | |||||
DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL})) | |||||
.OUTPUT(y, TensorType({DT_INT64})) | |||||
.ATTR(transpose, Bool, false) | |||||
.OP_END_FACTORY_REG(NonZero) | |||||
/** | |||||
* @brief Expand the input tensor to a compatible shape. \n | |||||
* @par Inputs: | |||||
* One inputs, including: | |||||
* @li x: A Tensor. Must be one of the following types: | |||||
* float16, float32, int32, int8 ,uint8. \n | |||||
* @par Attributes: | |||||
* @li shape: A required listInt to specify the shape that the input tensor expanded to. \n | |||||
* @par Outputs: | |||||
* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the ONNX operator Expand. | |||||
*/ | |||||
REG_OP(ExpandD) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||||
.REQUIRED_ATTR(shape, ListInt) | |||||
.OP_END_FACTORY_REG(ExpandD) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -0,0 +1,58 @@ | |||||
/** | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
/*! | |||||
* \file avg_pool_1d_ops.h | |||||
* \brief | |||||
*/ | |||||
#ifndef OPS_BUILT_IN_OP_PROTO_INC_AVGPOOL1DOPS_H_ | |||||
#define OPS_BUILT_IN_OP_PROTO_INC_AVGPOOL1DOPS_H_ | |||||
#include "graph/operator_reg.h" | |||||
namespace ge { | |||||
/** | |||||
*@brief Generate an auxiliary matrix . \n | |||||
*@par Inputs: | |||||
* @li x: A tensor. Must be one of the following types:uint8, int8,int16, int32, | |||||
int64, float16, float, double.The format must be NHWC NCHW NC1HWC0. | |||||
*@par Attributes: | |||||
*@li ksize: Kernel size. Input type is int. | |||||
*@li strides: Input type is int. | |||||
*@li pads: Input type is listInt . | |||||
*@li ceil_mode: Bool, default value is false. | |||||
*@li count_include_pad: Bool, default value is false. \n | |||||
*@par Outputs: | |||||
*y_tensor: A tensor with the same types as "x" . \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the TensorFlow operator Unbatch. | |||||
*/ | |||||
REG_OP(AvgPool1DAvgMatrix) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT8, | |||||
DT_INT32, DT_INT64, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT8, | |||||
DT_INT32, DT_INT64, DT_DOUBLE})) | |||||
.REQUIRED_ATTR(ksize, Int) | |||||
.REQUIRED_ATTR(strides, Int) | |||||
.REQUIRED_ATTR(pads, ListInt) | |||||
.ATTR(ceil_mode, Bool, false) | |||||
.ATTR(count_include_pad, Bool, false) | |||||
.OP_END_FACTORY_REG(AvgPool1DAvgMatrix) | |||||
} | |||||
#endif |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -64,10 +64,10 @@ the same types as "x_tensors" . It's a dynamic output. \n | |||||
REG_OP(Batch) | REG_OP(Batch) | ||||
.DYNAMIC_INPUT(x_tensors, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, \ | .DYNAMIC_INPUT(x_tensors, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, \ | ||||
DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE})) | DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE})) | ||||
.OUTPUT(y_index, TensorType({ DT_INT64 })) | |||||
.OUTPUT(y_id, TensorType({ DT_INT64 })) | |||||
.DYNAMIC_OUTPUT(y_tensors, TensorType({DT_INT8, DT_UINT8, DT_INT16, \ | .DYNAMIC_OUTPUT(y_tensors, TensorType({DT_INT8, DT_UINT8, DT_INT16, \ | ||||
DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_BOOL})) | DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_BOOL})) | ||||
.OUTPUT(y_index, TensorType({ DT_INT64 })) | |||||
.OUTPUT(y_id, TensorType({ DT_INT64 })) | |||||
.REQUIRED_ATTR(num_batch_threads, Int) | .REQUIRED_ATTR(num_batch_threads, Int) | ||||
.REQUIRED_ATTR(max_batch_size, Int) | .REQUIRED_ATTR(max_batch_size, Int) | ||||
.ATTR(max_enqueued_batches, Int, 10) | .ATTR(max_enqueued_batches, Int, 10) | ||||
@@ -107,11 +107,13 @@ across multiple sessions . \n | |||||
REG_OP(Unbatch) | REG_OP(Unbatch) | ||||
.INPUT(x_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | .INPUT(x_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | ||||
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) | |||||
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ | |||||
DT_COMPLEX64, DT_COMPLEX128})) | |||||
.INPUT(index, TensorType({DT_INT64})) | .INPUT(index, TensorType({DT_INT64})) | ||||
.INPUT(id, TensorType({DT_INT64})) | .INPUT(id, TensorType({DT_INT64})) | ||||
.OUTPUT(y_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | .OUTPUT(y_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | ||||
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) | |||||
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ | |||||
DT_COMPLEX64, DT_COMPLEX128})) | |||||
.REQUIRED_ATTR(timeout_micros, Int) | .REQUIRED_ATTR(timeout_micros, Int) | ||||
.ATTR(container, String, "") | .ATTR(container, String, "") | ||||
.ATTR(shared_name, String, "") | .ATTR(shared_name, String, "") | ||||
@@ -146,13 +148,16 @@ across multiple sessions . \n | |||||
REG_OP(UnbatchGrad) | REG_OP(UnbatchGrad) | ||||
.INPUT(x_input, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | .INPUT(x_input, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | ||||
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) | |||||
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ | |||||
DT_COMPLEX64, DT_COMPLEX128})) | |||||
.INPUT(index, TensorType({DT_INT64})) | .INPUT(index, TensorType({DT_INT64})) | ||||
.INPUT(grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | .INPUT(grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | ||||
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) | |||||
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ | |||||
DT_COMPLEX64, DT_COMPLEX128})) | |||||
.INPUT(id, TensorType({DT_INT64})) | .INPUT(id, TensorType({DT_INT64})) | ||||
.OUTPUT(y_grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | .OUTPUT(y_grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | ||||
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) | |||||
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ | |||||
DT_COMPLEX64, DT_COMPLEX128})) | |||||
.ATTR(container, String, "") | .ATTR(container, String, "") | ||||
.ATTR(shared_name, String, "") | .ATTR(shared_name, String, "") | ||||
.OP_END_FACTORY_REG(UnbatchGrad) | .OP_END_FACTORY_REG(UnbatchGrad) | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -25,6 +25,35 @@ | |||||
namespace ge { | namespace ge { | ||||
/** | |||||
*@brief Element-wise computes the bitwise left-shift of x and y . \n | |||||
*@par Inputs: | |||||
*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper" | |||||
are 0D scalars. | |||||
* @li x: A Tensor. Must be one of the following types: int8, int16, int32, | |||||
int64, uint8, uint16, uint32, uint64. | |||||
* @li y: A Tensor. Has the same type as "x". \n | |||||
*@par Outputs: | |||||
* z: A Tensor. Has the same type as "x". \n | |||||
*@attention Constraints: | |||||
*Unique runs on the Ascend AI CPU, which delivers poor performance. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the TensorFlow operator LeftShift. | |||||
*/ | |||||
REG_OP(LeftShift) | |||||
.INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \ | |||||
DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64})) | |||||
.INPUT(y, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \ | |||||
DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64})) | |||||
.OUTPUT(z, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \ | |||||
DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64})) | |||||
.OP_END_FACTORY_REG(LeftShift) | |||||
/** | /** | ||||
*@brief Element-wise computes the bitwise right-shift of x and y . \n | *@brief Element-wise computes the bitwise right-shift of x and y . \n | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -96,7 +96,7 @@ REG_OP(RefMerge) | |||||
* Otherwise, the data is forwarded to "output_false" . \n | * Otherwise, the data is forwarded to "output_false" . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*@li data: The tensor to be forwarded. \ n | |||||
*@li data: The tensor to be forwarded. \n | |||||
* Must be one of the following types: float16, float32, float64, | * Must be one of the following types: float16, float32, float64, | ||||
* int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool. | * int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool. | ||||
*@li pred: A boolean scalar. The output port that will receive data . \n | *@li pred: A boolean scalar. The output port that will receive data . \n | ||||
@@ -387,12 +387,12 @@ REG_OP(ControlTrigger) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Three inputs, including: | * Three inputs, including: | ||||
*@li x: One dimensional tensore of type int32, specifying queried shape, max size is 8. | |||||
*@li data_seq: One dimensional tensore of type int32, specifying the mapped table is queried. | |||||
*@li level_index: One dimensional tensore of type int32, specifying secondary index. \n | |||||
*@li x: One dimensional tensor of type int32, specifying queried shape, max size is 128. | |||||
*@li data_seq: One dimensional tensor of type int32, specifying the mapped table is queried. | |||||
*@li level_index: One dimensional tensor of type int32, specifying secondary index. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*@li y: A Tensor with shape [batch, 8], of type int32, specifying index of shape in the map. | |||||
*@li y: A Tensor with shape [8], of type int32, specifying index of shape in the map. | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* It is a custom operator. It has no corresponding operator in Caffe. | * It is a custom operator. It has no corresponding operator in Caffe. | ||||
*/ | */ | ||||
@@ -0,0 +1,52 @@ | |||||
/** | |||||
* Copyright 2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
/*! | |||||
* \file correlation.h | |||||
* \brief | |||||
*/ | |||||
#ifndef GE_OP_CORRELATION_OPS_H | |||||
#define GE_OP_CORRELATION_OPS_H | |||||
#include "graph/operator_reg.h" | |||||
namespace ge { | |||||
/** | |||||
*@brief Computes a 2D Correlation given 4D "x" and "filter" tensors. | |||||
* | |||||
*@par Inputs: | |||||
* @li filter: A 4D tensor of filters. | |||||
* @li x: A 4D tensor of input images, batch number must equal to batch | |||||
* number of "filter", and channel must equal to channel of "filter". | |||||
* | |||||
*@par Attributes: | |||||
* @li groups: set correlation mode, must be 1 or channel. | |||||
* | |||||
*@par Outputs: | |||||
*y: A Tensor. Has the same type as "x". | |||||
*@par Third-party framework compatibility | |||||
* Compatible with caffe correlation custom operator. | |||||
*/ | |||||
REG_OP(Correlation) | |||||
.INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32})) | |||||
.ATTR(groups, Int, 1) | |||||
.OP_END_FACTORY_REG(Correlation) | |||||
} // namespace ge | |||||
#endif // GE_OP_NN_CALCULATION_OPS_H |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -137,6 +137,87 @@ REG_OP(CTCBeamSearchDecoder) | |||||
.OUTPUT(log_probability, TensorType({DT_FLOAT, DT_DOUBLE})) | .OUTPUT(log_probability, TensorType({DT_FLOAT, DT_DOUBLE})) | ||||
.OP_END_FACTORY_REG(CTCBeamSearchDecoder) | .OP_END_FACTORY_REG(CTCBeamSearchDecoder) | ||||
/** | |||||
*@brief The Connectionist Temporal Classification loss. | |||||
*@par Inputs: | |||||
*@li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size, | |||||
and C = number of classes (including blank). | |||||
It represent the logarithmized probabilities of the outputs. | |||||
*@li targets: Tensor of size (N, S), where S= max target length. | |||||
It represent the target sequences. | |||||
*@li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs. | |||||
*@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets. | |||||
*@par Outputs: | |||||
*@li neg_log_likelihood: A loss value which is differentiable with respect to each input node. | |||||
*@li log_alpha: The probability of possible trace of input to target. | |||||
*@par Attributes: | |||||
*@li blank : Blank label. Default 0. | |||||
*@li reduction: Specifies the reduction to apply to the output. Default: 'mean'. | |||||
*@li zero_infinity : Whether to zero infinite losses and the associated gradients. | |||||
*@par Third-party framework compatibility | |||||
* Compatible with Pytorch CTCLoss operator. | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(CTCLossV2) | |||||
.INPUT(log_probs, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(targets, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(input_lengths, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(target_lengths, TensorType({DT_INT32, DT_INT64})) | |||||
.OUTPUT(neg_log_likelihood, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(log_alpha, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.ATTR(blank, Int, 0) | |||||
.ATTR(reduction, String, "mean") | |||||
.ATTR(zero_infinity, Bool, false) | |||||
.OP_END_FACTORY_REG(CTCLossV2) | |||||
/** | |||||
*@brief The Connectionist Temporal Classification loss grad. | |||||
*@par Inputs: | |||||
*@li grad_out: Gradient renewal coefficient. Tensor of size (N), where N = batch size. | |||||
*@li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size, | |||||
and C = number of classes (including blank). | |||||
It represent the logarithmized probabilities of the outputs. | |||||
*@li targets: Tensor of size (N, S), where S= max target length. | |||||
It represent the target sequences. | |||||
*@li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs. | |||||
*@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets. | |||||
*@li neg_log_likelihood: A loss value which is differentiable with respect to each input node. | |||||
*@li log_alpha: The probability of possible trace of input to target. | |||||
*@par Outputs: | |||||
*@li grad: Tensor of size (T, N, C), The grad of Connectionist Temporal Classification loss. | |||||
*@par Attributes: | |||||
*@li blank : Blank label. Default 0. | |||||
*@li reduction: Specifies the reduction to apply to the output. Default: 'mean'. | |||||
*@li zero_infinity : Whether to zero infinite losses and the associated gradients. | |||||
*@par Third-party framework compatibility | |||||
* Compatible with Pytorch CTCLoss operator. | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(CTCLossV2Grad) | |||||
.INPUT(grad_out, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(log_probs, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(targets, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(input_lengths, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(target_lengths, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(neg_log_likelihood, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(log_alpha, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(grad, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.ATTR(blank, Int, 0) | |||||
.ATTR(reduction, String, "mean") | |||||
.ATTR(zero_infinity, Bool, false) | |||||
.OP_END_FACTORY_REG(CTCLossV2Grad) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_CTC_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_CTC_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -908,7 +908,7 @@ REG_OP(TensorArray) | |||||
.OUTPUT(handle, TensorType({DT_RESOURCE})) | .OUTPUT(handle, TensorType({DT_RESOURCE})) | ||||
.OUTPUT(flow, TensorType({DT_FLOAT})) | .OUTPUT(flow, TensorType({DT_FLOAT})) | ||||
.REQUIRED_ATTR(dtype, Type) | .REQUIRED_ATTR(dtype, Type) | ||||
.ATTR(element_shape, ListInt, ge::UNKNOWN_SHAPE) | |||||
.ATTR(element_shape, ListInt, ge::UNKNOWN_RANK) | |||||
.ATTR(dynamic_size, Bool, false) | .ATTR(dynamic_size, Bool, false) | ||||
.ATTR(clear_after_read, Bool, true) | .ATTR(clear_after_read, Bool, true) | ||||
.ATTR(identical_element_shapes, Bool, false) | .ATTR(identical_element_shapes, Bool, false) | ||||
@@ -963,7 +963,7 @@ REG_OP(TensorArrayConcat) | |||||
DT_QUINT8, DT_QINT32})) | DT_QUINT8, DT_QINT32})) | ||||
.OUTPUT(lengths, TensorType({DT_INT64})) | .OUTPUT(lengths, TensorType({DT_INT64})) | ||||
.REQUIRED_ATTR(dtype, Type) | .REQUIRED_ATTR(dtype, Type) | ||||
.ATTR(element_shape_except0, ListInt, ge::UNKNOWN_SHAPE) | |||||
.ATTR(element_shape_except0, ListInt, ge::UNKNOWN_RANK) | |||||
.OP_END_FACTORY_REG(TensorArrayConcat) | .OP_END_FACTORY_REG(TensorArrayConcat) | ||||
/** | /** | ||||
@@ -999,7 +999,7 @@ REG_OP(TensorArrayGather) | |||||
DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, | DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, | ||||
DT_QUINT8, DT_QINT32})) | DT_QUINT8, DT_QINT32})) | ||||
.REQUIRED_ATTR(dtype, Type) | .REQUIRED_ATTR(dtype, Type) | ||||
.ATTR(element_shape, ListInt, ge::UNKNOWN_SHAPE) | |||||
.ATTR(element_shape, ListInt, ge::UNKNOWN_RANK) | |||||
.OP_END_FACTORY_REG(TensorArrayGather) | .OP_END_FACTORY_REG(TensorArrayGather) | ||||
/** | /** | ||||
@@ -1430,6 +1430,24 @@ REG_OP(OrderedMapClear) | |||||
.ATTR(shared_name, String, "") | .ATTR(shared_name, String, "") | ||||
.OP_END_FACTORY_REG(OrderedMapClear) | .OP_END_FACTORY_REG(OrderedMapClear) | ||||
/** | |||||
*@brief FakeQueue, support tf api FixedLengthRecordReader. \n | |||||
*@par Inputs: | |||||
*Including: | |||||
* @li resource: A Tensor of type DT_RESOURCE. | |||||
*@par Outputs: | |||||
*handle: A Tensor of type DT_STRING ref. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the TensorFlow operator FakeQueue. | |||||
*/ | |||||
REG_OP(FakeQueue) | |||||
.INPUT(resource, TensorType({DT_RESOURCE})) | |||||
.OUTPUT(handle, TensorType({DT_STRING})) | |||||
.OP_END_FACTORY_REG(FakeQueue) | |||||
/** | /** | ||||
*@brief Returns the number of incomplete elements in the underlying container. \n | *@brief Returns the number of incomplete elements in the underlying container. \n | ||||
@@ -2258,6 +2276,7 @@ REG_OP(LruCache) | |||||
.ATTR(shared_name, String, "LruCache") | .ATTR(shared_name, String, "LruCache") | ||||
.ATTR(cache_size, Int, 100000) | .ATTR(cache_size, Int, 100000) | ||||
.ATTR(load_factor, Float, 1) | .ATTR(load_factor, Float, 1) | ||||
.REQUIRED_ATTR(dtype, Type) | |||||
.OP_END_FACTORY_REG(LruCache) | .OP_END_FACTORY_REG(LruCache) | ||||
/** | /** | ||||
@@ -2277,9 +2296,9 @@ REG_OP(CacheAdd) | |||||
.INPUT(cache, TensorType({DT_RESOURCE})) | .INPUT(cache, TensorType({DT_RESOURCE})) | ||||
.INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | ||||
.OUTPUT(swap_in_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | .OUTPUT(swap_in_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | ||||
.OUTPUT(swap_in_idx, TensorType({DT_INT64})) | |||||
.OUTPUT(swap_in_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | |||||
.OUTPUT(swap_out_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | .OUTPUT(swap_out_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | ||||
.OUTPUT(swap_out_idx, TensorType({DT_INT64})) | |||||
.OUTPUT(swap_out_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | |||||
.OP_END_FACTORY_REG(CacheAdd) | .OP_END_FACTORY_REG(CacheAdd) | ||||
/** | /** | ||||
@@ -2295,9 +2314,65 @@ REG_OP(CacheAdd) | |||||
REG_OP(CacheRemoteIndexToLocal) | REG_OP(CacheRemoteIndexToLocal) | ||||
.INPUT(cache, TensorType({DT_RESOURCE})) | .INPUT(cache, TensorType({DT_RESOURCE})) | ||||
.INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | ||||
.OUTPUT(local_idx, TensorType({DT_INT64})) | |||||
.OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | |||||
.OP_END_FACTORY_REG(CacheRemoteIndexToLocal) | .OP_END_FACTORY_REG(CacheRemoteIndexToLocal) | ||||
/** | |||||
*@brief CacheAllToLocalIndex, get id in cache | |||||
*@par Inputs: | |||||
*cache: resource data | |||||
*local_idx: id in cache. | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(CacheAllIndexToLocal) | |||||
.INPUT(cache, TensorType({DT_RESOURCE})) | |||||
.OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | |||||
.REQUIRED_ATTR(dtype, Type) | |||||
.OP_END_FACTORY_REG(CacheAllIndexToLocal) | |||||
/** | |||||
*@brief DynamicGetNext, dynamic get next data | |||||
*@par Inputs: | |||||
*x: the iterator, all types are available | |||||
*@par Outputs: | |||||
*y: the date in iterator, all types are available | |||||
*@par Attributes: | |||||
*output_types: types of all outputs | |||||
*output_shapes: shapes of all outputs | |||||
*_dynamic_graph_execute_mode: dynamic graph execution mode, | |||||
value is one of lazy_recompile and dynamic_execute | |||||
*_getnext_inputs_shape_range: shape ranges of outputs, | |||||
it works where _dynamic_graph_execute_mode is dynamic_execute | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(DynamicGetNext) | |||||
.INPUT(x, TensorType::ALL()) | |||||
.DYNAMIC_OUTPUT(y, TensorType::ALL()) | |||||
.ATTR(output_types, ListType, {}) | |||||
.ATTR(output_shapes, ListListInt, {{}, {}}) | |||||
.ATTR(_dynamic_graph_execute_mode, String, "lazy_recompile") | |||||
.ATTR(_getnext_inputs_shape_range, String, "") | |||||
.OP_END_FACTORY_REG(DynamicGetNext) | |||||
/** | |||||
*@brief AdpGetNext | |||||
*@par Outputs: | |||||
*y: the data in iterator, all types are available | |||||
*@par Attributes: | |||||
*output_types: types of all outputs | |||||
*output_shapes: shapes of all outputs | |||||
*queue_name: cdqm queue name | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(AdpGetNext) | |||||
.DYNAMIC_OUTPUT(y, TensorType::ALL()) | |||||
.ATTR(output_types, ListType, {}) | |||||
.ATTR(output_shapes, ListListInt, {{}, {}}) | |||||
.ATTR(queue_name, String, "") | |||||
.OP_END_FACTORY_REG(AdpGetNext) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2020 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -28,10 +28,13 @@ namespace ge { | |||||
*@par Inputs: | *@par Inputs: | ||||
*Dynamic inputs, including: | *Dynamic inputs, including: | ||||
* @li x: A list of Tensor objects, each with same shape and type. The supported types are: | |||||
*x: A list of Tensor objects, each with same shape and type. The supported types are: | |||||
* float16, float32, double, int32, uint8, int16, int8, complex64, int64, | * float16, float32, double, int32, uint8, int16, int8, complex64, int64, | ||||
* qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n | * qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n | ||||
*@par Attributes: | |||||
*N: An required attribute of type int32, means nums of inputs. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same shape and type as the elements of "x". \n | *y: A Tensor. Has the same shape and type as the elements of "x". \n | ||||
@@ -122,7 +125,8 @@ REG_OP(MinimumGrad) | |||||
*@par Inputs: | *@par Inputs: | ||||
*One input: | *One input: | ||||
*x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8, | *x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8, | ||||
int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. \n | |||||
int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. | |||||
For float32 type, the actual calculation on the chip is based on float16. \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*dst_type: An required attribute of type int32, specifying the dst data type. \n | *dst_type: An required attribute of type int32, specifying the dst data type. \n | ||||
@@ -142,6 +146,8 @@ REG_OP(Cast) | |||||
/** | /** | ||||
*@brief Returns the truth value of (x1 >= x2) element-wise. \n | *@brief Returns the truth value of (x1 >= x2) element-wise. \n | ||||
*when input is int32 and (x2 - x1) > 2**31 or < -2**31 | |||||
*aicore accuracy is not guaranteed \n | |||||
*@par Inputs: | *@par Inputs: | ||||
*Two inputs, including: | *Two inputs, including: | ||||
@@ -163,6 +169,8 @@ REG_OP(GreaterEqual) | |||||
/** | /** | ||||
*@brief Returns the truth value of (x1 < x2) element-wise. \n | *@brief Returns the truth value of (x1 < x2) element-wise. \n | ||||
*when input is int32 and (x2 - x1) > 2**31 or < -2**31 | |||||
*aicore accuracy is not guaranteed \n | |||||
*@par Inputs: | *@par Inputs: | ||||
*Two inputs, including: | *Two inputs, including: | ||||
@@ -322,8 +330,8 @@ REG_OP(Sub) | |||||
*@brief computes the absolute value of a tensor. \n | *@brief computes the absolute value of a tensor. \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*One inputs, including: | |||||
* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n | |||||
*One input, including: \n | |||||
*x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type as "x". \n | *y: A Tensor. Has the same type as "x". \n | ||||
@@ -563,6 +571,8 @@ REG_OP(InvGrad) | |||||
/** | /** | ||||
*@brief: Returns the truth value of (x <= y) element-wise. \n | *@brief: Returns the truth value of (x <= y) element-wise. \n | ||||
*when input is int32 and (x2 - x1) > 2**31 or < -2**31 | |||||
*aicore accuracy is not guaranteed \n | |||||
*@par Inputs: | *@par Inputs: | ||||
* Two inputs, including: | * Two inputs, including: | ||||
@@ -611,6 +621,15 @@ REG_OP(Log1p) | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type as "x1". | *y: A Tensor. Has the same type as "x1". | ||||
*@attention Constraints: | |||||
*@li x2: The input data does not support 0 | |||||
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the | |||||
*requirement of double thousandths in the mini form | |||||
*@li Due to different architectures, the calculation results of this operator | |||||
*on NPU and CPU may be inconsistent | |||||
*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator Mod. | *Compatible with the TensorFlow operator Mod. | ||||
*/ | */ | ||||
@@ -1020,7 +1039,7 @@ REG_OP(BesselI1e) | |||||
* y = log_base(shift + scale * x), with "base" > 0. \n | * y = log_base(shift + scale * x), with "base" > 0. \n | ||||
* @par Inputs: | * @par Inputs: | ||||
* @li x: A Tensor of type complex64, complex128, float16, float32 or double. \n | |||||
* x: A Tensor of type complex64, complex128, float16, float32 or double. \n | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li base: An optional float32, specifying the base "e". Defaults to "-1.0" | * @li base: An optional float32, specifying the base "e". Defaults to "-1.0" | ||||
@@ -1065,7 +1084,7 @@ REG_OP(Log) | |||||
* uint8, int8, uint16, int16, int32, int64, complex64, complex128. \n | * uint8, int8, uint16, int16, int32, int64, complex64, complex128. \n | ||||
* @attention Constraints: | * @attention Constraints: | ||||
* @li "x1" and "x2" have incompatible shapes or types. \n | |||||
* "x1" and "x2" have incompatible shapes or types. \n | |||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator Multiply. | * Compatible with the TensorFlow operator Multiply. | ||||
@@ -1451,6 +1470,8 @@ REG_OP(ReciprocalGrad) | |||||
/** | /** | ||||
*@brief Returns the truth value of (x1 > x2) element-wise. \n | *@brief Returns the truth value of (x1 > x2) element-wise. \n | ||||
*when input is int32 and (x2 - x1) > 2**31 or < -2**31 | |||||
*aicore accuracy is not guaranteed \n | |||||
*@par Inputs: | *@par Inputs: | ||||
*@li x1: A Tensor of type float16, float32, double, int64, int32, int16, int8, | *@li x1: A Tensor of type float16, float32, double, int64, int32, int16, int8, | ||||
@@ -2042,6 +2063,15 @@ REG_OP(FloorDiv) | |||||
* | * | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: Result remainder. | *y: Result remainder. | ||||
*@attention Constraints: | |||||
*@li x2: The input data does not support 0 | |||||
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the | |||||
*requirement of double thousandths in the mini form | |||||
*@li Due to different architectures, the calculation results of this operator | |||||
*on NPU and CPU may be inconsistent | |||||
*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator FloorMod. | * Compatible with the TensorFlow operator FloorMod. | ||||
*/ | */ | ||||
@@ -2168,6 +2198,14 @@ REG_OP(Tan) | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type as "x1". \n | *y: A Tensor. Has the same type as "x1". \n | ||||
*@attention Constraints: | |||||
*@li x2: The input data does not support 0 | |||||
*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the | |||||
*requirement of double thousandths in the mini form | |||||
*@li Due to different architectures, the calculation results of this operator | |||||
*on NPU and CPU may be inconsistent | |||||
*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*@li Compatible with the TensorFlow operator TruncateMod. | *@li Compatible with the TensorFlow operator TruncateMod. | ||||
*/ | */ | ||||
@@ -2424,6 +2462,25 @@ REG_OP(Eltwise) | |||||
.ATTR(coeff, ListFloat, {}) | .ATTR(coeff, ListFloat, {}) | ||||
.OP_END_FACTORY_REG(Eltwise) | .OP_END_FACTORY_REG(Eltwise) | ||||
/** | |||||
*@brief Computes the inverse error function of each element of input. \n | |||||
*@par Inputs: | |||||
*One inputs, including: | |||||
* @li input_x: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
*@par Outputs: | |||||
*y: A Tensor with the same type and shape of input_x's. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator Erfinv. \n | |||||
*/ | |||||
REG_OP(Erfinv) | |||||
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OP_END_FACTORY_REG(Erfinv) | |||||
/** | /** | ||||
*@brief Computes element-wise population count. \n | *@brief Computes element-wise population count. \n | ||||
@@ -2829,9 +2886,9 @@ REG_OP(AdamApplyOneAssign) | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
*/ | */ | ||||
REG_OP(LambApplyOptimizerAssign) | REG_OP(LambApplyOptimizerAssign) | ||||
.INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
.INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
.INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
@@ -2842,6 +2899,8 @@ REG_OP(LambApplyOptimizerAssign) | |||||
.INPUT(do_use_weight, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(do_use_weight, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
.INPUT(weight_decay_rate, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(weight_decay_rate, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
.OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT})) | .OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
.OUTPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(LambApplyOptimizerAssign) | .OP_END_FACTORY_REG(LambApplyOptimizerAssign) | ||||
/** | /** | ||||
@@ -2873,7 +2932,8 @@ REG_OP(LambApplyWeightAssign) | |||||
.INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
.INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
.INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
.INPUT(input4, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(LambApplyWeightAssign) | .OP_END_FACTORY_REG(LambApplyWeightAssign) | ||||
/** | /** | ||||
@@ -3183,12 +3243,14 @@ REG_OP(Fills) | |||||
*@brief Add tensor with scale. \n | *@brief Add tensor with scale. \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*Five inputs, including: | |||||
* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32. | |||||
* @li x2: A scale. Must be float. \n | |||||
*One input, including: \n | |||||
*x: A Tensor. Must be one of the following types:int32,int16, float16, float32. \n | |||||
*@par Attributes: | |||||
*value: A scale. Must be float. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*@li y: A Tensor. Has the same type and shape as "x1". \n | |||||
*y: A Tensor. Has the same type and shape as "x1". \n | |||||
*@par Third-party framework compatibility: | *@par Third-party framework compatibility: | ||||
* Compatible with the Pytorch operator adds. | * Compatible with the Pytorch operator adds. | ||||
@@ -3329,8 +3391,441 @@ REG_OP(TensorRedirect) | |||||
.OUTPUT(output_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, | .OUTPUT(output_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, | ||||
DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32})) | DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32})) | ||||
.OP_END_FACTORY_REG(TensorRedirect) | .OP_END_FACTORY_REG(TensorRedirect) | ||||
} // namespace ge | |||||
/** | |||||
* @brief Performs the element-wise division of tensor x2 by tensor x3, | |||||
* multiply the result by the scalar value and add it to tensor x1 | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li input_data: A mutable input Tensor. Must be one of the following types: | |||||
* float16, float32. | |||||
* @li x1: A mutable input Tensor of the same type as x1. | |||||
* @li x2: A mutable input Tensor of the same type as x1. | |||||
* @li value: A mutable input Tensor. Must be one of the following types: | |||||
* float16, float32, int32. \n | |||||
* @par Outputs: | |||||
* @li y: A mutable Tensor. Has the same type as "x1". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator Addcdiv. | |||||
*/ | |||||
REG_OP(Addcdiv) | |||||
.INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(Addcdiv) | |||||
/** | |||||
* @brief Performs the element-wise multiplication of tensor x2 by tensor x3, | |||||
* multiply the result by the scalar value and add it to tensor input_data | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li input_data: A mutable input Tensor. Must be one of the following types: | |||||
* float16, float32, int8, int32, uint8. | |||||
* @li x1: A mutable input Tensor of the same type as x1. | |||||
* @li x2: A mutable input Tensor of the same type as x1. | |||||
* @li value: A tensor which includes only one element of the same type as x1. \n | |||||
* @par Outputs: | |||||
* @li y: A mutable output Tensor. Has the same type as "x1". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator Addcmul. | |||||
*/ | |||||
REG_OP(Addcmul) | |||||
.INPUT(input_data, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||||
.INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||||
.INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||||
.INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||||
.OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||||
.OP_END_FACTORY_REG(Addcmul) | |||||
/** | |||||
* @brief Computes the result of x2 * alpha + x1. | |||||
* @par Inputs: | |||||
* @li x1: An ND tensor of type float16, float32, int32. | |||||
* @li x2: An ND tensor of type float16, float32, int32. | |||||
* @li alpha: A scalar tensor of type float16, float32. \n | |||||
* @par Outputs: | |||||
* @li y: An ND tensor tensor with the same shape and type as "x1". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator Axpy. | |||||
*/ | |||||
REG_OP(AxpyV2) | |||||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.OP_END_FACTORY_REG(AxpyV2) | |||||
/** | |||||
* @brief Computes the result of x1 - x2. | |||||
* @par Inputs: | |||||
* @li x1: An ND tensor of type float16, float, int32. | |||||
* @li x2: An ND tensor of type float16, float, int32. \n | |||||
* @par Outputs: | |||||
* @li y: An ND tensor tensor with the same type as "x1". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator Sub. | |||||
*/ | |||||
REG_OP(PtSub) | |||||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.OP_END_FACTORY_REG(PtSub) | |||||
/** | |||||
* @brief Add the partial values of two tensors in format NC1HWC0. | |||||
* @par Inputs: | |||||
* @li x1: A Tensor in 5HD, and must be one of the following types: float16, | |||||
* float32. \n | |||||
* @li x2: A Tensor of the same type as "x1", and the same shape as "x1", | |||||
* except for the C1 value. \n | |||||
* @par Attributes: | |||||
* @li x1_c1_offset: A required int. Offset value of C1 in "x1". \n | |||||
* @li x2_c1_offset: A required int. Offset value of C1 in "x2". \n | |||||
* @li c1_len: A required int. C1 len of "y". The value must be less than | |||||
* the difference between C1 and offset in "x1" and "x2". \n | |||||
* @par Outputs: | |||||
* @li y: A Tensor of the same type as "x1", and the same shape as "x1", | |||||
* except for the C1 value. Record the result after adding. \n | |||||
*/ | |||||
REG_OP(StrideAdd) | |||||
.INPUT(x1, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
.INPUT(x2, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
.OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
.REQUIRED_ATTR(x1_c1_offset, Int) | |||||
.REQUIRED_ATTR(x2_c1_offset, Int) | |||||
.REQUIRED_ATTR(c1_len, Int) | |||||
.OP_END_FACTORY_REG(StrideAdd) | |||||
/** | |||||
* @brief Compare two tensors are totally equal or not, only output a bool value" | |||||
* @par Inputs: | |||||
* Two inputs, including: | |||||
* @li input_x: A Tensor. the first tensor. \n | |||||
* @li input_y: A Tensor. the second tensor. \n | |||||
* @par Outputs: | |||||
* @li output_z: A Tensor. Bool type, compare result of the two inputs. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch equal operator. \n | |||||
*/ | |||||
REG_OP(TensorEqual) | |||||
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||||
.INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||||
.OUTPUT(output_z, TensorType({DT_BOOL})) | |||||
.OP_END_FACTORY_REG(TensorEqual) | |||||
/** | |||||
* @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). | |||||
* All inputs and outputs must have the same data type. This operator supports multidirectional | |||||
* (i.e., Numpy-style) broadcasting | |||||
* | |||||
* @par inputs | |||||
* one input including: | |||||
* @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64 | |||||
* | |||||
* @par output | |||||
* one output including: | |||||
* @li y:A Tensor of the same type as x | |||||
* | |||||
*/ | |||||
REG_OP(MaxN) | |||||
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) | |||||
.OP_END_FACTORY_REG(MaxN) | |||||
/** | |||||
* @brief Calculates x * maske * value. | |||||
* | |||||
* @par Inputs: | |||||
* @li x: An tensor of type float16 or float32, specifying the input to the data layer. | |||||
* @li mask: An tensor of type int8 or float16 or float32, be same shape with x. \n | |||||
* | |||||
* @par Attributes: | |||||
* value: A optional float. \n | |||||
* | |||||
* @par Outputs: | |||||
* y: The output tensor of type float16 or float32. | |||||
@ li y:A Tensor of the same type and shape as x | |||||
* | |||||
*/ | |||||
REG_OP(MaskedScale) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32})) | |||||
.INPUT(mask, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32})) | |||||
.REQUIRED_ATTR(value, Float) | |||||
.OP_END_FACTORY_REG(MaskedScale) | |||||
/** | |||||
* @brief Calculate the lerp function. \n | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li start: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li end: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li weight: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @par Outputs: | |||||
* y: A Tensor with the same type and shape of input_x's. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator Lerp. \n | |||||
*/ | |||||
REG_OP(Lerp) | |||||
.INPUT(start, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(end, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(Lerp) | |||||
/** | |||||
*@brief Returns the num value of abs(x1-x2) > atol+rtol*abs(x2) element-wise. \n | |||||
* | |||||
*@par Inputs: | |||||
*@li x1: A tensor. Must be one of the following types: float32, int32, uint8, int8, float16 | |||||
*@li x2: A tensor of the same type as "x1". | |||||
* | |||||
*@par Attributes: | |||||
* atol: Defaults to "1e-05". | |||||
* rtol: Defaults to "1e-03". | |||||
* | |||||
*@par Outputs: | |||||
* num: A tensor of type float32. | |||||
* | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
* | |||||
*/ | |||||
REG_OP(DataCompare) | |||||
.INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 })) | |||||
.INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 })) | |||||
.OUTPUT(num, TensorType({DT_FLOAT})) | |||||
.ATTR(atol, Float, 1e-5) | |||||
.ATTR(rtol, Float, 1e-3) | |||||
.OP_END_FACTORY_REG(DataCompare) | |||||
/** | |||||
*@brief Hardmax(element in input, axis) = 1 if the element is the first maximum value along the specified axis, 0 | |||||
*otherwise The input does not need to explicitly be a 2D vector.The "axis" attribute indicates the dimension along | |||||
*which Hardmax will be performed.The output tensor has the same shape and contains the Hardmax values of the | |||||
*corresponding input. | |||||
* | |||||
*@par inputs | |||||
*one input including: | |||||
*@li x: input A Tensor.Must be one of the following types:float32,float16 | |||||
* | |||||
*@par Attributes: | |||||
*@li axis:A required int attribute that decides which dimension will be used to cal the hard_max | |||||
* | |||||
*@par output: | |||||
*one output including: | |||||
*@li y:A Tensor of the same type as x | |||||
* | |||||
*/ | |||||
REG_OP(HardMax) | |||||
.INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT })) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(axis, Int, -1) | |||||
.OP_END_FACTORY_REG(HardMax) | |||||
/** | |||||
* @brief Computes the dot product (inner product) of two tensors. This function does not broadcast. | |||||
* @par Inputs: | |||||
* Two inputs, including: | |||||
* @li input_x: A Tensor. the first tensor must be 1d. \n | |||||
* @li input_y: A Tensor. the second tensor must be 1d. \n | |||||
* @par Outputs: | |||||
* @li output: A Tensor. Result of the two inputs, must be 1d. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch dot operator. \n | |||||
*/ | |||||
REG_OP(Dot) | |||||
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) | |||||
.INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) | |||||
.OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) | |||||
.OP_END_FACTORY_REG(Dot) | |||||
/** | |||||
*@brief Returns a new tensor with boolean elements representing \n | |||||
*if each element of input is “close” to the corresponding element of other \n | |||||
*@par Inputs: | |||||
*Two inputs, including: | |||||
* @li x1: A tensor. Must be one of the following types: | |||||
* float16, float32, int32. \n | |||||
* @li x2: A tensor with the same type and shape of x1's. \n | |||||
*@par Attributes: | |||||
*@li rtol: An optional float.Defaults to 1e-05. \n | |||||
*@li atol: An optional float.Defaults to 1e-08. \n | |||||
*@li equal_nan: An optional bool.Defaults to false. \n | |||||
*@par Outputs: | |||||
*y: A Tensor bool with the same shape of x1's. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator isclose. \n | |||||
*/ | |||||
REG_OP(IsClose) | |||||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_BOOL})) | |||||
.ATTR(rtol, Float, 1e-05) | |||||
.ATTR(atol, Float, 1e-08) | |||||
.ATTR(equal_nan, Bool, false) | |||||
.OP_END_FACTORY_REG(IsClose) | |||||
/** | |||||
* @brief Returns the reverse tensor of the ArgMax operator of a tensor. \n | |||||
* @par Inputs: | |||||
* three input, including: | |||||
* var: A Tensor of type float16, float32, int32 or int8. \n | |||||
* indices: A Tensor of type int32. \n | |||||
* updates: A Tensor of type float16, float32, int32 or int8. \n | |||||
* @par Attributes: | |||||
* @li dimension: An integer of type int, specifying the axis information of the index with the maximum value.\n | |||||
* @par Outputs: | |||||
* y: A Tensor of type float16, float32, int32 or int8. \n | |||||
* | |||||
*@attention Constraints: | |||||
*@li indices: only support int32,and shape same to "updates" | |||||
*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". | |||||
*@li y:A Tensor, the type and shape is same to "var" \n | |||||
*@par Third-party framework compatibility | |||||
* not support all scene like pytorch operator scatter | |||||
* exp: | |||||
* var.shape=[2,3,4,5], dim=2, the shape of indices and updates should be [2,3,5] | |||||
* not support the shape of indices and updates is [2,3,2,5] like pytorch operator scatter. \n | |||||
*/ | |||||
REG_OP(ArgMaxGrad) | |||||
.INPUT(var, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) | |||||
.INPUT(indices, TensorType({DT_INT32})) | |||||
.INPUT(updates, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) | |||||
.REQUIRED_ATTR(dimension, Int) | |||||
.OP_END_FACTORY_REG(ArgMaxGrad) | |||||
/** | |||||
* @brief Returns the reverse tensor of the ArgMax operator of a tensor. \n | |||||
* @par Inputs: | |||||
* three input, including: | |||||
* var: A Tensor of type float16, float32, int32 or int8. \n | |||||
* indices: A Tensor of type int32. \n | |||||
* updates: A Tensor of type float16, float32, int32 or int8. \n | |||||
* assist: A Tensor of int32,also a assist matrix and it's shape must match the shape of var \n | |||||
* @par Attributes: | |||||
* @li dimension: An integer of type int, specifying the axis information of the index with the maximum value.\n | |||||
* @par Outputs: | |||||
* y: A Tensor of type float16, float32, int32 or int8. \n | |||||
*@attention Constraints: | |||||
*@li indices: only support int32,and shape same to "updates" | |||||
*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". | |||||
*@li y:A Tensor, the type and shape is same to "var" \n | |||||
*@par Third-party framework compatibility | |||||
* not support all scene like pytorch operator scatter | |||||
* exp: | |||||
* var.shape=[2,3,4,5], dim=2, the shape of indices and updates should be [2,3,5] | |||||
* not support the shape of indices and updates is [2,3,2,5] like pytorch operator scatter. \n | |||||
*/ | |||||
REG_OP(ArgMaxGradD) | |||||
.INPUT(var, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) | |||||
.INPUT(indices, TensorType({DT_INT32})) | |||||
.INPUT(updates, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) | |||||
.INPUT(assist, TensorType({DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) | |||||
.REQUIRED_ATTR(dimension, Int) | |||||
.OP_END_FACTORY_REG(ArgMaxGradD) | |||||
/** | |||||
*@brief Calculates the reversed outputs of the function "AddMatMatElements" | |||||
* c = c * beta + alpha * a * b | |||||
*@par Inputs: | |||||
*Three inputs, including: | |||||
* @li c: A mutable Tensor. Must be one of the following types: | |||||
* float16, float32. | |||||
* @li a: A mutable Tensor of the same type as "c". | |||||
* @li b: A mutable Tensor of the same type as "c". | |||||
* @li beta: A mutable scalar of the same type as "c". | |||||
* @li alpha: A mutable scalar of the same type as "c". \n | |||||
*@par Outputs: | |||||
* @li c: A mutable Tensor. Has the same type as "c". \n | |||||
*@par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator AddMatMatElements. | |||||
*/ | |||||
REG_OP(AddMatMatElements) | |||||
.INPUT(c, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(a, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(b, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(c, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OP_END_FACTORY_REG(AddMatMatElements) | |||||
/** | |||||
*@brief Returns cosine similarity between x1 and x2,computed along dim. \n | |||||
*@par Inputs: | |||||
*Two inputs, including: | |||||
* @li input_x1: A tensor. Must be the following types: | |||||
* float32. \n | |||||
*@par Inputs: | |||||
*@li input_x2: A tensor. Must of the following types: | |||||
* float32. \n | |||||
*@par Outputs: | |||||
*@li output_y: A Tensor with the same type of input_x's. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator CosineSimilarity. \n | |||||
*/ | |||||
REG_OP(CosineSimilarity) | |||||
.INPUT(input_x1, TensorType({DT_FLOAT})) /* "First operand." */ | |||||
.INPUT(input_x2, TensorType({DT_FLOAT})) /* "Second operand." */ | |||||
.OUTPUT(output_y, TensorType({DT_FLOAT})) /* "Result, has same element type as two inputs" */ | |||||
.ATTR(dim, Int, 1) | |||||
.ATTR(eps, Float, 1e-8) | |||||
.OP_END_FACTORY_REG(CosineSimilarity) | |||||
} // namespace ge | |||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -0,0 +1,49 @@ | |||||
/** | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
/*! | |||||
* \file globalavgpool.h | |||||
* \brief | |||||
*/ | |||||
#ifndef OPS_BUILT_IN_OP_PROTO_INC_GLOBALAVERAGEPOOL_H_ | |||||
#define OPS_BUILT_IN_OP_PROTO_INC_GLOBALAVERAGEPOOL_H_ | |||||
#include "graph/operator_reg.h" | |||||
namespace ge { | |||||
/** | |||||
*@brief GlobalAveragePool consumes an input tensor X and applies average pooling across the values in the same channel. | |||||
This is equivalent to AveragePool with kernel size equal to the spatial dimension of input tensor \n | |||||
*@par Inputs: | |||||
*@li x: Input data tensor from the previous operator; dimensions for image case are (N x C x H x W), | |||||
where N is the batch size, C is the number of channels, and H and W are the height and the width of the data. | |||||
For non image case, the dimensions are in the form of (N x C x D1 x D2 ... Dn), where N is the batch size. | |||||
*@par Outputs: | |||||
*y: Output data tensor from pooling across the input tensor. The output tensor has the same rank as the input. | |||||
The first two dimensions of output shape are the same as the input (N x C), while the other dimensions are all 1 | |||||
*@par Restrictions: | |||||
*Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly. | |||||
*/ | |||||
REG_OP(GlobalAveragePool) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OP_END_FACTORY_REG(GlobalAveragePool) | |||||
} // namespace ge | |||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_GLOBALAVGPOOL_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2020 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -45,8 +45,6 @@ REG_OP(HcomAllGather) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) | .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) | ||||
.REQUIRED_ATTR(rank_size, Int) | .REQUIRED_ATTR(rank_size, Int) | ||||
.REQUIRED_ATTR(group, String) | .REQUIRED_ATTR(group, String) | ||||
.ATTR(alpha, Float, 1.0) | |||||
.ATTR(beta, Float, 0.0) | |||||
.OP_END_FACTORY_REG(HcomAllGather) | .OP_END_FACTORY_REG(HcomAllGather) | ||||
/** | /** | ||||
@@ -77,8 +75,6 @@ REG_OP(HcomAllReduce) | |||||
.REQUIRED_ATTR(group, String) | .REQUIRED_ATTR(group, String) | ||||
.ATTR(fusion, Int, 1) | .ATTR(fusion, Int, 1) | ||||
.ATTR(fusion_id, Int, -1) | .ATTR(fusion_id, Int, -1) | ||||
.ATTR(alpha, Float, 1.0) | |||||
.ATTR(beta, Float, 0.0) | |||||
.OP_END_FACTORY_REG(HcomAllReduce) | .OP_END_FACTORY_REG(HcomAllReduce) | ||||
/** | /** | ||||
@@ -91,7 +87,7 @@ REG_OP(HcomAllReduce) | |||||
input of this rank will be broadcast to other ranks. | input of this rank will be broadcast to other ranks. | ||||
* @li fusion: A required integer identifying if the op need to fusion,the | * @li fusion: A required integer identifying if the op need to fusion,the | ||||
default value is none fusion | default value is none fusion | ||||
* @li fusion: A required integer identifying the fusion id if para fusion | |||||
* @li fusion_id: A required integer identifying the fusion id if para fusion | |||||
is set. | is set. | ||||
* @li group: A required string identifying the group name of ranks | * @li group: A required string identifying the group name of ranks | ||||
participating in the op. | participating in the op. | ||||
@@ -109,10 +105,39 @@ REG_OP(HcomBroadcast) | |||||
.REQUIRED_ATTR(group, String) | .REQUIRED_ATTR(group, String) | ||||
.ATTR(fusion, Int, 0) | .ATTR(fusion, Int, 0) | ||||
.ATTR(fusion_id, Int, -1) | .ATTR(fusion_id, Int, -1) | ||||
.ATTR(alpha, Float, 1.0) | |||||
.ATTR(beta, Float, 0.0) | |||||
.OP_END_FACTORY_REG(HcomBroadcast) | .OP_END_FACTORY_REG(HcomBroadcast) | ||||
/** | |||||
* @brief preforms reduction from others rank to rootrank | |||||
* @par Inputs: | |||||
* @li root_rank: A required integer identifying the root rank in the op | |||||
the reduction result will be on this root rank | |||||
* x: A tensor. Must be one of the following types: int8, int16, int32, float16, | |||||
float32. | |||||
* @par Attributes: | |||||
* @li reduction: A required string identifying the reduction operation to | |||||
perform.The supported operation are: "sum", "max", "min", "prod". | |||||
* @li group: A required string identifying the group name of ranks | |||||
participating in the op. | |||||
* @li fusion: An optional integer identifying the fusion flag of the op. | |||||
0: no fusion; 1 (default): fusion; 2: fusion the ops by fusion id. | |||||
* @li fusion_id: An optional integer identifying the fusion id of the op. | |||||
* The HcomReduce ops with the same fusion id will be fused. | |||||
* @par Outputs: | |||||
* y: A Tensor. Has the same type as "x". | |||||
* @attention Constraints: | |||||
*"group" is limited to 128 characters. Use "hccl_world_group" | |||||
as the name of a world group. | |||||
*/ | |||||
REG_OP(HcomReduce) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) | |||||
.REQUIRED_ATTR(root_rank, Int) | |||||
.REQUIRED_ATTR(reduction, String) | |||||
.REQUIRED_ATTR(group, String) | |||||
.ATTR(fusion, Int, 0) | |||||
.ATTR(fusion_id, Int, -1) | |||||
.OP_END_FACTORY_REG(HcomReduce) | |||||
/** | /** | ||||
* @brief Performs reduction across all input tensors, scattering in equal | * @brief Performs reduction across all input tensors, scattering in equal | ||||
blocks among ranks, each rank getting a chunk of data based on its rank | blocks among ranks, each rank getting a chunk of data based on its rank | ||||
@@ -139,8 +164,6 @@ REG_OP(HcomReduceScatter) | |||||
.REQUIRED_ATTR(reduction, String) | .REQUIRED_ATTR(reduction, String) | ||||
.REQUIRED_ATTR(group, String) | .REQUIRED_ATTR(group, String) | ||||
.REQUIRED_ATTR(rank_size, Int) | .REQUIRED_ATTR(rank_size, Int) | ||||
.ATTR(alpha, Float, 1.0) | |||||
.ATTR(beta, Float, 0.0) | |||||
.OP_END_FACTORY_REG(HcomReduceScatter) | .OP_END_FACTORY_REG(HcomReduceScatter) | ||||
/** | /** | ||||
@@ -167,8 +190,6 @@ REG_OP(HcomSend) | |||||
.REQUIRED_ATTR(group, String) | .REQUIRED_ATTR(group, String) | ||||
.REQUIRED_ATTR(sr_tag, Int) | .REQUIRED_ATTR(sr_tag, Int) | ||||
.REQUIRED_ATTR(dest_rank, Int) | .REQUIRED_ATTR(dest_rank, Int) | ||||
.ATTR(alpha, Float, 1.0) | |||||
.ATTR(beta, Float, 0.0) | |||||
.OP_END_FACTORY_REG(HcomSend) | .OP_END_FACTORY_REG(HcomSend) | ||||
/** | /** | ||||
@@ -202,8 +223,6 @@ REG_OP(HcomReceive) | |||||
.REQUIRED_ATTR(src_rank, Int) | .REQUIRED_ATTR(src_rank, Int) | ||||
.REQUIRED_ATTR(shape, ListInt) | .REQUIRED_ATTR(shape, ListInt) | ||||
.REQUIRED_ATTR(dtype, Type) | .REQUIRED_ATTR(dtype, Type) | ||||
.ATTR(alpha, Float, 1.0) | |||||
.ATTR(beta, Float, 0.0) | |||||
.OP_END_FACTORY_REG(HcomReceive) | .OP_END_FACTORY_REG(HcomReceive) | ||||
/** | /** | ||||
@@ -219,6 +238,15 @@ REG_OP(HcomRemoteRead) | |||||
.REQUIRED_ATTR(dtype, Type) | .REQUIRED_ATTR(dtype, Type) | ||||
.OP_END_FACTORY_REG(HcomRemoteRead) | .OP_END_FACTORY_REG(HcomRemoteRead) | ||||
/** | |||||
* @brief Performs Remote Ref Read of input tensors | |||||
* @par Inputs: | |||||
* remote: A tensor. describing the remote memory address to read: u64 remoteId, u64 addrRemote, u64 length | |||||
* cache_var: The local base address | |||||
* local_offset: Skip step length | |||||
* @par Outputs: | |||||
* cache_var: The local base address | |||||
*/ | |||||
REG_OP(HcomRemoteRefRead) | REG_OP(HcomRemoteRefRead) | ||||
.INPUT(remote, TensorType({DT_UINT64})) | .INPUT(remote, TensorType({DT_UINT64})) | ||||
.INPUT(cache_var, TensorType({DT_UINT64})) | .INPUT(cache_var, TensorType({DT_UINT64})) | ||||
@@ -239,11 +267,90 @@ REG_OP(HcomRemoteWrite) | |||||
.INPUT(local, TensorType::ALL()) | .INPUT(local, TensorType::ALL()) | ||||
.OP_END_FACTORY_REG(HcomRemoteWrite) | .OP_END_FACTORY_REG(HcomRemoteWrite) | ||||
/** | |||||
* @brief Performs Remote Write of input tensors | |||||
* @par Inputs: | |||||
* remote: A tensor. describing the remote memory address to write: u64 remoteId, u64 addrRemote, u64 length | |||||
* @par Inputs: | |||||
* local: A Tensor. whose value is length / size_of(Type) | |||||
*/ | |||||
REG_OP(HcomRemoteScatterWrite) | REG_OP(HcomRemoteScatterWrite) | ||||
.INPUT(remote, TensorType({DT_INT64, DT_UINT64})) | .INPUT(remote, TensorType({DT_INT64, DT_UINT64})) | ||||
.INPUT(local, TensorType::ALL()) | .INPUT(local, TensorType::ALL()) | ||||
.OPTIONAL_INPUT(local_offset, TensorType({DT_UINT64})) | .OPTIONAL_INPUT(local_offset, TensorType({DT_UINT64})) | ||||
.OP_END_FACTORY_REG(HcomRemoteScatterWrite) | .OP_END_FACTORY_REG(HcomRemoteScatterWrite) | ||||
/** | |||||
* @brief All ranks send different amount of data to, and receive different | |||||
amount of data from, all ranks. | |||||
* @par Inputs: | |||||
* Five inputs, including: | |||||
* @li send_data: A tensor. the memory to send. | |||||
* @li send_counts: A list, where entry i specifies the number of elements in | |||||
send_data to send to rank i. | |||||
* @li send_displacements: A list, where entry i specifies the displacement | |||||
(offset from sendbuf) from which to send data to rank i. | |||||
* @li recv_counts: A list, where entry i specifies the number of | |||||
elements to receive from rank i. | |||||
* @li recv_displacements: A list, , where entry i specifies the displacement | |||||
(offset from recv_data) to which data from rank i should be written. | |||||
* @par Outputs: | |||||
* recv_data: A Tensor has same element type as send_data. | |||||
* @par Attributes: | |||||
* @li group: A string identifying the group name of ranks participating in | |||||
the op. | |||||
* @attention all ranks participating in the op should be full-mesh networking | |||||
using the RDMA. | |||||
*/ | |||||
REG_OP(HcomAllToAllV) | |||||
.INPUT(send_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) | |||||
.INPUT(send_counts, TensorType({DT_INT64})) | |||||
.INPUT(send_displacements, TensorType({DT_INT64})) | |||||
.INPUT(recv_counts, TensorType({DT_INT64})) | |||||
.INPUT(recv_displacements, TensorType({DT_INT64})) | |||||
.OUTPUT(recv_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) | |||||
.REQUIRED_ATTR(group, String) | |||||
.OP_END_FACTORY_REG(HcomAllToAllV) | |||||
/** | |||||
* @brief All ranks send different amount of data to, and receive different | |||||
amount of data from, all ranks. And concat all data descripting by addrinfo | |||||
togather into output gathered. | |||||
* @par Inputs: | |||||
* Four inputs, including: | |||||
* @li addrinfo: A tensor, descripting the memory info(address, length) to send. | |||||
* @li addrinfo_count_per_rank: A list, where entry i specifies the number of | |||||
elements in send_data to send to rank i. | |||||
* @li recv_counts: A list, where entry i specifies the number of | |||||
elements to receive from rank i. | |||||
* @li recv_displacements: A list, , where entry i specifies the displacement | |||||
(offset from recv_data) to which data from rank i should be written. | |||||
* @par Outputs: | |||||
* Two outputs, including: | |||||
* @li recv_data: A Tensor has same element type as dtype. | |||||
* @li gathered: A Tensor has same element type as dtype. | |||||
* @par Attributes: | |||||
* @li group: A string identifying the group name of ranks participating in | |||||
the op. | |||||
* @li dtype: Datatype of send buffer elements. | |||||
* @li addr_length: descripting the element memory length in the addrinfo. | |||||
-2: all element memory length in the addrinfo is the same, but it is unknown. | |||||
-1: all element memory length is unknown. | |||||
>0: all element memory length in the addrinfo is the same. the attr value is the memory length. | |||||
* @attention all ranks participating in the op should be full-mesh networking | |||||
using the RDMA. | |||||
*/ | |||||
REG_OP(HcomGatherAllToAllV) | |||||
.INPUT(addrinfo, TensorType({DT_UINT64})) | |||||
.INPUT(addrinfo_count_per_rank, TensorType({DT_INT64})) | |||||
.INPUT(recv_counts, TensorType({DT_INT64})) | |||||
.INPUT(recv_displacements, TensorType({DT_INT64})) | |||||
.OUTPUT(recv_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) | |||||
.OUTPUT(gathered, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) | |||||
.REQUIRED_ATTR(group, String) | |||||
.REQUIRED_ATTR(dtype, Type) | |||||
.REQUIRED_ATTR(addr_length, Int) | |||||
.OP_END_FACTORY_REG(HcomGatherAllToAllV) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2020 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -24,6 +24,22 @@ | |||||
#include "graph/operator_reg.h" | #include "graph/operator_reg.h" | ||||
namespace ge { | namespace ge { | ||||
/** | |||||
*@brief Decode the frame(s) of a GIF-encoded image to a uint8 tensor . \n | |||||
*@par Inputs: | |||||
*@li contents:A Tensor of type string. 0-D. The GIF-encoded image. \n | |||||
*@par Outputs: | |||||
*image:A Tensor of type uint8. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with tensorflow DecodeGif operator. | |||||
*/ | |||||
REG_OP(DecodeGif) | |||||
.INPUT(contents, TensorType({DT_STRING})) | |||||
.OUTPUT(image, TensorType({DT_UINT8})) | |||||
.OP_END_FACTORY_REG(DecodeGif) | |||||
/** | /** | ||||
*@brief Adjust the hue of one or more images . \n | *@brief Adjust the hue of one or more images . \n | ||||
@@ -31,11 +47,12 @@ namespace ge { | |||||
*@par Inputs: | *@par Inputs: | ||||
*Input images is a tensor of at least 3 dimensions. The last dimension is | *Input images is a tensor of at least 3 dimensions. The last dimension is | ||||
interpretted as channels, and must be three. Inputs include: | interpretted as channels, and must be three. Inputs include: | ||||
*@li images:A Tensor of type float. Images to adjust. At least 3-D. | |||||
*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format | |||||
must be NHWC. | |||||
*@li delta:A Tensor of type float. A float delta to add to the hue . \n | *@li delta:A Tensor of type float. A float delta to add to the hue . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y:A Tensor of type float . \n | |||||
*y:A Tensor of type float. The format must be NHWC. \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*Input images is a tensor of at least 3 dimensions. The last dimension is | *Input images is a tensor of at least 3 dimensions. The last dimension is | ||||
@@ -57,11 +74,12 @@ REG_OP(AdjustHue) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Input images is a tensor of at least 3 dimensions. The last dimension is | *Input images is a tensor of at least 3 dimensions. The last dimension is | ||||
interpretted as channels, and must be three. Inputs include: | interpretted as channels, and must be three. Inputs include: | ||||
*@li images:A Tensor of type float. Images to adjust. At least 3-D. | |||||
*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format | |||||
must be NHWC. | |||||
*@li scale:A Tensor of type float. A float scale to add to the saturation . \n | *@li scale:A Tensor of type float. A float scale to add to the saturation . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y:A Tensor of type float . \n | |||||
*y:A Tensor of type float. The format must be NHWC. \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*Input images is a tensor of at least 3 dimensions. The last dimension is | *Input images is a tensor of at least 3 dimensions. The last dimension is | ||||
@@ -83,11 +101,12 @@ REG_OP(AdjustSaturation) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Input images is a tensor of at least 3 dimensions. The last 3 dimensions are | *Input images is a tensor of at least 3 dimensions. The last 3 dimensions are | ||||
interpreted as '[height, width, channels]'. Inputs include: | interpreted as '[height, width, channels]'. Inputs include: | ||||
*@li images:A Tensor of type float. Images to adjust. At least 3-D. | |||||
*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format | |||||
must be NHWC. | |||||
*@li scale:A Tensor of type float. A float multiplier for adjusting contrast . \n | *@li scale:A Tensor of type float. A float multiplier for adjusting contrast . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y:A Tensor of type float . \n | |||||
*y:A Tensor of type float. The format must be NHWC. \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*Input images is a tensor of at least 3 dimensions. The last dimension is | *Input images is a tensor of at least 3 dimensions. The last dimension is | ||||
@@ -112,7 +131,7 @@ nearest neighbor sampling to a common output size specified by crop_size . \n | |||||
*Input images must be a 4-D tensor. Inputs include: | *Input images must be a 4-D tensor. Inputs include: | ||||
*@li images:A Tensor. Must be one of the following types:uint8, uint16, int8, | *@li images:A Tensor. Must be one of the following types:uint8, uint16, int8, | ||||
int16, int32, int64, float16, float, double. A 4-D tensor of shape | int16, int32, int64, float16, float, double. A 4-D tensor of shape | ||||
[batch, image_height, image_width, depth]. | |||||
[batch, image_height, image_width, depth]. The format must be NHWC. | |||||
*@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4]. | *@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4]. | ||||
*@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with | *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with | ||||
int32 values in [0, batch). | int32 values in [0, batch). | ||||
@@ -127,7 +146,7 @@ extrapolation, when applicable. | |||||
NearestNeighbor . \n | NearestNeighbor . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y:A Tensor of type float . \n | |||||
*y:A Tensor of type float. The format must be NHWC. \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*Input images must be a 4-D tensor . \n | *Input images must be a 4-D tensor . \n | ||||
@@ -193,7 +212,9 @@ boxes tensor . \n | |||||
*@par Inputs: | *@par Inputs: | ||||
*Input images and grads must be a 4-D tensor. Inputs include: | *Input images and grads must be a 4-D tensor. Inputs include: | ||||
*@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. | *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. | ||||
The format must be NHWC. | |||||
*@li images: A 4-D tensor of shape [batch, image_height, image_width, depth]. | *@li images: A 4-D tensor of shape [batch, image_height, image_width, depth]. | ||||
The format must be NHWC. | |||||
Both image_height and image_width need to be positive. | Both image_height and image_width need to be positive. | ||||
*@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor | *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor | ||||
specifies the coordinates of a box in the box_ind[i] image and is specified in | specifies the coordinates of a box in the box_ind[i] image and is specified in | ||||
@@ -233,6 +254,7 @@ images tensor . \n | |||||
*@par Inputs: | *@par Inputs: | ||||
*Input grads must be a 4-D tensor. Inputs include: | *Input grads must be a 4-D tensor. Inputs include: | ||||
*@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. | *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. | ||||
The format must be NHWC. | |||||
*@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor | *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor | ||||
specifies the coordinates of a box in the box_ind[i] image and is specified | specifies the coordinates of a box in the box_ind[i] image and is specified | ||||
in normalized coordinates [y1, x1, y2, x2]. | in normalized coordinates [y1, x1, y2, x2]. | ||||
@@ -248,7 +270,8 @@ method: A string specifying the interpolation method. Only 'bilinear' is | |||||
supported for now . \n | supported for now . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y:A 4-D tensor of shape [batch, image_height, image_width, depth] . \n | |||||
*y:A 4-D tensor of shape [batch, image_height, image_width, depth]. The format | |||||
must be NHWC. \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*Input grads must be a 4-D tensor . \n | *Input grads must be a 4-D tensor . \n | ||||
@@ -273,6 +296,7 @@ REG_OP(CropAndResizeGradImage) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Input x must be a 4-D tensor. Inputs include: | *Input x must be a 4-D tensor. Inputs include: | ||||
*@li x: A 4-D float tensor of shape [batch_size, height, width, channels]. | *@li x: A 4-D float tensor of shape [batch_size, height, width, channels]. | ||||
The format must be NHWC. | |||||
*@li size: A 1-D tensor of 2 elements containing the size of the glimpses to | *@li size: A 1-D tensor of 2 elements containing the size of the glimpses to | ||||
extract. The glimpse height must be specified first, following by the glimpse | extract. The glimpse height must be specified first, following by the glimpse | ||||
width. | width. | ||||
@@ -293,7 +317,7 @@ uniform_noise . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y:A tensor representing the glimpses [batch_size, glimpse_height, | *y:A tensor representing the glimpses [batch_size, glimpse_height, | ||||
glimpse_width, channels] . \n | |||||
glimpse_width, channels]. The format must be NHWC. \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*Input x must be a 4-D tensor . \n | *Input x must be a 4-D tensor . \n | ||||
@@ -340,7 +364,8 @@ REG_OP(HSVToRGB) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Input images must be a 4-D tensor. Inputs include: | *Input images must be a 4-D tensor. Inputs include: | ||||
*@li images: 4-D with shape [batch, height, width, channels]. | |||||
*@li images: 4-D with shape [batch, height, width, channels]. The format must | |||||
be NHWC. | |||||
*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new | *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new | ||||
size for the images. | size for the images. | ||||
*@li min: A Tensor of type float. | *@li min: A Tensor of type float. | ||||
@@ -354,6 +379,7 @@ the values at the corner pixels. Defaults to false. | |||||
*@par Outputs: | *@par Outputs: | ||||
*@li resized_images: 4-D with shape [batch, new_height, new_width, channels]. | *@li resized_images: 4-D with shape [batch, new_height, new_width, channels]. | ||||
The format must be NHWC. | |||||
*@li y_min: A Tensor of type float. | *@li y_min: A Tensor of type float. | ||||
*@li y_max: A Tensor of type float . \n | *@li y_max: A Tensor of type float . \n | ||||
@@ -381,7 +407,8 @@ REG_OP(QuantizedResizeBilinear) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Input images must be a 4-D tensor. Inputs include: | *Input images must be a 4-D tensor. Inputs include: | ||||
*@li images: 4-D with shape [batch, height, width, channels]. | |||||
*@li images: 4-D with shape [batch, height, width, channels]. The format must | |||||
be NHWC. | |||||
*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. | *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. | ||||
The new size for the images . \n | The new size for the images . \n | ||||
@@ -391,7 +418,8 @@ output tensors are aligned, preserving the values at the corner pixels. | |||||
Defaults to false . \n | Defaults to false . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: 4-D with shape [batch, new_height, new_width, channels] . \n | |||||
*y: 4-D with shape [batch, new_height, new_width, channels]. The format must | |||||
be NHWC. \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*Input images can be of different types but output images are always float . \n | *Input images can be of different types but output images are always float . \n | ||||
@@ -414,10 +442,10 @@ REG_OP(ResizeArea) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Input grads must be a 4-D tensor. Inputs include: | *Input grads must be a 4-D tensor. Inputs include: | ||||
*@li grads: A Tensor of type float. 4-D with shape [batch, height, width, | *@li grads: A Tensor of type float. 4-D with shape [batch, height, width, | ||||
channels]. | |||||
channels]. The format must be NHWC. | |||||
*@li original_image: A Tensor. Must be one of the following types: float, | *@li original_image: A Tensor. Must be one of the following types: float, | ||||
double. 4-D with shape [batch, orig_height, orig_width, channels], The image | double. 4-D with shape [batch, orig_height, orig_width, channels], The image | ||||
tensor that was resized . \n | |||||
tensor that was resized. The format must be NHWC. \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li align_corners: An optional bool. Defaults to False. If true, the centers | *@li align_corners: An optional bool. Defaults to False. If true, the centers | ||||
@@ -426,10 +454,10 @@ false. | |||||
*@li half_pixel_centers: An optional bool. Defaults to False . \n | *@li half_pixel_centers: An optional bool. Defaults to False . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type as original_image . \n | |||||
*y: A Tensor. Has the same type as original_image. The format must be NHWC. \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*Input images can be of different types but output images are always float . \n | |||||
*Input images can be of different types but output images are always float . | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with tensorflow ResizeBicubicGrad operator. | *Compatible with tensorflow ResizeBicubicGrad operator. | ||||
@@ -448,7 +476,8 @@ REG_OP(ResizeBicubicGrad) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Input images must be a 4-D tensor. Inputs include: | *Input images must be a 4-D tensor. Inputs include: | ||||
*@li images: 4-D with shape [batch, height, width, channels]. | |||||
*@li images: 4-D with shape [batch, height, width, channels]. The format | |||||
must be NHWC. | |||||
*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new | *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new | ||||
size for the images . \n | size for the images . \n | ||||
@@ -459,10 +488,11 @@ Defaults to false. | |||||
*@li half_pixel_centers: An optional bool. Defaults to False . \n | *@li half_pixel_centers: An optional bool. Defaults to False . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: 4-D with shape [batch, new_height, new_width, channels] . \n | |||||
*y: 4-D with shape [batch, new_height, new_width, channels]. The format | |||||
must be NHWC. \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*Input images can be of different types but output images are always float . \n | |||||
*Input images can be of different types but output images are always float . | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with tensorflow ResizeBicubic operator. | *Compatible with tensorflow ResizeBicubic operator. | ||||
@@ -483,7 +513,7 @@ REG_OP(ResizeBicubic) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Input grads must be a 4-D tensor. Inputs include: | *Input grads must be a 4-D tensor. Inputs include: | ||||
*@li grads: A Tensor. Must be one of the following types: uint8, int8, int32, | *@li grads: A Tensor. Must be one of the following types: uint8, int8, int32, | ||||
float16, float, double. 4-D with shape [batch, height, width, channels]. | |||||
float16, float, double. Must set the format, supported format list ["NCHW, NHWC"] | |||||
*@li size: A 1-D int32 Tensor of 2 elements: orig_height, orig_width. | *@li size: A 1-D int32 Tensor of 2 elements: orig_height, orig_width. | ||||
The original input size . \n | The original input size . \n | ||||
@@ -550,9 +580,8 @@ REG_OP(ResizeNearestNeighborV2GradD) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Input grads must be a 4-D tensor. Inputs include: | *Input grads must be a 4-D tensor. Inputs include: | ||||
*@li grads: A Tensor of type float32. 4-D with shape [batch, height, width, | |||||
channels]. | |||||
*@li original_image: A Tensor. 4-D with shape [batch, orig_height, orig_width, | |||||
*@li grads: A Tensor of type float32. Must set the format, supported format list ["NCHW, NHWC"] | |||||
*@li original_image: A Tensor. 4-D shape. Must set the format, supported format list ["NCHW, NHWC"] | |||||
channels], The image tensor that was resized . \n | channels], The image tensor that was resized . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -583,7 +612,7 @@ REG_OP(ResizeBilinearV2Grad) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Input images must be a 4-D tensor. Inputs include: | *Input images must be a 4-D tensor. Inputs include: | ||||
*@li x: 4-D with shape [batch, height, width, channels]. | |||||
*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"] | |||||
*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new | *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new | ||||
size for the images . \n | size for the images . \n | ||||
@@ -639,6 +668,62 @@ REG_OP(RGBToHSV) | |||||
/** | /** | ||||
*@brief Generate a single randomly distorted bounding box for an image . \n | *@brief Generate a single randomly distorted bounding box for an image . \n | ||||
*@par Inputs: | |||||
*Input images must be a 4-D tensor. Inputs include: | |||||
*@li image_size: 1-D, containing [height, width, channels]. | |||||
*@li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding | |||||
boxes associated with the image. \n | |||||
*@par Attributes: | |||||
*@li seed: If either seed or seed2 are set to non-zero, the random number | |||||
generator is seeded by the given seed. Otherwise, it is seeded by a random seed. | |||||
*@li seed2: A second seed to avoid seed collision. | |||||
*@li min_object_covered: The cropped area of the image must contain at least | |||||
this fraction of any bounding box supplied. The value of this parameter should | |||||
be non-negative. In the case of 0, the cropped area does not need to overlap | |||||
any of the bounding boxes supplied . | |||||
*@li aspect_ratio_range: The cropped area of the image must have an aspect | |||||
ratio = width / height within this range. | |||||
*@li max_attempts: Number of attempts at generating a cropped region of the | |||||
image of the specified constraints. After max_attempts failures, return the | |||||
entire image. | |||||
*@li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes | |||||
supplied. If true, assume an implicit bounding box covering the whole input. | |||||
If false, raise an error . \n | |||||
*@par Outputs: | |||||
*@li begin: 1-D, containing [offset_height, offset_width, 0]. | |||||
*@li size: 1-D, containing [target_height, target_width, -1]. | |||||
*@li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box . \n | |||||
*@attention Constraints: | |||||
*Input images can be of different types but output images are always float . \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with tensorflow SampleDistortedBoundingBox operator. | |||||
*/ | |||||
REG_OP(SampleDistortedBoundingBox) | |||||
.INPUT(image_size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ | |||||
DT_INT32, DT_INT64 })) | |||||
.INPUT(bounding_boxes, TensorType({ DT_FLOAT })) | |||||
.OUTPUT(begin, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ | |||||
DT_INT32, DT_INT64 })) | |||||
.OUTPUT(size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ | |||||
DT_INT32, DT_INT64 })) | |||||
.OUTPUT(bboxes, TensorType({ DT_FLOAT })) | |||||
.ATTR(seed, Int, 0) | |||||
.ATTR(seed2, Int, 0) | |||||
.ATTR(min_object_covered, Float, 0.1f) | |||||
.ATTR(aspect_ratio_range, ListFloat, { 0.75f, 1.33f }) | |||||
.ATTR(area_range, ListFloat, { 0.05f, 1.0f }) | |||||
.ATTR(max_attempts, Int, 100) | |||||
.ATTR(use_image_if_no_bounding_boxes, Bool, false) | |||||
.OP_END_FACTORY_REG(SampleDistortedBoundingBox) | |||||
/** | |||||
*@brief Generate a single randomly distorted bounding box for an image . \n | |||||
*@par Inputs: | *@par Inputs: | ||||
*Input images must be a 4-D tensor. Inputs include: | *Input images must be a 4-D tensor. Inputs include: | ||||
*@li image_size: 1-D, containing [height, width, channels]. | *@li image_size: 1-D, containing [height, width, channels]. | ||||
@@ -697,7 +782,7 @@ REG_OP(SampleDistortedBoundingBoxExt2) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Input x must be a 4-D tensor. Inputs include: | *Input x must be a 4-D tensor. Inputs include: | ||||
*@li x: 4-D with shape [batch, height, width, channels]. | |||||
*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"]. | |||||
*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. | *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. | ||||
The new size for the images . \n | The new size for the images . \n | ||||
@@ -729,12 +814,12 @@ REG_OP(ResizeNearestNeighborV2) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Input images must be a 4-D tensor. Inputs include: | *Input images must be a 4-D tensor. Inputs include: | ||||
*@li images: A Tensor. Must be one of the following types: float. 4-D with | *@li images: A Tensor. Must be one of the following types: float. 4-D with | ||||
shape [batch, height, width, depth]. A batch of images. | |||||
shape [batch, height, width, depth]. A batch of images. The format must be NHWC. | |||||
*@li boxes: A Tensor of type float32. 3-D with shape [batch, | *@li boxes: A Tensor of type float32. 3-D with shape [batch, | ||||
num_bounding_boxes, 4] containing bounding boxes . \n | num_bounding_boxes, 4] containing bounding boxes . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*A Tensor. Has the same type as images . \n | |||||
*A Tensor. Has the same type as images. The format must be NHWC. \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*Input images must be a 4-D tensor . \n | *Input images must be a 4-D tensor . \n | ||||
@@ -1002,6 +1087,88 @@ REG_OP(EncodePng) | |||||
.ATTR(compression, Int, -1) | .ATTR(compression, Int, -1) | ||||
.OP_END_FACTORY_REG(EncodePng) | .OP_END_FACTORY_REG(EncodePng) | ||||
/** | |||||
*@brief PNG-decode an image. | |||||
*@par Inputs: | |||||
*contents: 0-D. PNG-decoded image . | |||||
*@par Attributes: | |||||
*channels: graph channels \n | |||||
*dtype: type of image | |||||
*@par Outputs: | |||||
*image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels] | |||||
where channels is: 1: for grayscale; 2: for grayscale + alpha; 3: for RGB; | |||||
4: for RGBA . \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with tensorflow DecodePng operator. | |||||
*/ | |||||
REG_OP(DecodePng) | |||||
.INPUT(contents, TensorType({DT_STRING})) | |||||
.OUTPUT(image, TensorType({DT_UINT8, DT_UINT16})) | |||||
.ATTR(dtype, Type, DT_UINT8) | |||||
.ATTR(channels, Int, 0) | |||||
.OP_END_FACTORY_REG(DecodePng) | |||||
/** | |||||
*@brief Bmp-decode an image. \n | |||||
*@par Inputs: | |||||
*@li contents: A Tensor of type string. 0-D. The BMP-encoded image. \n | |||||
*@par Attributes: | |||||
*@li channels: Decode the desired number of color channels of the image. \n | |||||
*@par Outputs: | |||||
*image: A Tensor dtype of uint8. | |||||
* @par Third-party framework compatibility | |||||
* Compatible with tensorflow DecodeBmp operator. | |||||
*/ | |||||
REG_OP(DecodeBmp) | |||||
.INPUT(contents, TensorType({DT_STRING})) | |||||
.OUTPUT(image, TensorType({DT_UINT8})) | |||||
.ATTR(channels, Int, 0) | |||||
.OP_END_FACTORY_REG(DecodeBmp) | |||||
/** | |||||
*@brief Function parse image from string to int. \n | |||||
*@par Inputs: | |||||
*@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n | |||||
*@li crop_window: 1-D. The crop window: [crop_y, crop_x, crop_height, crop_width]. \n | |||||
*@par Attributes: | |||||
*@li channels: An optional int. Defaults to 0. Number of color channels for the | |||||
*decoded image. | |||||
*@li ratio: An optional int. Defaults to 1. Downscaling ratio. | |||||
*@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower | |||||
*but nicer upscaling of the chroma planes | |||||
*@li try_recover_truncated: An optional bool. Defaults to False. If true try to | |||||
*recover an image from truncated input. | |||||
*@li acceptable_fraction: An optional float. Defaults to 1. The minimum required | |||||
fraction of lines before a truncated input is accepted. | |||||
*@li dct_method: An optional string. Defaults to "". string specifying a hint | |||||
*about the algorithm used for decompression. \n | |||||
*@par Outputs: | |||||
*image: A Tensor dtype of uint8. | |||||
*/ | |||||
REG_OP(DecodeAndCropJpeg) | |||||
.INPUT(contents, TensorType({DT_STRING})) | |||||
.INPUT(crop_window, TensorType({DT_INT32})) | |||||
.OUTPUT(image, TensorType({DT_UINT8})) | |||||
.ATTR(channels, Int, 0) | |||||
.ATTR(ratio, Int, 1) | |||||
.ATTR(fancy_upscaling, Bool, true) | |||||
.ATTR(try_recover_truncated, Bool, false) | |||||
.ATTR(acceptable_fraction, Float, 1.0) | |||||
.ATTR(dct_method, String, "") | |||||
.OP_END_FACTORY_REG(DecodeAndCropJpeg) | |||||
/** | /** | ||||
*@brief Resizes "images" to "size" using bilinear interpolation . \n | *@brief Resizes "images" to "size" using bilinear interpolation . \n | ||||
@@ -1316,6 +1483,55 @@ REG_OP(CombinedNonMaxSuppression) | |||||
.ATTR(clip_boxes, Bool, true) | .ATTR(clip_boxes, Bool, true) | ||||
.OP_END_FACTORY_REG(CombinedNonMaxSuppression) | .OP_END_FACTORY_REG(CombinedNonMaxSuppression) | ||||
/** | |||||
*@brief Resizes "images" with "offset" using bilinear interpolation. \n | |||||
*@par Inputs: | |||||
*@li img: input image, A 4-D tensor of shape `[n, h, w, c]`. | |||||
*@li warp_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for offset point. | |||||
*@par Outputs: | |||||
*warp_img: A Tensor after resize. \n | |||||
*/ | |||||
REG_OP(IMGWarp) | |||||
.INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) | |||||
.INPUT(warp_offset, TensorType({DT_FLOAT32})) | |||||
.OUTPUT(warp_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) | |||||
.OP_END_FACTORY_REG(IMGWarp) | |||||
/** | |||||
*@brief Resizes "images" with "offset" using bilinear interpolation. \n | |||||
*@par Inputs: | |||||
*@li img: input image, A 4-D tensor of shape `[n, h, w, c]`. | |||||
*@li map_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for resize point. | |||||
*@par Outputs: | |||||
*map_img: A Tensor after resize. \n | |||||
*/ | |||||
REG_OP(Remap) | |||||
.INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) | |||||
.INPUT(map_offset, TensorType({DT_FLOAT32})) | |||||
.OUTPUT(map_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32})) | |||||
.OP_END_FACTORY_REG(Remap) | |||||
/** | |||||
*@brief Resizes "images" with "offset" using bilinear interpolation. \n | |||||
*@par Inputs: | |||||
*@li img: input image, A 5-D tensor of shape `[n, 4, c, h, w]`, | |||||
and 4 mean input[(h_top, w_left), (h_top, w_right), (h_bottom, w_left), (h_bottom, w_right)]. | |||||
*@li warp_index: the resize offset A 4-D float tensor of shape `[n, 2, h, w]`, 2 means (x, y) for resize point. | |||||
*@par Outputs: | |||||
*remap_img: A Tensor after ResizeBilinear, A 4-D tensor of shape `[n, c, h, w]`. \n | |||||
*/ | |||||
REG_OP(IMGWarpResize) | |||||
.INPUT(img, TensorType({DT_FLOAT32})) | |||||
.INPUT(warp_index, TensorType({DT_FLOAT32})) | |||||
.OUTPUT(warp_img, TensorType({DT_FLOAT32})) | |||||
.OP_END_FACTORY_REG(IMGWarpResize) | |||||
/** | /** | ||||
*@brief Function spatial transformer . \n | *@brief Function spatial transformer . \n | ||||
@@ -1342,6 +1558,383 @@ REG_OP(SpatialTransformerD) | |||||
.ATTR(use_default_theta, ListBool, {}) | .ATTR(use_default_theta, ListBool, {}) | ||||
.OP_END_FACTORY_REG(SpatialTransformerD) | .OP_END_FACTORY_REG(SpatialTransformerD) | ||||
} // namespace ge | |||||
/** | |||||
* @brief Resize the input tensor. \n | |||||
currently, only support resize image tensor using nearest neighbor and linear interpolation. | |||||
* @par Inputs: | |||||
* Input x must be a 4-D tensor. Inputs include: \n | |||||
* @li x: A Tensor. Must be one of the following types: uint8, int8, int16, \n | |||||
int32, int64, float16, float, double. 4-D with shape [batch, height, width, channels] \n | |||||
or shape [batch, channels, height, width]. | |||||
* @li roi: A 1-D float Tensor. only takes effect when attr coordinate_transformation_mode \n | |||||
is "tf_crop_and_resize" | |||||
* @li scales: A 1-D float Tensor, the scale array along each dimension, Only one of \n | |||||
'scales' and 'sizes' can be specified. | |||||
* @li sizes: A 1-D int64 Tensor, The size of the output tensor. nly one of \n | |||||
'scales' and 'sizes' can be specified. If 'size' is specified, then set scales \n | |||||
to empty data (zero shape) in this operator's input list. | |||||
* @par Attributes: | |||||
* @li coordinate_transformation_mode: String. Defaults to half_pixel. how to transform \n | |||||
the coordinate in the resized tensor to the coordinate in the original tensor. \n | |||||
other optional: pytorch_half_pixel, align_corners, asymmetric, tf_half_pixel_for_nn, \n | |||||
tf_crop_and_resize. | |||||
* @li cubic_coeff_a: Float. Defaults to -0.75, only used in cubic interpolation. \n | |||||
other optional: -0.5 | |||||
* @li exclude_outside: Int. Defaults to 0, If set to 1, the weight of sampling \n | |||||
locations outside the tensor will be set to 0 and the weight will be renormalized \n | |||||
so that their sum is 1.0. | |||||
* @li extrapolation_value: Float. Defaults to 0.0f. When coordinate_transformation_mode \n | |||||
is "tf_crop_and_resize" and x_original is outside the range [0, length_original - 1], \n | |||||
this value is used as the corresponding output value. | |||||
* @li mode: String. Defaults to nearest. Three interpolation modes: nearest (default), \n | |||||
linear and cubic. | |||||
* @li nearest_mode: String. Defaults to round_prefer_floor. Four modes: round_prefer_floor, \n | |||||
round_prefer_ceil, floor, ceil. Only used by nearest interpolation. | |||||
* @par Outputs: | |||||
* y: A Tensor. Has the same type as x. | |||||
* @attention Constraints: \n | |||||
* Input x must be a 4-D tensor. | |||||
* @par Third-party framework compatibility | |||||
* Compatible with tensorflow ResizeNearestNeighborV2 operator. | |||||
*/ | |||||
REG_OP(Resize) | |||||
.INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, | |||||
DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(roi, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(scales, TensorType({DT_FLOAT})) | |||||
.OPTIONAL_INPUT(sizes, TensorType({DT_INT64})) | |||||
.OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, | |||||
DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.ATTR(coordinate_transformation_mode, String, "half_pixel") | |||||
.ATTR(cubic_coeff_a, Float, -0.75) | |||||
.ATTR(exclude_outside, Int, 0) | |||||
.ATTR(extrapolation_value, Float, 0) | |||||
.ATTR(mode, String, "nearest") | |||||
.ATTR(nearest_mode, String, "round_prefer_floor") | |||||
.OP_END_FACTORY_REG(Resize) | |||||
/** | |||||
*@brief Function parse image from string to int. \n | |||||
*@par Inputs: | |||||
*@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n | |||||
*@par Attributes: | |||||
*@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image. | |||||
*@li ratio: An optional int. Defaults to 1. Downscaling ratio. | |||||
*@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower but nicer upscaling of the chroma planes | |||||
*@li try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input. | |||||
*@li acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted. | |||||
*@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n | |||||
*@par Outputs: | |||||
*image: A Tensor dtype of uint8. | |||||
*/ | |||||
REG_OP(DecodeJpeg) | |||||
.INPUT(contents, TensorType({DT_STRING})) | |||||
.OUTPUT(image, TensorType({DT_UINT8})) | |||||
.ATTR(channels, Int, 0) | |||||
.ATTR(ratio, Int, 1) | |||||
.ATTR(fancy_upscaling, Bool, true) | |||||
.ATTR(try_recover_truncated, Bool, false) | |||||
.ATTR(acceptable_fraction, Float, 1.0) | |||||
.ATTR(dct_method, String, "") | |||||
.OP_END_FACTORY_REG(DecodeJpeg) | |||||
/** | |||||
*@brief Image warping using per-pixel flow vectors. \n | |||||
*@par Inputs: | |||||
*@li image: 4-D Tensor with shape `[batch, height, width, channels]`. | |||||
*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n | |||||
*@par Outputs: | |||||
*y: Returns 4-D with the same shape and dtype as `image`. \n | |||||
*/ | |||||
REG_OP(DenseImageWarp) | |||||
.INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OP_END_FACTORY_REG(DenseImageWarp) | |||||
/** | |||||
*@brief Calculate the resize_d function. \n | |||||
*@par Inputs: | |||||
*One inputs, including: | |||||
* @li x: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
*@par Attributes: | |||||
*@li sizes: An optional listInt. \n | |||||
*@li scales: An optional listFloat. | |||||
Defaults to none. \n | |||||
*@li roi: An optional listInt. | |||||
Defaults to none. \n | |||||
*@li coordinate_transformation_mode: An optional String. | |||||
Defaults to "half_pixel". \n | |||||
*@li cubic_coeff_a: An optional float. | |||||
Defaults to -0.75. \n | |||||
*@li exclude_outside: An optional int. | |||||
Defaults to 0. \n | |||||
*@li extrapolation_value: An optional float. | |||||
Defaults to 0.0. \n | |||||
*@li mode: An optional String. | |||||
Defaults to "nearest". \n | |||||
*@li nearest_mode: An optional String. | |||||
Defaults to "round_prefer_floor". \n | |||||
*@par Outputs: | |||||
*y: A Tensor with the same type of x's, | |||||
shape depends on x and sizes. \n | |||||
*/ | |||||
REG_OP(ResizeD) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(sizes, ListInt) | |||||
.ATTR(scales, ListFloat, {}) | |||||
.ATTR(roi, ListInt, {}) | |||||
.ATTR(coordinate_transformation_mode, String, "half_pixel") | |||||
.ATTR(cubic_coeff_a, Float, -0.75) | |||||
.ATTR(exclude_outside, Int, 0) | |||||
.ATTR(extrapolation_value, Float, 0.0) | |||||
.ATTR(mode, String, "nearest") | |||||
.ATTR(nearest_mode, String, "round_prefer_floor") | |||||
.OP_END_FACTORY_REG(ResizeD) | |||||
/** | |||||
*@brief Calculate the resize_grad_d function. \n | |||||
*@par Inputs: | |||||
*One inputs, including: | |||||
* @li grads: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
*@par Attributes: | |||||
*@li original_size: An optional listInt. \n | |||||
*@li roi: An optional listInt. | |||||
Defaults to none. \n | |||||
*@li scales: An optional listFloat. | |||||
Defaults to none. \n | |||||
*@li coordinate_transformation_mode: An optional String. | |||||
Defaults to "half_pixel". \n | |||||
*@li cubic_coeff_a: An optional float. | |||||
Defaults to -0.75. \n | |||||
*@li exclude_outside: An optional int. | |||||
Defaults to 0. \n | |||||
*@li extrapolation_value: An optional float. | |||||
Defaults to 0.0. \n | |||||
*@li mode: An optional String. | |||||
Defaults to "nearest". \n | |||||
*@li nearest_mode: An optional String. | |||||
Defaults to "round_prefer_floor". \n | |||||
*@par Outputs: | |||||
*y: A Tensor with the same type of x's, | |||||
shape depends on x and sizes. \n | |||||
*/ | |||||
REG_OP(ResizeGradD) | |||||
.INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(original_size, ListInt) | |||||
.ATTR(roi, ListInt, {}) | |||||
.ATTR(scales, ListFloat, {}) | |||||
.ATTR(coordinate_transformation_mode, String, "half_pixel") | |||||
.ATTR(cubic_coeff_a, Float, -0.75) | |||||
.ATTR(exclude_outside, Int, 0) | |||||
.ATTR(extrapolation_value, Float, 0.0) | |||||
.ATTR(mode, String, "nearest") | |||||
.ATTR(nearest_mode, String, "round_prefer_floor") | |||||
.OP_END_FACTORY_REG(ResizeGradD) | |||||
/** | |||||
*@brief Computes the gradients of DenseImageWarp with respect to image and flow. \n | |||||
*@par Inputs: | |||||
*@li grad: gradients with respect to DenseImageWarp output. | |||||
*@li image: 4-D Tensor with shape `[batch, height, width, channels]`. | |||||
*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n | |||||
*@par Outputs: | |||||
*grad_image: Returns 4-D with the same shape and dtype as `image`. | |||||
*grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n | |||||
*/ | |||||
REG_OP(DenseImageWarpGrad) | |||||
.INPUT(grad, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(grad_image, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(grad_flow, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OP_END_FACTORY_REG(DenseImageWarpGrad) | |||||
/** | |||||
*@brief This operation samples input X by using interpolation based on flow field grid, | |||||
which is usually gennerated by affine_grid. The grid of shape [N, H, W, 2] is the concatenation of | |||||
(x, y) coordinates with shape [N, H, W] each, where x is indexing the 4th dimension (in width dimension) of | |||||
input data x and y is indexng the 3rd dimention (in height dimension), finally results is | |||||
the interpolation value of 4 nearest corner points. The output tensor shape will be [N, C, H, W]. | |||||
*@par Inputs: | |||||
*@li x: 4-D Tensor with shape `[batch, channels, height, width]`. | |||||
*@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`. | |||||
*@par Attributes: | |||||
*@li interpolation_mode: An optional string specifying the interpolation method. Only 'bilinear' is | |||||
supported for now . | |||||
*@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now . | |||||
*@li align_corners: An optional bool. If "true", the centers of the corner | |||||
pixels of the input and output tensors are aligned. Defaults to "false" . | |||||
*@par Outputs: | |||||
*y: Returns 4-D Tensor with the same dtype as `X`. | |||||
*@par Third-party framework compatibility | |||||
*Compatible with pytorch GridSampler2D operator. | |||||
*@par Restrictions: | |||||
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(GridSampler2D) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(interpolation_mode, String, "bilinear") | |||||
.ATTR(padding_mode, String, "zeros") | |||||
.ATTR(align_corners, Bool, false) | |||||
.OP_END_FACTORY_REG(GridSampler2D) | |||||
/** | |||||
*@brief This operation unnormalize input Grid, which is usually gennerated by affine_grid. | |||||
*@par Inputs: | |||||
*@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`. | |||||
*@li assist: Assist matrix, a 4-D tensor of type float16. | |||||
*@par Attributes: | |||||
*@li align_corners: An optional bool. If "true", the centers of the corner | |||||
pixels of the input and output tensors are aligned. Defaults to "false" . | |||||
*@par Outputs: | |||||
*diff: Returns 4-D Tensor with the same shape and dtype as `grid`. | |||||
*position: Returns 4-D Tensor with the same shape as `grid`. | |||||
*/ | |||||
REG_OP(GridUnnormal) | |||||
.INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(assist, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(diff, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(position, TensorType({DT_INT32})) | |||||
.ATTR(align_corners, Bool, false) | |||||
.OP_END_FACTORY_REG(GridUnnormal) | |||||
/** | |||||
*@brief This operation unfold input X based on unnormalized grid, which is gennerated by GridUnnormal. | |||||
*@par Inputs: | |||||
*@li x: 4-D Tensor with shape `[batch, channels, height, width]`. | |||||
*@li position: 4-D Tensor with shape `[batch, output_height, output_width, 2]`. | |||||
*@par Attributes: | |||||
*@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now . | |||||
*@par Outputs: | |||||
*y: Returns 4-D Tensor with the same dtype as `x`. | |||||
*/ | |||||
REG_OP(ImageUnfold) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(position, TensorType({DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(padding_mode, String, "zeros") | |||||
.OP_END_FACTORY_REG(ImageUnfold) | |||||
/** | |||||
*@brief This operation select images to warp_images according to offsets. | |||||
*@par Inputs: | |||||
*@li images: 4-D Tensor with shape `[batch, height, width, 3]`. | |||||
*@li offsets: 4-D Tensor with shape `[batch, 4, new_height, new_width]`. | |||||
*@par Outputs: | |||||
*warp_images: Returns 5-D Tensor with shape | |||||
`[batch, 4, new_height, new_width, 3]` and the same dtype as `images`. | |||||
*/ | |||||
REG_OP(IMGWarpOffsets) | |||||
.INPUT(images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(offsets, TensorType({DT_FLOAT, DT_INT32})) | |||||
.OUTPUT(warp_images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(IMGWarpOffsets) | |||||
/** | |||||
*@brief This operation samples 3d input x by using interpolation based on flow field grid, | |||||
which is usually gennerated by affine_grid. | |||||
*@par Inputs: | |||||
*@li x: 5-D Tensor with shape `[batch, channels, depth, height, width]`. | |||||
*@li grid: flow field grid, 5-D Tensor with shape `[batch, depth, height, width, 2]`. | |||||
*@par Attributes: | |||||
*@li interpolation_mode: An optional string specifying the interpolation method. | |||||
*@li padding_mode: An optional string specifying the pad method. | |||||
*@li align_corners: An optional bool. If "true", the centers of the corner | |||||
pixels of the input and output tensors are aligned. Defaults to "false" . | |||||
*@par Outputs: | |||||
*y: Returns 5-D Tensor with the same dtype as `x`. | |||||
*@par Third-party framework compatibility | |||||
*Compatible with pytorch GridSampler3D operator. | |||||
*@par Restrictions: | |||||
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(GridSampler3D) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.ATTR(interpolation_mode, String, "bilinear") | |||||
.ATTR(padding_mode, String, "zeros") | |||||
.ATTR(align_corners, Bool, false) | |||||
.OP_END_FACTORY_REG(GridSampler3D) | |||||
/** | |||||
*@brief Computes the gradients of GridSampler3D. | |||||
*@par Inputs: | |||||
*@li grad: 5-D Tensor with shape `[batch, channels, depth, height, width]`. | |||||
*@li x: 5-D Tensor with shape `[batch, channels, depth, height, width]`. | |||||
*@li grid: flow field grid, 5-D Tensor with shape `[batch, depth, height, width, 2]`. | |||||
*@par Attributes: | |||||
*@li interpolation_mode: An optional string specifying the interpolation method. | |||||
*@li padding_mode: An optional string specifying the pad method. | |||||
*@li align_corners: An optional bool. If "true", the centers of the corner | |||||
pixels of the input and output tensors are aligned. Defaults to "false" . | |||||
*@par Outputs: | |||||
*dx: Returns 5-D Tensor with the same dtype and shape as `x`. | |||||
*dgrid: Returns 5-D Tensor with the same dtype and shape as `grid`. | |||||
*@par Third-party framework compatibility | |||||
*Compatible with pytorch GridSampler3DGrad operator. | |||||
*@par Restrictions: | |||||
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(GridSampler3DGrad) | |||||
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(dgrid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.ATTR(interpolation_mode, String, "bilinear") | |||||
.ATTR(padding_mode, String, "zeros") | |||||
.ATTR(align_corners, Bool, false) | |||||
.OP_END_FACTORY_REG(GridSampler3DGrad) | |||||
} // namespace ge | |||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -61,8 +61,8 @@ REG_OP(CholeskyGrad) | |||||
*@par Inputs: | *@par Inputs: | ||||
*The input x has to be symmetric and positive definite.Inputs include: | *The input x has to be symmetric and positive definite.Inputs include: | ||||
*x:A Tensor. Must be one of the following types: double, float32. Shape | |||||
is [..., M, M] . \n | |||||
*x:A Tensor. Must be one of the following types: double, float32, float16, | |||||
complex64, complex128. Shape is [..., M, M] . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y:A Tensor. Has the same type as x . \n | *y:A Tensor. Has the same type as x . \n | ||||
@@ -76,10 +76,31 @@ form square matrices. | |||||
*/ | */ | ||||
REG_OP(Cholesky) | REG_OP(Cholesky) | ||||
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, \ | |||||
DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, \ | |||||
DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OP_END_FACTORY_REG(Cholesky) | .OP_END_FACTORY_REG(Cholesky) | ||||
/** | |||||
*@brief Computes the outer product of two 1D vectors . \n | |||||
*@par Inputs: | |||||
*The input x1 and x2 has to be a 1D vector.Inputs include: | |||||
*@li x1:A Tensor. Must be one of the following types: float16, float32. | |||||
Shape is [N] . \n | |||||
*@li x2:A Tensor. Must have the same type as x. Shape is [M] . \n | |||||
*@par Outputs: | |||||
*y:A Tensor. Has the same type as x . \n | |||||
*/ | |||||
REG_OP(Ger) | |||||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(Ger) | |||||
/** | /** | ||||
*@brief Computes the sign and the log of the absolute value of the determinant | *@brief Computes the sign and the log of the absolute value of the determinant | ||||
of one or more square matrices . \n | of one or more square matrices . \n | ||||
@@ -87,8 +108,8 @@ of one or more square matrices . \n | |||||
*@par Inputs: | *@par Inputs: | ||||
*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions | *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions | ||||
form square matrices. Inputs include: | form square matrices. Inputs include: | ||||
*x:A Tensor. Must be one of the following types: double, float32. Shape is | |||||
[..., M, M] . \n | |||||
*x:A Tensor. Must be one of the following types: double, float32, | |||||
complex64, complex128. Shape is [..., M, M] . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*@li y:A Tensor. Has the same type as x. | *@li y:A Tensor. Has the same type as x. | ||||
@@ -103,9 +124,9 @@ form square matrices. \n | |||||
*/ | */ | ||||
REG_OP(LogMatrixDeterminant) | REG_OP(LogMatrixDeterminant) | ||||
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OP_END_FACTORY_REG(LogMatrixDeterminant) | .OP_END_FACTORY_REG(LogMatrixDeterminant) | ||||
/** | /** | ||||
@@ -114,8 +135,8 @@ REG_OP(LogMatrixDeterminant) | |||||
*@par Inputs: | *@par Inputs: | ||||
*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions | *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions | ||||
form square matrices. Inputs include: | form square matrices. Inputs include: | ||||
*x:A Tensor. Must be one of the following types: double, float32. Shape is | |||||
[..., M, M] . \n | |||||
*x:A Tensor. Must be one of the following types: double, float32, complex64, | |||||
complex128. Shape is [..., M, M] . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y:A Tensor. Has the same type as x . \n | *y:A Tensor. Has the same type as x . \n | ||||
@@ -129,8 +150,8 @@ form square matrices. | |||||
*/ | */ | ||||
REG_OP(MatrixDeterminant) | REG_OP(MatrixDeterminant) | ||||
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OP_END_FACTORY_REG(MatrixDeterminant) | .OP_END_FACTORY_REG(MatrixDeterminant) | ||||
/** | /** | ||||
@@ -140,8 +161,7 @@ their adjoints (conjugate transposes) . \n | |||||
*@par Inputs: | *@par Inputs: | ||||
*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions | *The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions | ||||
form square matrices. Inputs include: | form square matrices. Inputs include: | ||||
*x:A Tensor. Must be one of the following types: double, float. Shape is | |||||
[..., M, M] . \n | |||||
*x:A Tensor of input. Shape is [..., M, M] . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*adjoint:An optional bool. Defaults to False.Boolean indicating whether to | *adjoint:An optional bool. Defaults to False.Boolean indicating whether to | ||||
@@ -159,8 +179,8 @@ form square matrices. \n | |||||
*/ | */ | ||||
REG_OP(MatrixInverse) | REG_OP(MatrixInverse) | ||||
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.ATTR(adjoint, Bool, false) | .ATTR(adjoint, Bool, false) | ||||
.OP_END_FACTORY_REG(MatrixInverse) | .OP_END_FACTORY_REG(MatrixInverse) | ||||
@@ -169,8 +189,7 @@ REG_OP(MatrixInverse) | |||||
*@par Inputs: | *@par Inputs: | ||||
*The input rhs must have the same type as matrix. Inputs include: | *The input rhs must have the same type as matrix. Inputs include: | ||||
*@li matrix:A Tensor. Must be one of the following types: double, float. | |||||
Shape is [..., M, M]. | |||||
*@li matrix:A Tensor of input. Shape is [..., M, M]. | |||||
*@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n | *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -189,9 +208,9 @@ dimensions form square matrices. \n | |||||
*/ | */ | ||||
REG_OP(MatrixSolve) | REG_OP(MatrixSolve) | ||||
.INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.ATTR(adjoint, Bool, false) | .ATTR(adjoint, Bool, false) | ||||
.OP_END_FACTORY_REG(MatrixSolve) | .OP_END_FACTORY_REG(MatrixSolve) | ||||
@@ -221,8 +240,8 @@ dimensions form square matrices. \n | |||||
*/ | */ | ||||
REG_OP(MatrixSolveLs) | REG_OP(MatrixSolveLs) | ||||
.INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.INPUT(l2, TensorType({DT_DOUBLE})) | .INPUT(l2, TensorType({DT_DOUBLE})) | ||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | ||||
.ATTR(fast, Bool, true) | .ATTR(fast, Bool, true) | ||||
@@ -234,8 +253,7 @@ matrices by backsubstitution . \n | |||||
*@par Inputs: | *@par Inputs: | ||||
*The input rhs must have the same type as matrix. Inputs include: | *The input rhs must have the same type as matrix. Inputs include: | ||||
*@li matrix: A Tensor. Must be one of the following types: double, float. | |||||
Shape is [..., M, M]. | |||||
*@li matrix: A Tensor. Shape is [..., M, M]. | |||||
*@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n | *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -256,9 +274,9 @@ dimensions form square matrices. \n | |||||
*/ | */ | ||||
REG_OP(MatrixTriangularSolve) | REG_OP(MatrixTriangularSolve) | ||||
.INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.ATTR(lower, Bool, true) | .ATTR(lower, Bool, true) | ||||
.ATTR(adjoint, Bool, false) | .ATTR(adjoint, Bool, false) | ||||
.OP_END_FACTORY_REG(MatrixTriangularSolve) | .OP_END_FACTORY_REG(MatrixTriangularSolve) | ||||
@@ -268,8 +286,7 @@ REG_OP(MatrixTriangularSolve) | |||||
*@par Inputs: | *@par Inputs: | ||||
*The input shape of x must be [..., M, N]. Inputs include: | *The input shape of x must be [..., M, N]. Inputs include: | ||||
*x:A Tensor whose shape is [..., M, N]. Must be one of the following types: | |||||
double, float . \n | |||||
*x:A Tensor whose shape is [..., M, N]. \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*full_matrices: An optional bool. Defaults to False. If true, compute | *full_matrices: An optional bool. Defaults to False. If true, compute | ||||
@@ -289,9 +306,12 @@ dimensions form matrices of size [M, N]. \n | |||||
*/ | */ | ||||
REG_OP(Qr) | REG_OP(Qr) | ||||
.INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE })) | |||||
.OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE })) | |||||
.OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE })) | |||||
.INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \ | |||||
DT_COMPLEX64, DT_COMPLEX128 })) | |||||
.OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \ | |||||
DT_COMPLEX64, DT_COMPLEX128 })) | |||||
.OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \ | |||||
DT_COMPLEX64, DT_COMPLEX128 })) | |||||
.ATTR(full_matrices, Bool, false) | .ATTR(full_matrices, Bool, false) | ||||
.OP_END_FACTORY_REG(Qr) | .OP_END_FACTORY_REG(Qr) | ||||
@@ -320,12 +340,40 @@ form square matrices. \n | |||||
*/ | */ | ||||
REG_OP(SelfAdjointEig) | REG_OP(SelfAdjointEig) | ||||
.INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT })) | |||||
.OUTPUT(eigen_value, TensorType({ DT_DOUBLE, DT_FLOAT })) | |||||
.OUTPUT(eigen_vector, TensorType({ DT_DOUBLE, DT_FLOAT })) | |||||
.INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) | |||||
.OUTPUT(eigen_value, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) | |||||
.OUTPUT(eigen_vector, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) | |||||
.ATTR(compute_v, Bool, true) | .ATTR(compute_v, Bool, true) | ||||
.OP_END_FACTORY_REG(SelfAdjointEig) | .OP_END_FACTORY_REG(SelfAdjointEig) | ||||
/** | |||||
*@brief Computes the sign and the log of the absolute value of the determinant | |||||
of one or more square matrices . \n | |||||
*@par Inputs: | |||||
*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions | |||||
form square matrices. Inputs include: | |||||
*x:A Tensor. Must be one of the following types: double, float32, float16 | |||||
Shape is [..., M, M] . \n | |||||
*@par Outputs: | |||||
*@li y:A Tensor. Has the same type as x. | |||||
*@li sign:A Tensor. Has the same type as x . \n | |||||
*@attention Constraints: | |||||
*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions | |||||
form square matrices. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with tensorflow LogMatrixDeterminant operator. | |||||
*/ | |||||
REG_OP(Slogdet) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(sign, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OP_END_FACTORY_REG(Slogdet) | |||||
/** | /** | ||||
*@brief Computes the singular value decompositions of one or more matrices . \n | *@brief Computes the singular value decompositions of one or more matrices . \n | ||||
@@ -384,8 +432,8 @@ of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n | |||||
*/ | */ | ||||
REG_OP(Lu) | REG_OP(Lu) | ||||
.INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OUTPUT(p, TensorType({DT_INT32, DT_INT64})) | .OUTPUT(p, TensorType({DT_INT32, DT_INT64})) | ||||
.REQUIRED_ATTR(output_idx_type, Type) | .REQUIRED_ATTR(output_idx_type, Type) | ||||
.OP_END_FACTORY_REG(Lu) | .OP_END_FACTORY_REG(Lu) | ||||
@@ -404,8 +452,8 @@ y: Shape is `[..., M, M]` . \n | |||||
*/ | */ | ||||
REG_OP(MatrixSquareRoot) | REG_OP(MatrixSquareRoot) | ||||
.INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OP_END_FACTORY_REG(MatrixSquareRoot) | .OP_END_FACTORY_REG(MatrixSquareRoot) | ||||
/** | /** | ||||
@@ -424,9 +472,9 @@ y: Tensor of shape `[..., M, K]` containing the solutions \n | |||||
*/ | */ | ||||
REG_OP(TridiagonalSolve) | REG_OP(TridiagonalSolve) | ||||
.INPUT(diagonals, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(diagonals, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.ATTR(partial_pivoting, Bool, true) | .ATTR(partial_pivoting, Bool, true) | ||||
.OP_END_FACTORY_REG(TridiagonalSolve) | .OP_END_FACTORY_REG(TridiagonalSolve) | ||||
@@ -0,0 +1,504 @@ | |||||
/** | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
/*! | |||||
* \file list_ops.h | |||||
* \brief | |||||
*/ | |||||
#ifndef OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_ | |||||
#define OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_ | |||||
#include <algorithm> | |||||
#include "graph/operator_reg.h" | |||||
#include "graph/operator.h" | |||||
namespace ge { | |||||
/** | |||||
*@brief Creates and returns an empty tensor list. \n | |||||
*@par Inputs: | |||||
*@li element_shape: A shape compatible with that of elements in the list. | |||||
*@li max_num_elements: The maximum number of elements. \n | |||||
*@par Attributes: | |||||
*@li element_dtype: The type of elements in the list. \n | |||||
*@par Outputs: | |||||
*@li handle: An empty tensor list . \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow EmptyTensorList operator. | |||||
*/ | |||||
REG_OP(EmptyTensorList) | |||||
.INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) | |||||
.INPUT(max_num_elements, TensorType({DT_INT32})) | |||||
.OUTPUT(handle, TensorType({DT_VARIANT})) | |||||
.ATTR(element_dtype, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(EmptyTensorList) | |||||
/** | |||||
*@brief Returns a list which has the passed-in `Tensor` as last element | |||||
and the other elements of the given list in `input_handle`. \n | |||||
*@par Inputs: | |||||
*@li input_handle: The old list. | |||||
*@li tensor: The tensor to put on the list. \n | |||||
*@par Attributes: | |||||
*@li element_dtype: The type of elements in the list. \n | |||||
*@par Outputs: | |||||
*@li output_handle:A list with the elements of old list followed by tensor. \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow TensorListPushBack operator. | |||||
*/ | |||||
REG_OP(TensorListPushBack) | |||||
.INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
.INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||||
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||||
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE, | |||||
DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||||
.OUTPUT(output_handle, TensorType({DT_VARIANT})) | |||||
.ATTR(element_dtype, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(TensorListPushBack) | |||||
/** | |||||
*@brief The last element of the input list as well as a | |||||
list with all but that element. \n | |||||
*@par Inputs: | |||||
*@li input_handle: The input list. | |||||
*@li element_shape: A shape compatible with that of elements in the list. \n | |||||
*@par Attributes: | |||||
*@li element_dtype: The type of elements in the list. \n | |||||
*@par Outputs: | |||||
*@li output_handle:A list with the elements of the old list followed by tensor. | |||||
*@li tensor:The withdrawn last element of the list. \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow TensorListPopBack operator. | |||||
*/ | |||||
REG_OP(TensorListPopBack) | |||||
.INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
.INPUT(element_shape, TensorType({DT_INT32})) | |||||
.OUTPUT(output_handle, TensorType({DT_VARIANT})) | |||||
.OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||||
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||||
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE, | |||||
DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||||
.ATTR(element_dtype, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(TensorListPopBack) | |||||
/** | |||||
*@brief The number of tensors in the input tensor list. \n | |||||
*@par Inputs: | |||||
*@li input_handle: The input list. \n | |||||
*@par Outputs: | |||||
*@li length:The number of tensors in the list. \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow TensorListLength operator. | |||||
*/ | |||||
REG_OP(TensorListLength) | |||||
.INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
.OUTPUT(length, TensorType({DT_INT32})) | |||||
.OP_END_FACTORY_REG(TensorListLength) | |||||
/** | |||||
*@brief The shape of elements in the input tensor list. \n | |||||
*@par Inputs: | |||||
*@li input_handle: The input list. \n | |||||
*@par Attributes: | |||||
*@li shape_type: The type of shape in the list. \n | |||||
*@par Outputs: | |||||
*@li element_shape:A shape compatible with that of elements in the list. \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow TensorListElementShape operator. | |||||
*/ | |||||
REG_OP(TensorListElementShape) | |||||
.INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
.OUTPUT(element_shape, TensorType({DT_INT32,DT_INT64})) | |||||
.ATTR(shape_type, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(TensorListElementShape) | |||||
/** | |||||
*@brief List of the given size with empty elements. \n | |||||
*@par Inputs: | |||||
*@li element_shape: A shape compatible with that of elements in the list. | |||||
*@li num_elements: The number of elements to reserve. \n | |||||
*@par Attributes: | |||||
*@li element_dtype: The type of elements in the list. | |||||
*@li shape_type: The type of shape in the list. \n | |||||
*@par Outputs: | |||||
*@li handle: An output tensor list . \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow TensorListReserve operator. | |||||
*/ | |||||
REG_OP(TensorListReserve) | |||||
.INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) | |||||
.INPUT(num_elements, TensorType({DT_INT32})) | |||||
.OUTPUT(handle, TensorType({DT_VARIANT})) | |||||
.ATTR(element_dtype, Type, DT_INT32) | |||||
.ATTR(shape_type, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(TensorListReserve) | |||||
/** | |||||
*@brief Get input tensor list elements of index position. \n | |||||
*@par Inputs: | |||||
*@li input_handle: The input list. | |||||
*@li index: A tensor of position. | |||||
*@li element_shape: A shape compatible with that of elements in the list. \n | |||||
*@par Attributes: | |||||
*@li element_dtype: The type of elements in the list. \n | |||||
*@par Outputs: | |||||
*@li item: An output tensor value of index position . \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow TensorListGetItem operator. | |||||
*/ | |||||
REG_OP(TensorListGetItem) | |||||
.INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
.INPUT(index, TensorType({DT_INT32})) | |||||
.INPUT(element_shape, TensorType({DT_INT32})) | |||||
.OUTPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||||
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||||
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, | |||||
DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||||
.ATTR(element_dtype, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(TensorListGetItem) | |||||
/** | |||||
*@brief Sets the index-th position of the list to contain the given tensor. \n | |||||
*@par Inputs: | |||||
*@li input_handle: The input list. | |||||
*@li index: The position in the list to which the tensor will be assigned. | |||||
*@li item: The element to be assigned to that position. \n | |||||
*@par Attributes: | |||||
*@li element_dtype: The type of elements in the list. \n | |||||
*@par Outputs: | |||||
*@li output_handle: An output tensor list . \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow TensorListSetItem operator. | |||||
*/ | |||||
REG_OP(TensorListSetItem) | |||||
.INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
.INPUT(index, TensorType({DT_INT32})) | |||||
.INPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||||
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||||
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE, | |||||
DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||||
.OUTPUT(output_handle, TensorType({DT_VARIANT})) | |||||
.ATTR(element_dtype, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(TensorListSetItem) | |||||
/** | |||||
*@brief Push tensor to list. \n | |||||
*@par Inputs: | |||||
*@li input_handles: The input tensor lists. | |||||
*@li tensor: The tensor push into tensor list. \n | |||||
*@par Attributes: | |||||
*@li element_dtype: The type of elements in the list. \n | |||||
*@par Outputs: | |||||
*@li output_handles: The output tensor lists. \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow TensorListPushBackBatch operator. | |||||
*/ | |||||
REG_OP(TensorListPushBackBatch) | |||||
.INPUT(input_handles, TensorType({DT_VARIANT})) | |||||
.INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||||
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||||
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, | |||||
DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||||
.OUTPUT(output_handles, TensorType({DT_VARIANT})) | |||||
.ATTR(element_dtype, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(TensorListPushBackBatch) | |||||
/** | |||||
*@brief Stacks all tensors in the list. \n | |||||
*@par Inputs: | |||||
*@li input_handle: The input tensor list. | |||||
*@li element_shape: A shape compatible with that of elements in the tensor. \n | |||||
*@par Attributes: | |||||
*@li element_dtype: The type of elements in the list. | |||||
*@li num_elements: The number of elements in the list. \n | |||||
*@par Outputs: | |||||
*@li tensor: The tensor of list. \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow TensorListStack operator. | |||||
*/ | |||||
REG_OP(TensorListStack) | |||||
.INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
.INPUT(element_shape, TensorType({DT_INT32})) | |||||
.OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||||
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||||
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, | |||||
DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||||
.ATTR(element_dtype, Type, DT_INT32) | |||||
.ATTR(num_elements, Int, -1) | |||||
.OP_END_FACTORY_REG(TensorListStack) | |||||
/** | |||||
*@brief Concats all tensors in the list along the 0th dimension. | |||||
Requires that all tensors have the same shape except the first dimension. \n | |||||
*@par Inputs: | |||||
*@li input_handle: The input list. | |||||
*@li element_shape: The shape of the uninitialized elements in the list. | |||||
If the first dimension is not -1, it is assumed that all list elements have | |||||
the same leading dim. | |||||
*@li leading_dims: The list of leading dims of uninitialized list elements. Used if | |||||
the leading dim of input_handle.element_shape or the element_shape input arg | |||||
is not already set. \n | |||||
*@par Attributes: | |||||
*@li element_dtype: The type of elements in the list. \n | |||||
*@par Outputs: | |||||
*@li tensor: The concated result. | |||||
*@li lengths: Output tensor containing sizes of the 0th dimension of tensors | |||||
in the list, used for computing the gradient. \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow TensorListConcatV2 operator. | |||||
*/ | |||||
REG_OP(TensorListConcatV2) | |||||
.INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
.INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) | |||||
.INPUT(leading_dims, TensorType({DT_INT64})) | |||||
.OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||||
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||||
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, | |||||
DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||||
.OUTPUT(lengths, TensorType({DT_INT64})) | |||||
.ATTR(element_dtype, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(TensorListConcatV2) | |||||
/** | |||||
*@brief Splits a tensor into a list. \n | |||||
*@par Inputs: | |||||
*@li tensor: The input tensor. | |||||
*@li element_shape: A shape compatible with that of elements in the tensor. | |||||
*@li lengths: Vector of sizes of the 0th dimension of tensors in the list. \n | |||||
*@par Attributes: | |||||
*@li element_dtype: The type of elements in the list. \n | |||||
*@par Outputs: | |||||
*@li output_handle: The list. \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow TensorListSplit operator. | |||||
*/ | |||||
REG_OP(TensorListSplit) | |||||
.INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||||
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||||
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, | |||||
DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||||
.INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) | |||||
.INPUT(lengths, TensorType({DT_INT64})) | |||||
.OUTPUT(output_handle, TensorType({DT_VARIANT})) | |||||
.ATTR(element_dtype, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(TensorListSplit) | |||||
/** | |||||
*@brief Creates a TensorList which, when stacked, has the value of `tensor`. \n | |||||
*@par Inputs: | |||||
*@li tensor: The input tensor. | |||||
*@li element_shape: The shape of elements in the list. \n | |||||
*@par Attributes: | |||||
*@li element_dtype: The type of elements in the list. \n | |||||
*@par Outputs: | |||||
*@li output_handle: An output tensor list . \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow TensorListFromTensor operator. | |||||
*/ | |||||
REG_OP(TensorListFromTensor) | |||||
.INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||||
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||||
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, | |||||
DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||||
.INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) | |||||
.OUTPUT(output_handle, TensorType({DT_VARIANT})) | |||||
.ATTR(element_dtype, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(TensorListFromTensor) | |||||
/** | |||||
*@brief Resizes the list. \n | |||||
*@par Inputs: | |||||
*@li input_handle: The input tensor list. | |||||
*@li size: size of the output list. \n | |||||
*@par Outputs: | |||||
*@li output_handle: The output tensor list. \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow TensorListResize operator. | |||||
*/ | |||||
REG_OP(TensorListResize) | |||||
.INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
.INPUT(size, TensorType({DT_INT32})) | |||||
.OUTPUT(output_handle, TensorType({DT_VARIANT})) | |||||
.OP_END_FACTORY_REG(TensorListResize) | |||||
/** | |||||
*@brief Creates a Tensor by indexing into the TensorList. \n | |||||
*@par Inputs: | |||||
*@li input_handle: The input tensor list. | |||||
*@li indices: The indices used to index into the list. | |||||
*@li element_shape: The shape of elements in the list. \n | |||||
*@par Attributes: | |||||
*@li element_dtype: The type of elements in the list. \n | |||||
*@par Outputs: | |||||
*@li values: The tensor. \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow TensorListGather operator. | |||||
*/ | |||||
REG_OP(TensorListGather) | |||||
.INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
.INPUT(indices, TensorType({DT_INT32})) | |||||
.INPUT(element_shape, TensorType({DT_INT32})) | |||||
.OUTPUT(values, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||||
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||||
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, | |||||
DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||||
.ATTR(element_dtype, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(TensorListGather) | |||||
/** | |||||
*@brief Creates a TensorList by indexing into a Tensor. \n | |||||
*@par Inputs: | |||||
*@li tensor: The input tensor. | |||||
*@li indices: The indices used to index into the list. | |||||
*@li element_shape: The shape of the elements in the list (can be less specified than | |||||
the shape of the tensor). | |||||
*@li num_elements: The size of the output list. Must be large enough to accommodate | |||||
the largest index in indices. If -1, the list is just large enough to include | |||||
the largest index in indices. \n | |||||
*@par Attributes: | |||||
*@li element_dtype: The type of elements in the list. \n | |||||
*@par Outputs: | |||||
*@li output_handle: The TensorList. \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow TensorListScatterV2 operator. | |||||
*/ | |||||
REG_OP(TensorListScatterV2) | |||||
.INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||||
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||||
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, | |||||
DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||||
.INPUT(indices, TensorType({DT_INT32})) | |||||
.INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) | |||||
.INPUT(num_elements, TensorType({DT_INT32})) | |||||
.OUTPUT(output_handle, TensorType({DT_VARIANT})) | |||||
.ATTR(element_dtype, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(TensorListScatterV2) | |||||
/** | |||||
*@brief Scatters tensor at indices in an input list. \n | |||||
*@par Inputs: | |||||
*@li input_handle: The input tensor list. | |||||
*@li tensor: The input tensor. | |||||
*@li indices: The indices used to index into the list. \n | |||||
*@par Attributes: | |||||
*@li element_dtype: The type of elements in the list. \n | |||||
*@par Outputs: | |||||
*@li output_handle: The TensorList. \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow TensorListScatterIntoExistingList operator. | |||||
*/ | |||||
REG_OP(TensorListScatterIntoExistingList) | |||||
.INPUT(input_handle, TensorType({DT_VARIANT})) | |||||
.INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | |||||
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | |||||
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, | |||||
DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | |||||
.INPUT(indices, TensorType({DT_INT32})) | |||||
.OUTPUT(output_handle, TensorType({DT_VARIANT})) | |||||
.ATTR(element_dtype, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(TensorListScatterIntoExistingList) | |||||
/** | |||||
*@brief Concat two tensor lists to a new tensor list. \n | |||||
*@par Inputs: | |||||
*@li input_a: The input tensor list A. | |||||
*@li input_b: The input tensor list B. \n | |||||
*@par Attributes: | |||||
*@li element_dtype: The type of elements in the list. \n | |||||
*@par Outputs: | |||||
*@li output: The output list. \n | |||||
*@par Third-party framework compatibility. | |||||
*Compatible with tensorflow TensorListConcatLists operator. | |||||
*/ | |||||
REG_OP(TensorListConcatLists) | |||||
.INPUT(input_a, TensorType({DT_VARIANT})) | |||||
.INPUT(input_b, TensorType({DT_VARIANT})) | |||||
.OUTPUT(output, TensorType({DT_VARIANT})) | |||||
.ATTR(element_dtype, Type, DT_INT32) | |||||
.OP_END_FACTORY_REG(TensorListConcatLists) | |||||
} // namespace ge | |||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -222,6 +222,24 @@ REG_OP(Bucketize) | |||||
.REQUIRED_ATTR(boundaries, ListFloat) | .REQUIRED_ATTR(boundaries, ListFloat) | ||||
.OP_END_FACTORY_REG(Bucketize) | .OP_END_FACTORY_REG(Bucketize) | ||||
/** | |||||
*@brief Returns a new tensor with the truncated integer values of the elements of input. \n | |||||
*@par Inputs: | |||||
*One inputs, including: | |||||
* @li input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n | |||||
*@par Outputs: | |||||
*y: A tensor with the same type and shape of input_x \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator Trunc. \n | |||||
*/ | |||||
REG_OP(Trunc) | |||||
.INPUT(input_x, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8})) | |||||
.OUTPUT(output_y, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8})) | |||||
.OP_END_FACTORY_REG(Trunc) | |||||
/** | /** | ||||
*@brief Computes the sum along sparse segments of a tensor . \n | *@brief Computes the sum along sparse segments of a tensor . \n | ||||
@@ -365,6 +383,27 @@ REG_OP(GetNext) | |||||
.ATTR(channel_name, String, "") | .ATTR(channel_name, String, "") | ||||
.OP_END_FACTORY_REG(GetNext) | .OP_END_FACTORY_REG(GetNext) | ||||
/** | |||||
*@brief Get dynamic dims after GetNext. \n | |||||
*@par Inputs: | |||||
*input: A nested structure of Tensor objects, from GetNext's output. \n | |||||
*@par Attributes: | |||||
*@li shape_info: GE shape_info for each inputs, -1 means unknow dim. | |||||
*@li N: Inputs number. \n | |||||
*@par Outputs: | |||||
*dims: GE unknow dims, a vector of int64. \n | |||||
*/ | |||||
REG_OP(GetDynamicDims) | |||||
.DYNAMIC_INPUT(input, TensorType({DT_INT32, DT_INT64})) | |||||
.OUTPUT(dims, TensorType({DT_INT32, DT_INT64})) | |||||
.REQUIRED_ATTR(shape_info, ListInt) | |||||
.REQUIRED_ATTR(N, Int) | |||||
.OP_END_FACTORY_REG(GetDynamicDims) | |||||
/** | /** | ||||
*@brief End of sequence . \n | *@brief End of sequence . \n | ||||
@@ -494,6 +533,29 @@ REG_OP(NextAfter) | |||||
.OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE})) | .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE})) | ||||
.OP_END_FACTORY_REG(NextAfter) | .OP_END_FACTORY_REG(NextAfter) | ||||
/** | |||||
*@brief Calculate the P-norm distance between vectors function. \n | |||||
*@par Inputs: | |||||
*One inputs, including: | |||||
* @li input_x: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
*@par Attributes: | |||||
*@li p: An optional float.Defaults to 2. \n | |||||
*@par Outputs: | |||||
*y: A Tensor with the same type and shape of input_x's. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator Pdist. \n | |||||
*/ | |||||
REG_OP(Pdist) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(p, Float, 2.0) | |||||
.OP_END_FACTORY_REG(Pdist) | |||||
/** | /** | ||||
*@brief Compute element-wise finiteness, return a boolean tensor. | *@brief Compute element-wise finiteness, return a boolean tensor. | ||||
@@ -624,6 +686,7 @@ REG_OP(NLLLoss) | |||||
.OUTPUT(y, TensorType({DT_FLOAT})) | .OUTPUT(y, TensorType({DT_FLOAT})) | ||||
.OUTPUT(total_weight, TensorType({DT_FLOAT})) | .OUTPUT(total_weight, TensorType({DT_FLOAT})) | ||||
.ATTR(reduction, String, "mean") | .ATTR(reduction, String, "mean") | ||||
.ATTR(ignore_index, Int, -100) | |||||
.OP_END_FACTORY_REG(NLLLoss) | .OP_END_FACTORY_REG(NLLLoss) | ||||
/** | /** | ||||
@@ -653,6 +716,7 @@ REG_OP(NLLLossGrad) | |||||
.INPUT(total_weight, TensorType({DT_FLOAT})) | .INPUT(total_weight, TensorType({DT_FLOAT})) | ||||
.OUTPUT(x_grad, TensorType({DT_FLOAT})) | .OUTPUT(x_grad, TensorType({DT_FLOAT})) | ||||
.ATTR(reduction, String, "mean") | .ATTR(reduction, String, "mean") | ||||
.ATTR(ignore_index, Int, -100) | |||||
.OP_END_FACTORY_REG(NLLLossGrad) | .OP_END_FACTORY_REG(NLLLossGrad) | ||||
/** | /** | ||||
@@ -710,6 +774,9 @@ REG_OP(IFMR) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with mindspore | *Compatible with mindspore | ||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(WtsARQ) | REG_OP(WtsARQ) | ||||
@@ -741,6 +808,9 @@ REG_OP(WtsARQ) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with mindspore | *Compatible with mindspore | ||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(ActsULQ) | REG_OP(ActsULQ) | ||||
@@ -748,8 +818,8 @@ REG_OP(ActsULQ) | |||||
.INPUT(clamp_min, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(clamp_min, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.INPUT(clamp_max, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(clamp_max, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.OUTPUT(clamp_min_mask, TensorType({DT_BOOL})) | |||||
.OUTPUT(clamp_max_mask, TensorType({DT_BOOL})) | |||||
.OUTPUT(clamp_min_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(clamp_max_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.ATTR(fixed_min, Bool, false) | .ATTR(fixed_min, Bool, false) | ||||
.ATTR(num_bits, Int, 8) | .ATTR(num_bits, Int, 8) | ||||
@@ -768,12 +838,15 @@ REG_OP(ActsULQ) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with mindspore | *Compatible with mindspore | ||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(ActsULQInputGrad) | REG_OP(ActsULQInputGrad) | ||||
.INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.INPUT(clamp_min_mask, TensorType({DT_BOOL})) | |||||
.INPUT(clamp_max_mask, TensorType({DT_BOOL})) | |||||
.INPUT(clamp_min_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(clamp_max_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(x_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(x_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.OP_END_FACTORY_REG(ActsULQInputGrad) | .OP_END_FACTORY_REG(ActsULQInputGrad) | ||||
@@ -790,11 +863,14 @@ REG_OP(ActsULQInputGrad) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with mindspore | *Compatible with mindspore | ||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(ActULQClampMaxGrad) | REG_OP(ActULQClampMaxGrad) | ||||
.INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.INPUT(clamp_max_mask, TensorType({DT_BOOL})) | |||||
.INPUT(clamp_max_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.OUTPUT(clamp_max_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(clamp_max_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.OP_END_FACTORY_REG(ActULQClampMaxGrad) | .OP_END_FACTORY_REG(ActULQClampMaxGrad) | ||||
@@ -812,15 +888,208 @@ REG_OP(ActULQClampMaxGrad) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with mindspore | *Compatible with mindspore | ||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(ActULQClampMinGrad) | REG_OP(ActULQClampMinGrad) | ||||
.INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.INPUT(clamp_min_mask, TensorType({DT_BOOL})) | |||||
.INPUT(clamp_min_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.OUTPUT(clamp_min_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(clamp_min_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.OP_END_FACTORY_REG(ActULQClampMinGrad) | .OP_END_FACTORY_REG(ActULQClampMinGrad) | ||||
/** | |||||
* @brief Computes Lp norm. | |||||
* @par Inputs: | |||||
* @li x: An ND tensor of type float16, float32. \n | |||||
* | |||||
* @par Attributes: | |||||
* @li p: Int, "inf" or "-inf", default value is 2. | |||||
* @li axes: ListInt, {} means all axes will be computed. | |||||
* @li keepdim: Bool, default is false. | |||||
* @li epsilon: Float, default is 1e-12. \n | |||||
* @par Outputs: | |||||
* @li y: An ND tensor of type float16, float32. The shape of y is depending | |||||
* on axes and keepdim. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator LpNorm. | |||||
*/ | |||||
REG_OP(LpNorm) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(p, Int, 2) | |||||
.ATTR(axes, ListInt, {}) | |||||
.ATTR(keepdim, Bool, false) | |||||
.ATTR(epsilon, Float, 1e-12) | |||||
.OP_END_FACTORY_REG(LpNorm) | |||||
/** | |||||
* @brief get complex. | |||||
* @par Inputs: | |||||
* @li real: An ND tensor of type float32. double | |||||
* @li imag: An ND tensor of type float32. double \n | |||||
* | |||||
* @par Outputs: | |||||
* @li out: An ND tensor of type complex64, complex128 \n | |||||
*/ | |||||
REG_OP(Complex) | |||||
.INPUT(real, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(imag, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(out, TensorType({DT_COMPLEX64, DT_COMPLEX128})) | |||||
.ATTR(Tout, Type, DT_COMPLEX64) | |||||
.OP_END_FACTORY_REG(Complex) | |||||
/** | |||||
* @brief deal complex. | |||||
* @par Inputs: | |||||
* @li input: An ND tensor of type complex64, complex128 \n | |||||
* | |||||
* @par Outputs: | |||||
* @li output: An ND tensor of type float32. double \n | |||||
*/ | |||||
REG_OP(Imag) | |||||
.INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.ATTR(Tout, Type, DT_FLOAT) | |||||
.OP_END_FACTORY_REG(Imag) | |||||
/** | |||||
* @brief deal complex. | |||||
* @par Inputs: | |||||
* @li input: An ND tensor of type complex64, complex128 \n | |||||
* | |||||
* @par Outputs: | |||||
* @li output: An ND tensor of type float32. double \n | |||||
*/ | |||||
REG_OP(Angle) | |||||
.INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.ATTR(Tout, Type, DT_FLOAT) | |||||
.OP_END_FACTORY_REG(Angle) | |||||
/** | |||||
*@brief Computes the gradient of SoftMarginLossGrad. \n | |||||
*@par Inputs: | |||||
*Three inputs, including: | |||||
* @li predict: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li label: A tensor with same shape of predict. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li dout: A tensor with same shpae of predcit. Must be one of the following types: | |||||
* float16, float32. \n | |||||
*@par Attributes: | |||||
* @li reduction: Specifies the reduction to apply to the output: | |||||
* 'none' | 'mean' | 'sum'. Default: 'mean'. \n | |||||
*@par Outputs: | |||||
* gradient: A Tensor with the same type of predict. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator SoftMarginLoss Backward. \n | |||||
*/ | |||||
REG_OP(SoftMarginLossGrad) | |||||
.INPUT(predict, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(label, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(dout, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(gradient, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.ATTR(reduction, String, "mean") | |||||
.OP_END_FACTORY_REG(SoftMarginLossGrad) | |||||
/** | |||||
*@brief Calculate the cross product of two tensors. \n | |||||
*@par Inputs: | |||||
*One inputs, including: | |||||
* @li x1: A tensor. Must be one of the following types: | |||||
* float16, float32, int32, int8, uint8, int16. \n | |||||
* @li x2: A tensor. Must be one of the following types: | |||||
* float16, float32, int32, int8, uint8, int16. \n | |||||
*@par Attributes: | |||||
*@li dim: the dimination of compute.Defaults to -65530. \n | |||||
*@par Outputs: | |||||
*y: A Tensor with the same type and shape of x1's. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator cross. \n | |||||
*/ | |||||
REG_OP(Cross) | |||||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_INT16})) | |||||
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_INT16})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_INT16})) | |||||
.ATTR(dim, Int, -65530) | |||||
.OP_END_FACTORY_REG(Cross) | |||||
/** | |||||
*@brief Computes batched the p-norm distance between each pair of | |||||
*the two collections of row vectors. \n | |||||
*@par Inputs: | |||||
*Two inputs, including: | |||||
* @li x1: A tensor with shpae: BxPXM. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li x2: A tensor with shpae: BxRxM. Must be one of the following types: | |||||
* float16, float32. \n | |||||
*@par Attributes: | |||||
* @li p: An optional float >= 0 or inf. Defaults to 2.0. \n | |||||
*@par Outputs: | |||||
* y: A Tensor with the same type of x1's and with shape BxPxR. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator Cdist. \n | |||||
*/ | |||||
REG_OP(Cdist) | |||||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(p, Float, 2.0) | |||||
.OP_END_FACTORY_REG(Cdist) | |||||
/** | |||||
*@brief Computes the grad of x1 in cdist. \n | |||||
*@par Inputs: | |||||
*Four inputs, including: | |||||
* @li grad: Grad with shape BxPxR. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li x1: A tensor with shpae: BxPXM. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li x2: A tensor with shpae: BxRxM. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li cdist: Output tensor of cdist forward with shpae: BxPXR. | |||||
* Must be one of the following types: float16, float32. \n | |||||
*@par Attributes: | |||||
* @li p: An optional float >= 0 or inf. Defaults to 2.0. \n | |||||
*@par Outputs: | |||||
* y: A Tensor with the same type and shape of x1's. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator Cdist Backward. \n | |||||
*/ | |||||
REG_OP(CdistGrad) | |||||
.INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(x1, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(x2, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(cdist, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.ATTR(p, Float, 2.0) | |||||
.OP_END_FACTORY_REG(CdistGrad) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2020 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -38,8 +38,8 @@ namespace ge { | |||||
* float32, int32. Has format [ND, NHWC] . \n | * float32, int32. Has format [ND, NHWC] . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. | |||||
*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n | |||||
*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. | |||||
*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: The result matrix Tensor. 2D. Must be one of the following types: float16, | *y: The result matrix Tensor. 2D. Must be one of the following types: float16, | ||||
@@ -70,8 +70,8 @@ REG_OP(MatMul) | |||||
* float32, int32. Has format [ND, NHWC] . \n | * float32, int32. Has format [ND, NHWC] . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. | |||||
*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n | |||||
*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. | |||||
*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: The result matrix Tensor. 2D. Must be one of the following types: float16, | *y: The result matrix Tensor. 2D. Must be one of the following types: float16, | ||||
@@ -91,6 +91,36 @@ REG_OP(MatMulV2) | |||||
.ATTR(offset_x, Int, 0) | .ATTR(offset_x, Int, 0) | ||||
.OP_END_FACTORY_REG(MatMulV2) | .OP_END_FACTORY_REG(MatMulV2) | ||||
/** | |||||
*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n | |||||
*@par Inputs: | |||||
*Two inputs, including: | |||||
* @li x1: A matrix Tensor. 2D. Must be one of the following types: int8. | |||||
* @li x2: A matrix Tensor. 2D. Must be one of the following types: int8. | |||||
* @li compress_index: A compress index matrix of type int8. | |||||
* @li bias: A 1D Tensor. Must be one of the following types: int32, float16. | |||||
*@par Attributes: | |||||
*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. | |||||
*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n | |||||
*@par Outputs: | |||||
*y: The result matrix Tensor. 2D. Must be one of the following types: float16, | |||||
* int32. \n | |||||
*/ | |||||
REG_OP(MatMulV2Compress) | |||||
.INPUT(x1, TensorType({DT_INT8})) | |||||
.INPUT(x2, TensorType({DT_INT8})) | |||||
.INPUT(compress_index, TensorType({DT_INT8})) | |||||
.OPTIONAL_INPUT(bias, TensorType({DT_INT32, DT_FLOAT16})) | |||||
.OUTPUT(y, TensorType({DT_INT32, DT_FLOAT16})) | |||||
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | |||||
.ATTR(transpose_x1, Bool, false) | |||||
.ATTR(transpose_x2, Bool, false) | |||||
.ATTR(offset_x, Int, 0) | |||||
.OP_END_FACTORY_REG(MatMulV2Compress) | |||||
/** | /** | ||||
*@brief Performs Matrix-to-matrix Multiply, producing c=alpha[0]*a*b+beta[0]*c . \n | *@brief Performs Matrix-to-matrix Multiply, producing c=alpha[0]*a*b+beta[0]*c . \n | ||||
@@ -149,15 +179,15 @@ REG_OP(GEMM) | |||||
*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n | *@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*Three inputs, including: | |||||
*Two inputs, including: | |||||
* @li x1: A matrix Tensor. Must be one of the following types: float16, | * @li x1: A matrix Tensor. Must be one of the following types: float16, | ||||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. | * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. | ||||
* @li x2: A matrix Tensor. Must be one of the following types: float16, | * @li x2: A matrix Tensor. Must be one of the following types: float16, | ||||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n | * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. | |||||
*@li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n | |||||
*@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. | |||||
*@li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, | *y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, | ||||
@@ -175,6 +205,42 @@ REG_OP(BatchMatMul) | |||||
.ATTR(adj_x2, Bool, false) | .ATTR(adj_x2, Bool, false) | ||||
.OP_END_FACTORY_REG(BatchMatMul) | .OP_END_FACTORY_REG(BatchMatMul) | ||||
/** | |||||
* @brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li x1: A matrix Tensor. Must be one of the following types: float16, | |||||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. | |||||
* @li x2: A matrix Tensor. Must be one of the following types: float16, | |||||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n | |||||
* @li bias: A matrix Tensor. Must be one of the following types: float16, | |||||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n | |||||
* @par Attributes: | |||||
* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. | |||||
* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n | |||||
* @par Outputs: | |||||
* y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, | |||||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2" . \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator BatchMatmul. | |||||
*/ | |||||
REG_OP(BatchMatMulV2) | |||||
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) | |||||
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) | |||||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||||
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||||
.ATTR(adj_x1, Bool, false) | |||||
.ATTR(adj_x2, Bool, false) | |||||
.ATTR(offset_x, Int, 0) | |||||
.OP_END_FACTORY_REG(BatchMatMulV2) | |||||
/** | /** | ||||
*@brief Computes half the L2 norm of a tensor without the sqrt . \n | *@brief Computes half the L2 norm of a tensor without the sqrt . \n | ||||
@@ -334,7 +400,7 @@ REG_OP(MatrixSetDiagD) | |||||
* int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32, | * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32, | ||||
* uint64 | * uint64 | ||||
*@li indices: An ND Tensor. | *@li indices: An ND Tensor. | ||||
*Must be one of the following types: int32, int64 | |||||
*Must be one of the following types: int32 or int64 | |||||
*@li updates: An ND Tensor. | *@li updates: An ND Tensor. | ||||
*Must be one of the following types: float16, float32, int8, uint8, double, | *Must be one of the following types: float16, float32, int8, uint8, double, | ||||
* int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32, | * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32, | ||||
@@ -378,6 +444,9 @@ REG_OP(ScatterNdUpdate) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator TensorScatterUpdate. | * Compatible with the TensorFlow operator TensorScatterUpdate. | ||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(TensorScatterUpdate) | REG_OP(TensorScatterUpdate) | ||||
.INPUT(x, TensorType::BasicType()) | .INPUT(x, TensorType::BasicType()) | ||||
@@ -386,6 +455,34 @@ REG_OP(TensorScatterUpdate) | |||||
.OUTPUT(y, TensorType::BasicType()) | .OUTPUT(y, TensorType::BasicType()) | ||||
.OP_END_FACTORY_REG(TensorScatterUpdate) | .OP_END_FACTORY_REG(TensorScatterUpdate) | ||||
/** | |||||
*@brief Uses "updates" to update tensor "data" by "indices". \n | |||||
*@par Inputs: | |||||
* Three inputs, including: | |||||
*@li data: An ND Tensor . \n | |||||
*Must be one of the following types: float16, float32, int32, int8, uint8 | |||||
*@li indices: An ND Tensor of type int32 or int64 | |||||
*@li updates: An Tensor. Same shape as indices. format:NCHW, NHWC . \n | |||||
*Must be one of the following types: float16, float32, int32, int8, uint8 | |||||
*@par Attributes: | |||||
*@li axis: An optional attribute. Defaults to 0. | |||||
*@par Outputs: | |||||
*y: A Tensor. Has the same type and format as input "data" . \n | |||||
*@par Third-party framework compatibility | |||||
* Compatible with the ONNX operator ScatterElements. | |||||
*/ | |||||
REG_OP(ScatterElements) | |||||
.INPUT(data, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(indices, TensorType::IndexNumberType()) | |||||
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.ATTR(axis, Int, 0) | |||||
.OP_END_FACTORY_REG(ScatterElements) | |||||
/** | /** | ||||
*@brief Adds sparse "updates" to a variable reference . \n | *@brief Adds sparse "updates" to a variable reference . \n | ||||
@@ -394,7 +491,7 @@ REG_OP(TensorScatterUpdate) | |||||
*@li var: An ND Tensor . \n | *@li var: An ND Tensor . \n | ||||
*Must be one of the following types: float16, float32, int32, int8, uint8 | *Must be one of the following types: float16, float32, int32, int8, uint8 | ||||
*@li indices: An ND Tensor of type int32 or int64. | |||||
*@li indices: An ND Tensor of type int32 or int64 | |||||
*@li updates: An Tensor. format:NCHW, NHWC . \n | *@li updates: An Tensor. format:NCHW, NHWC . \n | ||||
@@ -412,10 +509,10 @@ REG_OP(TensorScatterUpdate) | |||||
* Compatible with the TensorFlow operator ScatterAdd. | * Compatible with the TensorFlow operator ScatterAdd. | ||||
*/ | */ | ||||
REG_OP(ScatterAdd) | REG_OP(ScatterAdd) | ||||
.INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(indices, TensorType::IndexNumberType()) | .INPUT(indices, TensorType::IndexNumberType()) | ||||
.INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.ATTR(use_locking, Bool, false) | .ATTR(use_locking, Bool, false) | ||||
.OP_END_FACTORY_REG(ScatterAdd) | .OP_END_FACTORY_REG(ScatterAdd) | ||||
@@ -428,7 +525,7 @@ REG_OP(ScatterAdd) | |||||
*Must be one of the following types: float16, float, int32, int8, uint8 | *Must be one of the following types: float16, float, int32, int8, uint8 | ||||
*@li indices: An ND Tensor. | *@li indices: An ND Tensor. | ||||
*Must be one of the following types: int32 | |||||
*Must be one of the following types: int32 or int64 | |||||
*@li updates: An ND Tensor. | *@li updates: An ND Tensor. | ||||
*Must be one of the following types: float16, float, int32, int8, uint8 | *Must be one of the following types: float16, float, int32, int8, uint8 | ||||
@@ -443,10 +540,10 @@ REG_OP(ScatterAdd) | |||||
* Compatible with the TensorFlow operator ScatterDiv. | * Compatible with the TensorFlow operator ScatterDiv. | ||||
*/ | */ | ||||
REG_OP(ScatterDiv) | REG_OP(ScatterDiv) | ||||
.INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(indices, TensorType({DT_INT32})) | |||||
.INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(indices, TensorType::IndexNumberType()) | |||||
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.ATTR(use_locking, Bool, false) | .ATTR(use_locking, Bool, false) | ||||
.OP_END_FACTORY_REG(ScatterDiv) | .OP_END_FACTORY_REG(ScatterDiv) | ||||
@@ -458,7 +555,7 @@ REG_OP(ScatterDiv) | |||||
*@li var: An ND Tensor. | *@li var: An ND Tensor. | ||||
*Must be one of the following types: float16, float, int32, int8, uint8 | *Must be one of the following types: float16, float, int32, int8, uint8 | ||||
*@li indices: An ND Tensor. | *@li indices: An ND Tensor. | ||||
*Must be one of the following types: int32 | |||||
*Must be one of the following types: int32 or int64 | |||||
*@li updates: An ND Tensor. | *@li updates: An ND Tensor. | ||||
*Must be one of the following types: float16, float, int32, int8, uint8 | *Must be one of the following types: float16, float, int32, int8, uint8 | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -472,10 +569,10 @@ REG_OP(ScatterDiv) | |||||
* Compatible with the TensorFlow operator ScatterNdAdd. | * Compatible with the TensorFlow operator ScatterNdAdd. | ||||
*/ | */ | ||||
REG_OP(ScatterNdAdd) | REG_OP(ScatterNdAdd) | ||||
.INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(indices, TensorType::IndexNumberType()) | .INPUT(indices, TensorType::IndexNumberType()) | ||||
.INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.ATTR(use_locking, Bool, false) | .ATTR(use_locking, Bool, false) | ||||
.OP_END_FACTORY_REG(ScatterNdAdd) | .OP_END_FACTORY_REG(ScatterNdAdd) | ||||
@@ -499,6 +596,9 @@ REG_OP(ScatterNdAdd) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator TensorScatterAdd. | * Compatible with the TensorFlow operator TensorScatterAdd. | ||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(TensorScatterAdd) | REG_OP(TensorScatterAdd) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | ||||
@@ -515,7 +615,7 @@ REG_OP(TensorScatterAdd) | |||||
*@li var: An ND Tensor. | *@li var: An ND Tensor. | ||||
*Must be one of the following types: float16, float, int32, int8, uint8 | *Must be one of the following types: float16, float, int32, int8, uint8 | ||||
*@li indices: An ND Tensor. | *@li indices: An ND Tensor. | ||||
*Must be one of the following types: int32, int64 | |||||
*Must be one of the following types: int32 or int64 | |||||
*@li updates: An ND Tensor. | *@li updates: An ND Tensor. | ||||
*Must be one of the following types: float16, float, int32, int8, uint8 | *Must be one of the following types: float16, float, int32, int8, uint8 | ||||
@@ -530,10 +630,10 @@ REG_OP(TensorScatterAdd) | |||||
* Compatible with the TensorFlow operator ScatterNdSub. | * Compatible with the TensorFlow operator ScatterNdSub. | ||||
*/ | */ | ||||
REG_OP(ScatterNdSub) | REG_OP(ScatterNdSub) | ||||
.INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(indices, TensorType::IndexNumberType()) | .INPUT(indices, TensorType::IndexNumberType()) | ||||
.INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.ATTR(use_locking, Bool, false) | .ATTR(use_locking, Bool, false) | ||||
.OP_END_FACTORY_REG(ScatterNdSub) | .OP_END_FACTORY_REG(ScatterNdSub) | ||||
@@ -557,6 +657,9 @@ REG_OP(ScatterNdSub) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator TensorScatterSub. | * Compatible with the TensorFlow operator TensorScatterSub. | ||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(TensorScatterSub) | REG_OP(TensorScatterSub) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | ||||
@@ -573,7 +676,7 @@ REG_OP(TensorScatterSub) | |||||
*@li var: An ND Tensor. | *@li var: An ND Tensor. | ||||
*Must be one of the following types: float16, float, int32, int8, uint8 | *Must be one of the following types: float16, float, int32, int8, uint8 | ||||
*@li indices: An ND Tensor. | *@li indices: An ND Tensor. | ||||
*Must be one of the following types: int32, int64 | |||||
*Must be one of the following types: int32 or int64 | |||||
*@li updates: An ND Tensor. | *@li updates: An ND Tensor. | ||||
*Must be one of the following types: float16, float, int32, int8, uint8 | *Must be one of the following types: float16, float, int32, int8, uint8 | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -587,10 +690,10 @@ REG_OP(TensorScatterSub) | |||||
* Compatible with the TensorFlow operator ScatterSub. | * Compatible with the TensorFlow operator ScatterSub. | ||||
*/ | */ | ||||
REG_OP(ScatterSub) | REG_OP(ScatterSub) | ||||
.INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(indices, TensorType::IndexNumberType()) | .INPUT(indices, TensorType::IndexNumberType()) | ||||
.INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.ATTR(use_locking, Bool, false) | .ATTR(use_locking, Bool, false) | ||||
.OP_END_FACTORY_REG(ScatterSub) | .OP_END_FACTORY_REG(ScatterSub) | ||||
@@ -761,7 +864,7 @@ REG_OP(ConfusionMatrix) | |||||
*@li var: An ND Tensor. | *@li var: An ND Tensor. | ||||
*Must be one of the following types: float16, float, int32, int8, uint8 | *Must be one of the following types: float16, float, int32, int8, uint8 | ||||
*@li indices: An ND Tensor. | *@li indices: An ND Tensor. | ||||
*Must be one of the following types: int32 | |||||
*Must be one of the following types: int32 or int64 | |||||
*@li updates: An ND Tensor . \n | *@li updates: An ND Tensor . \n | ||||
*Must be one of the following types: float16, float, int32, int8, uint8 | *Must be one of the following types: float16, float, int32, int8, uint8 | ||||
@@ -778,7 +881,7 @@ REG_OP(ConfusionMatrix) | |||||
*/ | */ | ||||
REG_OP(ScatterMul) | REG_OP(ScatterMul) | ||||
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | ||||
.INPUT(indices, TensorType({DT_INT32})) | |||||
.INPUT(indices, TensorType::IndexNumberType()) | |||||
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | ||||
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | ||||
.ATTR(use_locking, Bool, false) | .ATTR(use_locking, Bool, false) | ||||
@@ -791,13 +894,13 @@ REG_OP(ScatterMul) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Three inputs, including: | * Three inputs, including: | ||||
*@li var: An ND Tensor. | *@li var: An ND Tensor. | ||||
*Must be one of the following types: float16, float, int32 | |||||
*Must be one of the following types: float16, float, int32, int8, uint8 | |||||
*@li indices: An ND Tensor. | *@li indices: An ND Tensor. | ||||
*Must be one of the following types: int32 | |||||
*Must be one of the following types: int32 or int64 | |||||
*@li updates: An ND Tensor. | *@li updates: An ND Tensor. | ||||
*Must be one of the following types: float16, float, int32 | |||||
*Must be one of the following types: float16, float, int32, int8, uint8 | |||||
*@par Attributes: | *@par Attributes: | ||||
*use_locking: An optional bool. Defaults to "False". If "True", the operation | *use_locking: An optional bool. Defaults to "False". If "True", the operation | ||||
@@ -810,10 +913,10 @@ REG_OP(ScatterMul) | |||||
* Compatible with the TensorFlow operator ScatterMin. | * Compatible with the TensorFlow operator ScatterMin. | ||||
*/ | */ | ||||
REG_OP(ScatterMin) | REG_OP(ScatterMin) | ||||
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) | |||||
.INPUT(indices, TensorType({DT_INT32})) | |||||
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) | |||||
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) | |||||
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(indices, TensorType::IndexNumberType()) | |||||
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.ATTR(use_locking, Bool, false) | .ATTR(use_locking, Bool, false) | ||||
.OP_END_FACTORY_REG(ScatterMin) | .OP_END_FACTORY_REG(ScatterMin) | ||||
@@ -824,13 +927,13 @@ REG_OP(ScatterMin) | |||||
* Three inputs, including: | * Three inputs, including: | ||||
*@li var: An ND Tensor . \n | *@li var: An ND Tensor . \n | ||||
*Must be one of the following types: float16, float, int32 | |||||
*Must be one of the following types: float16, float, int32, int8, uint8 | |||||
*@li indices: An NCHW, NHWC, or ND Tensor . \n | *@li indices: An NCHW, NHWC, or ND Tensor . \n | ||||
*Must be one of the following types: int32 | |||||
*Must be one of the following types: int32 or int64 | |||||
*@li updates: An NCHW, NHWC, or ND Tensor . \n | *@li updates: An NCHW, NHWC, or ND Tensor . \n | ||||
*Must be one of the following types: float16, float, int32 | |||||
*Must be one of the following types: float16, float, int32, int8, uint8 | |||||
*@par Attributes: | *@par Attributes: | ||||
*use_locking: An optional bool. Defaults to "False". | *use_locking: An optional bool. Defaults to "False". | ||||
@@ -843,10 +946,10 @@ REG_OP(ScatterMin) | |||||
* Compatible with the TensorFlow operator ScatterMax. | * Compatible with the TensorFlow operator ScatterMax. | ||||
*/ | */ | ||||
REG_OP(ScatterMax) | REG_OP(ScatterMax) | ||||
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) | |||||
.INPUT(indices, TensorType({DT_INT32})) | |||||
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) | |||||
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) | |||||
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(indices, TensorType::IndexNumberType()) | |||||
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.ATTR(use_locking, Bool, false) | .ATTR(use_locking, Bool, false) | ||||
.OP_END_FACTORY_REG(ScatterMax) | .OP_END_FACTORY_REG(ScatterMax) | ||||
@@ -860,7 +963,7 @@ REG_OP(ScatterMax) | |||||
*Must be one of the following types: float16, float, int32, int8, uint8 | *Must be one of the following types: float16, float, int32, int8, uint8 | ||||
*@li indices: An ND Tensor . \n | *@li indices: An ND Tensor . \n | ||||
*Must be one of the following types: int32 | |||||
*Must be one of the following types: int32 or int64 | |||||
*@li updates: An ND Tensor . \n | *@li updates: An ND Tensor . \n | ||||
*Must be one of the following types: float16, float, int32, int8, uint8 | *Must be one of the following types: float16, float, int32, int8, uint8 | ||||
@@ -876,10 +979,10 @@ REG_OP(ScatterMax) | |||||
* Compatible with the TensorFlow operator ScatterUpdate. | * Compatible with the TensorFlow operator ScatterUpdate. | ||||
*/ | */ | ||||
REG_OP(ScatterUpdate) | REG_OP(ScatterUpdate) | ||||
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) | |||||
.INPUT(indices, TensorType({DT_INT32})) | |||||
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) | |||||
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) | |||||
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.INPUT(indices, TensorType::IndexNumberType()) | |||||
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||||
.ATTR(use_locking, Bool, false) | .ATTR(use_locking, Bool, false) | ||||
.OP_END_FACTORY_REG(ScatterUpdate) | .OP_END_FACTORY_REG(ScatterUpdate) | ||||
@@ -979,6 +1082,137 @@ REG_OP(MatrixDiagV2) | |||||
.OUTPUT(output, TensorType::BasicType()) | .OUTPUT(output, TensorType::BasicType()) | ||||
.OP_END_FACTORY_REG(MatrixDiagV2) | .OP_END_FACTORY_REG(MatrixDiagV2) | ||||
/** | |||||
* @brief Add updates to var_out according to axis and indices. | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li var: A Tensor. Must be one of the following types: | |||||
* float16, float32, int32, int8, uint8. | |||||
* @li indices: A Tensor of the indices, type should be int32. | |||||
* @li updates: A Tensor of the same type as "var". | |||||
* @par Attributes: | |||||
* @li axis: An required int to specify the axis to perform indices add. | |||||
* @par Outputs: | |||||
* @li var_out: A Tensor. Same as input "var". | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator index_add. | |||||
* @par Restrictions: | |||||
* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(IndexAdd) | |||||
.INPUT(var, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||||
.INPUT(indices, TensorType({DT_INT32})) | |||||
.INPUT(updates, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||||
.OUTPUT(var_out, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||||
.ATTR(axis, Int, 0) | |||||
.OP_END_FACTORY_REG(IndexAdd) | |||||
/** | |||||
*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n | |||||
*@par Inputs: | |||||
* Two inputs, including: | |||||
*@li x: A Tensor. Must be one of the following types: | |||||
* float16, float32, double, int32, uint8, int16, int8, complex64, int64, | |||||
* qint8, quint8, qint32, uint16, complex128, uint32, uint64. | |||||
*@li diagonal:(int, optional) – the diagonal to consider。\n | |||||
*@par Outputs: | |||||
*y: A Tensor. Has the same type as "x" . \n | |||||
*@par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator Triu. | |||||
*/ | |||||
REG_OP(Triu) | |||||
.INPUT(x, TensorType::BasicType()) | |||||
.ATTR(diagonal, Int, 0) | |||||
.OUTPUT(y, TensorType::BasicType()) | |||||
.OP_END_FACTORY_REG(Triu) | |||||
/** | |||||
*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n | |||||
*@par Inputs: | |||||
* Two inputs, including: | |||||
*@li x: A Tensor. Must be one of the following types: | |||||
* float16, float32, double, int32, uint8, int16, int8, complex64, int64, | |||||
* qint8, quint8, qint32, uint16, complex128, uint32, uint64. | |||||
*@li diagonal:(int, optional) – the diagonal to consider。\n | |||||
*@par Outputs: | |||||
*y: A Tensor. Has the same type as "x" . \n | |||||
*@par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator Tril. | |||||
*/ | |||||
REG_OP(Tril) | |||||
.INPUT(x, TensorType::BasicType()) | |||||
.ATTR(diagonal, Int, 0) | |||||
.OUTPUT(y, TensorType::BasicType()) | |||||
.OP_END_FACTORY_REG(Tril) | |||||
/** | |||||
*@brief Concatenates a list of N tensors along the first dimension. | |||||
*@par Inputs: | |||||
* Two inputs, including: | |||||
* @li values: A list of Tensors. Must be one of the following types: int32, float16, float32. | |||||
* Tensors to be concatenated. All must have size 1 in the first dimension and same shape. | |||||
* It's a dynamic input. | |||||
* @li shape: A Tensor of the same type as "x". | |||||
* The final shape of the result. Should be equal to the shapes of any input | |||||
* but with the number of input values in the first dimension . \n | |||||
*@par Attributes: | |||||
*equation: The subscripts for the Einstein summation. \n | |||||
*N: tensor size of input \n | |||||
*@par Outputs: | |||||
*@li y: Sums the product of the elements of the input operands along dimensions specified | |||||
using a notation based on the Einstein summation convention. \n | |||||
*@attention Constraints: | |||||
*Input N must be Int. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with Pytorch einsum operator. | |||||
*/ | |||||
REG_OP(Einsum) | |||||
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.REQUIRED_ATTR(equation, String) | |||||
.REQUIRED_ATTR(N, Int) | |||||
.OP_END_FACTORY_REG(Einsum) | |||||
/** | |||||
*@brief Returns a 2-D tensor with ones on the diagonal and zeros elsewhere. \n | |||||
*@par Inputs: | |||||
*No inputs | |||||
*@par Attributes: | |||||
*@li num_rows: An required int. \n | |||||
*@li num_columns: An optional int.Defaults to 0. \n | |||||
*@li batch_shape: An optional ListInt.Defaults to []. \n | |||||
*@li dtype: An optional int.Defaults to 0. \n | |||||
*@par Outputs: | |||||
*y: A Tensor with targeted type and shape. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator Eye. \n | |||||
*/ | |||||
REG_OP(Eye) | |||||
.OUTPUT(y, TensorType::BasicType()) /* "Result, has targeted element type" */ | |||||
.REQUIRED_ATTR(num_rows, Int) | |||||
.ATTR(num_columns, Int, 0) | |||||
.ATTR(batch_shape, ListInt, {}) | |||||
.ATTR(dtype, Int, 0) | |||||
.OP_END_FACTORY_REG(Eye) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -144,6 +144,64 @@ REG_OP(BatchNorm) | |||||
/** | /** | ||||
*@brief Performs batch normalization . \n | *@brief Performs batch normalization . \n | ||||
*@par Inputs: | |||||
* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported) | |||||
*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D. | |||||
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. Must be 6D | |||||
if input "x" is with format NDC1HWC0. Specifies the scaling factor. | |||||
*@li offset: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D | |||||
if input "x" is with format NC1HWC0. Specifies the offset. | |||||
*@li mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D | |||||
if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the | |||||
operation is used for training. | |||||
*@li variance: A Tensor of type float32. Must be 3D if input "x" is with format NHWC or NCHW. Must be | |||||
5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None" | |||||
if the operation is used for training . \n | |||||
*@par Attributes: | |||||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". | |||||
*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". | |||||
*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n | |||||
*@par Outputs: | |||||
* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported) | |||||
*@li y: A 3D or 6D Tensor of type float16 or float32 for the normalized "x", with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D. | |||||
*@li batch_mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D | |||||
if input "x" is with format NDC1HWC0. Specifies the mean of "x". | |||||
*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. | |||||
Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x". | |||||
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. | |||||
Must be 6D if input "x" is with format NDC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. | |||||
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n | |||||
*@attention Constraints: | |||||
*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, | |||||
then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance". | |||||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n | |||||
*@par Third-party framework compatibility | |||||
*@li Compatible with the TensorFlow operator fused_batch_norm. | |||||
*@li Compatible with the TensorFlow operator fused_batch_norm_v2. | |||||
*/ | |||||
REG_OP(BatchNorm3D) | |||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(scale, TensorType({DT_FLOAT})) | |||||
.INPUT(offset, TensorType({DT_FLOAT})) | |||||
.OPTIONAL_INPUT(mean, TensorType({DT_FLOAT})) | |||||
.OPTIONAL_INPUT(variance, TensorType({DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(batch_mean, TensorType({DT_FLOAT})) | |||||
.OUTPUT(batch_variance, TensorType({DT_FLOAT})) | |||||
.OUTPUT(reserve_space_1, TensorType({DT_FLOAT})) | |||||
.OUTPUT(reserve_space_2, TensorType({DT_FLOAT})) | |||||
.ATTR(epsilon, Float, 0.0001) | |||||
.ATTR(data_format, String, "NCDHW") | |||||
.ATTR(is_training, Bool, true) | |||||
.OP_END_FACTORY_REG(BatchNorm3D) | |||||
/** | |||||
*@brief Performs batch normalization . \n | |||||
*@par Inputs: | *@par Inputs: | ||||
* Five inputs, including: (NHWC or NCHW supported) | * Five inputs, including: (NHWC or NCHW supported) | ||||
*@li x: A 4D Tensor of type float16 or float32. | *@li x: A 4D Tensor of type float16 or float32. | ||||
@@ -242,6 +300,52 @@ REG_OP(BatchNormGrad) | |||||
/** | /** | ||||
*@brief Performs the backpropagation of BatchNorm . \n | *@brief Performs the backpropagation of BatchNorm . \n | ||||
*@par Inputs: | |||||
* Five inputs, including: | |||||
*@li y_backprop: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0, for the gradient. | |||||
*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0. | |||||
*@li scale: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0. | |||||
*@li reserve_space_1: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm. | |||||
*@li reserve_space_2: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm . \n | |||||
*@par Attributes: | |||||
*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". | |||||
*@li data_format: An optional string. Defaults to "NCDHW". | |||||
*@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n | |||||
*@par Outputs: | |||||
*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x". | |||||
*@li scale_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "scale". | |||||
*@li *offset_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "offset". | |||||
*@li *reserve_space_4: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output. | |||||
*@li *reserve_space_5: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output . \n | |||||
*@attention Constraints: | |||||
* The preceding layer of this operator must be operator BatchNorm . \n | |||||
*@see BatchNorm | |||||
*@par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operators FusedBatchNormGradV2 and FusedBatchNorm3DGrad. | |||||
*/ | |||||
REG_OP(BatchNorm3DGrad) | |||||
.INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(scale, TensorType({DT_FLOAT})) | |||||
.INPUT(reserve_space_1, TensorType({DT_FLOAT})) | |||||
.INPUT(reserve_space_2, TensorType({DT_FLOAT})) | |||||
.OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(scale_backprop, TensorType({DT_FLOAT})) | |||||
.OUTPUT(offset_backprop, TensorType({DT_FLOAT})) | |||||
.OUTPUT(reserve_space_4, TensorType({DT_FLOAT})) | |||||
.OUTPUT(reserve_space_5, TensorType({DT_FLOAT})) | |||||
.ATTR(epsilon, Float, 0.0001) | |||||
.ATTR(data_format, String, "NCDHW") | |||||
.ATTR(is_training, Bool, true) | |||||
.OP_END_FACTORY_REG(BatchNorm3DGrad) | |||||
/** | |||||
*@brief Performs the backpropagation of BatchNorm . \n | |||||
*@par Inputs: | *@par Inputs: | ||||
* Five inputs, including: | * Five inputs, including: | ||||
*@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient. | *@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient. | ||||
@@ -315,35 +419,7 @@ REG_OP(BNInference) | |||||
.ATTR(use_global_stats, Bool,true) | .ATTR(use_global_stats, Bool,true) | ||||
.ATTR(mode, Int,1) | .ATTR(mode, Int,1) | ||||
.OP_END_FACTORY_REG(BNInference) | .OP_END_FACTORY_REG(BNInference) | ||||
/** | |||||
*@brief aicpu batch normalization host . \n | |||||
*@par Inputs: | |||||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | |||||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | |||||
*@li momentum: An optional float, mean and variance's Scale factor | |||||
*@par Attributes: | |||||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". | |||||
*@li use_global_stats: mean inference mode , only can be "True". | |||||
*@li mode: An optional attr, not use | |||||
*@par Outputs: | |||||
*@li alpha: A Tensor of type float16 or float32 for the cpu calculate mean | |||||
*@li beta: A Tensor of type float16 or float32 for the cpu calculate variance | |||||
*/ | |||||
REG_OP(BnHost) | |||||
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(momentum, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OPTIONAL_INPUT(scale, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OPTIONAL_INPUT(offset, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.ATTR(epsilon, Float, 0.00001) | |||||
.ATTR(mode, Int, 1) | |||||
.ATTR(use_global_stats, Bool, true) | |||||
.OUTPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(mu, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(BnHost) | |||||
/** | /** | ||||
*@brief Performs batch normalization . \n | *@brief Performs batch normalization . \n | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -365,6 +365,25 @@ REG_OP(BiasAddGrad) | |||||
* 4-D with shape [batch, out_height, out_width, out_channels] | * 4-D with shape [batch, out_height, out_width, out_channels] | ||||
* or [batch, out_channels, out_height, out_width]. | * or [batch, out_channels, out_height, out_width]. | ||||
* Gradients with respect to the output of the convolution. | * Gradients with respect to the output of the convolution. | ||||
*\n | |||||
*\n | |||||
* The following are the supported data types and data formats: | |||||
*@verbatim | |||||
| Tensor | out_bckprop | filter | y | |||||
------------|-------------|---------|-------- | |||||
| Data Type | float16 | float16 | float16 | |||||
| |-------------|---------|-------- | |||||
| | float32 | float32 | float32 | |||||
| |-------------|---------|-------- | |||||
| | float64 | float64 | float64 | |||||
------------|-------------|---------|-------- | |||||
| Format | NCHW | NCHW | NCHW | |||||
| | NHWC | HWCN | NHWC | |||||
@endverbatim | |||||
* For float32 and float64 type, the actual calculation on the chip is based on | |||||
* float16. | |||||
*\n | |||||
* | |||||
*@par Attributes: | *@par Attributes: | ||||
* Five attributes: | * Five attributes: | ||||
* @li strides: A tuple/list of 4 integers. The stride of the sliding window | * @li strides: A tuple/list of 4 integers. The stride of the sliding window | ||||
@@ -377,8 +396,53 @@ REG_OP(BiasAddGrad) | |||||
* channels. | * channels. | ||||
* @li data_format: An optional string from: "NHWC", "NCHW". Defaults to | * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to | ||||
* "NHWC". Specify the data format of the input and output data. | * "NHWC". Specify the data format of the input and output data. | ||||
*\n | |||||
*\n | |||||
* The following value range restrictions must be met: | |||||
*@verbatim | |||||
| Name | Field | Scope | |||||
-------------------|----------|-------------- | |||||
| input_size | H | [1, 4096] | |||||
| | W | [1, 4096] | |||||
-------------------|----------|-------------- | |||||
| Filter | H | [1, 255] | |||||
| | W | [1, 255] | |||||
-------------------|----------|-------------- | |||||
| out_backprop | H*strideH| [1, 4096] | |||||
| | W*strideW| [1, 4096] | |||||
-------------------|----------|-------------- | |||||
| y(fmap) | H | [1, 4096] | |||||
| | W | [1, 4096] | |||||
-------------------|----------|-------------- | |||||
| Stride | H | [1, 63] | |||||
| | W | [1, 63] | |||||
-------------------|----------|-------------- | |||||
| Padding | Top | [0, 255] | |||||
| | Bottom | [0, 255] | |||||
| | Left | [0, 255] | |||||
| | Right | [0, 255] | |||||
-------------------|----------|-------------- | |||||
| Dilation | H | [1, 255] | |||||
| | W | [1, 255] | |||||
@endverbatim | |||||
* In Ascend910, fmap or out_backprop's H and W not support 1 when | |||||
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 | |||||
* If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096 | |||||
*\n | |||||
* | |||||
*@par Outputs: | *@par Outputs: | ||||
* y: A Tensor. Has the same type as filter,and has same format as input_size. | * y: A Tensor. Has the same type as filter,and has same format as input_size. | ||||
*\n | |||||
* out_backprop_height = (fmap_height + pad_top + pad_bottom - | |||||
* (dilation_h * (filter_height - 1) + 1)) | |||||
* / stride_h + 1 | |||||
*\n | |||||
* out_backprop_width = (fmap_width + pad_left + pad_right - | |||||
* (dilation_w * (filter_width - 1) + 1)) | |||||
* / stride_w + 1 | |||||
*\n | |||||
* | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with Tensorflow's conv2d_backprop_input | * Compatible with Tensorflow's conv2d_backprop_input | ||||
*/ | */ | ||||
@@ -454,6 +518,21 @@ REG_OP(Conv2DBackpropInputD) | |||||
* @li bias: An optional tensor. Must have the same type as "y". | * @li bias: An optional tensor. Must have the same type as "y". | ||||
* @li offset_w: An optional 1D tensor for quantized deconvolution. | * @li offset_w: An optional 1D tensor for quantized deconvolution. | ||||
* Type is int8. Reserved.\n | * Type is int8. Reserved.\n | ||||
*\n | |||||
*\n | |||||
* The following are the supported data types and data formats: | |||||
*@verbatim | |||||
| Tensor | x | filter | bias | y | |||||
------------|---------|---------|---------|-------- | |||||
| Data Type | float16 | float16 | float16 | float16 | |||||
| |---------|---------|---------|-------- | |||||
| | int8 | int8 | int32 | int32 | |||||
------------|---------|---------|---------|-------- | |||||
| Format | NCHW | NCHW | ND | NCHW | |||||
@endverbatim | |||||
* For int8, a dequant or requant operator must be followed. | |||||
*\n | |||||
* | |||||
*@par Attributes: | *@par Attributes: | ||||
* Six attributes: | * Six attributes: | ||||
* @li strides: A tuple or list of 2 integers. The stride of the sliding window | * @li strides: A tuple or list of 2 integers. The stride of the sliding window | ||||
@@ -467,9 +546,54 @@ REG_OP(Conv2DBackpropInputD) | |||||
* @li data_format: An optional string from: "NCHW". Defaults to "NCHW". \n | * @li data_format: An optional string from: "NCHW". Defaults to "NCHW". \n | ||||
Specify the data format of the input and output data. | Specify the data format of the input and output data. | ||||
* @li offset_x: An optional integer for quantized deconvolution. | * @li offset_x: An optional integer for quantized deconvolution. | ||||
* Defaults to "0". | |||||
* The negative offset added to the input image for int8 type. Ensure offset_x | |||||
* within the effective range of int8 [-128, 127]. Defaults to "0". | |||||
*\n | |||||
*\n | |||||
* The following value range restrictions must be met: | |||||
*@verbatim | |||||
| Name | Field | Scope | |||||
-------------------|----------|-------------- | |||||
| x (out_backprop) | H*strideH| [1, 4096] | |||||
| | W*strideW| [1, 4096] | |||||
-------------------|----------|-------------- | |||||
| Filter | H | [1, 255] | |||||
| | W | [1, 255] | |||||
-------------------|----------|-------------- | |||||
| y (fmap) | H | [1, 4096] | |||||
| | W | [1, 4096] | |||||
-------------------|----------|-------------- | |||||
| Stride | H | [1, 63] | |||||
| | W | [1, 63] | |||||
-------------------|----------|-------------- | |||||
| Padding | Top | [0, 255] | |||||
| | Bottom | [0, 255] | |||||
| | Left | [0, 255] | |||||
| | Right | [0, 255] | |||||
-------------------|----------|-------------- | |||||
| Dilation | H | [1, 255] | |||||
| | W | [1, 255] | |||||
-------------------|----------|-------------- | |||||
| Offset_x | | [-128, 127] | |||||
@endverbatim | |||||
* In Ascend910, fmap or out_backprop's H and W not support 1 when | |||||
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 | |||||
* If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096 | |||||
*\n | |||||
* | |||||
*@par Outputs: | *@par Outputs: | ||||
* y: A Tensor. 4D tensor with shape [batch, channels, height, width]. | * y: A Tensor. 4D tensor with shape [batch, channels, height, width]. | ||||
*\n | |||||
* out_backprop_height = (fmap_height + pad_top + pad_bottom - | |||||
* (dilation_h * (filter_height - 1) + 1)) | |||||
* / stride_h + 1 | |||||
*\n | |||||
* out_backprop_width = (fmap_width + pad_left + pad_right - | |||||
* (dilation_w * (filter_width - 1) + 1)) | |||||
* / stride_w + 1 | |||||
*\n | |||||
* | |||||
* When type of x is float16, the type of y must be float16. | * When type of x is float16, the type of y must be float16. | ||||
* When type of x is int8, the type of y must be int32. | * When type of x is int8, the type of y must be int32. | ||||
*/ | */ | ||||
@@ -502,6 +626,25 @@ REG_OP(Deconvolution) | |||||
* [batch, out_height, out_width, out_channels] or [batch, out_channels, | * [batch, out_height, out_width, out_channels] or [batch, out_channels, | ||||
* out_height, out_width]. Gradients with respect to the output of the | * out_height, out_width]. Gradients with respect to the output of the | ||||
* convolution. | * convolution. | ||||
*\n | |||||
*\n | |||||
* The following are the supported data types and data formats: | |||||
*@verbatim | |||||
| Tensor | x | out_backprop | y | |||||
------------|---------|--------------|--------- | |||||
| Data Type | float16 | float16 | float16 | |||||
| |---------|--------------|--------- | |||||
| | float32 | float32 | float32 | |||||
| |---------|--------------|--------- | |||||
| | float64 | float64 | float64 | |||||
|-----------|---------|--------------|--------- | |||||
| Format | NCHW | NCHW | NCHW | |||||
| | NHWC | NHWC | HWCN | |||||
@endverbatim | |||||
* For float32 and float64 type of x and outbackprop, the actual calculation on the chip | |||||
* is based on float16. | |||||
*\n | |||||
* | |||||
*@par Attributes: | *@par Attributes: | ||||
* Five attributes: | * Five attributes: | ||||
* @li strides: A tuple/list of 4 integers. The stride of the sliding window | * @li strides: A tuple/list of 4 integers. The stride of the sliding window | ||||
@@ -514,8 +657,52 @@ REG_OP(Deconvolution) | |||||
* channels. | * channels. | ||||
* @li data_format: An optional string from: "NHWC", "NCHW". Defaults to | * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to | ||||
* "NHWC". Specify the data format of the input and output data. | * "NHWC". Specify the data format of the input and output data. | ||||
*\n | |||||
*\n | |||||
* The following value range restrictions must be met: | |||||
*@verbatim | |||||
| Name | Field | Scope | |||||
-------------------|----------|-------------- | |||||
| x(fmap) | H | [1, 4096] | |||||
| | W | [1, 4096] | |||||
-------------------|----------|-------------- | |||||
| Filter Size | H | [1, 255] | |||||
| | W | [1, 255] | |||||
-------------------|----------|-------------- | |||||
| out_backprop | H | [1, 4096] | |||||
| | W | [1, 4096] | |||||
-------------------|----------|-------------- | |||||
| y | H | [1, 4096] | |||||
| | W | [1, 4096] | |||||
-------------------|----------|-------------- | |||||
| Stride | H | [1, 63] | |||||
| | W | [1, 63] | |||||
-------------------|----------|-------------- | |||||
| Padding | Top | [0, 255] | |||||
| | Bottom | [0, 255] | |||||
| | Left | [0, 255] | |||||
| | Right | [0, 255] | |||||
-------------------|----------|-------------- | |||||
| Dilation | H | [1, 255] | |||||
| | W | [1, 255] | |||||
@endverbatim | |||||
* In Ascend910, out_backprop's H and W not support 1 when | |||||
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 | |||||
*\n | |||||
* | |||||
*@par Outputs: | *@par Outputs: | ||||
* y: A Tensor. Has the same type as x, has the same format as filter_size. | * y: A Tensor. Has the same type as x, has the same format as filter_size. | ||||
*\n | |||||
* out_backprop_height = (in_height + pad_top + pad_bottom - | |||||
* (dilation_h * (filter_height - 1) + 1)) | |||||
* / stride_h + 1 | |||||
*\n | |||||
* out_backprop_width = (in_width + pad_left + pad_right - | |||||
* (dilation_w * (filter_width - 1) + 1)) | |||||
* / stride_w + 1 | |||||
*\n | |||||
* | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with Tensorflow's conv2d_backprop_filter | * Compatible with Tensorflow's conv2d_backprop_filter | ||||
*/ | */ | ||||
@@ -597,16 +784,14 @@ REG_OP(Conv2DBackpropFilterD) | |||||
| Tensor | x | filter | bias | y | | Tensor | x | filter | bias | y | ||||
------------|---------|---------|---------|-------- | ------------|---------|---------|---------|-------- | ||||
| Data Type | float16 | float16 | float16 | float16 | | Data Type | float16 | float16 | float16 | float16 | ||||
| |---------|---------|---------|-------- | |||||
| | float32 | float32 | float32 | float32 | | | float32 | float32 | float32 | float32 | ||||
| |---------|---------|---------|-------- | |||||
| | int8 | int8 | int32 | int32 | | | int8 | int8 | int32 | int32 | ||||
------------|---------|---------|---------|-------- | ------------|---------|---------|---------|-------- | ||||
| Format | NCHW | NCHW | ND | NCHW | | Format | NCHW | NCHW | ND | NCHW | ||||
| | NHWC | HWCN | | NHWC | | | NHWC | HWCN | | NHWC | ||||
@endverbatim | @endverbatim | ||||
* For float32 type, the actual calculation on the chip is based on | * For float32 type, the actual calculation on the chip is based on | ||||
* float16. For int8, a dequant or requant operator must be followed. | |||||
* float16. | |||||
*\n | *\n | ||||
* | * | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -617,8 +802,7 @@ REG_OP(Conv2DBackpropFilterD) | |||||
* (top, bottom, left, right) side of the input. | * (top, bottom, left, right) side of the input. | ||||
*@li dilations: Optional. A list of 4 integers. The dilation factor for each | *@li dilations: Optional. A list of 4 integers. The dilation factor for each | ||||
* dimension of input. The dimension order is determined by the data format of | * dimension of input. The dimension order is determined by the data format of | ||||
* "x". The N and C dimensions must be set to 1. The H and W dimensions must be | |||||
* set to 1 for int8 type. Defaults to [1, 1, 1, 1]. | |||||
* "x". The N and C dimensions must be set to 1. Defaults to [1, 1, 1, 1]. | |||||
*@li groups: Optional. An integer of type int32. The number of blocked | *@li groups: Optional. An integer of type int32. The number of blocked | ||||
* connections from input channels to output channels. In_channels and | * connections from input channels to output channels. In_channels and | ||||
* out_channels must both be divisible by "groups". Defaults to 1. | * out_channels must both be divisible by "groups". Defaults to 1. | ||||
@@ -652,6 +836,8 @@ REG_OP(Conv2DBackpropFilterD) | |||||
| Offset_x | | [-128, 127] | | Offset_x | | [-128, 127] | ||||
@endverbatim | @endverbatim | ||||
* The W dimension of the input image supports cases exceeding 4096, but it may | |||||
* cause compilation errors. | |||||
*\n | *\n | ||||
* | * | ||||
*@par Outputs: | *@par Outputs: | ||||
@@ -666,21 +852,6 @@ REG_OP(Conv2DBackpropFilterD) | |||||
* out_width = (in_width + pad_left + pad_right - | * out_width = (in_width + pad_left + pad_right - | ||||
* (dilation_w * (filter_width - 1) + 1)) | * (dilation_w * (filter_width - 1) + 1)) | ||||
* / stride_w + 1 | * / stride_w + 1 | ||||
* | |||||
*@attention Constraints: | |||||
*@li The following restrictions on the output must be met: | |||||
*@verbatim | |||||
| Output | Restrictions | |||||
----------|-------------------------------- | |||||
| H == 1 | H * W(input) == H * W(filter) | |||||
| W == 1 | | |||||
----------|-------------------------------- | |||||
| H != 1 | W(input) == W(filter) | |||||
| W == 1 | Only for Ascend310 Hi3796V300CS | |||||
@endverbatim | |||||
* "H * W (input)" indicates the image size after padding and "H * W (filter)" | |||||
* indicates the filter size after dilation."W(input)" and W(filter) indicate | |||||
* the same rule on the W dimension. | |||||
*\n | *\n | ||||
* | * | ||||
*@par Quantization supported or not | *@par Quantization supported or not | ||||
@@ -778,7 +949,7 @@ REG_OP(Conv2DCompress) | |||||
* With the format "HWCN" , the data is stored in the order of: [filter_height, | * With the format "HWCN" , the data is stored in the order of: [filter_height, | ||||
* filter_width, in_channels / groups, out_channels]. | * filter_width, in_channels / groups, out_channels]. | ||||
*@li offsets: A 4D tensor of x-y coordinates offset and mask. With the format | *@li offsets: A 4D tensor of x-y coordinates offset and mask. With the format | ||||
* "NHWC", the data is stored in the order of: [batch, in_height, in_width, | |||||
* "NHWC", the data is stored in the order of: [batch, out_height, out_width, | |||||
* deformable_groups * filter_height * filter_width * 3]. | * deformable_groups * filter_height * filter_width * 3]. | ||||
*@li bias: An optional 1D tensor of additive biases to the filter outputs. | *@li bias: An optional 1D tensor of additive biases to the filter outputs. | ||||
* The data is stored in the order of: [out_channels]. | * The data is stored in the order of: [out_channels]. | ||||
@@ -816,31 +987,20 @@ REG_OP(Conv2DCompress) | |||||
*@li deformable_groups: Optional. An integer of type int32. The number of | *@li deformable_groups: Optional. An integer of type int32. The number of | ||||
* deformable group partitions. In_channels must be divisible by | * deformable group partitions. In_channels must be divisible by | ||||
* "deformable_groups". Defaults to 1. | * "deformable_groups". Defaults to 1. | ||||
*@li modulated: Optional. Specify version of DeformableConv2D, true means v2, | |||||
* false means v1, currently only support v2. | |||||
*\n | *\n | ||||
*\n | *\n | ||||
* The following value range restrictions must be met: | * The following value range restrictions must be met: | ||||
*@verbatim | *@verbatim | ||||
| Name | Field | Scope | | Name | Field | Scope | ||||
--------------------|--------|---------------------------- | --------------------|--------|---------------------------- | ||||
| Input Image Size | H | [1, 100000] | |||||
| | W | [1, 4096] | |||||
--------------------|--------|---------------------------- | |||||
| Filter Size | H | [1, 255] | |||||
| | W | [1, 255] | |||||
| Input Image Size | H | [1, 100000 / filter_height] | |||||
| | W | [1, 4096 / filter_width] | |||||
--------------------|--------|---------------------------- | --------------------|--------|---------------------------- | ||||
| Stride | H | [1, 63] | |||||
| Filter Size | H | [1, 63] | |||||
| | W | [1, 63] | | | W | [1, 63] | ||||
--------------------|--------|---------------------------- | |||||
| Padding | Top | [0, 255] | |||||
| | Bottom | [0, 255] | |||||
| | Left | [0, 255] | |||||
| | Right | [0, 255] | |||||
------------ -------|--------|---------------------------- | |||||
| Dilation | H | [1, 255] | |||||
| | W | [1, 255] | |||||
@endverbatim | @endverbatim | ||||
* "W(input)" indicate the image width after padding and W(filter) indicates the | |||||
* filter width after dilation. | |||||
*\n | *\n | ||||
* | * | ||||
*@par Outputs: | *@par Outputs: | ||||
@@ -855,21 +1015,7 @@ REG_OP(Conv2DCompress) | |||||
* out_width = (in_width + pad_left + pad_right - | * out_width = (in_width + pad_left + pad_right - | ||||
* (dilation_w * (filter_width - 1) + 1)) | * (dilation_w * (filter_width - 1) + 1)) | ||||
* / stride_w + 1 | * / stride_w + 1 | ||||
* | |||||
*@attention Constraints: | |||||
*@li The following restrictions on the output must be met: | |||||
*@verbatim | |||||
| Output | Restrictions | |||||
----------|-------------------------------- | |||||
| H == 1 | H * W(input) == H * W(filter) | |||||
| W == 1 | | |||||
----------|-------------------------------- | |||||
| H != 1 | W(input) == W(filter) | |||||
| W == 1 | Only for Ascend310 Hi3796V300CS | |||||
@endverbatim | |||||
* "H * W(input)" indicates the image size after padding and "H * W(filter)" | |||||
* indicates the filter size after dilation. "W(input)" and W(filter) indicate | |||||
* the same rule on the W dimension. | |||||
*\n | |||||
* | * | ||||
*@par Quantization supported or not | *@par Quantization supported or not | ||||
*@li No | *@li No | ||||
@@ -891,6 +1037,7 @@ REG_OP(DeformableConv2D) | |||||
.ATTR(groups, Int, 1) | .ATTR(groups, Int, 1) | ||||
.ATTR(data_format, String, "NHWC") | .ATTR(data_format, String, "NHWC") | ||||
.ATTR(deformable_groups, Int, 1) | .ATTR(deformable_groups, Int, 1) | ||||
.ATTR(modulated, Bool, true) | |||||
.OP_END_FACTORY_REG(DeformableConv2D) | .OP_END_FACTORY_REG(DeformableConv2D) | ||||
/** | /** | ||||
@@ -916,12 +1063,12 @@ REG_OP(DeformableConv2D) | |||||
*@par Attributes: | *@par Attributes: | ||||
* @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
* channels. Reserved. | |||||
* channels. | |||||
* @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
* Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
* @li dilations: A list of 5 integers. Specifies the dilation factor for each | * @li dilations: A list of 5 integers. Specifies the dilation factor for each | ||||
* dimension of "x", now only support [1,1,1,1,1] | |||||
* The N and C dimensions must be 1. Has the same format as "x". | |||||
* dimension of "x". | |||||
* The N, C and D dimensions must be 1. Has the same format as "x". | |||||
* @li offset_x: An optional int. Input offset, used for quantized inference. | * @li offset_x: An optional int. Input offset, used for quantized inference. | ||||
* Defaults to 0. Reserved . \n | * Defaults to 0. Reserved . \n | ||||
@@ -967,8 +1114,8 @@ REG_OP(Conv3D) | |||||
*@par Required Attributes: | *@par Required Attributes: | ||||
* @li strides: A list of 5 integers. Specifies the stride of the sliding window | * @li strides: A list of 5 integers. Specifies the stride of the sliding window | ||||
* for each dimension of "x". | |||||
* The N and C dimensions must be 1. Has the same format as "x". | |||||
* for each dimension of "out_backprop". | |||||
* The N and C dimensions must be 1. Has the same format as "out_backprop". | |||||
* @li pads: A list of 6 integers. | * @li pads: A list of 6 integers. | ||||
* Supports only padding along the D, H and W dimensions in sequence of head, | * Supports only padding along the D, H and W dimensions in sequence of head, | ||||
* tail, top, bottom, left and right . \n | * tail, top, bottom, left and right . \n | ||||
@@ -976,14 +1123,15 @@ REG_OP(Conv3D) | |||||
*@par Attributes: | *@par Attributes: | ||||
* Three attributes: | * Three attributes: | ||||
* @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
* channels. Reserved. | |||||
* channels. | |||||
* @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
* Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
* @li dilations: A tuple/list of 5 integers, The dilation factor for each | * @li dilations: A tuple/list of 5 integers, The dilation factor for each | ||||
* dimension of the input, now only support [1,1,1,1,1] | |||||
* dimension of the input. | |||||
* The N, C and D dimensions must be 1. Has the same format as "out_backprop". | |||||
*@par Outputs: | *@par Outputs: | ||||
* y: A Tensor. Has the same type as filter,and has same format as input_size | |||||
* y: A Tensor. Has the same type as filter,and has same format as "input_size" | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with Tensorflow's conv3d_backprop_input | * Compatible with Tensorflow's conv3d_backprop_input | ||||
@@ -1011,8 +1159,8 @@ REG_OP(Conv3DBackpropInput) | |||||
*@par Required Attributes: | *@par Required Attributes: | ||||
* @li strides: A list of 5 integers. Specifies the stride of the sliding window | * @li strides: A list of 5 integers. Specifies the stride of the sliding window | ||||
* for each dimension of "x". | |||||
* The N and C dimensions must be 1. Has the same format as "x". | |||||
* for each dimension of "out_backprop". | |||||
* The N and C dimensions must be 1. Has the same format as "out_backprop". | |||||
* @li pads: A list of 6 integers. Supports only padding along the D, H and W | * @li pads: A list of 6 integers. Supports only padding along the D, H and W | ||||
* dimensions in sequence of head, tail, top, bottom, left and right. | * dimensions in sequence of head, tail, top, bottom, left and right. | ||||
* @li input_size: A tuple/list of type int32, int64. An integer vector | * @li input_size: A tuple/list of type int32, int64. An integer vector | ||||
@@ -1023,13 +1171,14 @@ REG_OP(Conv3DBackpropInput) | |||||
*@par Attributes: | *@par Attributes: | ||||
* Three attributes: | * Three attributes: | ||||
* @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
* channels. Reserved. | |||||
* channels. | |||||
* @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
* Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
* @li dilations: A tuple/list of 5 integers, The dilation factor for each | * @li dilations: A tuple/list of 5 integers, The dilation factor for each | ||||
* dimension of input, now only support [1,1,1,1,1] | |||||
* dimension of input. | |||||
* The N, C and D dimensions must be 1. Has the same format as "out_backprop". | |||||
*@par Outputs: | *@par Outputs: | ||||
* y: A Tensor. Has the same type and data format as out_backprop. | |||||
* y: A Tensor. Has the same type and data format as "out_backprop". | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with Tensorflow's conv3d_backprop_input | * Compatible with Tensorflow's conv3d_backprop_input | ||||
@@ -1072,9 +1221,7 @@ REG_OP(Conv3DBackpropInputD) | |||||
* @li c_t: A optinal Tensor dtype of float16, float32. The cell state at time t . \n | * @li c_t: A optinal Tensor dtype of float16, float32. The cell state at time t . \n | ||||
*@par Third-party framework compatibility: | *@par Third-party framework compatibility: | ||||
* Compatible with the Pytorch operator adds. | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
* Compatible with the Caffe operator LSTM. | |||||
*/ | */ | ||||
REG_OP(LSTM) | REG_OP(LSTM) | ||||
.INPUT(x, TensorType({DT_FLOAT16})) | .INPUT(x, TensorType({DT_FLOAT16})) | ||||
@@ -1121,14 +1268,15 @@ REG_OP(LSTM) | |||||
*@par Attributes: | *@par Attributes: | ||||
* Three attributes: | * Three attributes: | ||||
* @li dilations: A tuple/list of 5 integers, The dilation factor for each | * @li dilations: A tuple/list of 5 integers, The dilation factor for each | ||||
* dimension of input, now only support [1,1,1,1,1]. | |||||
* dimension of input. | |||||
* The N, C and D dimensions must be 1. Has the same format as "x". | |||||
* @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
* channels. Reserved. | |||||
* channels. | |||||
* @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
* Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
*@par Outputs: | *@par Outputs: | ||||
* y: A Tensor that has the same type as x | |||||
* y: A Tensor that has the same type as "x" | |||||
* and the format is NDHWC, NCDHW or DHWCN. | * and the format is NDHWC, NCDHW or DHWCN. | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with Tensorflow's conv3d_backprop_filter | * Compatible with Tensorflow's conv3d_backprop_filter | ||||
@@ -1172,9 +1320,10 @@ REG_OP(Conv3DBackpropFilter) | |||||
*@par Attributes: | *@par Attributes: | ||||
* Three attributes: | * Three attributes: | ||||
* @li dilations: A tuple/list of 5 integers, The dilation factor for each | * @li dilations: A tuple/list of 5 integers, The dilation factor for each | ||||
* dimension of input, now only support [1,1,1,1,1]. | |||||
* dimension of input. | |||||
* The N, C and D dimensions must be 1. Has the same format as "x". | |||||
* @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
* channels. Reserved. | |||||
* channels. | |||||
* @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
* Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
@@ -1224,15 +1373,16 @@ REG_OP(Conv3DBackpropFilterD) | |||||
*@par Attributes: | *@par Attributes: | ||||
* Five attributes: | * Five attributes: | ||||
* @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
* channels. Reserved. | |||||
* channels. | |||||
* @li dilations: A tuple/list of 5 integers, | * @li dilations: A tuple/list of 5 integers, | ||||
* The dilation factor for each dimension of input, now only support [1,1,1,1,1] | |||||
* The dilation factor for each dimension of input. | |||||
* The N, C and D dimensions must be 1. Has the same format as "x". | |||||
* @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
* Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
* @li output_padding: The size will be added in the output shape. | * @li output_padding: The size will be added in the output shape. | ||||
* @li offset_x: Input offset_x value. Reserved. | * @li offset_x: Input offset_x value. Reserved. | ||||
*@par Outputs: | *@par Outputs: | ||||
* y: A Tensor. Has the same type and format as x. | |||||
* y: A Tensor. Has the same type and format as "x". | |||||
*/ | */ | ||||
REG_OP(Conv3DTranspose) | REG_OP(Conv3DTranspose) | ||||
.INPUT(input_size, TensorType({DT_INT32, DT_INT64})) | .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) | ||||
@@ -1273,15 +1423,16 @@ REG_OP(Conv3DTranspose) | |||||
*@par Attributes: | *@par Attributes: | ||||
* Five attributes: | * Five attributes: | ||||
* @li dilations: A tuple/list of 5 integers, The dilation factor for each | * @li dilations: A tuple/list of 5 integers, The dilation factor for each | ||||
* dimension of input, now only support [1,1,1,1,1] | |||||
* dimension of input. | |||||
* The N, C and D dimensions must be 1. Has the same format as "x". | |||||
* @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
* channels. Reserved. | |||||
* channels. | |||||
* @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
* Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
* @li output_padding: The size will be added in the output shape. | * @li output_padding: The size will be added in the output shape. | ||||
* @li offset_x: Input offset_x value. Reserved. | * @li offset_x: Input offset_x value. Reserved. | ||||
*@par Outputs: | *@par Outputs: | ||||
* y: A Tensor. Has the same type and format as x. | |||||
* y: A Tensor. Has the same type and format as "x". | |||||
*@par Restrictions: | *@par Restrictions: | ||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead. | * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead. | ||||
*/ | */ | ||||
@@ -1316,6 +1467,22 @@ REG_OP(Conv3DTransposeD) | |||||
* or [out_channels, in_channel, filter_height, filter_width]. | * or [out_channels, in_channel, filter_height, filter_width]. | ||||
* @li bias: An optional 1D tensor of type float16 or int32. Format is "ND". | * @li bias: An optional 1D tensor of type float16 or int32. Format is "ND". | ||||
* @li offset_w: An optional 1D tensor for quantized inference. Reserved. | * @li offset_w: An optional 1D tensor for quantized inference. Reserved. | ||||
*\n | |||||
*\n | |||||
* The following are the supported data types and data formats: | |||||
*@verbatim | |||||
| Tensor | x | filter | bias | y | |||||
------------|---------|---------|---------|-------- | |||||
| Data Type | float16 | float16 | float16 | float16 | |||||
| |---------|---------|---------|-------- | |||||
| | int8 | int8 | int32 | int32 | |||||
------------|---------|---------|---------|-------- | |||||
| Format | NCHW | NCHW | ND | NCHW | |||||
| | NHWC | HWCN | | NHWC | |||||
@endverbatim | |||||
* For int8, a dequant or requant operator must be followed. | |||||
*\n | |||||
* | |||||
*@par Required Attributes: | *@par Required Attributes: | ||||
* @li strides: A required tuple/list of 4 integers. The stride of the sliding | * @li strides: A required tuple/list of 4 integers. The stride of the sliding | ||||
* window for H/W dimension. The index of H/W is same as data_format. | * window for H/W dimension. The index of H/W is same as data_format. | ||||
@@ -1333,10 +1500,58 @@ REG_OP(Conv3DTransposeD) | |||||
* @li output_padding: The size will be added in the output shape. Defaults | * @li output_padding: The size will be added in the output shape. Defaults | ||||
* to [0, 0, 0, 0]. | * to [0, 0, 0, 0]. | ||||
* @li offset_x: An optional int. Input offset, used for quantized inference. | * @li offset_x: An optional int. Input offset, used for quantized inference. | ||||
* Defaults to "0". | |||||
* The negative offset added to the input image for int8 type. Ensure offset_x | |||||
* within the effective range of int8 [-128, 127]. Defaults to "0". | |||||
*\n | |||||
*\n | |||||
* The following value range restrictions must be met: | |||||
*@verbatim | |||||
| Name | Field | Scope | |||||
-------------------|----------|-------------- | |||||
| input_size | H | [1, 4096] | |||||
| | W | [1, 4096] | |||||
-------------------|----------|-------------- | |||||
| x (out_backprop) | H*strideH| [1, 4096] | |||||
| | W*strideW| [1, 4096] | |||||
-------------------|----------|-------------- | |||||
| filter | H | [1, 255] | |||||
| | W | [1, 255] | |||||
-------------------|----------|-------------- | |||||
| y (fmap) | H | [1, 4096] | |||||
| | W | [1, 4096] | |||||
-------------------|----------|-------------- | |||||
| Stride | H | [1, 63] | |||||
| | W | [1, 63] | |||||
-------------------|----------|-------------- | |||||
| Padding | Top | [0, 255] | |||||
| | Bottom | [0, 255] | |||||
| | Left | [0, 255] | |||||
| | Right | [0, 255] | |||||
-------------------|----------|-------------- | |||||
| Dilation | H | [1, 255] | |||||
| | W | [1, 255] | |||||
-------------------|----------|-------------- | |||||
| Offset_x | | [-128, 127] | |||||
@endverbatim | |||||
* In Ascend910, fmap or out_backprop's H and W not support 1 when | |||||
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 | |||||
* If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096 | |||||
*\n | |||||
* | |||||
*@par Outputs: | *@par Outputs: | ||||
* y: A Tensor. A Tensor of type float16 or int32, and has same format as | * y: A Tensor. A Tensor of type float16 or int32, and has same format as | ||||
* input_size. | * input_size. | ||||
*\n | |||||
* out_backprop_height = (fmap_height + pad_top + pad_bottom - | |||||
* (dilation_h * (filter_height - 1) + 1)) | |||||
* / stride_h + 1 | |||||
*\n | |||||
* out_backprop_width = (fmap_width + pad_left + pad_right - | |||||
* (dilation_w * (filter_width - 1) + 1)) | |||||
* / stride_w + 1 | |||||
*\n | |||||
* | |||||
*/ | */ | ||||
REG_OP(Conv2DTranspose) | REG_OP(Conv2DTranspose) | ||||
.INPUT(input_size, TensorType({DT_INT32, DT_INT64})) | .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) | ||||
@@ -1405,21 +1620,22 @@ REG_OP(Conv2DTransposeD) | |||||
/** | /** | ||||
*@brief Computes the deformed convolution output with the expected input | *@brief Computes the deformed convolution output with the expected input | ||||
*@par Inputs: | *@par Inputs: | ||||
* Four inputs: | |||||
* Two inputs: | |||||
* @li x: A Tensor of type float16,float32 | * @li x: A Tensor of type float16,float32 | ||||
* @li offsets: A Tensor of type float16,float32.Deformation offset parameter. | * @li offsets: A Tensor of type float16,float32.Deformation offset parameter. | ||||
*@par Required Attributes: | *@par Required Attributes: | ||||
* @li strides: A tuple/list of 4 integers.The stride of the sliding window for | * @li strides: A tuple/list of 4 integers.The stride of the sliding window for | ||||
* height and width for H/W dimension. | * height and width for H/W dimension. | ||||
* @li pads: A tuple/list of 4 integers.Padding added to each dimension | |||||
* @li pads: A tuple/list of 4 integers.Padding added to H/W dimension | |||||
* of the input. | * of the input. | ||||
* @li ksize: A tuple/list of 2 integers.kernel size. | * @li ksize: A tuple/list of 2 integers.kernel size. | ||||
*@par Attributes: | *@par Attributes: | ||||
* Three attributes: | |||||
* Four attributes: | |||||
* @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension | * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension | ||||
* of input. Defaults to [1, 1, 1, 1] | * of input. Defaults to [1, 1, 1, 1] | ||||
* @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. | * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. | ||||
* @li deformable_groups: Specify the c-axis grouping number of input x. | * @li deformable_groups: Specify the c-axis grouping number of input x. | ||||
* @li modulated: Specify version of DeformableConv2D, true means v2, false means v1 | |||||
*@par Outputs: | *@par Outputs: | ||||
* y: A Tensor. A Tensor of type float16, float32. | * y: A Tensor. A Tensor of type float16, float32. | ||||
*/ | */ | ||||
@@ -1433,7 +1649,69 @@ REG_OP(DeformableOffsets) | |||||
.ATTR(dilations, ListInt, {1, 1, 1, 1}) | .ATTR(dilations, ListInt, {1, 1, 1, 1}) | ||||
.ATTR(data_format, String, "NCHW") | .ATTR(data_format, String, "NCHW") | ||||
.ATTR(deformable_groups, Int, 1) | .ATTR(deformable_groups, Int, 1) | ||||
.ATTR(modulated, Bool, true) | |||||
.OP_END_FACTORY_REG(DeformableOffsets) | .OP_END_FACTORY_REG(DeformableOffsets) | ||||
/** | |||||
*@brief Computes the gradients of DeformableOffsets with respect to input and offsets | |||||
*@par Inputs: | |||||
* Three inputs: | |||||
* @li grad: A Tensor of type float16,float32. gradients with respect to DeformableOffsets output | |||||
* @li x: A Tensor of type float16,float32. | |||||
* @li offsets: A Tensor of type float16,float32.Deformation offset parameter. | |||||
*@par Required Attributes: | |||||
* @li strides: A tuple/list of 4 integers.The stride of the sliding window for | |||||
* height and width for H/W dimension. | |||||
* @li pads: A tuple/list of 4 integers.Padding added to H/W dimension | |||||
* of the input. | |||||
* @li ksize: A tuple/list of 2 integers.kernel size. | |||||
*@par Attributes: | |||||
* Three attributes: | |||||
* @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension | |||||
* of input. Defaults to [1, 1, 1, 1] | |||||
* @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. | |||||
* @li deformable_groups: Specify the c-axis grouping number of input x. | |||||
* @li modulated: Specify version of DeformableConv2D, true means v2, false means v1. | |||||
*@par Outputs: | |||||
* grad_x: A Tensor of type float16, float32. Gradients with respect to input_x | |||||
* grad_offsets: A Tensor of type float16, float32. Gradients with respect to input_offsets | |||||
*/ | |||||
REG_OP(DeformableOffsetsGrad) | |||||
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(offsets, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(grad_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(grad_offsets, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(strides, ListInt) | |||||
.REQUIRED_ATTR(pads, ListInt) | |||||
.REQUIRED_ATTR(ksize, ListInt) | |||||
.ATTR(dilations, ListInt, {1, 1, 1, 1}) | |||||
.ATTR(data_format, String, "NCHW") | |||||
.ATTR(deformable_groups, Int, 1) | |||||
.ATTR(modulated, Bool, true) | |||||
.OP_END_FACTORY_REG(DeformableOffsetsGrad) | |||||
/** | |||||
*@brief Computes the deformed dilation output with the expected input | |||||
*@par Inputs: | |||||
* One inputs: | |||||
* @li x: A Tensor of type int8, float16, float32 | |||||
*@par Required Attributes: | |||||
* @li dilations: A tuple/list of integers. | |||||
*@par Attributes: | |||||
* Two attributes: | |||||
* @li padding_value: default value filling in blank | |||||
* @li pads: A tuple/list of integers. | |||||
*@par Outputs: | |||||
* y: A Tensor. A Tensor of type int8, float16, float32. | |||||
*/ | |||||
REG_OP(Dilation) | |||||
.INPUT(x, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(dilations, ListInt) | |||||
.ATTR(pads, ListInt, {}) | |||||
.ATTR(padding_value, Float, 0.0) | |||||
.OP_END_FACTORY_REG(Dilation) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -254,22 +254,22 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* It is a custom operator. It has no corresponding operator in Caffe. | * It is a custom operator. It has no corresponding operator in Caffe. | ||||
*/ | */ | ||||
REG_OP(PriorBox) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(min_size, ListFloat) | |||||
.REQUIRED_ATTR(max_size, ListFloat) | |||||
.REQUIRED_ATTR(aspect_ratio, ListFloat) | |||||
.ATTR(img_h, Int, 0) | |||||
.ATTR(img_w, Int, 0) | |||||
.ATTR(step_h, Float, 0.0) | |||||
.ATTR(step_w, Float, 0.0) | |||||
.ATTR(flip, Bool, true) | |||||
.ATTR(clip, Bool, false) | |||||
.ATTR(offset, Float, 0.5) | |||||
.ATTR(variance, ListFloat, {0.1}) | |||||
.OP_END_FACTORY_REG(PriorBox); | |||||
REG_OP(PriorBox) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(min_size, ListFloat) | |||||
.REQUIRED_ATTR(max_size, ListFloat) | |||||
.REQUIRED_ATTR(aspect_ratio, ListFloat) | |||||
.ATTR(img_h, Int, 0) | |||||
.ATTR(img_w, Int, 0) | |||||
.ATTR(step_h, Float, 0.0) | |||||
.ATTR(step_w, Float, 0.0) | |||||
.ATTR(flip, Bool, true) | |||||
.ATTR(clip, Bool, false) | |||||
.ATTR(offset, Float, 0.5) | |||||
.ATTR(variance, ListFloat, {0.1}) | |||||
.OP_END_FACTORY_REG(PriorBox); | |||||
/** | /** | ||||
*@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n | *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n | ||||
@@ -306,25 +306,25 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul | |||||
*@par Restrictions: | *@par Restrictions: | ||||
*Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead. | *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead. | ||||
*/ | */ | ||||
REG_OP(PriorBoxD) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(min_size, ListFloat) | |||||
.REQUIRED_ATTR(max_size, ListFloat) | |||||
.ATTR(img_h, Int, 0) | |||||
.ATTR(img_w, Int, 0) | |||||
.ATTR(step_h, Float, 0.0) | |||||
.ATTR(step_w, Float, 0.0) | |||||
.ATTR(flip, Bool, true) | |||||
.ATTR(clip, Bool, false) | |||||
.ATTR(offset, Float, 0.5) | |||||
.ATTR(variance, ListFloat, {0.1}) | |||||
.OP_END_FACTORY_REG(PriorBoxD); | |||||
REG_OP(PriorBoxD) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(min_size, ListFloat) | |||||
.REQUIRED_ATTR(max_size, ListFloat) | |||||
.ATTR(img_h, Int, 0) | |||||
.ATTR(img_w, Int, 0) | |||||
.ATTR(step_h, Float, 0.0) | |||||
.ATTR(step_w, Float, 0.0) | |||||
.ATTR(flip, Bool, true) | |||||
.ATTR(clip, Bool, false) | |||||
.ATTR(offset, Float, 0.5) | |||||
.ATTR(variance, ListFloat, {0.1}) | |||||
.OP_END_FACTORY_REG(PriorBoxD); | |||||
/** | /** | ||||
*@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n | *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n | ||||
@@ -358,22 +358,22 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul | |||||
*@par Restrictions: | *@par Restrictions: | ||||
*Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead. | *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead. | ||||
*/ | */ | ||||
REG_OP(PriorBoxDV2) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(min_size, ListFloat) | |||||
.REQUIRED_ATTR(max_size, ListFloat) | |||||
.ATTR(img_h, Int, 0) | |||||
.ATTR(img_w, Int, 0) | |||||
.ATTR(step_h, Float, 0.0) | |||||
.ATTR(step_w, Float, 0.0) | |||||
.ATTR(flip, Bool, true) | |||||
.ATTR(clip, Bool, false) | |||||
.ATTR(offset, Float, 0.5) | |||||
.ATTR(variance, ListFloat, {0.1}) | |||||
.OP_END_FACTORY_REG(PriorBoxDV2); | |||||
REG_OP(PriorBoxDV2) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(min_size, ListFloat) | |||||
.REQUIRED_ATTR(max_size, ListFloat) | |||||
.ATTR(img_h, Int, 0) | |||||
.ATTR(img_w, Int, 0) | |||||
.ATTR(step_h, Float, 0.0) | |||||
.ATTR(step_w, Float, 0.0) | |||||
.ATTR(flip, Bool, true) | |||||
.ATTR(clip, Bool, false) | |||||
.ATTR(offset, Float, 0.5) | |||||
.ATTR(variance, ListFloat, {0.1}) | |||||
.OP_END_FACTORY_REG(PriorBoxDV2); | |||||
/** | /** | ||||
*@brief Performs Position Sensitive ROI Pooling . \n | *@brief Performs Position Sensitive ROI Pooling . \n | ||||
@@ -531,10 +531,10 @@ as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n | |||||
* It is a custom operator. It has no corresponding operator in Caffe. | * It is a custom operator. It has no corresponding operator in Caffe. | ||||
*/ | */ | ||||
REG_OP(Yolo) | REG_OP(Yolo) | ||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(boxes, Int, 3) | .ATTR(boxes, Int, 3) | ||||
.ATTR(coords, Int, 4) | .ATTR(coords, Int, 4) | ||||
.ATTR(classes, Int, 80) | .ATTR(classes, Int, 80) | ||||
@@ -584,10 +584,10 @@ REG_OP(Yolo) | |||||
* It is a custom operator. It has no corresponding operator in Caffe. | * It is a custom operator. It has no corresponding operator in Caffe. | ||||
*/ | */ | ||||
REG_OP(YoloV2DetectionOutput) | REG_OP(YoloV2DetectionOutput) | ||||
.INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(biases, ListFloat) | .REQUIRED_ATTR(biases, ListFloat) | ||||
.ATTR(boxes, Int, 5) | .ATTR(boxes, Int, 5) | ||||
.ATTR(coords, Int, 4) | .ATTR(coords, Int, 4) | ||||
@@ -598,7 +598,7 @@ REG_OP(YoloV2DetectionOutput) | |||||
.ATTR(score_threshold, Float, 0.5) | .ATTR(score_threshold, Float, 0.5) | ||||
.ATTR(iou_threshold, Float, 0.45) | .ATTR(iou_threshold, Float, 0.45) | ||||
.ATTR(pre_nms_topn, Int, 512) | .ATTR(pre_nms_topn, Int, 512) | ||||
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(box_out_num, TensorType({DT_INT32})) | .OUTPUT(box_out_num, TensorType({DT_INT32})) | ||||
.OP_END_FACTORY_REG(YoloV2DetectionOutput) | .OP_END_FACTORY_REG(YoloV2DetectionOutput) | ||||
@@ -647,12 +647,12 @@ REG_OP(YoloV2DetectionOutput) | |||||
*Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV2DetectionOutput instead. | *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV2DetectionOutput instead. | ||||
*/ | */ | ||||
REG_OP(YoloV2DetectionOutputD) | REG_OP(YoloV2DetectionOutputD) | ||||
.INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(biases, ListFloat) | .REQUIRED_ATTR(biases, ListFloat) | ||||
.ATTR(boxes, Int, 5) | .ATTR(boxes, Int, 5) | ||||
.ATTR(coords, Int, 4) | .ATTR(coords, Int, 4) | ||||
@@ -663,7 +663,7 @@ REG_OP(YoloV2DetectionOutputD) | |||||
.ATTR(score_threshold, Float, 0.5) | .ATTR(score_threshold, Float, 0.5) | ||||
.ATTR(iou_threshold, Float, 0.45) | .ATTR(iou_threshold, Float, 0.45) | ||||
.ATTR(pre_nms_topn, Int, 512) | .ATTR(pre_nms_topn, Int, 512) | ||||
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(box_out_num, TensorType({DT_INT32})) | .OUTPUT(box_out_num, TensorType({DT_INT32})) | ||||
.OP_END_FACTORY_REG(YoloV2DetectionOutputD) | .OP_END_FACTORY_REG(YoloV2DetectionOutputD) | ||||
@@ -707,16 +707,16 @@ REG_OP(YoloV2DetectionOutputD) | |||||
* It is a custom operator. It has no corresponding operator in Caffe. | * It is a custom operator. It has no corresponding operator in Caffe. | ||||
*/ | */ | ||||
REG_OP(YoloV3DetectionOutput) | REG_OP(YoloV3DetectionOutput) | ||||
.INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(biases_low, ListFloat) | .REQUIRED_ATTR(biases_low, ListFloat) | ||||
.REQUIRED_ATTR(biases_mid, ListFloat) | .REQUIRED_ATTR(biases_mid, ListFloat) | ||||
.REQUIRED_ATTR(biases_high, ListFloat) | .REQUIRED_ATTR(biases_high, ListFloat) | ||||
@@ -729,7 +729,7 @@ REG_OP(YoloV3DetectionOutput) | |||||
.ATTR(score_threshold, Float, 0.5) | .ATTR(score_threshold, Float, 0.5) | ||||
.ATTR(iou_threshold, Float, 0.45) | .ATTR(iou_threshold, Float, 0.45) | ||||
.ATTR(pre_nms_topn, Int, 512) | .ATTR(pre_nms_topn, Int, 512) | ||||
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(box_out_num, TensorType({DT_INT32})) | .OUTPUT(box_out_num, TensorType({DT_INT32})) | ||||
.OP_END_FACTORY_REG(YoloV3DetectionOutput) | .OP_END_FACTORY_REG(YoloV3DetectionOutput) | ||||
@@ -776,22 +776,22 @@ s | |||||
*Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutput instead. | *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutput instead. | ||||
*/ | */ | ||||
REG_OP(YoloV3DetectionOutputD) | REG_OP(YoloV3DetectionOutputD) | ||||
.INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(windex1, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(windex2, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(windex3, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(hindex1, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(hindex2, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(hindex3, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(windex1, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(windex2, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(windex3, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(hindex1, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(hindex2, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(hindex3, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(biases_low, ListFloat) | .REQUIRED_ATTR(biases_low, ListFloat) | ||||
.REQUIRED_ATTR(biases_mid, ListFloat) | .REQUIRED_ATTR(biases_mid, ListFloat) | ||||
.REQUIRED_ATTR(biases_high, ListFloat) | .REQUIRED_ATTR(biases_high, ListFloat) | ||||
@@ -804,7 +804,7 @@ REG_OP(YoloV3DetectionOutputD) | |||||
.ATTR(score_threshold, Float, 0.5) | .ATTR(score_threshold, Float, 0.5) | ||||
.ATTR(iou_threshold, Float, 0.45) | .ATTR(iou_threshold, Float, 0.45) | ||||
.ATTR(pre_nms_topn, Int, 512) | .ATTR(pre_nms_topn, Int, 512) | ||||
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(box_out_num, TensorType({DT_INT32})) | .OUTPUT(box_out_num, TensorType({DT_INT32})) | ||||
.OP_END_FACTORY_REG(YoloV3DetectionOutputD) | .OP_END_FACTORY_REG(YoloV3DetectionOutputD) | ||||
@@ -848,7 +848,7 @@ There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yol | |||||
* It is a custom operator. It has no corresponding operator in Caffe. | * It is a custom operator. It has no corresponding operator in Caffe. | ||||
*/ | */ | ||||
REG_OP(YoloV3DetectionOutputV2) | REG_OP(YoloV3DetectionOutputV2) | ||||
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(biases, ListFloat) | .REQUIRED_ATTR(biases, ListFloat) | ||||
.ATTR(boxes, Int, 3) | .ATTR(boxes, Int, 3) | ||||
.ATTR(coords, Int, 4) | .ATTR(coords, Int, 4) | ||||
@@ -862,7 +862,7 @@ REG_OP(YoloV3DetectionOutputV2) | |||||
.ATTR(N, Int, 10) | .ATTR(N, Int, 10) | ||||
.ATTR(resize_origin_img_to_net, Bool, false) | .ATTR(resize_origin_img_to_net, Bool, false) | ||||
.ATTR(out_box_dim, Int, 3) | .ATTR(out_box_dim, Int, 3) | ||||
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(box_out_num, TensorType({DT_INT32})) | .OUTPUT(box_out_num, TensorType({DT_INT32})) | ||||
.OP_END_FACTORY_REG(YoloV3DetectionOutputV2) | .OP_END_FACTORY_REG(YoloV3DetectionOutputV2) | ||||
@@ -910,9 +910,9 @@ REG_OP(YoloV3DetectionOutputV2) | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutputV2 instead. | * Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutputV2 instead. | ||||
*/ | */ | ||||
REG_OP(YoloV3DetectionOutputV2D) | REG_OP(YoloV3DetectionOutputV2D) | ||||
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(biases, ListFloat) | .REQUIRED_ATTR(biases, ListFloat) | ||||
.ATTR(boxes, Int, 3) | .ATTR(boxes, Int, 3) | ||||
.ATTR(coords, Int, 4) | .ATTR(coords, Int, 4) | ||||
@@ -926,7 +926,7 @@ REG_OP(YoloV3DetectionOutputV2D) | |||||
.ATTR(N, Int, 10) | .ATTR(N, Int, 10) | ||||
.ATTR(resize_origin_img_to_net, Bool, false) | .ATTR(resize_origin_img_to_net, Bool, false) | ||||
.ATTR(out_box_dim, Int, 3) | .ATTR(out_box_dim, Int, 3) | ||||
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(box_out_num, TensorType({DT_INT32})) | .OUTPUT(box_out_num, TensorType({DT_INT32})) | ||||
.OP_END_FACTORY_REG(YoloV3DetectionOutputV2D) | .OP_END_FACTORY_REG(YoloV3DetectionOutputV2D) | ||||
@@ -968,8 +968,9 @@ REG_OP(SPP) | |||||
* Three inputs, including: | * Three inputs, including: | ||||
*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature | *@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature | ||||
* map. | * map. | ||||
*@li rois: A tensor of type float16 or float32, with shape | |||||
*@li rois: A tensor of type float16 or float32, with 3D shape | |||||
* [batch, 5, roi_max_num], describing the RIOs. | * [batch, 5, roi_max_num], describing the RIOs. | ||||
* roi_max_num must be less than or equal to 6000 and must be divided by 16. | |||||
*@li roi_actual_num: A optional tensor of type int32, with shape [batch, 8], specifying | *@li roi_actual_num: A optional tensor of type int32, with shape [batch, 8], specifying | ||||
* the number of ROIs per batch . \n | * the number of ROIs per batch . \n | ||||
@@ -1201,35 +1202,6 @@ REG_OP(RpnProposalsD) | |||||
.OUTPUT(sorted_box, TensorType({DT_FLOAT16})) | .OUTPUT(sorted_box, TensorType({DT_FLOAT16})) | ||||
.OP_END_FACTORY_REG(RpnProposalsD) | .OP_END_FACTORY_REG(RpnProposalsD) | ||||
/** | |||||
*@brief Computes Score Filte Pre-Sort function. | |||||
*@par Inputs: | |||||
*Inputs include: | |||||
* @li rois: A Tensor. Must be float16. N-D with shape [N, 4]. | |||||
* @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1]. | |||||
*@par Attributes: | |||||
* @li score_threshold: required, float, threahold of topk process. | |||||
* @li k: required, Int, threahold of topk process. | |||||
* @li score_filter: bool, mark of score_filter. Defaults to "true" | |||||
* @li core_max_num: int, max number of core. Defaults to "8" | |||||
*@par Outputs: | |||||
* @li sorted_proposal: A Tensor. Must be float16. | |||||
* N-D with shape [8*6002, 8]. | |||||
* @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8]. | |||||
*/ | |||||
REG_OP(ScoreFiltePreSort) | |||||
.INPUT(rois, TensorType({DT_FLOAT16})) | |||||
.INPUT(cls_bg_prob, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(sorted_proposal, TensorType({ DT_FLOAT16})) | |||||
.OUTPUT(proposal_num, TensorType({ DT_UINT32})) | |||||
.REQUIRED_ATTR(score_threshold, Float) | |||||
.REQUIRED_ATTR(k, Int) | |||||
.ATTR(score_filter, Bool, true) | |||||
.ATTR(core_max_num, Int, 8) | |||||
.OP_END_FACTORY_REG(ScoreFiltePreSort) | |||||
/** | /** | ||||
*@brief Computes Score Filte Pre-Sort function. | *@brief Computes Score Filte Pre-Sort function. | ||||
@@ -1383,6 +1355,7 @@ REG_OP(DecodeWheelsTarget) | |||||
*@attention Constraints: | *@attention Constraints: | ||||
* Only computation of float16 data is supported. | * Only computation of float16 data is supported. | ||||
* Note: when the class num per image * max_size_per_class is too big, will compile fail with ERROR-insufficient memory | |||||
*/ | */ | ||||
REG_OP(BatchMultiClassNonMaxSuppression) | REG_OP(BatchMultiClassNonMaxSuppression) | ||||
.INPUT(boxes, TensorType({DT_FLOAT16})) | .INPUT(boxes, TensorType({DT_FLOAT16})) | ||||
@@ -1464,9 +1437,9 @@ REG_OP(NormalizeBBox) | |||||
* y: A Tensor. Must have the same type as box_predictions. | * y: A Tensor. Must have the same type as box_predictions. | ||||
*/ | */ | ||||
REG_OP(DecodeBboxV2) | REG_OP(DecodeBboxV2) | ||||
.INPUT(boxes, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(anchors, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(anchors, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(scales, ListFloat, {1.0, 1.0, 1.0, 1.0}) | .ATTR(scales, ListFloat, {1.0, 1.0, 1.0, 1.0}) | ||||
.ATTR(decode_clip, Float, 0.0) | .ATTR(decode_clip, Float, 0.0) | ||||
.ATTR(reversed_box, Bool, false) | .ATTR(reversed_box, Bool, false) | ||||
@@ -1477,7 +1450,8 @@ REG_OP(DecodeBboxV2) | |||||
* | * | ||||
*@par Inputs: | *@par Inputs: | ||||
*Inputs include: | *Inputs include: | ||||
* x: A Tensor. Must be float16 or float32. | |||||
* x: A Tensor. Dtype support: flaot16, flaot, int16, int8, | |||||
uint8, int32, int64. | |||||
* | * | ||||
*@par Attributes: | *@par Attributes: | ||||
* @li axis: optional, int. | * @li axis: optional, int. | ||||
@@ -1485,16 +1459,364 @@ REG_OP(DecodeBboxV2) | |||||
* | * | ||||
*@par Outputs: | *@par Outputs: | ||||
* @li y1: A Tensor. Must have the same type as x. | * @li y1: A Tensor. Must have the same type as x. | ||||
* @li y2: A Tensor. Indices of y1 in x.Dtype must be int32. | |||||
* @li y2: A Tensor. Indices of y1 in x. Dtype must be int32. | |||||
* | |||||
*/ | */ | ||||
REG_OP(Sort) | REG_OP(Sort) | ||||
.INPUT(x, TensorType({ DT_FLOAT16 })) | |||||
.OUTPUT(y1, TensorType({ DT_FLOAT16 })) | |||||
.OUTPUT(y2, TensorType({ DT_INT32 })) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT16, DT_INT8, | |||||
DT_UINT8, DT_INT32, DT_INT64})) | |||||
.OUTPUT(y1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT16, DT_INT8, | |||||
DT_UINT8, DT_INT32, DT_INT64})) | |||||
.OUTPUT(y2, TensorType({DT_INT32})) | |||||
.ATTR(axis, Int, -1) | .ATTR(axis, Int, -1) | ||||
.ATTR(descending, Bool, false) | .ATTR(descending, Bool, false) | ||||
.OP_END_FACTORY_REG(Sort) | .OP_END_FACTORY_REG(Sort) | ||||
/** | |||||
*@brief Computes iou for input bboxes and gtboxes. | |||||
*@par Inputs: | |||||
* Two inputs, including: | |||||
*@li bboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1), | |||||
*@li gtboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1).\n | |||||
*@par Attributes: | |||||
*@li mode: A optional attribute of type string, whether judge the mode of iou. \n | |||||
*@par Outputs: | |||||
*@li overlap: A 2D Tensor of type float16 with shape [n, m]. \n | |||||
*@attention Constraints: | |||||
* Only computation of float16 data is supported. | |||||
*@par Restrictions: | |||||
*Warning:THIS FUNCTION IS DEPRECATED. Please use Iou instead. | |||||
*/ | |||||
REG_OP(PtIou) | |||||
.INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(mode, String, "iou") | |||||
.OP_END_FACTORY_REG(PtIou) | |||||
/** | |||||
*@brief Greedily selects a subset of bounding boxes in descending order of | |||||
score . \n | |||||
*@par Inputs: | |||||
*Input boxes and scores must be float16 type. Inputs include: | |||||
*@li boxes: A input tensor with shape [num_batches,spatial_dimension,4]. | |||||
The single box data format is indicated by center_point_box. | |||||
*@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension] | |||||
*@li max_output_size: A scalar integer tensor representing the maximum number | |||||
of boxes to be selected by non max suppression. | |||||
*@li iou_threshold: A 0-D float tensor representing the threshold for deciding | |||||
whether boxes overlap too much with respect to IOU. | |||||
*@li score_threshold: A 0-D float tensor representing the threshold for | |||||
deciding when to remove boxes based on score . \n | |||||
*@par Attributes: | |||||
*center_point_box:Integer indicate the format of the box data. | |||||
The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] | |||||
where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair | |||||
of box corners and the coordinates can be provided as normalized | |||||
(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models. | |||||
1 - the box data is supplied as [x_center, y_center, width, height]. | |||||
Mostly used for Pytorch models. \n | |||||
*@par Outputs: | |||||
*@li selected_indices: A 2-D integer tensor of shape [M] representing the | |||||
selected indices from the boxes tensor, where M <= max_output_size. \n | |||||
*@attention Constraints: | |||||
*Input boxes and scores must be float16 type . \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with onnx NonMaxSuppression operator. | |||||
*@par Restrictions: | |||||
*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(NonMaxSuppressionV6) | |||||
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32})) | |||||
.OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT})) | |||||
.OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT})) | |||||
.OUTPUT(selected_indices, TensorType({DT_INT32})) | |||||
.ATTR(center_point_box, Int, 0) | |||||
.ATTR(max_boxes_size, Int, 0) | |||||
.OP_END_FACTORY_REG(NonMaxSuppressionV6) | |||||
/** | |||||
*@brief Greedily selects a subset of bounding boxes in descending order of | |||||
score . \n | |||||
*@par Inputs: | |||||
*Input boxes and scores must be float16 type. Inputs include: | |||||
*@li boxes: A input tensor with shape [num_batches,spatial_dimension,4]. | |||||
The single box data format is indicated by center_point_box. | |||||
*@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension] | |||||
*@li max_output_size: A scalar integer tensor representing the maximum number | |||||
of boxes to be selected by non max suppression. | |||||
*@li iou_threshold: A 0-D float tensor representing the threshold for deciding | |||||
whether boxes overlap too much with respect to IOU. | |||||
*@li score_threshold: A 0-D float tensor representing the threshold for | |||||
deciding when to remove boxes based on score . \n | |||||
*@li index_id: A input tensor with shape [num_batches,num_classes,spatial_dimension,3] | |||||
the last dim representing (batch_id,class_id,index_id) . \n | |||||
*@par Attributes: | |||||
*center_point_box:Integer indicate the format of the box data. | |||||
The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] | |||||
where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair | |||||
of box corners and the coordinates can be provided as normalized | |||||
(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models. | |||||
1 - the box data is supplied as [x_center, y_center, width, height]. | |||||
Mostly used for Pytorch models. \n | |||||
*@par Outputs: | |||||
*@li selected_indices: A 2-D integer tensor of shape [M] representing the | |||||
selected indices from the boxes tensor, where M <= max_output_size. \n | |||||
*@attention Constraints: | |||||
*Input boxes and scores must be float16 type . \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with onnx NonMaxSuppression operator. | |||||
*/ | |||||
REG_OP(NonMaxSuppressionV7) | |||||
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32})) | |||||
.OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT})) | |||||
.OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT})) | |||||
.OPTIONAL_INPUT(index_id, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(selected_indices, TensorType({DT_INT32})) | |||||
.ATTR(center_point_box, Int, 0) | |||||
.ATTR(max_boxes_size, Int, 0) | |||||
.OP_END_FACTORY_REG(NonMaxSuppressionV7) | |||||
/** | |||||
*@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n | |||||
*@par Inputs: | |||||
* Three inputs, including: | |||||
*@li features: A 5HD Tensor list of type float32 or float16. | |||||
*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, | |||||
* the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1". | |||||
*@par Attributes: | |||||
*@li finest_scale: A optional attribute of type int, specifying the scale of calculate levels of "rois". | |||||
*@li roi_scale_factor: A optional attribute of type float32, specifying the rescaling of "rois" coordinates. | |||||
*@li spatial_scale: A optional attribute of type list float32, specifying the scaling ratio of "features" | |||||
* to the original image. | |||||
*@li pooled_height: A optional attribute of type int32, specifying the H dimension. | |||||
*@li pooled_width: A optional attribute of type int32, specifying the W dimension. | |||||
*@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency | |||||
* of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois", | |||||
* which is a floating point number. Defaults to "0". | |||||
*@li pool_mode: An optional attribute of type string to indicate pooling mode. Defaults to "avg" . \n | |||||
*@li aligned: An optional attribute of type bool, specifying the align to corner. Defaults to true . \n | |||||
*@par Outputs: | |||||
* output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16. | |||||
* The axis N is the number of input ROIs. Axes H, W, and C are consistent with the values of "pooled_height", | |||||
* "pooled_width", and "features", respectively. | |||||
*@par Third-party framework compatibility | |||||
*Compatible with mmdetection SingleRoIExtractor operator. | |||||
*/ | |||||
REG_OP(RoiExtractor) | |||||
.DYNAMIC_INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(finest_scale, Int, 56) | |||||
.ATTR(roi_scale_factor, Float, 0) | |||||
.ATTR(spatial_scale, ListFloat, {1.f / 4, 1.f / 8, 1.f / 16, 1.f / 32}) | |||||
.ATTR(pooled_height, Int, 7) | |||||
.ATTR(pooled_width, Int, 7) | |||||
.ATTR(sample_num, Int, 0) | |||||
.ATTR(pool_mode, String, "avg") | |||||
.ATTR(aligned, Bool, true) | |||||
.OP_END_FACTORY_REG(RoiExtractor) | |||||
/** | |||||
*@brief Performs Position Sensitive PS ROI Pooling . \n | |||||
*@par Inputs: | |||||
* Two inputs, including: | |||||
*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature | |||||
* map, dimension C1 must be equal to | |||||
* (int(output_dim+15)/C0))*group_size*group_size. | |||||
*@li rois: A tensor of type float16 or float32, with shape | |||||
* [batch, 5, rois_num], describing the ROIs, each ROI consists of five | |||||
* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates | |||||
* the index of the input feature map, "x1", "y1", "x2", or "y2" must be | |||||
* greater than or equal to "0.0" . \n | |||||
*@par Attributes: | |||||
*@li output_dim: A required int32, specifying the number of output channels, | |||||
* must be greater than 0. | |||||
*@li group_size: A required int32, specifying the number of groups to encode | |||||
* position-sensitive score maps, must be within the range (0, 128). | |||||
*@li spatial_scale: A required float32, scaling factor for mapping the input | |||||
* coordinates to the ROI coordinates . \n | |||||
*@par Outputs: | |||||
*y: An NC1HWC0 tensor of type float16 or float32, describing the result | |||||
* feature map . \n | |||||
*@attention Constraints: | |||||
* HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16 | |||||
*/ | |||||
REG_OP(PSROIPoolingV2) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(spatial_scale, Float) | |||||
.REQUIRED_ATTR(output_dim, Int) | |||||
.REQUIRED_ATTR(group_size, Int) | |||||
.OP_END_FACTORY_REG(PSROIPoolingV2) | |||||
/** | |||||
*@brief Performs Position Sensitive PS ROI Pooling Grad . \n | |||||
*@par Inputs: | |||||
* Two inputs, including: | |||||
*@li x: An NC1HWC0 tensor of type float16 or float32, describing the result | |||||
* feature map . \n | |||||
*@li rois: A tensor of type float16 or float32, with shape | |||||
* [batch, 5, rois_num], describing the ROIs, each ROI consists of five | |||||
* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates | |||||
* the index of the input feature map, "x1", "y1", "x2", or "y2" must be | |||||
* greater than or equal to "0.0" . \n | |||||
*@par Attributes: | |||||
*@li output_dim: A required int32, specifying the number of output channels, | |||||
* must be greater than 0. | |||||
*@li group_size: A required int32, specifying the number of groups to encode | |||||
* position-sensitive score maps, must be within the range (0, 128). | |||||
*@li spatial_scale: A required float32, scaling factor for mapping the input | |||||
* coordinates to the ROI coordinates . \n | |||||
*@li input_size: A required listInt, mapping the gradinput size: (H, W) | |||||
*@par Outputs: | |||||
*y: An NC1HWC0 tensor of type float16 or float32, describing the feature | |||||
* map, dimension C1 must be equal to | |||||
* (int(output_dim+15)/C0))*group_size*group_size. | |||||
*@attention Constraints: | |||||
* HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16 | |||||
*/ | |||||
REG_OP(PSROIPoolingGradV2D) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(spatial_scale, Float) | |||||
.REQUIRED_ATTR(output_dim, Int) | |||||
.REQUIRED_ATTR(group_size, Int) | |||||
.REQUIRED_ATTR(input_size, ListInt) | |||||
.OP_END_FACTORY_REG(PSROIPoolingGradV2D) | |||||
/** | |||||
*@brief Generate the responsible flags of anchor in a single feature map. | |||||
*@par Inputs: | |||||
*@li gt_bboxes: Ground truth box, 2-D Tensor with shape `[batch, 4]`. | |||||
*@par Attributes: | |||||
*@li featmap_size: The size of feature maps, listint. | |||||
*@li strides: Stride of current level, listint. | |||||
*@li num_base_anchors: The number of base anchors. | |||||
*@par Outputs: | |||||
*flags: The valid flags of each anchor in a single level. | |||||
*/ | |||||
REG_OP(AnchorResponseFlags) | |||||
.INPUT(gt_bboxes, TensorType({DT_FLOAT})) | |||||
.OUTPUT(flags, TensorType({DT_UINT8})) | |||||
.REQUIRED_ATTR(featmap_size, ListInt) | |||||
.REQUIRED_ATTR(strides, ListInt) | |||||
.REQUIRED_ATTR(num_base_anchors, Int) | |||||
.OP_END_FACTORY_REG(AnchorResponseFlags) | |||||
/** | |||||
*@brief Generates bounding boxes based on yolo's "anchor" and "ground-truth" boxes. | |||||
* It is a customized mmdetection operator . \n | |||||
*@par Inputs: | |||||
* Three inputs, including: | |||||
*@li anchor_boxes: anchor boxes generated by the yolo training set. | |||||
* A 2D Tensor of type float32 or float16 with shape (N, 4). "N" indicates the number | |||||
* of ROIs, "N" indicates the number of ROIs, and the value "4" refers to (tx, ty, tw, th). | |||||
*@li gt_bboxes: target of the transformation, e.g, ground-truth boxes. | |||||
* A 2D Tensor of type float32 or float16 with shape (N, 4). | |||||
* "N" indicates the number of ROIs, and 4 indicates "dx", "dy", "dw", and "dh" . | |||||
*@li stride: Scale for each box. | |||||
* A 1D Tensor of type int32 shape (N,). | |||||
* "N" indicates the number of ROIs. \n | |||||
*@par Attributes: | |||||
*@li performance_mode: select performance mode, "high_precision" or "high_performance". | |||||
* select "high_precision" when input type is float32, the output tensor precision | |||||
* will be smaller than 0.0001, select "high_performance" when input type is float32, | |||||
* the ops will be best performance, but precision will be only smaller than 0.005. | |||||
*@par Outputs: | |||||
*encoded_bboxes: Bboxes generated based on "anchor_boxes" and "gt_bboxes". Have the | |||||
* same format and type as "anchor_boxes". | |||||
* | |||||
*@attention Constraints: | |||||
* input anchor boxes only support maximum N=20480. \n | |||||
*/ | |||||
REG_OP(YoloBoxesEncode) | |||||
.INPUT(anchor_boxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(gt_bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(stride, TensorType({DT_INT32})) | |||||
.ATTR(performance_mode, String, "high_precision") | |||||
.OUTPUT(encoded_bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(YoloBoxesEncode) | |||||
/** | |||||
*@brief Performs Position Sensitive PS ROI Pooling Grad. | |||||
*@par Inputs: | |||||
* Eight inputs, including: | |||||
*@li assigned_gt_inds: Tensor of type float16 or float32, shape (n, ) | |||||
*@li overlaps: A Tensor. Datatype is same as assigned_gt_inds. IOU between gt_bboxes and bboxes. shape(k, n) | |||||
*@li box_responsible_flags: A Tensor. Support uint8. Flag to indicate whether box is responsible. | |||||
*@li max_overlaps: A Tensor. Datatype is same as assigned_gt_inds. overlaps.max(axis=0). | |||||
*@li argmax_overlaps: A Tensor. Support int32. overlaps.argmax(axis=0). | |||||
*@li gt_max_overlaps: A Tensor. Datatype is same as assigned_gt_inds. overlaps.max(axis=1). | |||||
*@li gt_argmax_overlaps: A Tensor. Support int32. overlaps.argmax(axis=1). | |||||
*@li num_gts: A Tensor. Support int32. real k. shape (1, ) | |||||
*@par Attributes: | |||||
*@li output_dim: float. IOU threshold for positive bboxes. | |||||
*@li group_size: float. minimum iou for a bbox to be considered as a positive bbox | |||||
*@li spatial_scale: bool. whether to assign all bboxes with the same highest overlap with some gt to that gt. | |||||
*@par Outputs: | |||||
*@li assigned_gt_inds_pos: A Tensor. Support float16/float32. shape (n, ). | |||||
*/ | |||||
REG_OP(GridAssignPositive) | |||||
.INPUT(assigned_gt_inds, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
.INPUT(overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
.INPUT(box_responsible_flags, TensorType({ DT_UINT8 })) | |||||
.INPUT(max_overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
.INPUT(argmax_overlaps, TensorType({ DT_INT32 })) | |||||
.INPUT(gt_max_overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
.INPUT(gt_argmax_overlaps, TensorType({ DT_INT32 })) | |||||
.INPUT(num_gts, TensorType({ DT_INT32 })) | |||||
.OUTPUT(assigned_gt_inds_pos, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.REQUIRED_ATTR(pos_iou_thr, Float) | |||||
.REQUIRED_ATTR(min_pos_iou, Float) | |||||
.REQUIRED_ATTR(gt_max_assign_all, Bool) | |||||
.OP_END_FACTORY_REG(GridAssignPositive) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -55,7 +55,9 @@ REG_OP(LogSoftmaxGrad) | |||||
*Two inputs, including: | *Two inputs, including: | ||||
* @li features: A Tensor. Must be one of the following types: half, float32, double. | * @li features: A Tensor. Must be one of the following types: half, float32, double. | ||||
* A "batch_size * num_classes" matrix. | * A "batch_size * num_classes" matrix. | ||||
* @li labels: A Tensor of the same type as "features". batch_size vector with values in [0, num_classes). | |||||
* @li labels: A Tensor. Must be one of the following types: 'int32', 'int64'. | |||||
* batch_size vector with values in [0, num_classes). | |||||
* This is the label for the given minibatch entry. | |||||
*@par Outputs: | *@par Outputs: | ||||
@@ -105,6 +107,9 @@ REG_OP(SoftmaxCrossEntropyWithLogits) | |||||
* @li grad_softmax: A Tensor. Has the same shape and type as "softmax". | * @li grad_softmax: A Tensor. Has the same shape and type as "softmax". | ||||
* The format is NC1HWC0 or DN . \n | * The format is NC1HWC0 or DN . \n | ||||
*@par Attributes: | |||||
* axes: An optional list of ints. Defaults to "{-1}" . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*grad_x: A Tensor. Has the same shape and type as "softmax" . \n | *grad_x: A Tensor. Has the same shape and type as "softmax" . \n | ||||
@@ -115,6 +120,7 @@ REG_OP(SoftmaxGrad) | |||||
.INPUT(softmax, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | .INPUT(softmax, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | ||||
.INPUT(grad_softmax, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | .INPUT(grad_softmax, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | ||||
.OUTPUT(grad_x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | .OUTPUT(grad_x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | ||||
.ATTR(axes, ListInt, {-1}) | |||||
.OP_END_FACTORY_REG(SoftmaxGrad) | .OP_END_FACTORY_REG(SoftmaxGrad) | ||||
/** | /** | ||||
@@ -160,20 +166,20 @@ REG_OP(SigmoidCrossEntropyWithLogits) | |||||
.OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogits) | .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogits) | ||||
/** | /** | ||||
*@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n | |||||
*@brief Computes the sigmoid cross entropy loss of "predict" and "target". | |||||
*@par Inputs: | *@par Inputs: | ||||
* four inputs, including: | * four inputs, including: | ||||
*@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value. | *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value. | ||||
*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n | |||||
*@li weight: An multi-dimensional Tensor, specifying the weight value. \n | |||||
*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value. | |||||
*@li weight: An multi-dimensional Tensor, specifying the weight value. | |||||
*@li pos_weight: An multi-dimensional Tensor, specifying the pos weight value. \n | *@li pos_weight: An multi-dimensional Tensor, specifying the pos weight value. \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean" . \n | |||||
*reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean". \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n | |||||
*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict". \n | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with PyTorch operator BCEWithLogitsLoss. | * Compatible with PyTorch operator BCEWithLogitsLoss. | ||||
@@ -330,6 +336,41 @@ REG_OP(SoftmaxV2) | |||||
.ATTR(axes, ListInt, {-1}) | .ATTR(axes, ListInt, {-1}) | ||||
.OP_END_FACTORY_REG(SoftmaxV2) | .OP_END_FACTORY_REG(SoftmaxV2) | ||||
/** | |||||
*@brief Function softmax with dropoutDoMaskV3D | |||||
*@par Inputs: | |||||
*Two inputs, including: | |||||
* @li x: A mutable Tensor. The type only support float16. | |||||
* @li mask: A mutable Tensor. Must met all of the following rules: | |||||
* shape of mask should be 1D. | |||||
* dtype of mask should be uint8. | |||||
* value of shape should met the following algorithm: | |||||
* value = (size(x) + 128 - 1) // 128 * 128 | |||||
*@par Attributes: | |||||
* @li keep_prob: A mutable Tensor. Must met all of the following rules: | |||||
* shape of "keep_prob" should be (1,) or [1,]. | |||||
* Has the same type as "x" . \n | |||||
* @li axes: A list of int. The dimension softmax would be performed on. Defaults | |||||
* to "[-1]" . \n | |||||
*@par Outputs: | |||||
*y1: A mutable Tensor. Has the same type as "x". | |||||
*y2: A mutable Tensor. Has the same type as "x". \n | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(SoftmaxV2WithDropOutDoMaskV3D) | |||||
.INPUT(x, TensorType({DT_FLOAT16})) | |||||
.INPUT(mask, TensorType({DT_UINT8})) | |||||
.OUTPUT(y1, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(y2, TensorType({DT_FLOAT16})) | |||||
.REQUIRED_ATTR(keep_prob, Float) | |||||
.ATTR(axes, ListInt, {-1}) | |||||
.OP_END_FACTORY_REG(SoftmaxV2WithDropOutDoMaskV3D) | |||||
/** | /** | ||||
*@brief Computes log softmax activations . \n | *@brief Computes log softmax activations . \n | ||||
@@ -427,6 +468,33 @@ REG_OP(MVN) | |||||
.ATTR(eps, Float, 1e-9) | .ATTR(eps, Float, 1e-9) | ||||
.OP_END_FACTORY_REG(MVN) | .OP_END_FACTORY_REG(MVN) | ||||
/** | |||||
*@brief Normalizes the input . \n | |||||
*@par Inputs: | |||||
* One input: | |||||
*x: An NCHW tensor of type float16 or float32 . \n | |||||
*@par Attributes: | |||||
*@li eps: An optional float32 epsilon for not dividing by zero. Defaults to "1e-9" . \n | |||||
*@li axes: A list of Intefers, along which axis to reduce. Defaults to "[0, 2, 3]" . \n | |||||
*@par Outputs: | |||||
*y: An NCHW tensor of type float16 or float32 . \n | |||||
*@attention Constraints: | |||||
* The input tensor must have the NCHW format, whose shape length must be 4. | |||||
*@par Third-party framework compatibility | |||||
* Compatible with the ONNX operator MeanVarianceNormalization. | |||||
*/ | |||||
REG_OP(MVNV2) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) /* "First operand." */ | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) /* "Result, has same element type as inputs" */ | |||||
.ATTR(eps, Float, 1e-9) | |||||
.ATTR(axes, ListInt, {0, 2, 3}) | |||||
.OP_END_FACTORY_REG(MVNV2) | |||||
/** | /** | ||||
*@brief Normalizes the input "x1" . \n | *@brief Normalizes the input "x1" . \n | ||||
@@ -498,6 +566,31 @@ REG_OP(LayerNorm) | |||||
.ATTR(epsilon, Float, 0.0000001) | .ATTR(epsilon, Float, 0.0000001) | ||||
.OP_END_FACTORY_REG(LayerNorm) | .OP_END_FACTORY_REG(LayerNorm) | ||||
/** | |||||
*@brief Returns a tensor where each sub-tensor of input along dimension | |||||
* dim is normalized such that the p-norm of the sub-tensor is lower than the value maxnorm. \n | |||||
*@par Inputs: | |||||
*One input, including: | |||||
* @li x: A Tensor. Must be one of the following types: float16, float32 . \n | |||||
*@par Attributes: | |||||
* @li p: Specify L_p norm, the type is float. | |||||
* @li dim: The processed dim, the type is int. | |||||
* @li maxnorm: Threshold for comparison, the type is float. \n | |||||
*@par Outputs: | |||||
*One outputs, including: | |||||
* @li y: shape and dtype of output, should be same shape and type as input. | |||||
*/ | |||||
REG_OP(Renorm) | |||||
.INPUT(x, TensorType::BasicType()) | |||||
.OUTPUT(y, TensorType::BasicType()) | |||||
.REQUIRED_ATTR(p, Float) | |||||
.REQUIRED_ATTR(dim, Int) | |||||
.REQUIRED_ATTR(maxnorm, Float) | |||||
.OP_END_FACTORY_REG(Renorm) | |||||
/** | /** | ||||
*@brief LayerNormGrad operator interface implementation | *@brief LayerNormGrad operator interface implementation | ||||
* calculating: dy, x, variance, mean, gamma | * calculating: dy, x, variance, mean, gamma | ||||
@@ -586,6 +679,48 @@ REG_OP(LayerNormXBackprop) | |||||
.OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16})) | .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16})) | ||||
.OP_END_FACTORY_REG(LayerNormXBackprop) | .OP_END_FACTORY_REG(LayerNormXBackprop) | ||||
/** | |||||
*@brief LayerNormXBackpropV2 operator interface implementation | |||||
* calculating: dy, x, variance, mean, gamma | |||||
* pd_xl = data_dy*data_gamma | |||||
* pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean) | |||||
* np.power((data_variance + EPSLON), (-1.5))), | |||||
* reduce_axis, keepdims=True) | |||||
* pd_mean = np.sum(((-1.0)*pd_xl | |||||
* np.power((data_variance + EPSLON), (-0.5))), | |||||
* reduce_axis, keepdims=True) | |||||
* + pd_var*(1.0/m) | |||||
* np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True) | |||||
* pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) + | |||||
* pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m) | |||||
* res_for_gamma = (data_x - data_mean) * np.power((data_variance + EPSLON), (-0.5)) | |||||
*@par Inputs: | |||||
*Five inputs, including: | |||||
* @li dy: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li x: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li variance: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li mean: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n | |||||
*@par Outputs: | |||||
*Three outputs, including: | |||||
* @li pd_x: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li res_for_gamma: A Tensor. Must be one of the following types: float32. | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(LayerNormXBackpropV2) | |||||
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(res_for_gamma, TensorType({DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(LayerNormXBackpropV2) | |||||
/** | /** | ||||
*@brief LayerNormBetaGammaBackprop operator interface implementation | *@brief LayerNormBetaGammaBackprop operator interface implementation | ||||
* calculating: dy, x, variance, mean | * calculating: dy, x, variance, mean | ||||
@@ -629,6 +764,35 @@ REG_OP(LayerNormBetaGammaBackprop) | |||||
.REQUIRED_ATTR(shape_gamma, ListInt) | .REQUIRED_ATTR(shape_gamma, ListInt) | ||||
.OP_END_FACTORY_REG(LayerNormBetaGammaBackprop) | .OP_END_FACTORY_REG(LayerNormBetaGammaBackprop) | ||||
/** | |||||
*@brief LayerNormBetaGammaBackpropV2 operator interface implementation | |||||
* calculating: dy, x, variance, mean | |||||
* pd_gamma = np.sum((data_dy*res_for_gamma), param_axis, keepdims=True) | |||||
* pd_beta = np.sum(data_dy, param_axis, keepdims=True) | |||||
*@par Inputs: | |||||
*Three inputs, including: | |||||
* @li dy: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li x: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li variance: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li mean: A Tensor. Must be one of the following types: float16, float32 . \n | |||||
*@par Outputs: | |||||
*Three outputs, including: | |||||
* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li pd_beta: A Tensor. Must be one of the following types: float16, float32. | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(LayerNormBetaGammaBackpropV2) | |||||
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(res_for_gamma, TensorType({DT_FLOAT})) | |||||
.OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.REQUIRED_ATTR(shape_gamma, ListInt) | |||||
.OP_END_FACTORY_REG(LayerNormBetaGammaBackpropV2) | |||||
/** | /** | ||||
*@brief Return "output" according to the algorithm of dropout_do_mask: | *@brief Return "output" according to the algorithm of dropout_do_mask: | ||||
* scale_x = x *(1 / keep_prob) | * scale_x = x *(1 / keep_prob) | ||||
@@ -656,7 +820,68 @@ REG_OP(DropOutDoMask) | |||||
.INPUT(keep_prob, TensorType({DT_FLOAT, DT_FLOAT16})) | .INPUT(keep_prob, TensorType({DT_FLOAT, DT_FLOAT16})) | ||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | ||||
.OP_END_FACTORY_REG(DropOutDoMask) | .OP_END_FACTORY_REG(DropOutDoMask) | ||||
/** | |||||
*@brief Return "output" according to the algorithm of dropout_do_mask: | |||||
* scale_x = x *(1 / keep_prob) | |||||
* output = select(mask == 1, scale_x, 0) | |||||
*@par Inputs: | |||||
*Three inputs, including: | |||||
* @li x: A mutable Tensor. Must be one of the following types: | |||||
* float16, float32 | |||||
* @li mask: A mutable Tensor. Must met all of the following rules: | |||||
* shape of mask should be 1D. | |||||
* dtype of mask should be uint8. | |||||
* value of shape should met the following algorithm: | |||||
* value = (size(x) + 128 - 1) // 128 * 128 | |||||
* @li keep_prob: A mutable Tensor. Must met all of the following rules: | |||||
* shape of "keep_prob" should be (1,) or [1,]. | |||||
* Has the same type as "x" . \n | |||||
*@par Output: | |||||
*y: A mutable Tensor. Has the same type as "x". | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(DropOutDoMaskV3) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(mask, TensorType({DT_UINT8})) | |||||
.INPUT(keep_prob, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OP_END_FACTORY_REG(DropOutDoMaskV3) | |||||
/** | |||||
*@brief Return "output" according to the algorithm of dropout_do_mask: | |||||
* scale_x = x *(1 / keep_prob) | |||||
* output = select(mask == 1, scale_x, 0) | |||||
*@par Inputs: | |||||
*Two inputs, including: | |||||
* @li x: A mutable Tensor. Must be one of the following types: | |||||
* float16, float32 | |||||
* @li mask: A mutable Tensor. Must met all of the following rules: | |||||
* shape of mask should be 1D. | |||||
* dtype of mask should be uint8. | |||||
* value of shape should met the following algorithm: | |||||
* value = (size(x) + 128 - 1) // 128 * 128 | |||||
*@par Attributes: | |||||
* @li keep_prob: A mutable Tensor. Must met all of the following rules: | |||||
* shape of "keep_prob" should be (1,) or [1,]. | |||||
* Has the same type as "x" . \n | |||||
*@par Output: | |||||
*y: A mutable Tensor. Has the same type as "x". | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(DropOutDoMaskV3D) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(mask, TensorType({DT_UINT8})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.REQUIRED_ATTR(keep_prob, Float) | |||||
.OP_END_FACTORY_REG(DropOutDoMaskV3D) | |||||
/** | /** | ||||
*@brief Scales the input . \n | *@brief Scales the input . \n | ||||
@@ -703,7 +928,7 @@ REG_OP(Scale) | |||||
*@par Inputs: | *@par Inputs: | ||||
*One input, including: | *One input, including: | ||||
*@li x: A Tensor. Must be 4-D shape, and only support the following types: float16, float32 . \n | |||||
*x: A Tensor. Must be 4-D shape, and only support the following types: float16, float32 . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li depth_radius: An optional int32, specifying the half-width of the normalization window. Defaults to "5". | *@li depth_radius: An optional int32, specifying the half-width of the normalization window. Defaults to "5". | ||||
@@ -960,24 +1185,532 @@ REG_OP(INInferV2D) | |||||
.OP_END_FACTORY_REG(INInferV2D) | .OP_END_FACTORY_REG(INInferV2D) | ||||
/** | /** | ||||
*@brief Performs instance normalization for inference of InHost part. | |||||
* @brief InstanceNorm operator interface implementation. | |||||
*@par Inputs:\n | |||||
* One input, including: (NC1HWC0 supported) | |||||
* variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance. | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li x: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li gamma: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li beta: A Tensor. Must be one of the following types: float16, float32. | |||||
* @par Attributes: | |||||
* @li data_format: An attribute of type String \n | |||||
* @li epsilon: An attribute of type Float. \n | |||||
* @par Outputs: | |||||
*Three outputs, including: | |||||
* @li y: A Tensor. Has the same type as "x". \n | |||||
* @li mean: A Tensor. Has the same type as "x". \n | |||||
* @li variance: A Tensor. Has the same type as "x". \n | |||||
* @par Third-party framework compatibility | |||||
* Can be used by onnx InstanceNormalization | |||||
*/ | |||||
REG_OP(InstanceNorm) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(gamma, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(beta, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(variance, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(data_format, String, "NDHWC") | |||||
.ATTR(epsilon, Float, 1e-6) | |||||
.OP_END_FACTORY_REG(InstanceNorm) | |||||
/** | |||||
*@brief InstanceNormGrad operator interface implementation. | |||||
*@par Inputs: | |||||
*Five inputs, including: | |||||
* @li dy: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li x: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li variance: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li mean: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n | |||||
*@par Outputs: | |||||
*Three outputs, including: | |||||
* @li pd_x: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li pd_beta: A Tensor. Must be one of the following types: float16, float32. | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(InstanceNormGrad) | |||||
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OP_END_FACTORY_REG(InstanceNormGrad) | |||||
/** | |||||
*@brief InstanceNormXBackprop operator interface implementation. | |||||
*@par Inputs: | |||||
*Five inputs, including: | |||||
* @li dy: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li x: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li variance: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li mean: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n | |||||
*@par Outputs: | |||||
*Two outputs, including: | |||||
* @li pd_x: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li res_for_gamma: A Tensor. Must be one of the following types: float32. | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(InstanceNormXBackprop) | |||||
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(res_for_gamma, TensorType({DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(InstanceNormXBackprop) | |||||
/** | |||||
*@brief InstanceNormBetaGammaBackprop operator interface implementation. | |||||
*@par Inputs: | |||||
*Two inputs, including: | |||||
* @li dy: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li res_for_gamma: A Tensor. Must be one of the following types: float32.\n | |||||
*@par Outputs: | |||||
*Two outputs, including: | |||||
* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. | |||||
* @li pd_beta: A Tensor. Must be one of the following types: float16, float32. | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(InstanceNormBetaGammaBackprop) | |||||
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(res_for_gamma, TensorType({DT_FLOAT})) | |||||
.OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OP_END_FACTORY_REG(InstanceNormBetaGammaBackprop) | |||||
/** | |||||
* @brief Computes Kl_div_loss_grad or Kl_div_loss_backward. \n | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li grad: A Tensor. Must be one of the following types: float16, float32. | |||||
* Required. | |||||
* @li input: A Tensor. Has the same type as "grad". Required. | |||||
* @li target: A Tensor. Has the same type as "grad". Required. \n | |||||
* @par Attributes: | |||||
* @li reduction: An optional attribute of type String. Defaults to "mean". \n | |||||
* @li log_target: An optional attribute of type Bool. Defaults to false. \n | |||||
* @par Outputs: | |||||
* @li y: A Tensor. Has the same type as "grad". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator KlDivLossGrad. | |||||
*/ | |||||
REG_OP(KlDivLossGrad) | |||||
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(reduction, String, "mean") | |||||
.ATTR(log_target, Bool, false) | |||||
.OP_END_FACTORY_REG(KlDivLossGrad) | |||||
/** | |||||
* @brief Computes l1_loss_grad or l1_loss_backward. \n | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li grads: A Tensor. Must be one of the following types: float16, float32. | |||||
* Required. | |||||
* @li predict: A Tensor. Has the same type as "grads". Required. | |||||
* @li label: A Tensor. Has the same type as "grads". Required. \n | |||||
* @par Attributes: | |||||
* @li reduction: An optional attribute of type String. Defaults to "mean". \n | |||||
* @par Outputs: | |||||
* @li y: A Tensor. Has the same type as "x". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator L1LossGrad. | |||||
*/ | |||||
REG_OP(L1LossGrad) | |||||
.INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(reduction, String, "mean") | |||||
.OP_END_FACTORY_REG(L1LossGrad) | |||||
/** | |||||
* @brief Computes loss of lp, p=1,2,3.... | |||||
* @par Inputs: | |||||
* @li predict: An ND tensor of type float16, float32. | |||||
* @li label: An ND tensor of type float16, float32. \n | |||||
* @par Attributes: | |||||
* @li p: A required int attribute that decides which loss to compute, now the p only can be 1 to compute l1_loss. | |||||
* @li reduction: An optional string.Defaults to "mean". \n | |||||
* @par Outputs: | |||||
* @li y: An ND tensor tensor with the same shape and type as "predict". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator LpLoss. | |||||
*/ | |||||
REG_OP(LpLoss) | |||||
.INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(p, Int) | |||||
.ATTR(reduction, String, "mean") | |||||
.OP_END_FACTORY_REG(LpLoss) | |||||
/** | |||||
* @brief Computes gradients of mse loss. | |||||
* @par Inputs: | |||||
* @li predict: An ND tensor of type float16, float32. | |||||
* @li label: An ND tensor of type float16, float32. | |||||
* @li dout: An ND tensor of type float16, float32. \n | |||||
* @par Attributes: | |||||
* @li reduction: An optional string.Defaults to "mean". \n | |||||
* @par Outputs: | |||||
* @li y: An ND tensor tensor with the same shape and type as "predict". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator MseLossGrad. | |||||
*/ | |||||
REG_OP(MseLossGrad) | |||||
.INPUT(predict, TensorType({DT_FLOAT32, DT_FLOAT16})) | |||||
.INPUT(label, TensorType({DT_FLOAT32, DT_FLOAT16})) | |||||
.INPUT(dout, TensorType({DT_FLOAT32, DT_FLOAT16})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT32, DT_FLOAT16})) | |||||
.ATTR(reduction, String, "mean") | |||||
.OP_END_FACTORY_REG(MseLossGrad) | |||||
/** | |||||
* @brief Computes mse loss. | |||||
* @par Inputs: | |||||
* two inputs, including: | |||||
* @li predict: An ND Tensor of dtype float16 or float32. | |||||
* @li label: An ND Tensor of dtype float16 or float32.\n | |||||
* | |||||
* @par Attributes: | |||||
* @li reduction:An optional str from sum, none, mean, Defaults to "mean".\n | |||||
* | |||||
* @par Outputs: | |||||
* @li y: when reduction=sum/mean, y is scale. when reduction=none, y has | |||||
* same type and shape as "predict".\n | |||||
*/ | |||||
REG_OP(MseLoss) | |||||
.INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(reduction, String, "mean") | |||||
.OP_END_FACTORY_REG(MseLoss) | |||||
/** | |||||
* @brief Calculates the reversed outputs of the function "smooth_l1_loss_v2". \n | |||||
* @par Inputs: | |||||
* Three Inputs, including: | |||||
* @li predict: A Tensor. Must be one of the following types: | |||||
* float16, float32. | |||||
* @li label: A Tensor. Has the same type as "predict". | |||||
* @li dout: A Tensor. Has the same type as "predict". \n | |||||
* @par Attributes: | |||||
* Two Attributes, including: | |||||
* @li sigma: An optional float. Defaults to 1.0. \n | |||||
* @li reduction: An optional string. Defaults to "mean", | |||||
* Must be one of the following: "none", "mean", "sum". \n | |||||
* @par Outputs: | |||||
* @li gradient: A Tensor. Has the same type as "predict". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator SmoothL1LossBackward. | |||||
*/ | |||||
REG_OP(SmoothL1LossGradV2) | |||||
.INPUT(predict, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(label, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(dout, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(gradient, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.ATTR(sigma, Float, 1.0) | |||||
.ATTR(reduction, String, "mean") | |||||
.OP_END_FACTORY_REG(SmoothL1LossGradV2) | |||||
/** | |||||
* @brief Creates a criterion that uses a squared term if the absolute | |||||
* element-wise error falls below beta and an L1 term otherwise. It is | |||||
* less sensitive to outliers than the MSELoss and in some cases prevents | |||||
* exploding gradients. | |||||
* @par Inputs: | |||||
* @li predict: A multi-dimensional Tensor of type float16 or float32, | |||||
* specifying the predictive value. \n | |||||
* @li label: A multi-dimensional Tensor of type float16 or float32, | |||||
* specifying the target value. \n | |||||
* @par Attributes: | |||||
* @li sigma: An optional int. Specifies the threshold of loss. Defaults | |||||
* to "1.0". \n | |||||
* @li reduction: An optional str. Specifies the reduction to apply to | |||||
* the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, | |||||
* 'mean': the sum of the output will be divided by the number of elements in | |||||
* the output,'sum': the output will be summed. Default: 'mean'. \n | |||||
* @par Outputs: | |||||
* @li loss: Indicates the loss between the predictive value and target value. | |||||
* Has the same dimensions as "predict". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator smooth_l1_loss. \n | |||||
*/ | |||||
REG_OP(SmoothL1LossV2) | |||||
.INPUT(predict, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
.INPUT(label, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
.OUTPUT(loss, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
.ATTR(sigma, Float, 1.0) | |||||
.ATTR(reduction, String, "mean") | |||||
.OP_END_FACTORY_REG(SmoothL1LossV2) | |||||
/** | |||||
* @brief Computes Centralization. result = x - mean(x, axes) | |||||
* @par Inputs: | |||||
* @li x: An ND tensor of type float16, float32. | |||||
* @par Attributes: | |||||
* @li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType. | |||||
* Must be in the range [-rank(x), rank(x)). | |||||
* @par Outputs: | |||||
* @li y: A Tensor. Has the same type as "x". \n | |||||
* @par Third-party framework compatibility | |||||
* custom operator \n | |||||
*/ | |||||
REG_OP(Centralization) | |||||
.INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
.OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
.ATTR(axes, ListInt, {-1}) | |||||
.OP_END_FACTORY_REG(Centralization) | |||||
/** | |||||
*@brief Roll the tensor along the given dimension(s). | |||||
* Elements that are shifted beyond the last position are re-introduced at the first position. | |||||
* If a dimension is not specified, the tensor will be flattened before rolling and then restored to the original shape. \n | |||||
*@par Inputs: | |||||
*One inputs, including: | |||||
* @li x: A tensor . Must be one of the following types: | |||||
* float16, float32, int32, uint32, int8, uint8. \n | |||||
*@par Attributes: | *@par Attributes: | ||||
* epsilon: An optional float32, specifying the small value added to | |||||
variance to avoid dividing by zero. Defaults to "0.00001" . \n | |||||
* @li shifts: The number of places by which the elements of the tensor are shifted. \n | |||||
* @li dims: Axis along which to roll. \n | |||||
*@par Outputs:\n | |||||
* variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt. | |||||
*@par Outputs: | |||||
* y: A Tensor with the same type and shape of x's. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator Roll. \n | |||||
*/ | */ | ||||
REG_OP(InHost) | |||||
.INPUT(variance, TensorType({DT_FLOAT})) | |||||
.OUTPUT(variance_sqrt, TensorType({DT_FLOAT})) | |||||
.ATTR(epsilon, Float, 0.00001) | |||||
.OP_END_FACTORY_REG(InHost) | |||||
REG_OP(Roll) | |||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_UINT32,DT_INT8,DT_UINT8})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_UINT32,DT_INT8,DT_UINT8})) | |||||
.REQUIRED_ATTR(shifts, ListInt) | |||||
.ATTR(dims, ListInt, {}) | |||||
.OP_END_FACTORY_REG(Roll) | |||||
/** | |||||
*@brief Calculate the loss. Creates a criterion that optimizes a two-class classification | |||||
logistic loss between input_x and input_y (containing 1 or -1). \n | |||||
*@par Inputs: | |||||
*One inputs, including: | |||||
* @li input_x: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li input_y: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
*@par Attributes: | |||||
*@li lambd: An optional string.Defaults to "mean". \n | |||||
*@par Outputs: | |||||
*output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n | |||||
* while reduction == "sum" or "mean", A Tensor with the same type of input_x , shape of which is (1,) | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator SoftMarginLoss. \n | |||||
*/ | |||||
REG_OP(SoftMarginLoss) | |||||
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.ATTR(reduction, String, "mean") | |||||
.OUTPUT(output_z, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OP_END_FACTORY_REG(SoftMarginLoss) | |||||
/** | |||||
* @brief Computes gradients of sigmoid_cross_entropy_with_logits_v2. | |||||
* @par Inputs: | |||||
* @li predict: An ND tensor of type float16, float32. | |||||
* @li target: An ND tensor of type float16, float32. | |||||
* @li dout: An ND tensor of type float16, float32. | |||||
* @li weight: An optional ND tensor of type float16, float32. | |||||
* @li pos_weight: An optional ND tensor of type float16, float32. \n | |||||
* @par Attributes: | |||||
* @li reduction: An optional string.Defaults to "mean". \n | |||||
* @par Outputs: | |||||
* @li gradient: An ND tensor tensor with the same shape and type as "predict". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator SigmoidCrossEntropyWithLogitsGrad. | |||||
*/ | |||||
REG_OP(SigmoidCrossEntropyWithLogitsGradV2) | |||||
.INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(dout, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(pos_weight, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(reduction, String, "mean") | |||||
.OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2) | |||||
/** | |||||
* @brief Calculate the PoissonNllLoss function. | |||||
* target∼Poisson(input)loss(input,target)=input−target∗log(input)+log(target!) \n | |||||
* @par Inputs: | |||||
* Two inputs, including: | |||||
* @li input_x: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* | |||||
* @par Inputs: | |||||
* @li target: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @par Attributes: | |||||
* four Attributes, including: | |||||
* @li log_input: An optional bool. Defaults to "True" \n | |||||
* | |||||
* @par Attributes: | |||||
* @li full: An optional bool. Defaults to "False" \n | |||||
* | |||||
* @par Attributes: | |||||
* @li eps: An optional float. Defaults to "1e-8" \n | |||||
* | |||||
* @par Attributes: | |||||
* @li reduction: An optional string. Defaults to "mean" \n | |||||
* @par Outputs: | |||||
* loss: A Tensor has same element type as two inputs. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator PoissonNllLoss. \n | |||||
*/ | |||||
REG_OP(PoissonNllLoss) | |||||
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(loss, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(log_input, Bool, true) | |||||
.ATTR(full, Bool, false) | |||||
.ATTR(eps, Float, 1e-8) | |||||
.ATTR(reduction, String, "mean") | |||||
.OP_END_FACTORY_REG(PoissonNllLoss) | |||||
/** | |||||
*@brief rnn_gen_mask | |||||
* @par Inputs: | |||||
* @li seq_length: A ND Tensor of type int32. Recoed the current length of each batch.\n | |||||
* | |||||
* @par Attributes: | |||||
* @li num_step: A required int.\n | |||||
* @li hidden_size: A required int. \n | |||||
* | |||||
* | |||||
* @par Output: | |||||
* y: A mutable Tensor of type float16, with the shape of [num_step, batch_size, hidden_size]. \n | |||||
* | |||||
*/ | |||||
REG_OP(RnnGenMask) | |||||
.INPUT(seq_length, TensorType({DT_INT32})) | |||||
.OUTPUT(seq_mask, TensorType({DT_FLOAT16})) | |||||
.REQUIRED_ATTR(num_step, Int) | |||||
.REQUIRED_ATTR(hidden_size, Int) | |||||
.OP_END_FACTORY_REG(RnnGenMask) | |||||
/** | |||||
* @brief Creates a criterion that optimizes a multi-class multi-classification hinge loss (margin-based loss) | |||||
* between input x (a 2D mini-batch Tensor) and output y (which is a 2D Tensor of target class indices) \n | |||||
* @par Inputs: | |||||
* Two inputs, including: | |||||
* @li x: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* | |||||
* @par Inputs: | |||||
* @li target: A tensor. Must be the following types: | |||||
* int32. \n | |||||
* @par Attributes: | |||||
* @li reduction: An optional string. Defaults to "mean" \n | |||||
* @par Outputs: | |||||
* y: A Tensor has same element type as input x. \n | |||||
* is_target: A Tensor has same element type as input target. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator MultiLabelMarginLoss. \n | |||||
*/ | |||||
REG_OP(MultilabelMarginLoss) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(target, TensorType({DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(is_target, TensorType({DT_INT32})) | |||||
.ATTR(reduction, String, "mean") | |||||
.OP_END_FACTORY_REG(MultilabelMarginLoss) | |||||
/** | |||||
*@brief Performs batch normalization . \n | |||||
*@par Inputs: | |||||
* Two inputs | |||||
*@li input_x: A Tensor. Support float32. shape (n, c, d). | |||||
*@li seq_len: A Tensor. Each batch normalize data num. Support Int32. Shape (n, ). \n | |||||
*@par Attributes: | |||||
*@li normalize_type: Str. Support "per_feature" or "all_features". | |||||
*@li epsilon: An optional float32, specifying the small value added to | |||||
variance to avoid dividing by zero. Defaults to "0.00001" . \n | |||||
*@par Outputs: | |||||
* One outputs | |||||
*@li output_y: A Tensor for the normalized "x".Support float32. shape (n, c, d).\n | |||||
*/ | |||||
REG_OP(NormalizeBatch) | |||||
.INPUT(input_x, TensorType({ DT_FLOAT })) | |||||
.INPUT(seq_len, TensorType({ DT_INT32 })) | |||||
.OUTPUT(output_y, TensorType({ DT_FLOAT })) | |||||
.REQUIRED_ATTR(normalize_type, String) | |||||
.ATTR(epsilon, Float, 0.00001) | |||||
.OP_END_FACTORY_REG(NormalizeBatch) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -20,7 +20,144 @@ | |||||
*/ | */ | ||||
#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ | #ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ | ||||
#define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ | #define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ | ||||
#include "graph/operator_reg.h" | |||||
#include "nn_pooling_ops.h" | #include "nn_pooling_ops.h" | ||||
namespace ge { | |||||
/** | |||||
* @brief Says whether the targets are in the top "k" predictions . \n | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li predictions: A 2D Tensor of type float32. A "batch_size * classes" tensor. | |||||
* @li targets: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids. | |||||
* @li k: A 1D Tensor of the same type as "targets". | |||||
* Specifies the number of top elements to look at for computing precision . \n | |||||
* @par Outputs: | |||||
* precision: A Tensor of type bool . \n | |||||
* @attention Constraints: | |||||
* @li targets must be non-negative tensor. | |||||
* @par Third-party framework compatibility | |||||
* @li Compatible with the TensorFlow operator InTopKV2. | |||||
*/ | |||||
REG_OP(InTopKV2) | |||||
.INPUT(predictions, TensorType({DT_FLOAT})) | |||||
.INPUT(targets, TensorType(IndexNumberType)) | |||||
.INPUT(k, TensorType({IndexNumberType})) | |||||
.OUTPUT(precision, TensorType({DT_BOOL})) | |||||
.OP_END_FACTORY_REG(InTopKV2) | |||||
/** | |||||
*@brief Performs batch normalization . \n | |||||
*@par Inputs: | |||||
* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported) | |||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||||
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||||
if input "x" is with format NC1HWC0. Specifies the scaling factor. | |||||
*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||||
if input "x" is with format NC1HWC0. Specifies the offset. | |||||
*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||||
if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the | |||||
operation is used for training. | |||||
*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be | |||||
5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None" | |||||
if the operation is used for training . \n | |||||
*@par Attributes: | |||||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". | |||||
*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". | |||||
*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n | |||||
*@par Outputs: | |||||
* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported) | |||||
*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||||
*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||||
if input "x" is with format NC1HWC0. Specifies the mean of "x". | |||||
*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". | |||||
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. | |||||
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n | |||||
*@attention Constraints: | |||||
*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, | |||||
then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance". | |||||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n | |||||
*/ | |||||
REG_OP(FusedBatchNormV2) | |||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(scale, TensorType({DT_FLOAT})) | |||||
.INPUT(offset, TensorType({DT_FLOAT})) | |||||
.OPTIONAL_INPUT(mean, TensorType({DT_FLOAT})) | |||||
.OPTIONAL_INPUT(variance, TensorType({DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(batch_mean, TensorType({DT_FLOAT})) | |||||
.OUTPUT(batch_variance, TensorType({DT_FLOAT})) | |||||
.OUTPUT(reserve_space_1, TensorType({DT_FLOAT})) | |||||
.OUTPUT(reserve_space_2, TensorType({DT_FLOAT})) | |||||
.ATTR(epsilon, Float, 0.0001) | |||||
.ATTR(data_format, String, "NHWC") | |||||
.ATTR(is_training, Bool, true) | |||||
.OP_END_FACTORY_REG(FusedBatchNormV2) | |||||
/** | |||||
* @brief: Large amount of data sort.First operator of TopK. | |||||
* @par Inputs: | |||||
* two input, including: | |||||
* @li input_data: A Tensor. Data to be sorted. Support float16 | |||||
* @li input_index: A Tensor. Range(0, 2048). Datatype and format is same as input_data. | |||||
* @par Attributes: | |||||
* @li k_num: Int.Number to be sorted. | |||||
* @par Outputs: | |||||
* 1 output, including: | |||||
* @li output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. | |||||
*/ | |||||
REG_OP(SegmentSort) | |||||
.INPUT(input_data, TensorType({DT_FLOAT16})) | |||||
.INPUT(input_index, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(output_proposal, TensorType({DT_FLOAT16})) | |||||
.REQUIRED_ATTR(k_num, Int) | |||||
.OP_END_FACTORY_REG(SegmentSort) | |||||
/** | |||||
* @brief: Large amount of data sort.Second operator of TopK. | |||||
* @par Inputs: | |||||
* two input, including: | |||||
* @li input_proposal: A Tensor. Proposal sorted for each channel. Support float16 | |||||
* @par Attributes: | |||||
* @li k_num: Int.Number to be sorted. | |||||
* @par Outputs: | |||||
* 1 output, including: | |||||
* @li output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. | |||||
*/ | |||||
REG_OP(MultiMerge) | |||||
.INPUT(input_proposal, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(output_proposal, TensorType({DT_FLOAT16})) | |||||
.REQUIRED_ATTR(k_num, Int) | |||||
.OP_END_FACTORY_REG(MultiMerge) | |||||
/** | |||||
* @brief: Large amount of data sort.Third operator of TopK. | |||||
* @par Inputs: | |||||
* two input, including: | |||||
* @li input_proposal: A Tensor. Proposal sorted for each channel. Support float16 | |||||
* @par Attributes: | |||||
* @li k_num: Int.Number to be sorted. | |||||
* @par Outputs: | |||||
* 2 output, including: | |||||
* @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted. | |||||
* @li output_index: A Tensor. int32. Data index. | |||||
*/ | |||||
REG_OP(SingleMerge) | |||||
.INPUT(input_proposal, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(output_data, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(output_index, TensorType({DT_INT32})) | |||||
.REQUIRED_ATTR(k_num, Int) | |||||
.OP_END_FACTORY_REG(SingleMerge) | |||||
}// namespace ge | |||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -182,6 +182,128 @@ REG_OP(AvgPool3D) | |||||
.ATTR(data_format, String, "NDHWC") | .ATTR(data_format, String, "NDHWC") | ||||
.OP_END_FACTORY_REG(AvgPool3D) | .OP_END_FACTORY_REG(AvgPool3D) | ||||
/** | |||||
*@brief Performs average pooling on the input. | |||||
*@par Inputs: | |||||
*@li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double. | |||||
*@li filter: An optional tensor of type float16, float32, double, fractal_z_3d layout. | |||||
*@li multiplier: An optional tensor of float16, float32, double. | |||||
*@par Attributes: | |||||
*@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor. | |||||
*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor. | |||||
*@li pads: List of ints, implicit zero paddings on both sides of the input. | |||||
*@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. | |||||
*@li count_include_pad: When true, will include the zero-padding in the averaging calculation. | |||||
*@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. | |||||
*@li data_format: A string, format of input data . \n | |||||
*@par Outputs: | |||||
*y: The average pooled output tensor . \n | |||||
*@attention Constraints: | |||||
*@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] | |||||
*@par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator AvgPool3D. | |||||
*/ | |||||
REG_OP(AvgPool3DD) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.REQUIRED_ATTR(ksize, ListInt) | |||||
.REQUIRED_ATTR(strides, ListInt) | |||||
.REQUIRED_ATTR(pads, ListInt) | |||||
.ATTR(ceil_mode, Bool, false) | |||||
.ATTR(count_include_pad, Bool, true) | |||||
.ATTR(divisor_override, Int, 0) | |||||
.ATTR(data_format, String, "NDHWC") | |||||
.OP_END_FACTORY_REG(AvgPool3DD) | |||||
/** | |||||
* @brief Computes AvgPool3DGrad function. | |||||
* @par Inputs: | |||||
* @li orig_input_shape: An NDHWC tensor of type int32. | |||||
* @li grads: An NDHWC tensor of type float16, float32, or double. | |||||
* @par Attributes: | |||||
* @li ksize: List of ints that has length 5. The size of the window for each dimension of the input tensor. | |||||
* @li strides:List of ints that has length 5. The stride of the sliding window for each dimension of the input tensor. | |||||
* @li pads: List of ints, implicit zero paddings on both sides of the input. | |||||
* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. | |||||
* @li count_include_pad: When true, will include the zero-padding in the averaging calculation. | |||||
* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. | |||||
* @li data_format: A string, format of input data. | |||||
* @par Outputs: | |||||
* @output: A mutable tensor with the same shape and type as "orig_input_shape". | |||||
* @attention Constraints: | |||||
* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] | |||||
* @par Third-party framework compatibility | |||||
* @li Compatible with the TensorFlow operator AvgPoolGrad. | |||||
*/ | |||||
REG_OP(AvgPool3DGrad) | |||||
.INPUT(orig_input_shape, TensorType({DT_INT32})) | |||||
.INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.REQUIRED_ATTR(ksize, ListInt) | |||||
.REQUIRED_ATTR(strides, ListInt) | |||||
.REQUIRED_ATTR(pads, ListInt) | |||||
.ATTR(ceil_mode, Bool, false) | |||||
.ATTR(count_include_pad, Bool, true) | |||||
.ATTR(divisor_override, Int, 0) | |||||
.ATTR(data_format, String, "NDHWC") | |||||
.OP_END_FACTORY_REG(AvgPool3DGrad) | |||||
/** | |||||
* @brief Performs average pooling on the input. | |||||
* @par Inputs: | |||||
* @li grads: An NDHWC tensor of type float16. | |||||
* @li filter: An optional tensor of type float16, fractal_z_3d layout. | |||||
* @li multiplier: An optional tensor of float16. | |||||
* @par Attributes: | |||||
* @li orig_input_shape: List of ints that has length 5. The size of the window for each dimension of the input tensor. | |||||
* @li ksize: List of ints that has length 5. The size of the window for each dimension of the input tensor. | |||||
* @li strides:List of ints that has length 5. The stride of the sliding window for each dimension of the input tensor. | |||||
* @li pads: List of ints, implicit zero paddings on both sides of the input. | |||||
* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. | |||||
* @li count_include_pad: When true, will include the zero-padding in the averaging calculation. | |||||
* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. | |||||
* @li data_format: A string, format of input data . \n | |||||
* @par Outputs: | |||||
* @output: The average pooled output tensor . \n | |||||
* @attention Constraints: | |||||
* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator AvgPool3DGradD. | |||||
*/ | |||||
REG_OP(AvgPool3DGradD) | |||||
.INPUT(grads, TensorType({DT_FLOAT16})) | |||||
.OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16})) | |||||
.OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(output, TensorType({DT_FLOAT16})) | |||||
.REQUIRED_ATTR(orig_input_shape, ListInt) | |||||
.REQUIRED_ATTR(ksize, ListInt) | |||||
.REQUIRED_ATTR(strides, ListInt) | |||||
.REQUIRED_ATTR(pads, ListInt) | |||||
.ATTR(ceil_mode, Bool, false) | |||||
.ATTR(count_include_pad, Bool, true) | |||||
.ATTR(divisor_override, Int, 0) | |||||
.ATTR(data_format, String, "NDHWC") | |||||
.OP_END_FACTORY_REG(AvgPool3DGradD) | |||||
/** | /** | ||||
*@brief Performs max_pool_ext2 on the input . \n | *@brief Performs max_pool_ext2 on the input . \n | ||||
@@ -278,8 +400,8 @@ No default value. | |||||
specifying the stride of the sliding window for each dimension of | specifying the stride of the sliding window for each dimension of | ||||
the input tensor. No default value. | the input tensor. No default value. | ||||
*@li padding: A required string type of float16. | *@li padding: A required string type of float16. | ||||
*@li pads: A list type of int32. Default value {0, 0, 0}. | |||||
*@li dilation: A list type of int32. Default value {1, 1, 1}. | |||||
*@li pads: A list type of int32. Default value {0,0,0,0,0,0}. | |||||
*@li dilation: A list type of int32. Default value {1,1,1,1,1,1}. | |||||
*@li ceil_mode: A ceil mode number of int32 . Default value 0. | *@li ceil_mode: A ceil mode number of int32 . Default value 0. | ||||
*@li data_format: An optional string. Defaults to "NDHWC" . \n | *@li data_format: An optional string. Defaults to "NDHWC" . \n | ||||
@@ -302,12 +424,37 @@ REG_OP(MaxPool3D) | |||||
.REQUIRED_ATTR(ksize, ListInt) | .REQUIRED_ATTR(ksize, ListInt) | ||||
.REQUIRED_ATTR(strides, ListInt) | .REQUIRED_ATTR(strides, ListInt) | ||||
.REQUIRED_ATTR(padding, String) | .REQUIRED_ATTR(padding, String) | ||||
.ATTR(pads, ListInt, {0,0,0}) | |||||
.ATTR(dilation, ListInt, {1,1,1}) | |||||
.ATTR(pads, ListInt, {0,0,0,0,0,0}) | |||||
.ATTR(dilation, ListInt, {1,1,1,1,1,1}) | |||||
.ATTR(ceil_mode, Int, 0) | .ATTR(ceil_mode, Int, 0) | ||||
.ATTR(data_format, String, "NDHWC") | .ATTR(data_format, String, "NDHWC") | ||||
.OP_END_FACTORY_REG(MaxPool3D) | .OP_END_FACTORY_REG(MaxPool3D) | ||||
/** | |||||
*@brief Applies a 2D adaptive max pooling over an input signal conposed of several input planes. \n | |||||
* The output is of size H x W, for any input size. | |||||
* @par Inputs: | |||||
* One input, including: | |||||
* @li x: A Tensor. Must be one of the following data types: | |||||
* float16, float32, float64. \n | |||||
* @par Attributes: | |||||
* @li output_size: A required list of 2 ints | |||||
* specifying the size (H,W) of the output tensor. \n | |||||
* @par Outputs: | |||||
* @li y: A Tensor. Has the same data type as "x" \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator AdaptiveMaxPool2d. | |||||
*/ | |||||
REG_OP(AdaptiveMaxPool2d) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.OUTPUT(argmax, TensorType::IndexNumberType()) | |||||
.REQUIRED_ATTR(output_size, ListInt) | |||||
.OP_END_FACTORY_REG(AdaptiveMaxPool2d) | |||||
/** | /** | ||||
* @brief Computes second-order gradients of the maxpooling3d function . \n | * @brief Computes second-order gradients of the maxpooling3d function . \n | ||||
@@ -477,8 +624,9 @@ REG_OP(MaxPoolV2) | |||||
*@par Inputs: | *@par Inputs: | ||||
* One input: | * One input: | ||||
*x: An NC1HWC0 Tensor. Supported type: float, double, int32, | |||||
* uint8, int16, int8, int64, uint16, half, uint32, uint64 . \n | |||||
* x: An 4D Tensor. Supported type: float, double, int32, | |||||
* uint8, int16, int8, int64, uint16, half, uint32, uint64. | |||||
* Must set the format, supported format list ["NCHW, NHWC"]. \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li ksize: A required list of int8, int16, int32, or int64 values, | *@li ksize: A required list of int8, int16, int32, or int64 values, | ||||
@@ -490,8 +638,8 @@ REG_OP(MaxPoolV2) | |||||
*@li padding: A required string. No default value . \n | *@li padding: A required string. No default value . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type and format as input "x". | |||||
*argmax: A Tensor. Has the same type and format as input "x". | |||||
*@li y: A Tensor. Has the same type and format as input "x". | |||||
*@li argmax: A Tensor. Has the same type and format as input "x". | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, | *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, | ||||
* ksize[1] * ksize[2] <= 255. | * ksize[1] * ksize[2] <= 255. | ||||
@@ -517,10 +665,12 @@ REG_OP(MaxPoolWithArgmax) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Three inputs, including: | * Three inputs, including: | ||||
*@li x: An NC1HWC0 tensor. Supported type: float, double, int32, | |||||
*@li x: An 4d tensor. Supported type: float, double, int32, | |||||
* uint8, int16, int8, int64, uint16, half, uint32, uint64. | * uint8, int16, int8, int64, uint16, half, uint32, uint64. | ||||
*@li grad: An NC1HWC0 tensor. Supported type: float, double, int32, | |||||
* Must set the format, supported format list ["NCHW, NHWC"] | |||||
*@li grad: An 4d tensor. Supported type: float, double, int32, | |||||
* uint8, int16, int8, int64, uint16, half, uint32, uint64. | * uint8, int16, int8, int64, uint16, half, uint32, uint64. | ||||
* Must set the format, supported format list ["NCHW, NHWC"] | |||||
*@li argmx: An NC1HWC0 tensor of type int32 or int64 . \n | *@li argmx: An NC1HWC0 tensor of type int32 or int64 . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -741,7 +891,7 @@ REG_OP(AvgPoolV2Grad) | |||||
* @brief Computes gradients of averagev2 pooling function. | * @brief Computes gradients of averagev2 pooling function. | ||||
* @par Inputs: | * @par Inputs: | ||||
* @li input_grad: An NHWC tensor of type float16, float32, or double. | |||||
*input_grad: An NHWC tensor of type float16, float32, or double. | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li orig_input_shape: A required tuple or list of type int32. | * @li orig_input_shape: A required tuple or list of type int32. | ||||
@@ -759,10 +909,10 @@ REG_OP(AvgPoolV2Grad) | |||||
* @li data_format: An optional string. Defaults to "NHWC". | * @li data_format: An optional string. Defaults to "NHWC". | ||||
* @par Outputs: | * @par Outputs: | ||||
* @out_grad: A mutable tensor with the same shape and type as "orig_input". | |||||
*out_grad: A mutable tensor with the same shape and type as "orig_input". | |||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
* @li Compatible with the TensorFlow operator AvgPoolGrad. | |||||
*Compatible with the TensorFlow operator AvgPoolGrad. | |||||
*/ | */ | ||||
REG_OP(AvgPoolV2GradD) | REG_OP(AvgPoolV2GradD) | ||||
.INPUT(input_grad, TensorType({DT_FLOAT16})) | .INPUT(input_grad, TensorType({DT_FLOAT16})) | ||||
@@ -1037,6 +1187,7 @@ REG_OP(MaxPool3DGrad) | |||||
.OUTPUT(y, TensorType::RealNumberType()) | .OUTPUT(y, TensorType::RealNumberType()) | ||||
.REQUIRED_ATTR(ksize, ListInt) | .REQUIRED_ATTR(ksize, ListInt) | ||||
.REQUIRED_ATTR(strides, ListInt) | .REQUIRED_ATTR(strides, ListInt) | ||||
.ATTR(padding, String, "SAME") | |||||
.REQUIRED_ATTR(pads, ListInt) | .REQUIRED_ATTR(pads, ListInt) | ||||
.ATTR(data_format, String, "NDHWC") | .ATTR(data_format, String, "NDHWC") | ||||
.OP_END_FACTORY_REG(MaxPool3DGrad) | .OP_END_FACTORY_REG(MaxPool3DGrad) | ||||
@@ -1107,7 +1258,7 @@ REG_OP(AvgPool1DD) | |||||
*@par Inputs: | *@par Inputs: | ||||
* One input: | * One input: | ||||
*x: An NC1HWC0 Tensor of type float16. | |||||
*x: An 4d Tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"]. | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | ||||
* each dimension of the input tensor. No default value. | * each dimension of the input tensor. No default value. | ||||
@@ -1148,9 +1299,9 @@ REG_OP(MaxPoolWithArgmaxV2) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Three inputs, including: | * Three inputs, including: | ||||
*@li x: An NC1HWC0 tensor of type float16. | |||||
*@li grad: An NC1HWC0 tensor of type float16. | |||||
*@li argmx: An NC1HWC0 tensor of type uint16 or int64 . \n | |||||
*@li x: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"] | |||||
*@li grad: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"] | |||||
*@li argmx: An 4d tensor of type uint16 or int64. Must set the format, supported format list ["NCHW, NHWC"] \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | ||||
@@ -1291,5 +1442,306 @@ REG_OP(MaxPoolV3Grad) | |||||
.ATTR(global_pooling, Bool, false) | .ATTR(global_pooling, Bool, false) | ||||
.ATTR(ceil_mode, Bool, false) | .ATTR(ceil_mode, Bool, false) | ||||
.OP_END_FACTORY_REG(MaxPoolV3Grad) | .OP_END_FACTORY_REG(MaxPoolV3Grad) | ||||
/** | |||||
*@brief Performs Dilation2D on the input . \n | |||||
*@par Inputs: | |||||
*x: A tensor of shape is 4d, format is support NHWC. | |||||
*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. \n | |||||
*@par Attributes: | |||||
*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. | |||||
*@li rates: A required list of 4 ints. The rates of the N and C dimensions are 1. | |||||
*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID. | |||||
*@li pads: An optional list of 4 ints. | |||||
*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". | |||||
*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n | |||||
*@par Outputs: | |||||
*y: The output tensor. Has the same type and format as input "x" . \n | |||||
*@par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator Dilation2D. | |||||
*/ | |||||
REG_OP(Dilation2D) | |||||
.INPUT(x,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) | |||||
.INPUT(filter,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) | |||||
.OUTPUT(y,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) | |||||
.REQUIRED_ATTR(strides, ListInt) | |||||
.REQUIRED_ATTR(rates, ListInt) | |||||
.ATTR(padding_mode, String, "SAME") | |||||
.ATTR(pads, ListInt, {0,0,0,0}) | |||||
.ATTR(ceil_mode, Bool, false) | |||||
.ATTR(data_format, String, "NHWC") | |||||
.OP_END_FACTORY_REG(Dilation2D) | |||||
/** | |||||
*@brief Performs Dilation2DBackpropFilter on the input. \n | |||||
*@par Inputs: | |||||
*x: A tensor of shape is 4d, format is support NHWC. | |||||
*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. | |||||
*out_backprop: Has the same type and format as input x and the c dimension is same with x. \n | |||||
*@par Attributes | |||||
*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1. | |||||
*@li rates: A required list of 4 ints, the rates of the N and C dimensions are 1. | |||||
*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID. | |||||
*@li pads: A optional list of 4 ints. | |||||
*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". | |||||
*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n | |||||
*@par Outputs: | |||||
*y: The output tensor. Has the same type and format as input "filter" . \n | |||||
*@par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator Dilation2DBackpropFilter. | |||||
*/ | |||||
REG_OP(Dilation2DBackpropFilter) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) | |||||
.INPUT(filter, | |||||
TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) | |||||
.INPUT(out_backprop, | |||||
TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) | |||||
.OUTPUT(y, | |||||
TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) | |||||
.REQUIRED_ATTR(strides, ListInt) | |||||
.REQUIRED_ATTR(rates, ListInt) | |||||
.ATTR(padding_mode, String, "SAME") | |||||
.ATTR(pads, ListInt, {0, 0, 0, 0}) | |||||
.ATTR(ceil_mode, Bool, false) | |||||
.ATTR(data_format, String, "NHWC") | |||||
.OP_END_FACTORY_REG(Dilation2DBackpropFilter) | |||||
/** | |||||
*@brief Performs Dilation2DBackpropInput on the input. \n | |||||
*@par Inputs: | |||||
*x: A tensor of shape is 4d, format is support NHWC. | |||||
*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. | |||||
*out_backprop: Has the same type and format as input x and the c dimension is same with x. \n | |||||
*@par Attributes | |||||
*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1. | |||||
*@li rates: A required list of 4 ints, the rates of the N and C dimensions are 1. | |||||
*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID. | |||||
*@li pads: A optional list of 4 ints. | |||||
*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". | |||||
*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n | |||||
*@par Outputs: | |||||
*y: The output tensor. Has the same type and format as input "x" . \n | |||||
*@par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator Dilation2DBackpropInput. | |||||
*/ | |||||
REG_OP(Dilation2DBackpropInput) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) | |||||
.INPUT(filter, | |||||
TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) | |||||
.INPUT(out_backprop, | |||||
TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) | |||||
.OUTPUT(y, | |||||
TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) | |||||
.REQUIRED_ATTR(strides, ListInt) | |||||
.REQUIRED_ATTR(rates, ListInt) | |||||
.ATTR(padding_mode, String, "SAME") | |||||
.ATTR(pads, ListInt, {0, 0, 0, 0}) | |||||
.ATTR(ceil_mode, Bool, false) | |||||
.ATTR(data_format, String, "NHWC") | |||||
.OP_END_FACTORY_REG(Dilation2DBackpropInput) | |||||
/** | |||||
* @brief Applies a 2D adaptive average pooling over | |||||
* an input signal composed of several input planes. \n | |||||
* @par Inputs: | |||||
* One input, including: | |||||
* @li x: A Tensor. Must be one of the following data types: | |||||
* float16, float32. \n | |||||
* @par Attributes: | |||||
* @li output_size: A required list of 2 ints | |||||
* specifying the size (H,W) of the output tensor. \n | |||||
* @par Outputs: | |||||
* @li y: A Tensor. Has the same data type as "x" \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator AdaptiveAvgPool2d. | |||||
*/ | |||||
REG_OP(AdaptiveAvgPool2d) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.REQUIRED_ATTR(output_size, ListInt) | |||||
.OP_END_FACTORY_REG(AdaptiveAvgPool2d) | |||||
/** | |||||
* @brief Compute gradients of adaptive averagev2 pooling function. | |||||
* @par Inputs: | |||||
* @li input_grad: A Tensor. Must be one of the following data types: | |||||
* float16, float32. | |||||
* @par Attributes: | |||||
* @li orig_input_shape: A required tuple or list of type int32. | |||||
* @par Outputs: | |||||
* @li output_grad: A tensor with the same type as "input_grad". | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator AdaptiveAvgPool2dGrad. | |||||
*/ | |||||
REG_OP(AdaptiveAvgPool2dGrad) | |||||
.INPUT(input_grad, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(output_grad, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.REQUIRED_ATTR(orig_input_shape, ListInt) | |||||
.OP_END_FACTORY_REG(AdaptiveAvgPool2dGrad) | |||||
/** | |||||
* @brief Performs the backpropagation of MaxPoolWithGradArgmaxV1. | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li x: An NC1HWC0 tensor of type float16. | |||||
* @li grad: An NC1HWC0 tensor of type float16. | |||||
* @li argmax: An NC1HWC0 tensor of type uint16 or int64. \n | |||||
* @par Attributes: | |||||
* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | |||||
* each dimension of the input tensor. No default value. | |||||
* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for | |||||
* each dimension of the input tensor. No default value. | |||||
* @li pads: A required listint. \n | |||||
* @par Outputs: | |||||
* y: A Tensor. Has the same type and format as input "x". \n | |||||
* @attention Constraints: | |||||
* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | |||||
* @li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1 | |||||
* @li "pads" is listint. | |||||
* @li "ceil_mode" defaults to False. | |||||
* @li "data_format" defaults to "NC1HWC0". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV1. | |||||
*/ | |||||
REG_OP(MaxPoolGradWithArgmaxV1) | |||||
.INPUT(x, TensorType({DT_FLOAT16})) | |||||
.INPUT(grad, TensorType({DT_FLOAT16})) | |||||
.INPUT(argmax, TensorType({DT_UINT16})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16})) | |||||
.REQUIRED_ATTR(ksize, ListInt) | |||||
.REQUIRED_ATTR(strides, ListInt) | |||||
.REQUIRED_ATTR(pads, ListInt) | |||||
.ATTR(dtype, Int, 3) | |||||
.ATTR(dilation, ListInt, {1, 1, 1, 1}) | |||||
.ATTR(ceil_mode, Bool, false) | |||||
.OP_END_FACTORY_REG(MaxPoolGradWithArgmaxV1) | |||||
/** | |||||
* @brief Performs max pooling on the input and outputs both max values and indices. | |||||
* @par Inputs: | |||||
* One input: | |||||
* x: An NC1HWC0 Tensor of type float16. \n | |||||
* @par Attributes: | |||||
* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | |||||
* each dimension of the input tensor. No default value. | |||||
* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for | |||||
* each dimension of the input tensor. No default value. | |||||
* @li pads: A required string. No default value. \n | |||||
* @par Outputs: | |||||
* y: A Tensor. Has the same type and format as input "x". | |||||
* argmax: A Tensor. type:uint16, format:NC1HWC0. \n | |||||
* @attention Constraints: | |||||
* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | |||||
* @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, | |||||
* strides[2] <= 63, strides[2] >= 1. | |||||
* @li "pads" is listint. | |||||
* @li "ceil_mode" defaults to False. | |||||
* @li "data_format" defaults to "NC1HWC0". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator MaxPoolWithArgmaxV1. | |||||
*/ | |||||
REG_OP(MaxPoolWithArgmaxV1) | |||||
.INPUT(x, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(argmax, TensorType({DT_UINT16})) | |||||
.REQUIRED_ATTR(ksize, ListInt) | |||||
.REQUIRED_ATTR(strides, ListInt) | |||||
.REQUIRED_ATTR(pads, ListInt) | |||||
.ATTR(dtype, Int, 3) | |||||
.ATTR(dilation, ListInt, {1, 1, 1, 1}) | |||||
.ATTR(ceil_mode, Bool, false) | |||||
.OP_END_FACTORY_REG(MaxPoolWithArgmaxV1) | |||||
/** | |||||
*@brief Randomly sample a subset of positive and negative examples,and overwrite | |||||
the label vector to the ignore value (-1) for all elements that are not | |||||
included in the sample.\n | |||||
* @par Inputs: | |||||
* One input: | |||||
* labels: shape of labels,(N, ) label vector with values. \n | |||||
* @par Attributes: | |||||
* @li batch_size_per_images: A require attribute of type int. | |||||
* @li positive_fraction: A require attribute of type float. | |||||
*@par Outputs: | |||||
*y: The result of subSample. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator SubSample. | |||||
*@par Restrictions: | |||||
*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. | |||||
*/ | |||||
REG_OP(SubSample) | |||||
.INPUT(labels, TensorType({DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_INT32})) | |||||
.REQUIRED_ATTR(batch_size_per_images, Int) | |||||
.REQUIRED_ATTR(positive_fraction, Float) | |||||
.OP_END_FACTORY_REG(SubSample) | |||||
/** | |||||
*@brief Randomly sample a subset of positive and negative examples,and overwrite | |||||
the label vector to the ignore value (-1) for all elements that are not | |||||
included in the sample.\n | |||||
* @par Inputs: | |||||
* two inputs, including: | |||||
* @li labels: shape of labels,(N, ) label vector with values:. | |||||
* @li shuffle_matrix: random matrix with shape (N, ). \n | |||||
* @par Attributes: | |||||
* @li batch_size_per_images: A require attribute of type int. | |||||
* @li positive_fraction: A require attribute of type float. | |||||
*@par Outputs: | |||||
*y: The result of subSample. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator SubSampleLabels. | |||||
*@par Restrictions: | |||||
*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly. | |||||
*/ | |||||
REG_OP(SubSampleLabels) | |||||
.INPUT(labels, TensorType({DT_INT32})) | |||||
.INPUT(shuffle_matrix, TensorType({DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_INT32})) | |||||
.REQUIRED_ATTR(batch_size_per_images, Int) | |||||
.REQUIRED_ATTR(positive_fraction, Float) | |||||
.OP_END_FACTORY_REG(SubSampleLabels) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2020 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -2101,6 +2101,55 @@ REG_OP(FusedMulApplyMomentumExtern) | |||||
.ATTR(use_locking, Bool, false) | .ATTR(use_locking, Bool, false) | ||||
.OP_END_FACTORY_REG(FusedMulApplyMomentumExtern) | .OP_END_FACTORY_REG(FusedMulApplyMomentumExtern) | ||||
/** | |||||
*@brief Updates '*var' according to the momentum scheme. | |||||
* accum = accum * momentum - x1 * x2 * lr | |||||
* if use_nesterov is True: | |||||
* var += accum * momentum - x1 * x2 * lr | |||||
* else: | |||||
* var += accum | |||||
* | |||||
*@par Inputs: | |||||
*@li var: A mutable tensor. Must be one of the data types defined in | |||||
* TensorType::NumberType(). Should be from a Variable(). | |||||
*@li accum: A mutable tensor. Has the same type as "var". Should be from a | |||||
* Variable(). | |||||
*@li lr: A tensor for the learning rate. Has the same type as "var". Should be | |||||
* from a Variable(). | |||||
*@li x1: A Tensor has type TensorType::NumberType(). | |||||
*@li momentum: A scalar. Has the same type as "var". | |||||
*@li x2: A scalar has the same type as "var". | |||||
* | |||||
*@par Attributes: | |||||
*@li use_nesterov: An optional bool. Defaults to "False". | |||||
* If "True", var will be updated by using Nesterov momentum. | |||||
*@li use_locking: An optional bool. Defaults to "False". | |||||
* If "True", updating of the "var" tensor is protected by a lock; | |||||
* otherwise the behavior is undefined, but may exhibit less contention. | |||||
* | |||||
*@par Outputs: | |||||
* var: A mutable tensor. Has the same type as input "var". | |||||
* | |||||
*@attention Constraints: | |||||
* The input tensors must have the same shape. | |||||
* | |||||
*@par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator ResourceApplyKerasMomentum. | |||||
* | |||||
*/ | |||||
REG_OP(FusedMulApplyKerasMomentum) | |||||
.INPUT(var, TensorType::NumberType()) | |||||
.INPUT(accum, TensorType::NumberType()) | |||||
.INPUT(lr, TensorType::NumberType()) | |||||
.INPUT(x1, TensorType::NumberType()) | |||||
.INPUT(momentum, TensorType::NumberType()) | |||||
.INPUT(x2, TensorType::NumberType()) | |||||
.OUTPUT(var, TensorType::NumberType()) | |||||
.OUTPUT(accum, TensorType::NumberType()) | |||||
.ATTR(use_locking, Bool, false) | |||||
.ATTR(use_nesterov, Bool, false) | |||||
.OP_END_FACTORY_REG(FusedMulApplyKerasMomentum) | |||||
/** | /** | ||||
*@brief Update "g" according to the LARS algorithm . \n | *@brief Update "g" according to the LARS algorithm . \n | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2020 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -223,7 +223,29 @@ REG_OP(Relu6Grad) | |||||
.INPUT(features, TensorType::RealNumberType()) | .INPUT(features, TensorType::RealNumberType()) | ||||
.OUTPUT(backprops, TensorType::RealNumberType()) | .OUTPUT(backprops, TensorType::RealNumberType()) | ||||
.OP_END_FACTORY_REG(Relu6Grad) | .OP_END_FACTORY_REG(Relu6Grad) | ||||
/** | |||||
*@brief Calculate the elu_grad_v2 function. | |||||
*Applies the element-wise function: | |||||
* Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha . | |||||
*@par Inputs: | |||||
*One inputs, including: | |||||
* @li grads: A tensor. Must be one of the following types: | |||||
* float16, float32. | |||||
* @li activations: A tensor. Must be one of the following types: | |||||
* float16, float32. | |||||
* | |||||
*@par Outputs: | |||||
*y: A Tensor with the same type and shape of grads's. | |||||
* | |||||
*@par Attributes: | |||||
*@li alpha: scalar parameter, default value = 1.0 | |||||
*/ | |||||
REG_OP(EluGradV2) | |||||
.INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.ATTR(alpha, Float, 1.0) | |||||
.OP_END_FACTORY_REG(EluGradV2) | |||||
/** | /** | ||||
* @brief Compute sigmoid of "x" element-wise . \n | * @brief Compute sigmoid of "x" element-wise . \n | ||||
@@ -508,6 +530,42 @@ REG_OP(Elu) | |||||
.ATTR(alpha, Float, 1.0) | .ATTR(alpha, Float, 1.0) | ||||
.OP_END_FACTORY_REG(Elu) | .OP_END_FACTORY_REG(Elu) | ||||
/** | |||||
*@brief Continuously Differentiable Exponential Linear Uints: | |||||
* Perform the linear uint element-wise on the input tensor X using formula: | |||||
* max(0, x) + min(0, alpha * (exp(x/alpha) - 1)). \n | |||||
*@par Inputs: | |||||
*x: A float16, float32, for the input data type . \n | |||||
*@par Attributes: | |||||
*alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n | |||||
*@par Attributes: | |||||
*alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n | |||||
*@par Attributes: | |||||
*alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n | |||||
*@par Outputs: | |||||
*y: A float16, float32, for the normalized result . \n | |||||
*@attention Constraints: | |||||
*@li The input is of type float16 or float32 . \n | |||||
*@par Multiple batches supported or not | |||||
*Supported | |||||
*@par Third-party framework compatibility | |||||
*@li Compatible with ONNX's Celu operator | |||||
*/ | |||||
REG_OP(Celu) | |||||
.INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16})) | |||||
.ATTR(alpha1, Float, 1.0) | |||||
.ATTR(alpha2, Float, 1.0) | |||||
.ATTR(alpha3, Float, 1.0) | |||||
.OP_END_FACTORY_REG(Celu) | |||||
/** | /** | ||||
*@brief Computes gradients for the exponential linear (Elu) operation. | *@brief Computes gradients for the exponential linear (Elu) operation. | ||||
* | * | ||||
@@ -640,6 +698,352 @@ REG_OP(Mish) | |||||
.OUTPUT(y, TensorType({ DT_FLOAT,DT_FLOAT16 })) | .OUTPUT(y, TensorType({ DT_FLOAT,DT_FLOAT16 })) | ||||
.OP_END_FACTORY_REG(Mish) | .OP_END_FACTORY_REG(Mish) | ||||
/** | |||||
* @brief: pytorch mish_grad operator. | |||||
* @par Inputs: | |||||
* three input, including: | |||||
* @li grad: A Tensor. shape, datatype and format is same as x | |||||
* @li x: A Tensor. Must be one of the following types: float16, float32 | |||||
* @li tanhx: A Tensor. shape, datatype and format is same as x | |||||
* @par Outputs: | |||||
* 1 output, including: | |||||
* @li x_grad: A Tensor. shape, datatype and format is same as x | |||||
*/ | |||||
REG_OP(MishGrad) | |||||
.INPUT(grad, TensorType({ DT_FLOAT,DT_FLOAT16 })) | |||||
.INPUT(x, TensorType({ DT_FLOAT,DT_FLOAT16 })) | |||||
.OPTIONAL_INPUT(tanhx, TensorType({ DT_FLOAT,DT_FLOAT16 })) | |||||
.OUTPUT(x_grad, TensorType({ DT_FLOAT,DT_FLOAT16 })) | |||||
.OP_END_FACTORY_REG(MishGrad) | |||||
/** | |||||
* @brief pytorch hardtanh_backward operator. | |||||
* | |||||
* @par Inputs: | |||||
* 2 inputs, including: | |||||
* @li result, minimum tensor of the linear region range, | |||||
* datatype: float16/float32, format:ND/5HD. | |||||
* @li grad, maximum tensor of the linear region range, | |||||
* datatype:float16/float32, format:ND/5HD. \n | |||||
* @par Attributes: | |||||
* 2 attributes, including: | |||||
* @li min_val, minimum value of the linear region range, datatype:float. | |||||
* @li max_val, maximum value of the linear region range, datatype:float. \n | |||||
* @par Outputs: | |||||
* 1 output, including: | |||||
* @li y, hardtanh_backward output tensor, datatype and format is same as | |||||
* input result. \n | |||||
* @attention Constraints: | |||||
* This operator only supports dataType: float16/float32, format: ND/5HD. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator HardtanhGrad. | |||||
*/ | |||||
REG_OP(HardtanhGrad) | |||||
.INPUT(result, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "First operand." */ | |||||
.INPUT(grad, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "Second operand." */ | |||||
.OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "Result, has same element type as two inputs" */ | |||||
.ATTR(min_val, Float, -1.0) | |||||
.ATTR(max_val, Float, 1.0) | |||||
.OP_END_FACTORY_REG(HardtanhGrad) | |||||
/** | |||||
* @brief Calculates the softplus loss function with attributes of beta and threshold. \n | |||||
* @par Inputs: | |||||
* One inputs, including: | |||||
* @li x: A mutable Tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @par Attributes: | |||||
* @li beta: An optional float. Defaults to "1.0" \n | |||||
* @li threshold: An optional float. Defaults to "20.0" \n | |||||
* @par Outputs: | |||||
* @li y: A mutable Tensor. Has the same type as "x" \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator Softplus. | |||||
*/ | |||||
REG_OP(SoftplusV2) | |||||
.INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
.OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
.ATTR(beta, Float, 1.0) | |||||
.ATTR(threshold, Float, 20.0) | |||||
.OP_END_FACTORY_REG(SoftplusV2) | |||||
/** | |||||
* @brief Calculates the reversed outputs of the function "softplus_v2". \n | |||||
* @par Inputs: | |||||
* Two inputs, including: | |||||
* @li input_gradients: A mutable Tensor. Must be one of the following types: | |||||
* float16, float32. | |||||
* @li input_features: A mutable Tensor of the same type as "input_gradients" \n | |||||
* @par Attributes: | |||||
* @li beta: An optional float. Defaults to "1.0" \n | |||||
* @li threshold: An optional float. Defaults to "20.0" \n | |||||
* @par Outputs: | |||||
* @li output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator SoftplusGrad. | |||||
*/ | |||||
REG_OP(SoftplusV2Grad) | |||||
.INPUT(input_gradients, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
.INPUT(input_features, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
.OUTPUT(output_backprops, TensorType({ DT_FLOAT, DT_FLOAT16 })) | |||||
.ATTR(beta, Float, 1.0) | |||||
.ATTR(threshold, Float, 20.0) | |||||
.OP_END_FACTORY_REG(SoftplusV2Grad) | |||||
/** | |||||
* @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor) | |||||
* where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise. | |||||
* | |||||
* @par inputs | |||||
* one input including: | |||||
* @li x: input A Tensor. Must be one of the following types: float32, float16 | |||||
* | |||||
* @par output | |||||
* one output including: | |||||
* @li y:A Tensor of the same type as x | |||||
* | |||||
*/ | |||||
REG_OP(ThresholdedRelu) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(alpha, Float, 1.0) | |||||
.OP_END_FACTORY_REG(ThresholdedRelu) | |||||
/** | |||||
* @brief Calculate the hard shrinkage function. \n | |||||
* @par Inputs: | |||||
* One inputs, including: | |||||
* @li input_x: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @par Attributes: | |||||
* @li lambd: An optional float. Defaults to 0.5. \n | |||||
* @par Outputs: | |||||
* y: A Tensor with the same dtype and shape of input_x's. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator Hardshrink. \n | |||||
*/ | |||||
REG_OP(HardShrink) | |||||
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(lambd, Float, 0.5) | |||||
.OP_END_FACTORY_REG(HardShrink) | |||||
/** | |||||
*@brief Calculate the hard shrink grad function. \n | |||||
* | |||||
* Computes the gradient for the HardShrink: if x > lambda or x < -lambda, x,otherwise 0 | |||||
* | |||||
*@par Inputs: | |||||
*Two inputs, including: | |||||
* @li gradients: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li features: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* | |||||
*@par Outputs: | |||||
*backprops: A Tensor with the same type and shape of features's. \n | |||||
* | |||||
*@par Attributes: | |||||
*@li lambd: An optional float.Defaults to 0.5. \n | |||||
* | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator Hardshrink_backward. \n | |||||
*/ | |||||
REG_OP(HardShrinkGrad) | |||||
.INPUT(gradients, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(lambd, Float, 0.5) | |||||
.OP_END_FACTORY_REG(HardShrinkGrad) | |||||
/** | |||||
* @brief Calculate the hard sigmoid function. \n | |||||
* @par Inputs: | |||||
* One inputs, including: | |||||
* @li input_x: A tensor. Must be one of the following types: | |||||
* float16, float32, int32. \n | |||||
* @par Attributes: | |||||
* @li alpha: An optional float. Defaults to 0.16666666. \n | |||||
* @li beta: An optional float. Defaults to 0.5. \n | |||||
* @par Outputs: | |||||
* y: A Tensor with the same dtype and shape of input_x's. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator Hardsigmoid. \n | |||||
*/ | |||||
REG_OP(HardSigmoid) | |||||
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||||
.OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.ATTR(alpha, Float, 0.16666666) | |||||
.ATTR(beta, Float, 0.5) | |||||
.OP_END_FACTORY_REG(HardSigmoid) | |||||
/** | |||||
* @brief Calculate the soft shrinkage function. \n | |||||
* @par Inputs: | |||||
* One inputs, including: | |||||
* @li input_x: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @par Attributes: | |||||
* @li lambd: An optional float. Defaults to 0.5. \n | |||||
* @par Outputs: | |||||
* y: A Tensor with the same dtype and shape of input_x's. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator Softshrink. \n | |||||
*/ | |||||
REG_OP(SoftShrink) | |||||
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(lambd, Float, 0.5) | |||||
.OP_END_FACTORY_REG(SoftShrink) | |||||
/** | |||||
* @brief Calculate the reversed outputs of the function "soft_shrink". \n | |||||
* @par Inputs: | |||||
* Two inputs, including: | |||||
* @li input_grad: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li input_x: A tensor of the same dtype as "input_grad". \n | |||||
* @par Attributes: | |||||
* @li lambd: An optional float. Defaults to 0.5. \n | |||||
* @par Outputs: | |||||
* y: A Tensor of the same dtype and shape as "input_graxd". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator SoftShrinkGrad. \n | |||||
*/ | |||||
REG_OP(SoftShrinkGrad) | |||||
.INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(lambd, Float, 0.5) | |||||
.OP_END_FACTORY_REG(SoftShrinkGrad) | |||||
/** | |||||
*@brief Calculate the gradient of log simoid. \n | |||||
*@par Inputs: | |||||
*Two inputs, including: | |||||
* @li grads: A tensor, gradient of previous layer. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li features: A tensor, input of log sigmoid. Must be one of the following types: | |||||
* float16, float32. \n | |||||
*@par Outputs: | |||||
*One outputs, including: | |||||
* @li backprops: A tensor with the same type of and shape of grads. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator LogSigmoidBackward. \n | |||||
*/ | |||||
REG_OP(LogSigmoidGrad) | |||||
.INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(LogSigmoidGrad) | |||||
/** | |||||
*@brief Calculate -ln(1+e^(-x)). \n | |||||
*@par Inputs: | |||||
*One inputs, including: | |||||
* @li x: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
*@par Outputs: | |||||
*One outputs, including: | |||||
* @li y: A tensor with the same type and shape of x's. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator LogSigmoid. \n | |||||
*/ | |||||
REG_OP(LogSigmoid) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) /* "input:x" */ | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) /* "output:y" */ | |||||
.OP_END_FACTORY_REG(LogSigmoid) | |||||
/** | |||||
*@brief Calculate the backward outputs of the function "hard_sigmoid" \n | |||||
*@par Inputs: | |||||
*One inputs, including: | |||||
* @li grads: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @li input_x: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
*@par Outputs: | |||||
*One outputs, including: | |||||
* @li y: A tensor with the same type and shape of x's. \n | |||||
* @par Attributes: | |||||
* @li alpha: An optional float. Defaults to 0.16666666. \n | |||||
* @li beta: An optional float. Defaults to 0.5. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator LogSigmoidGrad. \n | |||||
*/ | |||||
REG_OP(HardSigmoidGrad) | |||||
.INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.ATTR(alpha, Float, 0.16666666) | |||||
.ATTR(beta, Float, 0.5) | |||||
.OP_END_FACTORY_REG(HardSigmoidGrad) | |||||
/** | |||||
* @brief Calculate the shrink function. \n | |||||
* @par Inputs: | |||||
* One inputs, including: | |||||
* @li input_x: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @par Attributes: | |||||
* @li lambd: An optional float. Defaults to 0.5. \n | |||||
* @li bias: An optional float. Defaults to 0.0. \n | |||||
* @par Outputs: | |||||
* y: A Tensor with the same dtype and shape of input_x's. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the ONNX operator Shrink. \n | |||||
*/ | |||||
REG_OP(Shrink) | |||||
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(lambd, Float, 0.5) | |||||
.ATTR(bias, Float, 0.0) | |||||
.OP_END_FACTORY_REG(Shrink) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2020 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -101,7 +101,7 @@ REG_OP(FillD) | |||||
*/ | */ | ||||
REG_OP(BroadcastTo) | REG_OP(BroadcastTo) | ||||
.INPUT(x, TensorType::BasicType()) | .INPUT(x, TensorType::BasicType()) | ||||
.INPUT(shape, TensorType({DT_INT32})) | |||||
.INPUT(shape, TensorType({DT_INT32,DT_INT64})) | |||||
.OUTPUT(y, TensorType::BasicType()) | .OUTPUT(y, TensorType::BasicType()) | ||||
.OP_END_FACTORY_REG(BroadcastTo) | .OP_END_FACTORY_REG(BroadcastTo) | ||||
@@ -161,7 +161,7 @@ REG_OP(Pad) | |||||
*@brief Pads a tensor . \n | *@brief Pads a tensor . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n | |||||
*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*paddings: An optional "vector<vector<int>>". Defaults to "{}". | *paddings: An optional "vector<vector<int>>". Defaults to "{}". | ||||
@@ -180,8 +180,8 @@ REG_OP(Pad) | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead. | * Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead. | ||||
*/ | */ | ||||
REG_OP(PadD) | REG_OP(PadD) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.REQUIRED_ATTR(paddings, ListListInt) | .REQUIRED_ATTR(paddings, ListListInt) | ||||
.OP_END_FACTORY_REG(PadD) | .OP_END_FACTORY_REG(PadD) | ||||
@@ -213,7 +213,7 @@ REG_OP(PadV2) | |||||
*@brief Pads a tensor . \n | *@brief Pads a tensor . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n | |||||
*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n | |||||
*constant_values: A Tensor. Must have the same type as input. | *constant_values: A Tensor. Must have the same type as input. | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -227,10 +227,7 @@ REG_OP(PadV2) | |||||
*y: A Tensor of the same type as "x" . \n | *y: A Tensor of the same type as "x" . \n | ||||
*@par Third-party framework compatibility: | *@par Third-party framework compatibility: | ||||
* Compatible with TensorFlow operator Pad. | |||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead. | |||||
* Compatible with TensorFlow operator PadV2. | |||||
*/ | */ | ||||
REG_OP(PadV2D) | REG_OP(PadV2D) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | ||||
@@ -272,42 +269,42 @@ REG_OP(PadV3) | |||||
.ATTR(paddings_contiguous, Bool, true) | .ATTR(paddings_contiguous, Bool, true) | ||||
.OP_END_FACTORY_REG(PadV3) | .OP_END_FACTORY_REG(PadV3) | ||||
/** | |||||
*@brief Pads a tensor. | |||||
*@par Inputs: | |||||
*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32. | |||||
*@par Attributes: | |||||
* @li paddings: An required "vector<vector<int>>". | |||||
* For each dimension D of input, paddings[D, 0] indicates how many | |||||
* values to add before the contents of tensor in that dimension, | |||||
* and paddings[D, 1] indicates how many values to add after the | |||||
* contents of tensor in that dimension. | |||||
* @li constant_values: An optional int value for pad. | |||||
* @li mode: An optional string, Defaults to "constant", indicates paddings mode, | |||||
* support "constant", "reflect", "edge" | |||||
* @li paddings_contiguous: An optional bool value, Defaults to true. | |||||
* If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...] | |||||
* If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...] | |||||
*@par Outputs: | |||||
*y: A Tensor of the same type as "x". | |||||
*@par Third-party framework compatibility: | |||||
* Compatible with ONNX operator Pad. | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use PadV3 instead. | |||||
*/ | |||||
REG_OP(PadV3D) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) | |||||
.REQUIRED_ATTR(paddings, ListListInt) | |||||
.ATTR(constant_values, Int, 0) | |||||
.ATTR(mode, String, "constant") | |||||
.ATTR(paddings_contiguous, Bool, true) | |||||
.OP_END_FACTORY_REG(PadV3D) | |||||
/** | |||||
*@brief Pads a tensor. | |||||
*@par Inputs: | |||||
*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32. | |||||
*@par Attributes: | |||||
* @li paddings: An required "vector<vector<int>>". | |||||
* For each dimension D of input, paddings[D, 0] indicates how many | |||||
* values to add before the contents of tensor in that dimension, | |||||
* and paddings[D, 1] indicates how many values to add after the | |||||
* contents of tensor in that dimension. | |||||
* @li constant_values: An optional int value for pad. | |||||
* @li mode: An optional string, Defaults to "constant", indicates paddings mode, | |||||
* support "constant", "reflect", "edge" | |||||
* @li paddings_contiguous: An optional bool value, Defaults to true. | |||||
* If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...] | |||||
* If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...] | |||||
*@par Outputs: | |||||
*y: A Tensor of the same type as "x". | |||||
*@par Third-party framework compatibility: | |||||
* Compatible with ONNX operator Pad. | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use PadV3 instead. | |||||
*/ | |||||
REG_OP(PadV3D) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) | |||||
.REQUIRED_ATTR(paddings, ListListInt) | |||||
.ATTR(constant_values, Int, 0) | |||||
.ATTR(mode, String, "constant") | |||||
.ATTR(paddings_contiguous, Bool, true) | |||||
.OP_END_FACTORY_REG(PadV3D) | |||||
/** | /** | ||||
*@brief Create a diagonal tensor | *@brief Create a diagonal tensor | ||||
@@ -403,5 +400,76 @@ REG_OP(EmbeddingRankId) | |||||
.ATTR(mode, String, "mod") | .ATTR(mode, String, "mod") | ||||
.OP_END_FACTORY_REG(EmbeddingRankId) | .OP_END_FACTORY_REG(EmbeddingRankId) | ||||
/** | |||||
*@brief EmbeddingLocalIndex, Sort statistics index according to rank_id \n | |||||
*@par Inputs: | |||||
* @li addr_table: A 2D tensor which last dimension must be 3. | |||||
* @li index: A tensor with data type int32, int64, uint32, uint64. | |||||
*@par Attributes: | |||||
* @li row_memory: The size of Embedding vector in a row, the default is 320. | |||||
* @li mode: String type, currently there are two options: 'mod' and 'order' | |||||
*@par Outputs: | |||||
* @li local_idx:Index on each server. | |||||
* @li nums:The number of local_idx found on each server. | |||||
* @li recover_idx:The sorted local_idx element is at the position corresponding | |||||
* to the original input index. | |||||
*@par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator Diag. | |||||
*/ | |||||
REG_OP(EmbeddingLocalIndex) | |||||
.INPUT(addr_table, TensorType({DT_UINT64})) | |||||
.INPUT(index, TensorType({DT_INT64,DT_INT32,DT_UINT32,DT_UINT64})) | |||||
.OUTPUT(local_idx, TensorType({DT_INT64,DT_INT32,DT_UINT32,DT_UINT64})) | |||||
.OUTPUT(nums, TensorType({DT_INT64,DT_INT32,DT_UINT32,DT_UINT64})) | |||||
.OUTPUT(recover_idx, TensorType({DT_INT64,DT_INT32,DT_UINT32,DT_UINT64})) | |||||
.ATTR(row_memory, Int, 320) | |||||
.ATTR(mode, String, "mod") | |||||
.OP_END_FACTORY_REG(EmbeddingLocalIndex) | |||||
/** | |||||
* @brief Fill the value to a tensor has the specified shape. | |||||
* @par Inputs: | |||||
* One inputs, including: | |||||
* @li dims: An Tensor, specify the shape that the value to fill. | |||||
* @par Attributes: | |||||
* @li value: An optional float value. Defaults to 0.0. | |||||
* @par Outputs: | |||||
* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value. | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the ONNX operator ConstantOfShape. | |||||
*/ | |||||
REG_OP(FillV2) | |||||
.INPUT(dims, TensorType({DT_INT16, DT_INT32, DT_INT64})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) | |||||
.ATTR(value, Float, 0) | |||||
.OP_END_FACTORY_REG(FillV2) | |||||
/** | |||||
* @brief Fill the value to a tensor has the specified shape. | |||||
* @par Attributes: | |||||
* @li value: An optional float value. Defaults to 0.0. | |||||
* @li dims: An required listInt to specify the shape that the value to fill. | |||||
* @par Outputs: | |||||
* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value. | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the ONNX operator ConstantOfShape. | |||||
*/ | |||||
REG_OP(FillV2D) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64})) | |||||
.ATTR(value, Float, 0) | |||||
.REQUIRED_ATTR(dims, ListInt) | |||||
.OP_END_FACTORY_REG(FillV2D) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -51,6 +51,246 @@ REG_OP(StringToNumber) | |||||
.ATTR(out_type, Type, DT_FLOAT) | .ATTR(out_type, Type, DT_FLOAT) | ||||
.OP_END_FACTORY_REG(StringToNumber) | .OP_END_FACTORY_REG(StringToNumber) | ||||
/** | |||||
*@brief Convert serialized tensorflow.TensorProto prototype to Tensor. | |||||
*@brief Parse an Example prototype. | |||||
*@par Input: | |||||
*serialized: A Tensor of type string. | |||||
*dense_defaults: DYNAMIC INPUT Tensor type as string, float, int64. \n | |||||
*@par Attributes: | |||||
*num_sparse: type int num of inputs sparse_indices , sparse_values, sparse_shapes | |||||
*out_type: output type | |||||
*sparse_keys: ListString | |||||
*sparse_types: types of sparse_values | |||||
*dense_keys: ListString | |||||
*dense_shapes: output of dense_defaults shape | |||||
*dense_types: output of dense_defaults type \n | |||||
*@par Outputs: | |||||
*sparse_indices: A Tensor of type string. | |||||
*sparse_values: Has the same type as sparse_types. | |||||
*sparse_shapes: A Tensor of type int64 | |||||
*dense_values: Has the same type as dense_defaults. | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
**/ | |||||
REG_OP(ParseSingleExample) | |||||
.INPUT(serialized, TensorType({DT_STRING})) | |||||
.DYNAMIC_INPUT(dense_defaults, TensorType({DT_STRING,DT_FLOAT,DT_INT64})) | |||||
.DYNAMIC_OUTPUT(sparse_indices, TensorType({DT_INT64})) | |||||
.DYNAMIC_OUTPUT(sparse_values, TensorType({DT_STRING,DT_FLOAT,DT_INT64})) | |||||
.DYNAMIC_OUTPUT(sparse_shapes, TensorType({DT_INT64})) | |||||
.DYNAMIC_OUTPUT(dense_values, TensorType({DT_STRING,DT_FLOAT,DT_INT64})) | |||||
.ATTR(num_sparse, Int, 0) | |||||
.ATTR(sparse_keys, ListString, {}) | |||||
.ATTR(dense_keys, ListString, {}) | |||||
.ATTR(sparse_types, ListType, {}) | |||||
.ATTR(Tdense, ListType, {}) | |||||
.ATTR(dense_shapes, ListListInt, {}) | |||||
.OP_END_FACTORY_REG(ParseSingleExample) | |||||
/** | |||||
*@brief Decodes raw file into tensor . \n | |||||
*@par Input: | |||||
*bytes: A Tensor of type string. | |||||
*@par Attributes: | |||||
*little_endian: bool ture | |||||
*out_type: output type | |||||
*@par Outputs: | |||||
*Output: A Tensor | |||||
**/ | |||||
REG_OP(DecodeRaw) | |||||
.INPUT(bytes, TensorType({DT_STRING})) | |||||
.OUTPUT(output, TensorType({DT_BOOL,DT_FLOAT16,DT_DOUBLE,DT_FLOAT, | |||||
DT_INT64,DT_INT32,DT_INT8,DT_UINT8,DT_INT16, | |||||
DT_UINT16,DT_COMPLEX64,DT_COMPLEX128})) | |||||
.ATTR(out_type, Type, DT_FLOAT) | |||||
.ATTR(little_endian, Bool, true) | |||||
.OP_END_FACTORY_REG(DecodeRaw) | |||||
/** | |||||
*@brief Convert serialized tensorflow.TensorProto prototype to Tensor. \n | |||||
*@par Inputs: | |||||
*serialized: A Tensor of string type. Scalar string containing serialized | |||||
*TensorProto prototype. \n | |||||
*@par Attributes: | |||||
*out_type: The type of the serialized tensor. The provided type must match the | |||||
*type of the serialized tensor and no implicit conversion will take place. \n | |||||
*@par Outputs: | |||||
*output: A Tensor of type out_type. \n | |||||
*@attention Constraints: | |||||
*The implementation for StringToNumber on Ascend uses AICPU, | |||||
*with badperformance. \n | |||||
*@par Third-party framework compatibility | |||||
*@li compatible with tensorflow ParseTensor operator. | |||||
*/ | |||||
REG_OP(ParseTensor) | |||||
.INPUT(serialized, TensorType({DT_STRING})) | |||||
.OUTPUT(output, TensorType(DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, | |||||
DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, | |||||
DT_UINT64, DT_BOOL, DT_DOUBLE, DT_STRING, | |||||
DT_COMPLEX64, DT_COMPLEX128})) | |||||
.ATTR(out_type, Type, DT_FLOAT) | |||||
.OP_END_FACTORY_REG(ParseTensor) | |||||
/** | |||||
*@brief Converts each string in the input Tensor to the specified numeric | |||||
*type . \n | |||||
*@par Inputs: | |||||
*Inputs include: | |||||
*records: Each string is a record/row in the csv and all records should have the | |||||
*same format. \n | |||||
*record_defaults: One tensor per column of the input record, with either a | |||||
*scalar default value for that column or an empty vector if the column is | |||||
*required. \n | |||||
*@par Attributes: | |||||
*OUT_TYPE: The numeric type to interpret each string in string_tensor as . \n | |||||
*field_delim: char delimiter to separate fields in a record. \n | |||||
*use_quote_delim: If false, treats double quotation marks as regular characters | |||||
*inside of the string fields (ignoring RFC 4180, Section 2, Bullet 5). \n | |||||
*na_value: Additional string to recognize as NA/NaN. \n | |||||
*@par Outputs: | |||||
*output: A Tensor. Has the same type as x . \n | |||||
*@attention Constraints: | |||||
*The implementation for StringToNumber on Ascend uses AICPU, with bad | |||||
*performance. \n | |||||
*@par Third-party framework compatibility | |||||
*@li compatible with tensorflow StringToNumber operator. | |||||
*/ | |||||
REG_OP(DecodeCSV) | |||||
.INPUT(records, TensorType({DT_STRING})) | |||||
.DYNAMIC_INPUT(record_defaults, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, | |||||
DT_INT64, DT_STRING})) | |||||
.DYNAMIC_OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, | |||||
DT_INT64, DT_STRING})) | |||||
.ATTR(OUT_TYPE, ListType, {}) | |||||
.ATTR(field_delim, String, ",") | |||||
.ATTR(use_quote_delim, Bool, true) | |||||
.ATTR(na_value, String, ",") | |||||
.ATTR(select_cols, ListInt, {}) | |||||
.OP_END_FACTORY_REG(DecodeCSV) | |||||
/** | |||||
*@brief Convert serialized tensorflow.TensorProto prototype to Tensor. | |||||
*@brief Parse an Example prototype. | |||||
*@par Input: | |||||
*serialized: A Tensor of type string. \n | |||||
*name:A Tensor of type string. \n | |||||
*sparse_keys: Dynamic input tensor of string. \n | |||||
*dense_keys: Dynamic input tensor of string \n | |||||
*dense_defaults: Dynamic input tensor type as string, float, int64. \n | |||||
*@par Attributes: | |||||
*Nsparse: Number of sparse_keys, sparse_indices and sparse_shapes \n | |||||
*Ndense: Number of dense_keys \n | |||||
*sparse_types: types of sparse_values \n | |||||
*Tdense: Type of dense_defaults dense_defaults and dense_values \n | |||||
*dense_shapes: output of dense_defaults shape \n | |||||
*@par Outputs: | |||||
*sparse_indices: A Tensor of type string. \n | |||||
*sparse_values: Has the same type as sparse_types. \n | |||||
*sparse_shapes: A Tensor of type int64 \n | |||||
*dense_values: Has the same type as dense_defaults. \n | |||||
*@par Third-party framework compatibility \n | |||||
*@li compatible with tensorflow StringToNumber operator. \n | |||||
*/ | |||||
REG_OP(ParseExample) | |||||
.INPUT(serialized, TensorType({DT_STRING})) | |||||
.INPUT(name, TensorType({DT_STRING})) | |||||
.DYNAMIC_INPUT(sparse_keys, TensorType({DT_STRING})) | |||||
.DYNAMIC_INPUT(dense_keys, TensorType({DT_STRING})) | |||||
.DYNAMIC_INPUT(dense_defaults, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) | |||||
.DYNAMIC_OUTPUT(sparse_indices, TensorType({DT_INT64})) | |||||
.DYNAMIC_OUTPUT(sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) | |||||
.DYNAMIC_OUTPUT(sparse_shapes, TensorType({DT_INT64})) | |||||
.DYNAMIC_OUTPUT(dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) | |||||
.ATTR(Nsparse, Int, 0) | |||||
.ATTR(Ndense, Int, 0) | |||||
.ATTR(sparse_types, ListType, {}) | |||||
.ATTR(Tdense, ListType, {}) | |||||
.ATTR(dense_shapes, ListListInt, {}) | |||||
.OP_END_FACTORY_REG(ParseExample) | |||||
/** | |||||
*@brief Transforms a scalar brain.SequenceExample proto (as strings) into typed | |||||
*tensors. | |||||
*@par Input: | |||||
*serialized: A Tensor of type string. \n | |||||
*feature_list_dense_missing_assumed_empty:A Tensor of type string. \n | |||||
*context_sparse_keys: Dynamic input tensor of string. \n | |||||
*context_dense_keys: Dynamic input tensor of string \n | |||||
*feature_list_sparse_keys: Dynamic input tensor of string \n | |||||
*feature_list_dense_keys: Dynamic input tensor of string \n | |||||
*context_dense_defaults: Dynamic input tensor of string, float, int64 \n | |||||
*debug_name: A Tensor of type string. \n | |||||
*@par Attributes: | |||||
*Ncontext_sparse: Number of context_sparse_keys, context_sparse_indices and context_sparse_shapes \n | |||||
*Ncontext_dense: Number of context_dense_keys \n | |||||
*Nfeature_list_sparse: Number of feature_list_sparse_keys \n | |||||
*Nfeature_list_dense: Number of feature_list_dense_keys \n | |||||
*context_sparse_types: Types of context_sparse_values \n | |||||
*Tcontext_dense: Number of dense_keys \n | |||||
*feature_list_dense_types: Types of feature_list_dense_values \n | |||||
*context_dense_shapes: Shape of context_dense \n | |||||
*feature_list_sparse_types: Type of feature_list_sparse_values \n | |||||
*feature_list_dense_shapes: Shape of feature_list_dense \n | |||||
*@par Outputs: | |||||
*context_sparse_indices: Dynamic output tensor of type int64. \n | |||||
*context_sparse_values: Dynamic output tensor of type string, float, int64. \n | |||||
*context_sparse_shapes: Dynamic output tensor of type int64 \n | |||||
*context_dense_values: Dynamic output tensor of type string, float, int64. \n | |||||
*feature_list_sparse_indices: Dynamic output tensor of type int64. \n | |||||
*feature_list_sparse_values: Dynamic output tensor of type string, float, int64. \n | |||||
*feature_list_sparse_shapes: Dynamic output tensor of type int64 \n | |||||
*feature_list_dense_values: Dynamic output tensor of type string, float, int64. \n | |||||
*@par Third-party framework compatibility \n | |||||
*@li compatible with tensorflow StringToNumber operator. \n | |||||
*/ | |||||
REG_OP(ParseSingleSequenceExample) | |||||
.INPUT(serialized, TensorType({DT_STRING})) | |||||
.INPUT(feature_list_dense_missing_assumed_empty, TensorType({DT_STRING})) | |||||
.DYNAMIC_INPUT(context_sparse_keys, TensorType({DT_STRING})) | |||||
.DYNAMIC_INPUT(context_dense_keys, TensorType({DT_STRING})) | |||||
.DYNAMIC_INPUT(feature_list_sparse_keys, TensorType({DT_STRING})) | |||||
.DYNAMIC_INPUT(feature_list_dense_keys, TensorType({DT_STRING})) | |||||
.DYNAMIC_INPUT(context_dense_defaults, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) | |||||
.INPUT(debug_name, TensorType({DT_STRING})) | |||||
.DYNAMIC_OUTPUT(context_sparse_indices, TensorType({DT_INT64})) | |||||
.DYNAMIC_OUTPUT(context_sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) | |||||
.DYNAMIC_OUTPUT(context_sparse_shapes, TensorType({DT_INT64})) | |||||
.DYNAMIC_OUTPUT(context_dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) | |||||
.DYNAMIC_OUTPUT(feature_list_sparse_indices, TensorType({DT_INT64})) | |||||
.DYNAMIC_OUTPUT(feature_list_sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) | |||||
.DYNAMIC_OUTPUT(feature_list_sparse_shapes, TensorType({DT_INT64})) | |||||
.DYNAMIC_OUTPUT(feature_list_dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING})) | |||||
.ATTR(Ncontext_sparse, Int, 0) | |||||
.ATTR(Ncontext_dense, Int, 0) | |||||
.ATTR(Nfeature_list_sparse, Int, 0) | |||||
.ATTR(Nfeature_list_dense, Int, 0) | |||||
.ATTR(context_sparse_types, ListType, {}) | |||||
.ATTR(Tcontext_dense, ListType, {}) | |||||
.ATTR(feature_list_dense_types, ListType, {}) | |||||
.ATTR(context_dense_shapes, ListListInt, {}) | |||||
.ATTR(feature_list_sparse_types, ListType, {}) | |||||
.ATTR(feature_list_dense_shapes, ListListInt, {}) | |||||
.OP_END_FACTORY_REG(ParseSingleSequenceExample) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -60,6 +60,26 @@ REG_OP(Dequantize) | |||||
.ATTR(mode, String, "MIN_COMBINED") | .ATTR(mode, String, "MIN_COMBINED") | ||||
.OP_END_FACTORY_REG(Dequantize) | .OP_END_FACTORY_REG(Dequantize) | ||||
/** | |||||
*@brief Quantizes the input . \n | |||||
*@par Inputs: | |||||
*x: shape and dtype of input_x. \n | |||||
*scales: shape and dtype of input_scales. \n | |||||
*zero_points: shape and dtype of input_zero_points \n | |||||
*@par Attributes: | |||||
*@li axis: the processed dim. \n | |||||
*@par Outputs: | |||||
*y: shape and dtype of output_y, should be same shape as input, dtype is same as the quantified type . \n | |||||
*/ | |||||
REG_OP(Quantize) | |||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(scales, TensorType({DT_FLOAT})) | |||||
.INPUT(zero_points, TensorType({DT_INT8,DT_UINT8,DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_INT8,DT_UINT8,DT_INT32})) | |||||
.REQUIRED_ATTR(dtype, String) | |||||
.ATTR(axis, Int, 1) | |||||
.OP_END_FACTORY_REG(Quantize) | |||||
/** | /** | ||||
*@brief Quantizes the input . \n | *@brief Quantizes the input . \n | ||||
@@ -194,7 +214,7 @@ REG_OP(AscendRequant) | |||||
*@brief Requantizes the input of int16 . \n | *@brief Requantizes the input of int16 . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*@li x: An NC1HWC0 tensor of type int16, specifying the input. | |||||
*@li x0: An NC1HWC0 tensor of type int16, specifying the input. | |||||
*@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio. | *@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio. | ||||
*@li x1: An NC1HWC0 tensor of type int16 . \n | *@li x1: An NC1HWC0 tensor of type int16 . \n | ||||
@@ -203,22 +223,21 @@ REG_OP(AscendRequant) | |||||
*@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n | *@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*@li y: The dequantized output tensor of type int8 and with format NC1HWC0. | |||||
*@li y0: The dequantized output tensor of type int8 and with format NC1HWC0. | |||||
*@li y1: The dequantized output tensor of type int16 and with format NC1HWC0 . \n | *@li y1: The dequantized output tensor of type int16 and with format NC1HWC0 . \n | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* It is a custom operator. It has no corresponding operator in Caffe. | * It is a custom operator. It has no corresponding operator in Caffe. | ||||
*/ | */ | ||||
REG_OP(AscendRequantS16) | REG_OP(AscendRequantS16) | ||||
.INPUT(x, TensorType({DT_INT16})) | |||||
.INPUT(x0, TensorType({DT_INT16})) | |||||
.INPUT(req_scale, TensorType({DT_UINT64})) | .INPUT(req_scale, TensorType({DT_UINT64})) | ||||
.OPTIONAL_INPUT(x1, TensorType({DT_INT16})) | .OPTIONAL_INPUT(x1, TensorType({DT_INT16})) | ||||
.OUTPUT(y, TensorType({DT_INT8})) | |||||
.OUTPUT(y0, TensorType({DT_INT8})) | |||||
.OUTPUT(y1, TensorType({DT_INT16})) | .OUTPUT(y1, TensorType({DT_INT16})) | ||||
.ATTR(dual_output, Bool, false) | .ATTR(dual_output, Bool, false) | ||||
.ATTR(relu_flag, Bool, false) | .ATTR(relu_flag, Bool, false) | ||||
.OP_END_FACTORY_REG(AscendRequantS16) | .OP_END_FACTORY_REG(AscendRequantS16) | ||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_QUANTIZE_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_QUANTIZE_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -356,6 +356,39 @@ REG_OP(DropOutGenMask) | |||||
.ATTR(seed2, Int, 0) | .ATTR(seed2, Int, 0) | ||||
.OP_END_FACTORY_REG(DropOutGenMask) | .OP_END_FACTORY_REG(DropOutGenMask) | ||||
/** | |||||
*@brief Generate random uint8 mask for dropout v3 . \n | |||||
*@par Inputs: | |||||
include: | |||||
*@li shape:The shape of the output tensor. | |||||
*@li prob:0-D. Prob of 1 . \n | |||||
*@par Attributes: | |||||
*@li seed:If either seed or seed2 are set to be non-zero, the random number | |||||
*generator is seeded by the given seed. Otherwise, it is seeded by a random seed. | |||||
*@li seed2:A second seed to avoid seed collision . \n | |||||
*@par Outputs: | |||||
*y:Output (1-D) random number using uint8 data format . \n | |||||
*@attention Constraints: | |||||
*The output is aligned with 16 | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*@see DropOutGenMaskV3() | |||||
*/ | |||||
REG_OP(DropOutGenMaskV3) | |||||
.INPUT(shape, TensorType({ DT_INT32, DT_INT64 })) | |||||
.INPUT(prob, TensorType({ DT_FLOAT16, DT_FLOAT })) | |||||
.OUTPUT(y, TensorType({ DT_UINT8 })) | |||||
.ATTR(seed, Int, 0) | |||||
.ATTR(seed2, Int, 0) | |||||
.OP_END_FACTORY_REG(DropOutGenMaskV3) | |||||
/** | /** | ||||
*@brief Generates values in an interval . \n | *@brief Generates values in an interval . \n | ||||
@@ -495,6 +528,62 @@ REG_OP(ShuffleChannel) | |||||
DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64})) | DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64})) | ||||
.ATTR(group, Int, 1) | .ATTR(group, Int, 1) | ||||
.OP_END_FACTORY_REG(ShuffleChannel) | .OP_END_FACTORY_REG(ShuffleChannel) | ||||
/** | |||||
* @briefGenerate a tensor of samples from a multinomial | |||||
* distribution according to the probabilities of each of | |||||
* the possible outcomes. | |||||
* | |||||
* @par inputs | |||||
* one input including: | |||||
* @li x:Input tensor with shape [batch_size, class_size], | |||||
* where class_size is the number of all possible outcomes. | |||||
* Each value along the axis zero represents the unnormalized | |||||
* log-probability of each corresponding outcome in a batch. | |||||
* | |||||
* @par output | |||||
* one output including: | |||||
* @li y:Output tensor with shape [batch_size, sample_size], | |||||
* where sample_size is the number of times to sample. | |||||
* Each value along the axis zero represents the outcome of | |||||
* the corresponding sample in a batch. | |||||
* | |||||
* @par Restrictions: | |||||
* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(MultinomialFuss) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64})) | |||||
.OUTPUT(y, TensorType({DT_INT32, DT_INT64})) | |||||
.ATTR(dtype, Int, 6) | |||||
.ATTR(sample_size, Int, 1) | |||||
.ATTR(seed, Float, 0) | |||||
.OP_END_FACTORY_REG(MultinomialFuss) | |||||
/** | |||||
* @brief During training, randomly zeroes some of the elements of the input tensor | |||||
* with probability | |||||
* | |||||
* @par Inputs: | |||||
* @li x: A ND Tensor. Must be one of the following data types: Float, Float16 | |||||
* @li seed: A ND Tensor. Must be one of the following data types: Float | |||||
* | |||||
* @par Attributes: | |||||
* @li p: probability of an element to be zeroed | |||||
* | |||||
* @par Outputs: | |||||
* @li y: A tensor with the same shape and type as "x". | |||||
* @li mask: A tensor with the same shape and type as "x". | |||||
* @li new_seed: A tensor with the same shape and type as "seed". | |||||
*/ | |||||
REG_OP(DropoutV2) | |||||
.INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT })) | |||||
.INPUT(seed, TensorType({ DT_FLOAT })) | |||||
.OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT })) | |||||
.OUTPUT(mask, TensorType({ DT_FLOAT })) | |||||
.OUTPUT(seed, TensorType({ DT_FLOAT })) | |||||
.REQUIRED_ATTR(p, Float) | |||||
.OP_END_FACTORY_REG(DropoutV2) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2020 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -37,7 +37,7 @@ namespace ge { | |||||
*@attention Constraints: | *@attention Constraints: | ||||
* This operator is a BatchNorm fusion operator for updating the moving | * This operator is a BatchNorm fusion operator for updating the moving | ||||
* averages for training. | * averages for training. | ||||
* This operator is used in conjunction with BNTrainingUpdate. | |||||
* This operator is used in conjunction with BNTrainingReduce. | |||||
*/ | */ | ||||
REG_OP(BNTrainingReduce) | REG_OP(BNTrainingReduce) | ||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
@@ -45,6 +45,27 @@ REG_OP(BNTrainingReduce) | |||||
.OUTPUT(square_sum, TensorType({DT_FLOAT})) | .OUTPUT(square_sum, TensorType({DT_FLOAT})) | ||||
.OP_END_FACTORY_REG(BNTrainingReduce) | .OP_END_FACTORY_REG(BNTrainingReduce) | ||||
/** | |||||
*@brief Performs reduced batch normalization . \n | |||||
*@par Inputs: | |||||
*x: A 6D Tensor of type float16 or float32, with format NDC1HWC0 . \n | |||||
*@par Outputs: | |||||
*@li sum: A 3D Tensor of type float32 for SUM reduced "x". | |||||
*@li square_sum: A 3D Tensor of type float32 for SUMSQ reduced "x" . \n | |||||
*@attention Constraints: | |||||
* This operator is a BatchNorm fusion operator for updating the moving | |||||
* averages for training. | |||||
* This operator is used in conjunction with BN3DTrainingReduce. | |||||
*/ | |||||
REG_OP(BN3DTrainingReduce) | |||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(sum, TensorType({DT_FLOAT})) | |||||
.OUTPUT(square_sum, TensorType({DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(BN3DTrainingReduce) | |||||
/** | /** | ||||
*@brief Performs the backpropagation of BatchNorm . \n | *@brief Performs the backpropagation of BatchNorm . \n | ||||
@@ -88,6 +109,49 @@ REG_OP(BNTrainingReduceGrad) | |||||
.ATTR(epsilon, Float, 0.0001) | .ATTR(epsilon, Float, 0.0001) | ||||
.OP_END_FACTORY_REG(BNTrainingReduceGrad) | .OP_END_FACTORY_REG(BNTrainingReduceGrad) | ||||
/** | |||||
*@brief Performs the backpropagation of BatchNorm . \n | |||||
*@par Inputs: | |||||
* Seven inputs, including: | |||||
*@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0, for | |||||
* the gradient. | |||||
*@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0. | |||||
*@li diff_scale: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
* for the mean of "x". | |||||
*@li diff_offset: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
* for the variance of "x". | |||||
*@li scale: A 6D Tensor of type float32, with format NDC1HWC0. | |||||
*@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
* for the mean of "x". | |||||
*@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
* for the variance of "x" . \n | |||||
*@par Attributes: | |||||
*epsilon: An optional float32. Defaults to "0.0001". A small float number | |||||
* added to the variance of "x" . \n | |||||
*@par Outputs: | |||||
*y: A Tensor of type float16 or float32, with format NDC1HWC0, for the offset | |||||
* of "x" . \n | |||||
*@attention Constraints: | |||||
* The preceding layer of this operator must be BN3DTrainingReduceGrad . \n | |||||
*@see BN3DTrainingReduceGrad | |||||
*/ | |||||
REG_OP(BN3DTrainingReduceGrad) | |||||
.INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(diff_scale, TensorType({DT_FLOAT})) | |||||
.INPUT(diff_offset, TensorType({DT_FLOAT})) | |||||
.INPUT(scale, TensorType({DT_FLOAT})) | |||||
.INPUT(batch_mean, TensorType({DT_FLOAT})) | |||||
.INPUT(batch_variance, TensorType({DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.ATTR(epsilon, Float, 0.0001) | |||||
.OP_END_FACTORY_REG(BN3DTrainingReduceGrad) | |||||
/** | /** | ||||
*@brief Performs reduced batch normalization . \n | *@brief Performs reduced batch normalization . \n | ||||
@@ -120,7 +184,7 @@ REG_OP(BNTrainingReduceGrad) | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*@li This operator is a BatchNorm fusion operator for updating the moving | *@li This operator is a BatchNorm fusion operator for updating the moving | ||||
averages for training. | averages for training. | ||||
*This operator is used in conjunction with BNTrainingReduce. | |||||
*This operator is used in conjunction with BNTrainingUpdate. | |||||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square | *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square | ||||
* root instruction. | * root instruction. | ||||
*/ | */ | ||||
@@ -141,6 +205,59 @@ REG_OP(BNTrainingUpdate) | |||||
.OUTPUT(batch_variance, TensorType({DT_FLOAT})) | .OUTPUT(batch_variance, TensorType({DT_FLOAT})) | ||||
.OP_END_FACTORY_REG(BNTrainingUpdate) | .OP_END_FACTORY_REG(BNTrainingUpdate) | ||||
/** | |||||
*@brief Performs reduced batch normalization . \n | |||||
*@par Inputs: | |||||
* Seven inputs, including: (NDC1HWC0 supported) | |||||
*@li x: A 6D Tensor of type float16 or float32. | |||||
*@li sum: A 6D Tensor of type float32 for the output of operator | |||||
* BN3DTrainingUpdate. | |||||
*@li square_sum: A 6D Tensor of type float32 for the output of operator | |||||
* BN3DTrainingUpdate. | |||||
*@li scale: A 6D Tensor of type float32, for the scaling factor. | |||||
*@li offset: A 6D Tensor of type float32, for the scaling offset. | |||||
*@li mean: A 6D Tensor of type float32, for the updated mean. | |||||
*@li variance: A 6D Tensor of type float32, for the updated variance . \n | |||||
*@par Attributes: | |||||
*@li epsilon: A required float32, specifying the small value added to variance | |||||
* to avoid dividing by zero. | |||||
*@li factor: A required float32, specifying the weight for updating the mean | |||||
* and variance . \n | |||||
*@par Outputs: | |||||
* Five outputs, including: (NDC1HWC0 supported) | |||||
*@li y: A 6D Tensor of type float16 or float32, for normalized "x". | |||||
*@li mean: A 6D Tensor of type float32, for the updated mean. | |||||
*@li variance: A 6D Tensor of type float32, for the updated variance. | |||||
*@li batch_mean: A 6D Tensor of type float32, for the mean of "x". | |||||
*@li batch_variance: A 6D Tensor of type float32, for the variance of "x" . \n | |||||
*@attention Constraints: | |||||
*@li This operator is a BatchNorm fusion operator for updating the moving | |||||
averages for training. | |||||
*This operator is used in conjunction with BN3DTrainingUpdate. | |||||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square | |||||
* root instruction. | |||||
*/ | |||||
REG_OP(BN3DTrainingUpdate) | |||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(sum, TensorType({DT_FLOAT})) | |||||
.INPUT(square_sum, TensorType({DT_FLOAT})) | |||||
.INPUT(scale, TensorType({DT_FLOAT})) | |||||
.INPUT(offset, TensorType({DT_FLOAT})) | |||||
.INPUT(mean, TensorType({DT_FLOAT})) | |||||
.INPUT(variance, TensorType({DT_FLOAT})) | |||||
.REQUIRED_ATTR(factor, Float) | |||||
.REQUIRED_ATTR(epsilon, Float) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(mean, TensorType({DT_FLOAT})) | |||||
.OUTPUT(variance, TensorType({DT_FLOAT})) | |||||
.OUTPUT(batch_mean, TensorType({DT_FLOAT})) | |||||
.OUTPUT(batch_variance, TensorType({DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(BN3DTrainingUpdate) | |||||
/** | /** | ||||
*@brief Performs batch normalization for inference . \n | *@brief Performs batch normalization for inference . \n | ||||
@@ -284,6 +401,40 @@ REG_OP(BNTrainingUpdateGrad) | |||||
.OUTPUT(diff_offset, TensorType({DT_FLOAT})) | .OUTPUT(diff_offset, TensorType({DT_FLOAT})) | ||||
.OP_END_FACTORY_REG(BNTrainingUpdateGrad) | .OP_END_FACTORY_REG(BNTrainingUpdateGrad) | ||||
/** | |||||
*@brief Performs the backpropagation of BatchNorm . \n | |||||
*@par Inputs: | |||||
* Four inputs, including: | |||||
*@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0, | |||||
* for the gradient. | |||||
*@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0. | |||||
*@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
* for the mean of "x". | |||||
*@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
* for the variance of "x" . \n | |||||
*@par Attributes: | |||||
*epsilon: An optional float32. Defaults to "0.0001". A small float number | |||||
* added to the variance of "x" . \n | |||||
*@par Outputs: | |||||
*@li diff_scale: A Tensor of type float32, with format NDC1HWC0, | |||||
* for the offset of "scale". | |||||
*@li diff_offset: A Tensor of type float32, with format NDC1HWC0, | |||||
* for the offset of "offset" . \n | |||||
*/ | |||||
REG_OP(BN3DTrainingUpdateGrad) | |||||
.INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(batch_mean, TensorType({DT_FLOAT})) | |||||
.INPUT(batch_variance, TensorType({DT_FLOAT})) | |||||
.ATTR(epsilon, Float, 0.0001) | |||||
.OUTPUT(diff_scale, TensorType({DT_FLOAT})) | |||||
.OUTPUT(diff_offset, TensorType({DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(BN3DTrainingUpdateGrad) | |||||
/** | /** | ||||
*@brief Performs the backpropagation of BatchNorm for inference . \n | *@brief Performs the backpropagation of BatchNorm for inference . \n | ||||
@@ -635,8 +786,8 @@ REG_OP(ReduceMin) | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead. | * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead. | ||||
*/ | */ | ||||
REG_OP(ReduceMinD) | REG_OP(ReduceMinD) | ||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) | |||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32})) | |||||
.REQUIRED_ATTR(axes, ListInt) | .REQUIRED_ATTR(axes, ListInt) | ||||
.ATTR(keep_dims, Bool, false) | .ATTR(keep_dims, Bool, false) | ||||
.OP_END_FACTORY_REG(ReduceMinD) | .OP_END_FACTORY_REG(ReduceMinD) | ||||
@@ -747,14 +898,14 @@ REG_OP(Reduction) | |||||
*@brief Computes the euclidean norm of elements across dimensions of a tensor . \n | *@brief Computes the euclidean norm of elements across dimensions of a tensor . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*@li input_tensor: A Tensor. Must be one of the following types: float16, float32, int32. | |||||
*@li x: A Tensor. Must be one of the following types: float16, float32, int32. | |||||
*@li axes: A Tensor of type int8 or int32. Specifies the dimensions to reduce. Defaults to "None" . \n | *@li axes: A Tensor of type int8 or int32. Specifies the dimensions to reduce. Defaults to "None" . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*keep_dims: An optional bool. If "True", reduced dimensions will be retained. Defaults to "False" . \n | *keep_dims: An optional bool. If "True", reduced dimensions will be retained. Defaults to "False" . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*output_tensor: A Tensor. Must be one of the following types: float16, float32, int32 . \n | |||||
*y: A Tensor. Must be one of the following types: float16, float32, int32 . \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
* If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)) . \n | * If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)) . \n | ||||
@@ -821,7 +972,7 @@ Defaults to "0.00001" . \n | |||||
*batch_ variance: A Tensor of type float32 for the result variance . \n | *batch_ variance: A Tensor of type float32 for the result variance . \n | ||||
*@attention Constraints: | *@attention Constraints: | ||||
*For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. | |||||
*For Ascend 310, the result accuracy fails to reach 0.001 due to the square root instruction. | |||||
*/ | */ | ||||
REG_OP(INInferV2) | REG_OP(INInferV2) | ||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
@@ -839,7 +990,7 @@ REG_OP(INInferV2) | |||||
*@brief Performs reduced instance normalization . \n | *@brief Performs reduced instance normalization . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*x: A Tensor of type float16 or float32, with format NC1HWC0 . \n | |||||
*x: A Tensor of type float16 or float32. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*@li sum: A Tensor of type float32 for SUM reduced "x". | *@li sum: A Tensor of type float32 for SUM reduced "x". | ||||
@@ -862,19 +1013,19 @@ REG_OP(INTrainingReduceV2) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Seven inputs, including: (NC1HWC0supported) | * Seven inputs, including: (NC1HWC0supported) | ||||
*@li x: A Tensor of type float16 or float32. | *@li x: A Tensor of type float16 or float32. | ||||
*@li sum: A T [N, C1, 1, 1, C0] ensor of type float32 for the output of operator INTrainingReduceV2. | |||||
*@li square_sum: A [N, C1, 1, 1, C0] Tensor of type float32 for the output of operator INTrainingReduceV2. | |||||
*@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma. | |||||
*@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta. | |||||
*@li mean: A [N, C1, 1, 1, C0] Tensor of type float32, for the updated mean. | |||||
*@li variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the updated variance . \n | |||||
*@li sum: A Tensor of type float32 for the output of operator INTrainingReduceV2. | |||||
*@li square_sum: A Tensor of type float32 for the output of operator INTrainingReduceV2. | |||||
*@li gamma: A Tensor of type float32, for the scaling gamma. | |||||
*@li beta: A Tensor of type float32, for the scaling beta. | |||||
*@li mean: A Tensor of type float32, for the updated mean. | |||||
*@li variance: A Tensor of type float32, for the updated variance . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li momentum: A required float32, specifying the momentum to update mean and var. | *@li momentum: A required float32, specifying the momentum to update mean and var. | ||||
*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n | *@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
* Three outputs, including: (NC1HWC0 supported) | |||||
* Three outputs | |||||
*@li y: A Tensor of type float16 or float32, for normalized "x". | *@li y: A Tensor of type float16 or float32, for normalized "x". | ||||
*@li batch_mean: A Tensor of type float32, for the updated mean. | *@li batch_mean: A Tensor of type float32, for the updated mean. | ||||
*@li batch_variance: A Tensor of type float32, for the updated variance . \n | *@li batch_variance: A Tensor of type float32, for the updated variance . \n | ||||
@@ -882,7 +1033,7 @@ REG_OP(INTrainingReduceV2) | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. | *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. | ||||
* This operator is used in conjunction with INTrainingReduceV2. | * This operator is used in conjunction with INTrainingReduceV2. | ||||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. | |||||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. | |||||
*/ | */ | ||||
REG_OP(INTrainingUpdateV2) | REG_OP(INTrainingUpdateV2) | ||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
@@ -965,7 +1116,7 @@ for the updated variance. | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. | *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. | ||||
* This operator is used in conjunction with GNTrainingUpdate. | * This operator is used in conjunction with GNTrainingUpdate. | ||||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. | |||||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. | |||||
*/ | */ | ||||
REG_OP(GNTrainingUpdate) | REG_OP(GNTrainingUpdate) | ||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
@@ -982,6 +1133,98 @@ REG_OP(GNTrainingUpdate) | |||||
.OUTPUT(batch_variance, TensorType({DT_FLOAT})) | .OUTPUT(batch_variance, TensorType({DT_FLOAT})) | ||||
.OP_END_FACTORY_REG(GNTrainingUpdate) | .OP_END_FACTORY_REG(GNTrainingUpdate) | ||||
/** | |||||
*@brief Joins a string Tensor across the given dimensions. \n | |||||
*@par Inputs: | |||||
include: | |||||
*@li input:A Tensor of type string. The text to be processed. | |||||
*@li reduction_indices:A Tensor of type int. The text to be processed. | |||||
*@par Attributes: | |||||
*@li keep_dims:A bool, An optional bool. Defaults to False. If True, retain reduced dimensions with length 1.. | |||||
*@li separator:string. | |||||
*@par output: | |||||
*@li output::A Tensor of type string.. | |||||
*/ | |||||
REG_OP(ReduceJoin) | |||||
.INPUT(input, TensorType({DT_STRING})) | |||||
.INPUT(reduction_indices, TensorType({DT_INT32})) | |||||
.OUTPUT(output, TensorType({DT_STRING})) | |||||
.ATTR(keep_dims, Bool, true) | |||||
.ATTR(separator, String, "") | |||||
.OP_END_FACTORY_REG(ReduceJoin) | |||||
/** | |||||
* @brief Calculates the standard deviation and average value of Tensors. | |||||
* @par Inputs: | |||||
* @li x: A Tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @par Attributes: | |||||
* Three Attributes, including: | |||||
* @li dim: An optional listint, Defaults to "None". \n | |||||
* @li unbiased: An optional bool. Defaults to "True". | |||||
* If "True", Use Bessel Correction. | |||||
* If "False", Do not use Bessel Correction. \n | |||||
* @li keepdim: An optional bool. Defaults to "False". | |||||
* If "True", Keep the original tensor dimension. | |||||
* If "False", Do not keep the original tensor dimension. \n | |||||
* @par Outputs: | |||||
* Two Outputs, including: | |||||
* @li y1: A Tensor. Has the same type as "x". | |||||
* @li y2: A Tensor. Has the same type as "x". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator ReduceStd. | |||||
*/ | |||||
REG_OP(ReduceStd) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(y1, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(y2, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.ATTR(dim, ListInt, {}) | |||||
.ATTR(unbiased, Bool, true) | |||||
.ATTR(keepdim, Bool, false) | |||||
.OP_END_FACTORY_REG(ReduceStd) | |||||
/** | |||||
* @brief Calculates the standard deviation of Tensors. | |||||
* @par Inputs: | |||||
* include: | |||||
* @li x: A Tensor. Must be one of the following types: float16, float32. \n | |||||
* @li mean: A Tensor. It's the mean of X. Must be one of the following types: float16, float32. \n | |||||
* @par Attributes: | |||||
* Three Attributes, including: | |||||
* @li dim: An optional listint, Defaults to "None". \n | |||||
* @li unbiased: An optional bool. Defaults to "True". | |||||
* If "True", Use Bessel Correction. | |||||
* If "False", Do not use Bessel Correction. \n | |||||
* @li keepdim: An optional bool. Defaults to "False". | |||||
* If "True", Keep the original tensor dimension. | |||||
* If "False", Do not keep the original tensor dimension. \n | |||||
* @par Outputs: | |||||
* @li y: A Tensor. It's the std of X. Has the same type as "x". | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator ReduceStdWithMean. | |||||
*/ | |||||
REG_OP(ReduceStdWithMean) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.ATTR(dim, ListInt, {}) | |||||
.ATTR(unbiased, Bool, true) | |||||
.ATTR(keepdim, Bool, false) | |||||
.OP_END_FACTORY_REG(ReduceStdWithMean) | |||||
} //namespace ge | } //namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -33,6 +33,7 @@ namespace ge { | |||||
*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. | *@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. | ||||
*@li b:A 1D Tensor. Must be one of the following types: float16. The format must be ND . \n | *@li b:A 1D Tensor. Must be one of the following types: float16. The format must be ND . \n | ||||
*@li mask:A 1D Tensor. Must be one of the following types: uint8. | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li keep_prob:An integer identifying the keep prob in the op. Default to 1. | *@li keep_prob:An integer identifying the keep prob in the op. Default to 1. | ||||
@@ -42,7 +43,6 @@ namespace ge { | |||||
*@par Outputs: | *@par Outputs: | ||||
*seven outputs: | *seven outputs: | ||||
*@li mask:A 1D Tensor. Must be one of the following types: uint8. | |||||
*@li ct:A 4D Tensor. Must be one of the following types: float16, float32. | *@li ct:A 4D Tensor. Must be one of the following types: float16, float32. | ||||
*@li ht:A 4D Tensor. Must be one of the following types: float16. | *@li ht:A 4D Tensor. Must be one of the following types: float16. | ||||
*@li it:A 4D Tensor. Must be one of the following types: float16, float32. | *@li it:A 4D Tensor. Must be one of the following types: float16, float32. | ||||
@@ -187,16 +187,16 @@ REG_OP(DynamicRNNGrad) | |||||
*@brief: DynamicRNN calculation. | *@brief: DynamicRNN calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
*ten inputs: | *ten inputs: | ||||
*@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
*@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND. | |||||
*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n | |||||
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
*@li seq_length:A optional Tensor. Only Support float16 in FRACTAL_NZ and int32 in ND. | |||||
*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. | *@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. | ||||
@@ -209,6 +209,7 @@ REG_OP(DynamicRNNGrad) | |||||
*@li time_major:An bool identifying the time major in the op. Default to true. | *@li time_major:An bool identifying the time major in the op. Default to true. | ||||
*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. | *@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. | ||||
*@li forget_bias:An float identifying the forget bias in the op. Default to 0. | *@li forget_bias:An float identifying the forget bias in the op. Default to 0. | ||||
*@li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifjo". Default to "ijfo". | |||||
*@li is_training:An bool identifying is training in the op. Default to true . \n | *@li is_training:An bool identifying is training in the op. Default to true . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
@@ -221,12 +222,14 @@ REG_OP(DynamicRNNGrad) | |||||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@par Third-party framework compatibility: | |||||
* Compatible with the TF operator LSTM. | |||||
*/ | */ | ||||
REG_OP(DynamicRNN) | REG_OP(DynamicRNN) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) | |||||
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16})) | |||||
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) | .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) | .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
@@ -251,9 +254,237 @@ REG_OP(DynamicRNN) | |||||
.ATTR(time_major, Bool, true) | .ATTR(time_major, Bool, true) | ||||
.ATTR(activation, String, "tanh") | .ATTR(activation, String, "tanh") | ||||
.ATTR(forget_bias, Float, 0.0) | .ATTR(forget_bias, Float, 0.0) | ||||
.ATTR(gate_order, String, "ijfo") | |||||
.ATTR(is_training, Bool, true) | .ATTR(is_training, Bool, true) | ||||
.OP_END_FACTORY_REG(DynamicRNN) | .OP_END_FACTORY_REG(DynamicRNN) | ||||
/** | |||||
*@brief: DynamicRNNV2 calculation. | |||||
*@par Inputs: | |||||
*ten inputs: | |||||
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li weight_input:A required 4D Tensor. Must be one of the following types: float16, float32. | |||||
*The format must be FRACTAL_Z. | |||||
*@li weight_hidden:A required 4D Tensor. Must be one of the following types: float16, float32. | |||||
*The format must be FRACTAL_Z. | |||||
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND. | |||||
*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n | |||||
*@par Attributes: | |||||
*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. | |||||
*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". | |||||
*Only UNIDIRECTIONAL is currently supported. | |||||
*@li cell_depth:An integer identifying the cell depth in the op. Default to 1. | |||||
*@li use_peephole:An bool identifying if use peephole in the op. Default to false. | |||||
*@li keep_prob:An float identifying the keep prob in the op. Default to 1. | |||||
*@li cell_clip:An float identifying the cell clip in the op. Default to -1. | |||||
*@li num_proj:An integer identifying the num projection in the op. Default to 0. | |||||
*@li time_major:An bool identifying the time major in the op. Default to true. | |||||
*@li activation:An string identifying the type of activation function in the op. Default to "tanh". | |||||
*Only tanh is currently supported. | |||||
*@li recurrent_activation:An string identifying the type of activation function in the op. Default to "sigmoid". | |||||
*Supprot "sigmoid" and "hard_sigmoid". In general, set "hard_sigmoid" for TF Keras LSTM. | |||||
*@li forget_bias:An float identifying the forget bias in the op. Default to 0. | |||||
*@li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifco". Default to "ijfo". | |||||
*Set "ijfo" for TF operator LSTM, Set "ifco" for TF Keras LSTM. | |||||
*@li stateful: An bool identifying the type of stateful in the op. Default to fasle.Only false is currently supported. | |||||
*@li merge_mode: An string identifying the type of merge_modein the op. Default to "concat". | |||||
*Only "concat" is currently supported | |||||
*@li is_training:An bool identifying is training in the op. Default to true . \n | |||||
*@par Outputs: | |||||
*eight outputs: | |||||
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*Return the last output_h. | |||||
*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*Return the last output_c. | |||||
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@par Third-party framework compatibility: | |||||
* Compatible with the TF operator LSTM or TF keras operator LSTM. | |||||
*/ | |||||
REG_OP(DynamicRNNV2) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(weight_input, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) | |||||
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(mask, TensorType({DT_UINT8})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(j, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(f, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(o, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(tanhc, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(cell_type, String, "LSTM") | |||||
.ATTR(direction, String, "UNIDIRECTIONAL") | |||||
.ATTR(cell_depth, Int, 1) | |||||
.ATTR(use_peephole, Bool, false) | |||||
.ATTR(keep_prob, Float, 1.0) | |||||
.ATTR(cell_clip, Float, -1.0) | |||||
.ATTR(num_proj, Int, 0) | |||||
.ATTR(time_major, Bool, true) | |||||
.ATTR(activation, String, "tanh") | |||||
.ATTR(recurrent_activation, String, "sigmoid") | |||||
.ATTR(forget_bias, Float, 0.0) | |||||
.ATTR(gate_order, String, "ijfo") | |||||
.ATTR(stateful, Bool, false) | |||||
.ATTR(merge_mode, String, "concat") | |||||
.ATTR(is_training, Bool, true) | |||||
.OP_END_FACTORY_REG(DynamicRNNV2) | |||||
/** | |||||
*@brief: DynamicRNNV3 calculation. | |||||
*@par Inputs: | |||||
*ten inputs: | |||||
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND. | |||||
*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n | |||||
*@li real_mask:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li project:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@par Attributes: | |||||
*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. | |||||
*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. | |||||
*@li cell_depth:An integer identifying the cell depth in the op. Default to 1. | |||||
*@li use_peephole:An bool identifying if use peephole in the op. Default to false. | |||||
*@li keep_prob:An float identifying the keep prob in the op. Default to 1. | |||||
*@li cell_clip:An float identifying the cell clip in the op. Default to -1. | |||||
*@li num_proj:An integer identifying the num projection in the op. Default to 0. | |||||
*@li time_major:An bool identifying the time major in the op. Default to true. | |||||
*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. | |||||
*@li forget_bias:An float identifying the forget bias in the op. Default to 0. | |||||
*@li is_training:An bool identifying is training in the op. Default to true . \n | |||||
*@par Outputs: | |||||
*eight outputs: | |||||
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@par Third-party framework compatibility: | |||||
* Compatible with the TF operator LSTM. | |||||
*/ | |||||
REG_OP(DynamicRNNV3) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) | |||||
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(mask, TensorType({DT_UINT8})) | |||||
.OPTIONAL_INPUT(real_mask, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(project, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(j, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(f, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(o, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(tanhc, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(cell_type, String, "LSTM") | |||||
.ATTR(direction, String, "UNIDIRECTIONAL") | |||||
.ATTR(cell_depth, Int, 1) | |||||
.ATTR(use_peephole, Bool, false) | |||||
.ATTR(keep_prob, Float, 1.0) | |||||
.ATTR(cell_clip, Float, -1.0) | |||||
.ATTR(num_proj, Int, 0) | |||||
.ATTR(time_major, Bool, true) | |||||
.ATTR(activation, String, "tanh") | |||||
.ATTR(forget_bias, Float, 0.0) | |||||
.ATTR(is_training, Bool, true) | |||||
.OP_END_FACTORY_REG(DynamicRNNV3) | |||||
/** | |||||
*@brief: DynamicLSTMV2 calculation. | |||||
*@par Inputs: | |||||
*ten inputs: | |||||
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
*@li cont:A required 2D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
*@li w_xc_x_static:A optional 2D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
*@li h0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li c0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li wci:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li wcf:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li wco:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li mask:A optional 1D Tensor. Must be one of the following types: uint8. The format must be ND . | |||||
*@par Attributes: | |||||
*@li num_output:An integer identifying the num projection in the op. Default to 0. | |||||
*@li expose_hidden:An bool identifying the expose_hidden in the op. Default to flase. | |||||
*@li need_output_last:An bool identifying the time major in the op. Default to true. | |||||
*@li forget_bias:An float identifying the forget bias in the op. Default to 0. | |||||
*@par Outputs: | |||||
*eight outputs: | |||||
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li last_output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li last_output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@par Third-party framework compatibility: | |||||
* Compatible with the Caffe operator LSTM. | |||||
*@par Restrictions: | |||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(DynamicLSTMV2) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(cont, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(w_xc_x_static, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(h0, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(c0, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(mask, TensorType({DT_UINT8})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(last_output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(last_output_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(num_output, Int, 0) | |||||
.ATTR(expose_hidden, Bool, false) | |||||
.ATTR(need_output_last, Bool, false) | |||||
.ATTR(forget_bias, Float, 0.0) | |||||
.OP_END_FACTORY_REG(DynamicLSTMV2) | |||||
/** | /** | ||||
*@brief: LSTMInputGrad calculation. | *@brief: LSTMInputGrad calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
@@ -297,6 +528,60 @@ REG_OP(LSTMInputGrad) | |||||
.OP_END_FACTORY_REG(LSTMInputGrad) | .OP_END_FACTORY_REG(LSTMInputGrad) | ||||
/** | |||||
*@brief: Dynamic LSTM Cell grad calculation.Calculate the gradient of gates and cell state. | |||||
*@par Inputs: | |||||
*twelve inputs: | |||||
*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dc:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li mask:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li t_state:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ . \n | |||||
*@par Attributes: | |||||
*@li forget_bias:An integer identifying the forget bias in the op. Default to 1. | |||||
*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported . \n | |||||
*@li direction:An string that marks the calculation sequence of the operator. Default to "Forward". | |||||
*@li gate_order:An string mark the order of output 4 gate. Default to "ijfo". | |||||
*@par Outputs: | |||||
*two outputs: | |||||
*@li dgate:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li dct_1:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(DynamicLSTMGradCell) | |||||
.INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(dc, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(i, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(j, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(mask, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(t_state, TensorType({DT_INT32, DT_INT32})) | |||||
.OUTPUT(dgate, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(dct_1, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(forget_bias, Float, 1) | |||||
.ATTR(activation, String, "") | |||||
.ATTR(direction, String, "Forward") | |||||
.ATTR(gate_order, String, "ijfo") | |||||
.OP_END_FACTORY_REG(DynamicLSTMGradCell) | |||||
/** | /** | ||||
*@brief: Basic LSTM Cell backward calculation.Calculate the gradient of input and hidden state. | *@brief: Basic LSTM Cell backward calculation.Calculate the gradient of input and hidden state. | ||||
*@par Inputs: | *@par Inputs: | ||||
@@ -475,9 +760,9 @@ REG_OP(BasicRNNCell) | |||||
.OP_END_FACTORY_REG(BasicRNNCell) | .OP_END_FACTORY_REG(BasicRNNCell) | ||||
/** | /** | ||||
*@brief: DynamicGRU calculation. | |||||
*@brief DynamicGRU calculation. | |||||
*@par Inputs: | *@par Inputs: | ||||
*seven inputs: \n | |||||
*seven inputs: | |||||
*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. | *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. | ||||
*@li w:Must be one of the following types: float16. The format must be FRACTAL_Z. | *@li w:Must be one of the following types: float16. The format must be FRACTAL_Z. | ||||
*@li b:Must be one of the following types: float16, float32. The format must be ND. | *@li b:Must be one of the following types: float16, float32. The format must be ND. | ||||
@@ -497,7 +782,7 @@ REG_OP(BasicRNNCell) | |||||
*@li is_training:An bool identifying is training in the op. Default to true. | *@li is_training:An bool identifying is training in the op. Default to true. | ||||
*@par Outputs: | *@par Outputs: | ||||
*five outputs: \n | |||||
*five outputs: | |||||
*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
@@ -531,9 +816,9 @@ REG_OP(DynamicGRU) | |||||
.OP_END_FACTORY_REG(DynamicGRU) | .OP_END_FACTORY_REG(DynamicGRU) | ||||
/** | /** | ||||
*@brief: DynamicGRUV2 calculation. | |||||
*@brief DynamicGRUV2 calculation. | |||||
*@par Inputs: | *@par Inputs: | ||||
*seven inputs: \n | |||||
*seven inputs: | |||||
*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. | *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. | ||||
*@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z. | *@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z. | ||||
*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. | *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. | ||||
@@ -555,16 +840,13 @@ REG_OP(DynamicGRU) | |||||
*@li is_training:An bool identifying is training in the op. Default to true. | *@li is_training:An bool identifying is training in the op. Default to true. | ||||
*@par Outputs: | *@par Outputs: | ||||
*six outputs: \n | |||||
*six outputs: | |||||
*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(DynamicGRUV2) | REG_OP(DynamicGRUV2) | ||||
.INPUT(x, TensorType({DT_FLOAT16})) | .INPUT(x, TensorType({DT_FLOAT16})) | ||||
@@ -592,6 +874,68 @@ REG_OP(DynamicGRUV2) | |||||
.ATTR(is_training, Bool, true) | .ATTR(is_training, Bool, true) | ||||
.OP_END_FACTORY_REG(DynamicGRUV2) | .OP_END_FACTORY_REG(DynamicGRUV2) | ||||
/** | |||||
*@brief DynamicGRUV2Hidden calculation. | |||||
*@par Inputs: | |||||
*five inputs: | |||||
*@li x_weight_input:Must be one of the following types: float32. The format must be FRACTAL_NZ. | |||||
*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. | |||||
*@li seq_length:Must be one of the following types: int32. The format must be ND. | |||||
*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@par Attributes: | |||||
*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". | |||||
Only UNIDIRECTIONAL is currently supported. | |||||
*@li cell_depth:An integer identifying the cell depth in the op. Default to 1. | |||||
*@li keep_prob:An float identifying the keep prob in the op. Default to 1. | |||||
*@li cell_clip:An float identifying the cell clip in the op. Default to -1. | |||||
*@li num_proj:An integer identifying the num projection in the op. Default to 0. | |||||
*@li time_major:An bool identifying the time major in the op. Default to true. | |||||
*@li activation:An string identifying the type of activation function in the op. Default to "tanh". | |||||
Only tanh is currently supported. | |||||
*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. | |||||
*@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true. | |||||
*@li is_training:An bool identifying is training in the op. Default to true. | |||||
*@par Outputs: | |||||
*six outputs: | |||||
*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(DynamicGRUV2Hidden) | |||||
.INPUT(x_weight_input, TensorType({DT_FLOAT32})) | |||||
.INPUT(weight_hidden, TensorType({DT_FLOAT16})) | |||||
.OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) | |||||
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(update, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(new, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(direction, String, "UNIDIRECTIONAL") | |||||
.ATTR(cell_depth, Int, 1) | |||||
.ATTR(keep_prob, Float, 1.0) | |||||
.ATTR(cell_clip, Float, -1.0) | |||||
.ATTR(num_proj, Int, 0) | |||||
.ATTR(time_major, Bool, true) | |||||
.ATTR(activation, String, "tanh") | |||||
.ATTR(gate_order, String, "zrh") | |||||
.ATTR(reset_after, Bool, true) | |||||
.ATTR(is_training, Bool, true) | |||||
.OP_END_FACTORY_REG(DynamicGRUV2Hidden) | |||||
/** | /** | ||||
*@brief: DynamicGRUV2Grad calculation. | *@brief: DynamicGRUV2Grad calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
@@ -618,7 +962,6 @@ REG_OP(DynamicGRUV2) | |||||
*@li cell_clip:An float identifying the cell clip in the op. Default to -1. | *@li cell_clip:An float identifying the cell clip in the op. Default to -1. | ||||
*@li num_proj:An integer identifying the num projection in the op. Default to 0. | *@li num_proj:An integer identifying the num projection in the op. Default to 0. | ||||
*@li time_major:An bool identifying the time major in the op. Default to true. | *@li time_major:An bool identifying the time major in the op. Default to true. | ||||
*@li bias_type:An string identifying the type of bias_type function in the op. Default to "double_bias". | |||||
*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. | *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. | ||||
*@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true. | *@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true. | ||||
@@ -630,6 +973,9 @@ REG_OP(DynamicGRUV2) | |||||
*@li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(DynamicGRUV2Grad) | REG_OP(DynamicGRUV2Grad) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
@@ -658,7 +1004,6 @@ REG_OP(DynamicGRUV2Grad) | |||||
.ATTR(cell_clip, Float, -1.0) | .ATTR(cell_clip, Float, -1.0) | ||||
.ATTR(num_proj, Int, 0) | .ATTR(num_proj, Int, 0) | ||||
.ATTR(time_major, Bool, true) | .ATTR(time_major, Bool, true) | ||||
.ATTR(bias_type, String, "double_bias") | |||||
.ATTR(gate_order, String, "zrh") | .ATTR(gate_order, String, "zrh") | ||||
.ATTR(reset_after, Bool, true) | .ATTR(reset_after, Bool, true) | ||||
.OP_END_FACTORY_REG(DynamicGRUV2Grad) | .OP_END_FACTORY_REG(DynamicGRUV2Grad) | ||||
@@ -667,7 +1012,7 @@ REG_OP(DynamicGRUV2Grad) | |||||
*@brief: GRUV2HiddenGrad calculation. | *@brief: GRUV2HiddenGrad calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
*nine inputs: \n | *nine inputs: \n | ||||
*@li weight_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
@@ -678,6 +1023,7 @@ REG_OP(DynamicGRUV2Grad) | |||||
*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li t_state:An Int identifying the current t state. Default to [0, 4]. | |||||
*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. | *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. | ||||
*@par Outputs: | *@par Outputs: | ||||
@@ -685,10 +1031,12 @@ REG_OP(DynamicGRUV2Grad) | |||||
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(GRUV2HiddenGrad) | |||||
.INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
REG_OP(GRUV2HiddenGradCell) | |||||
.INPUT(dh_pre_t, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
@@ -699,8 +1047,197 @@ REG_OP(GRUV2HiddenGrad) | |||||
.OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.ATTR(t_state, Int, 0) | |||||
.ATTR(gate_order, String, "zrh") | .ATTR(gate_order, String, "zrh") | ||||
.OP_END_FACTORY_REG(GRUV2HiddenGrad) | |||||
.OP_END_FACTORY_REG(GRUV2HiddenGradCell) | |||||
/** | |||||
* @brief Calculates the reversed outputs of the function "embedding". \n | |||||
* @par Inputs: | |||||
* Two inputs, including: | |||||
* @li grad: A mutable Tensor of word grad. Must be one of the following types: | |||||
* float32. | |||||
* @li indices: A mutable word index Tensor of the int32 type.\n | |||||
* @par Attributes: | |||||
* @li num_weights: An int attr which use to judge how many words in dict. \n | |||||
* @li padding_idx: An int attr judge which word to fill zeros. Defaults to "-1". \n | |||||
* @li scale_grad_by_freq: An optional bool. Defaults to "False". | |||||
* If "True", "grad_weight" will be scale by word_frequency. | |||||
* If "False", "grad_weight" will not be scale by word_frequency. \n | |||||
* @par Outputs: | |||||
* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator EmbeddingDenseGrad. | |||||
*/ | |||||
REG_OP(EmbeddingDenseGrad) | |||||
.INPUT(grad, TensorType({ DT_FLOAT32 })) /* "First operand." */ | |||||
.INPUT(indices, TensorType({ DT_INT32 })) /* "Second operand." */ | |||||
.OUTPUT(y, TensorType({ DT_FLOAT32 })) /* "Result, has same element type as two inputs" */ | |||||
.REQUIRED_ATTR(num_weights, Int) | |||||
.ATTR(padding_idx, Int, -1) | |||||
.ATTR(scale_grad_by_freq, Bool, false) | |||||
.OP_END_FACTORY_REG(EmbeddingDenseGrad) | |||||
/** | |||||
*@brief CommonLSTM calculation. | |||||
*@par Inputs: | |||||
*eight inputs: \n | |||||
*@li x:Each time step is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li w:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li r:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li b:An optional input. Each direction is a 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | |||||
*@li sequence_lens:An optional input. A 1D Tensor.Must be one of the following types: int32. The format must be ND. | |||||
*@li initial_h:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li initial_c:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li p:An optional input. Each direction is a 1D Tensor.Must be one of the following types: float16, float32. The format must be ND. | |||||
*@par Attributes: | |||||
*@li activation_alpha:Optional scaling values used by some activation functions. Empty is currently supported. | |||||
*@li activation_beta:Optional scaling values used by some activation functions. Empty is currently supported. | |||||
*@li activations:The list of activation functions. Empty is currently supported. | |||||
*@li clip:An float identifying the cell clip in the op. Default to -1. | |||||
*@li direction:Specify if the RNN is forward, reverse, or bidirectional. Must be one of forward(default), reverse, or bidirectional. | |||||
*@li hidden_size:Number of neurons in the hidden layer. Reserved. | |||||
*@li input_forget:Couple the input and forget gates if 1. Reserved. | |||||
*@par Outputs: | |||||
*three outputs: \n | |||||
*@li y:First dimension is time step, second dimension is direction, others is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li y_h:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li y_c:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*/ | |||||
REG_OP(CommonLSTM) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32})) | |||||
.OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(initial_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(p, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y_c, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(activation_alpha, ListFloat, {}) | |||||
.ATTR(activation_beta, ListFloat, {}) | |||||
.ATTR(activations, ListString, {}) | |||||
.ATTR(clip, Float, -1.0) | |||||
.ATTR(direction, String, "forward") | |||||
.REQUIRED_ATTR(hidden_size, Int) | |||||
.ATTR(input_forget, Int, 0) | |||||
.OP_END_FACTORY_REG(CommonLSTM) | |||||
/** | |||||
* @brief Calculate the mask. According to hidden_size and num_step, convert seq_length to mask. | |||||
* | |||||
* @par Inputs: | |||||
* @li seq_length: A 1D Tensor. Must be one of the following types: int32. Record the current length of each batch. [batch_size]. | |||||
* @li b: A 1D Tensor. Must be one of the following types: fp16/fp32. Record the hidden_size. [4 * hidden_size]. | |||||
* @li x: A 3D Tensor. Must be one of the following types: fp16/fp32. Record the num_step/batch_size/input_size. [num_step, batch_size, input_size]. | |||||
* | |||||
* @par Outputs: | |||||
* seq_mask: A 3D Tensor. Must be one of the following types: fp16/fp32. with the shape of [num_step, batch_size, hidden_size]. And has the same type as "b" \n | |||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(RnnGenMaskV2) | |||||
.INPUT(seq_length, TensorType({DT_INT32})) | |||||
.INPUT(b, TensorType({{DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(seq_mask, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(RnnGenMaskV2) | |||||
/** | |||||
* @brief Common GRU calculation. | |||||
* @par Inputs: | |||||
* Eight inputs, including: | |||||
* @li x: The input sequences packed (and pontentially padded) into on 3D Tesnor(float16). The format must be FRACTAL_NZ | |||||
* @li w: The weight tensor for the gates is 3D Tensor(float16). The format must be FRACTAL_Z | |||||
* @li r: The recurrence weight tesnor is 3D Tensor(float16). The format must be FRACTAL_Z | |||||
* @li b: The bias tensor for the gates. The format must be ND | |||||
* @li sequence_lens: Optional tensor specifying lengths of sequences(int32). The format must be ND | |||||
* @li init_h: Optional initial value of the hidden(float16,float32). The format must be FRACTAL_NZ | |||||
* @par Attributes: | |||||
* @li activation_alpha: Optional scaling values used by some activation functions. \n | |||||
* @li activation_beta: Optional scaling values used by some activation functions. \n | |||||
* @li activations: A list of 2 (or 4 if bidirectional) activation functions for update, reset, and hidden gates. \n | |||||
* @li clip: Cell clip threshold. \n | |||||
* @li direction: Specify if the RNN is forward, reverse, or bidirectional. \n | |||||
* @li hidden_size: Number of neurons in the hidden layer. \n | |||||
* @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n | |||||
* @par Outputs: | |||||
* @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). The format must be FRACTAL_NZ | |||||
* @li y_h: The last output value of the hidden(float16,float32). The format must be FRACTAL_NZ | |||||
*/ | |||||
REG_OP(CommonGRU) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32})) | |||||
.OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(activation_alpha, ListFloat, {}) | |||||
.ATTR(activation_beta , ListFloat, {}) | |||||
.ATTR(activations , ListString, {}) | |||||
.ATTR(clip, Float, -1.0) | |||||
.ATTR(direction, String, "forward") | |||||
.REQUIRED_ATTR(hidden_size, Int) | |||||
.ATTR(linear_before_reset , Int, 0) | |||||
.OP_END_FACTORY_REG(CommonGRU) | |||||
/** | |||||
* @brief Calculates the reversed outputs of the function "embedding". \n | |||||
* @par Inputs: | |||||
* Four inputs, including: | |||||
* @li weight: A mutable Tensor of word grad. Must be one of the following types: | |||||
* float32. | |||||
* @li indices: A mutable word index Tensor of the int32 type.\n | |||||
* @li offsets: A mutable word index Tensor of the int32 type.\n | |||||
* @li per_sample_weights: to indicate all weights should be taken to be 1. | |||||
* If specified, per_sample_weights must have exactly the same shape as input | |||||
* and is treated as having the same offsets, if those are not None. | |||||
* Only supported for mode='sum'..\n | |||||
* @par Attributes: | |||||
* @li mode: An string attr which use "sum"``, ``"mean"`` or ``"max"``. Specifies the way to reduce the bag.. \n | |||||
* @li scale_grad_by_freq: An optional bool. Defaults to "False". | |||||
* If "True", "grad_weight" will be scale by word_frequency. | |||||
* If "False", "grad_weight" will not be scale by word_frequency. \n | |||||
* @li sparse: if True, gradient w.r.t.attr weight matrix will be a sparse tensor. \n | |||||
* @li include_last_offset: if True, attr offsets has one additional element, where the last element | |||||
* is equivalent to the size of indices. This matches the CSR format.. \n | |||||
* @par Outputs: | |||||
* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator EmbeddingBag. | |||||
*/ | |||||
REG_OP(EmbeddingBag) | |||||
.INPUT(weight, TensorType({ DT_FLOAT32 })) | |||||
.INPUT(indices, TensorType({ DT_INT32 })) | |||||
.OPTIONAL_INPUT(offsets, TensorType({DT_INT32})) | |||||
.OPTIONAL_INPUT(per_sample_weights, TensorType({DT_FLOAT32})) | |||||
.OUTPUT(y, TensorType({ DT_FLOAT32 })) | |||||
.ATTR(mode, String, "mean") | |||||
.ATTR(scale_grad_by_freq, Bool, false) | |||||
.ATTR(sparse, Bool, false) | |||||
.ATTR(include_last_offset, Bool, false) | |||||
.OP_END_FACTORY_REG(EmbeddingBag) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -239,6 +239,30 @@ REG_OP(GatherV2D) | |||||
.REQUIRED_ATTR(axis, Int) | .REQUIRED_ATTR(axis, Int) | ||||
.OP_END_FACTORY_REG(GatherV2D) | .OP_END_FACTORY_REG(GatherV2D) | ||||
/** | |||||
*@Gathers values along an axis specified by dim . \n | |||||
*@par Inputs: | |||||
*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64. | |||||
*@li index: A Tensor. Must be one of the following types: int64 . \n | |||||
*@par Attributes: | |||||
* dim: the axis along which to index . \n | |||||
*@par Outputs: | |||||
* y: A Tensor. Has the same type as "x" . \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the PyTorch operator Gather. | |||||
*/ | |||||
REG_OP(GatherElements) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) | |||||
.INPUT(index, TensorType({DT_INT64})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) | |||||
.ATTR(dim, Int, 0) | |||||
.OP_END_FACTORY_REG(GatherElements) | |||||
/** | /** | ||||
*@brief Extracts a strided slice of a tensor. Roughly speaking, this op | *@brief Extracts a strided slice of a tensor. Roughly speaking, this op | ||||
extracts a slice of size (end-begin)/stride from the given input tensor. | extracts a slice of size (end-begin)/stride from the given input tensor. | ||||
@@ -275,8 +299,6 @@ REG_OP(GatherV2D) | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type as "x" . \n | *y: A Tensor. Has the same type as "x" . \n | ||||
*@attention Constraints: | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator StridedSlice. | * Compatible with the TensorFlow operator StridedSlice. | ||||
*/ | */ | ||||
@@ -327,8 +349,6 @@ REG_OP(StridedSlice) | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type as "x" . \n | *y: A Tensor. Has the same type as "x" . \n | ||||
*@attention Constraints: | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator StridedSlice. | * Compatible with the TensorFlow operator StridedSlice. | ||||
@@ -385,8 +405,6 @@ REG_OP(StridedSliceD) | |||||
*@par Outputs: | *@par Outputs: | ||||
*output: A Tensor. Has the same type as "dy" . \n | *output: A Tensor. Has the same type as "dy" . \n | ||||
*@attention Constraints: | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator StridedSliceGradD. | * Compatible with the TensorFlow operator StridedSliceGradD. | ||||
@@ -444,8 +462,6 @@ REG_OP(StridedSliceGradD) | |||||
*@par Outputs: | *@par Outputs: | ||||
*output: A Tensor has the same type as "dy" . \n | *output: A Tensor has the same type as "dy" . \n | ||||
*@attention Constraints: | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator StridedSliceGrad. | * Compatible with the TensorFlow operator StridedSliceGrad. | ||||
*/ | */ | ||||
@@ -486,6 +502,38 @@ REG_OP(UnsortedSegmentSum) | |||||
.OUTPUT(y, TensorType::NumberType()) | .OUTPUT(y, TensorType::NumberType()) | ||||
.OP_END_FACTORY_REG(UnsortedSegmentSum) | .OP_END_FACTORY_REG(UnsortedSegmentSum) | ||||
/** | |||||
*@brief Creates a one-dimensional tensor of size steps whose values are evenly spaced from start to | |||||
* end, inclusive, on a logarithmic scale with base base. \n | |||||
*@par Inputs: | |||||
*One inputs, including: | |||||
* @li assist: A tensor. Must be one of the following types: | |||||
* float16, float32. \n | |||||
* @par Attributes: | |||||
* @li start: An required float. Used to select the start. \n | |||||
* @li end: An required float. Used to select the end. \n | |||||
* @li steps: An optional int.Defaults to 100. \n | |||||
* @li base: An optional float.Defaults to 10.0. \n | |||||
* @li dtype: An optional int.Defaults to 1. \n | |||||
*@par Outputs: | |||||
*y: A Tensor with the same type and shape of input_x's. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator logspaced. \n | |||||
*/ | |||||
REG_OP(LogSpaceD) | |||||
.INPUT(assist, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.REQUIRED_ATTR (start, Float) | |||||
.REQUIRED_ATTR (end, Float) | |||||
.ATTR(steps, Int, 100) | |||||
.ATTR(base, Float, 10.0) | |||||
.ATTR(dtype, Int, 1) | |||||
.OP_END_FACTORY_REG(LogSpaceD) | |||||
/** | /** | ||||
*@brief Computes the sum along segments of a tensor . \n | *@brief Computes the sum along segments of a tensor . \n | ||||
@@ -796,6 +844,34 @@ REG_OP(SliceD) | |||||
.REQUIRED_ATTR(size, ListInt) | .REQUIRED_ATTR(size, ListInt) | ||||
.OP_END_FACTORY_REG(SliceD) | .OP_END_FACTORY_REG(SliceD) | ||||
/** | |||||
*@brief Extracts a slice from a tensor. | |||||
* This operation extracts a slice of size "size" from a tensor "x" | |||||
* starting at the location specified by "begin" . \n | |||||
*@par Inputs: | |||||
*@li x: A Tensor. Must be one of the following types: | |||||
* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, | |||||
* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32 . \n | |||||
*@par Inputs: | |||||
*@li offsets: The starting location for the slice. | |||||
*@par Attributes: | |||||
*@li size: The tensor shape . \n | |||||
*@par Outputs: | |||||
*y: A Tensor. Has the same type as "x". The slice extracted from the tensor. | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS DEPRECATED. Please use Slice instead. | |||||
*/ | |||||
REG_OP(SliceDV2) | |||||
.INPUT(x, TensorType::BasicType()) | |||||
.INPUT(offsets, TensorType::IndexNumberType()) | |||||
.OUTPUT(y, TensorType::BasicType()) | |||||
.REQUIRED_ATTR(size, ListInt) | |||||
.OP_END_FACTORY_REG(SliceDV2) | |||||
/** | /** | ||||
* @brief Finds values and indices of the "k" largest elements for the last | * @brief Finds values and indices of the "k" largest elements for the last | ||||
* dimension . \n | * dimension . \n | ||||
@@ -829,8 +905,8 @@ REG_OP(SliceD) | |||||
* @li sorted = true | * @li sorted = true | ||||
* @li It's unstable sorted indices on the platform of Ascend310 | * @li It's unstable sorted indices on the platform of Ascend310 | ||||
* @par Third-party framework compatibility | |||||
* @li Compatible with the TensorFlow operator TopK. | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use TopKV2 instead. | |||||
*/ | */ | ||||
REG_OP(TopKD) | REG_OP(TopKD) | ||||
.INPUT(x, TensorType::RealNumberType()) | .INPUT(x, TensorType::RealNumberType()) | ||||
@@ -855,6 +931,44 @@ REG_OP(TopKD) | |||||
* Number of top elements to look for along the last dimension (along each row | * Number of top elements to look for along the last dimension (along each row | ||||
* for matrices) . \n | * for matrices) . \n | ||||
* @par Attributes: | |||||
* @li sorted: An optional bool. Defaults to true. | |||||
* If true, the resulting "k" elements will be sorted by the values in descending | |||||
* order. | |||||
* @li dim: An optional int. Defaults to -1. For reserved use. | |||||
* @li largest: An optional bool. Defaults to true. For reserved use. \n | |||||
* @par Outputs: | |||||
* @li values: A Tensor, specifying the sorted data. Has the same type as | |||||
* "input". | |||||
* @li indices: A Tensor of type int32, specifying the indices of sorted data . \n | |||||
* @see TopK() | |||||
* @par Third-party framework compatibility | |||||
* @li Compatible with the TensorFlow operator TopKV2. | |||||
*/ | |||||
REG_OP(TopKV2) | |||||
.INPUT(x, TensorType::RealNumberType()) | |||||
.INPUT(k, TensorType({DT_INT32})) | |||||
.OUTPUT(values, TensorType::RealNumberType()) | |||||
.OUTPUT(indices, TensorType({DT_INT32})) | |||||
.ATTR(sorted, Bool, true) | |||||
.ATTR(dim, Int, -1) | |||||
.ATTR(largest, Bool, true) | |||||
.OP_END_FACTORY_REG(TopKV2) | |||||
/** | |||||
* @brief Finds values and indices of the "k" largest elements for the last | |||||
* dimension . \n | |||||
* @par Inputs: | |||||
* Two inputs, including: | |||||
* @li x: A 1D or higher tensor of type BasicType, with the last dimension | |||||
* at least "k". | |||||
* @li k: A 0D Tensor of type int32. | |||||
* Number of top elements to look for along the last dimension (along each row | |||||
* for matrices) . \n | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li sorted: An optional bool. Defaults to true. | * @li sorted: An optional bool. Defaults to true. | ||||
* If true, the resulting "k" elements will be sorted by the values in descending | * If true, the resulting "k" elements will be sorted by the values in descending | ||||
@@ -876,15 +990,17 @@ REG_OP(TopK) | |||||
.OUTPUT(values, TensorType::RealNumberType()) | .OUTPUT(values, TensorType::RealNumberType()) | ||||
.OUTPUT(indices, TensorType({DT_INT32})) | .OUTPUT(indices, TensorType({DT_INT32})) | ||||
.ATTR(sorted, Bool, true) | .ATTR(sorted, Bool, true) | ||||
.ATTR(largest, Bool, true) | |||||
.ATTR(dim, Int, -1) | |||||
.OP_END_FACTORY_REG(TopK) | .OP_END_FACTORY_REG(TopK) | ||||
/** | /** | ||||
*@brief Creates a new tensor by applying sparse "updates" to individual values or slices within a tensor (initially zero for numeric, empty for string) of the given "shape" according to "indices" . \n | *@brief Creates a new tensor by applying sparse "updates" to individual values or slices within a tensor (initially zero for numeric, empty for string) of the given "shape" according to "indices" . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*Inputs including: | *Inputs including: | ||||
* @li indices: A required index tensor. Must be one of the following types: float32, float16, int32, int8, uint8. | |||||
* @li x: A required slice tensor. Must be one of the following types: float32, float16, int32, int8, uint8. | |||||
* @li shape: A required list of int32, specifying the output shape. | |||||
* @li indices: A required index tensor. Must be one of the following types: int32 or int64. | |||||
* @li x: A required slice tensor. Must be one of the following types: float32, float16, int32, int8, uint8... | |||||
* @li shape: A required list of int32 or int64, specifying the output shape. | |||||
*@par Outputs: | *@par Outputs: | ||||
*y:A output Tensor with same datatype as "updates" . \n | *y:A output Tensor with same datatype as "updates" . \n | ||||
@@ -895,7 +1011,7 @@ REG_OP(TopK) | |||||
* Compatible with the TensorFlow operator ScatterNd. | * Compatible with the TensorFlow operator ScatterNd. | ||||
*/ | */ | ||||
REG_OP(ScatterNd) | REG_OP(ScatterNd) | ||||
.INPUT(indices, TensorType::BasicType()) | |||||
.INPUT(indices, TensorType::IndexNumberType()) | |||||
.INPUT(x, TensorType::BasicType()) | .INPUT(x, TensorType::BasicType()) | ||||
.INPUT(shape, TensorType::IndexNumberType()) | .INPUT(shape, TensorType::IndexNumberType()) | ||||
.OUTPUT(y, TensorType::BasicType()) | .OUTPUT(y, TensorType::BasicType()) | ||||
@@ -908,11 +1024,11 @@ REG_OP(ScatterNd) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Inputs including: | *Inputs including: | ||||
* @li indices: A required index tensor. Must be one of the following types: | * @li indices: A required index tensor. Must be one of the following types: | ||||
* float, float16, int32, int16. format:ND. | |||||
* int32 or int64. format:ND. | |||||
* @li x: A required slice tensor. Must be one of the following types: | * @li x: A required slice tensor. Must be one of the following types: | ||||
* float, float16, int32, int16. format:ND. | |||||
* float16, float, int32, int8, uint8. format:ND. | |||||
*@par Attributes: | *@par Attributes: | ||||
* @li shape: A required list of int32, specifying the output shape. | |||||
* @li shape: A required list of int32 or int64, specifying the output shape. | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type as "x". format:ND . \n | *y: A Tensor. Has the same type as "x". format:ND . \n | ||||
@@ -927,8 +1043,8 @@ REG_OP(ScatterNd) | |||||
*/ | */ | ||||
REG_OP(ScatterNdD) | REG_OP(ScatterNdD) | ||||
.INPUT(indices, TensorType::IndexNumberType()) | .INPUT(indices, TensorType::IndexNumberType()) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16})) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||||
.REQUIRED_ATTR(shape, ListInt) | .REQUIRED_ATTR(shape, ListInt) | ||||
.OP_END_FACTORY_REG(ScatterNdD) | .OP_END_FACTORY_REG(ScatterNdD) | ||||
@@ -1752,6 +1868,33 @@ REG_OP(Crop) | |||||
.REQUIRED_ATTR(offsets, ListInt) | .REQUIRED_ATTR(offsets, ListInt) | ||||
.OP_END_FACTORY_REG(Crop) | .OP_END_FACTORY_REG(Crop) | ||||
/** | |||||
*@brief Returns a namedtuple (values, indices) where values is the cumulative | |||||
* the cumulative minimum of elements of input in the dimension dim. | |||||
* And indices is the index location of each maximum value found in the dimension dim. \n | |||||
*@par Inputs: | |||||
*One inputs, including: | |||||
* @li x: A tensor . Must be one of the following types: | |||||
* float16, float32, int32, uint32, int8, uint8. \n | |||||
*@par Attributes: | |||||
* @li axis: Axis along which to cummin. \n | |||||
*@par Outputs: | |||||
* y: A Tensor with the same type and shape of x's. \n | |||||
* indices: A Tensor with the int32 type and the same shape of x's. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator Cummin. \n | |||||
*/ | |||||
REG_OP(Cummin) | |||||
.INPUT(x, TensorType::BasicType()) | |||||
.OUTPUT(y, TensorType::BasicType()) | |||||
.OUTPUT(indices, TensorType::BasicType()) | |||||
.REQUIRED_ATTR(axis, Int) | |||||
.OP_END_FACTORY_REG(Cummin) | |||||
/** | /** | ||||
*@brief Extends the input with copies of data along a specified dimension. For example: | *@brief Extends the input with copies of data along a specified dimension. For example: | ||||
*(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2); | *(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2); | ||||
@@ -1921,6 +2064,249 @@ REG_OP(CumulativeLogsumexpD) | |||||
.ATTR(exclusive, Bool, false) | .ATTR(exclusive, Bool, false) | ||||
.ATTR(reverse, Bool, false) | .ATTR(reverse, Bool, false) | ||||
.OP_END_FACTORY_REG(CumulativeLogsumexpD) | .OP_END_FACTORY_REG(CumulativeLogsumexpD) | ||||
/** | |||||
* @brief Add updates to var according to axis and indices. | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li var: A Tensor. Must be one of the following types: | |||||
* float16, float32, int16, int32, int8, uint8. | |||||
* @li indices: A Tensor of the indices, type should be int32. | |||||
* @li updates: A Tensor of the same type as "var". \n | |||||
* @par Attributes: | |||||
* @li axis: An required int to specify the axis to perform indices add. \n | |||||
* @par Outputs: | |||||
* @li var: A Tensor. Same as input "var". | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Pytorch operator index_add_. | |||||
*/ | |||||
REG_OP(InplaceIndexAdd) | |||||
.INPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8, | |||||
DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||||
.INPUT(indices, TensorType({DT_INT32})) | |||||
.INPUT(updates, TensorType({DT_INT16, DT_INT32, DT_INT8, | |||||
DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||||
.OUTPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8, | |||||
DT_UINT8, DT_FLOAT32, DT_FLOAT16})) | |||||
.REQUIRED_ATTR(axis, Int) | |||||
.OP_END_FACTORY_REG(InplaceIndexAdd) | |||||
/** | |||||
* @brief Replace the value of X with value according to mask. | |||||
* @par Inputs: | |||||
* three inputs, including: | |||||
* @li x: A Tensor of dtype is float16 or float32 or int64 or int32 or int8. | |||||
* @li mask: A Tensor of dtype bool. | |||||
* @li value: A Tensor of dtype float16 or float32 or int64 or int32 or int8. | |||||
* @par Outputs: | |||||
* @li y: A tensor. Must be one of the following dtypes: | |||||
* float16, float32, int64, int32, int8. | |||||
*/ | |||||
REG_OP(MaskedFill) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32, DT_INT64})) | |||||
.INPUT(mask, TensorType({DT_BOOL})) | |||||
.INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32, DT_INT64})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32, DT_INT64})) | |||||
.OP_END_FACTORY_REG(MaskedFill) | |||||
/** | |||||
* @brief Choose the value of X with value according to mask. | |||||
* @par Inputs: | |||||
* two inputs, including: | |||||
* @li x: A Tensor of dtype is float16 or float32. | |||||
* @li mask: A Tensor of dtype is bool. \n | |||||
* @par Outputs: | |||||
* @li y: A tensor with the same type as x. \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the Numpy operator select. | |||||
* Replaces the pytorch operator masked_select in some scenarios.\n | |||||
*/ | |||||
REG_OP(MaskedSelectV2) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(mask, TensorType({DT_BOOL})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(MaskedSelectV2) | |||||
/** | |||||
* @brief Slice a tensor at its last dim, e.x. a[..., begin:end:stride]. \n | |||||
* @par Inputs: | |||||
* One inputs, including: | |||||
* @li x: A Tensor. Must be one of the following types: float16, float32, int16, int32. | |||||
* @par Attributes: | |||||
* @li start: An attribute of type Int, start index of last dim. \n | |||||
* @li end: An attribute of type Int, end index of last dim. \n | |||||
* @li stride: An attribute of type Int, stride of slice. \n | |||||
* @par Outputs: | |||||
* @li y: A Tensor. Has the same type as "x". \n | |||||
* @par Third-party framework compatibility | |||||
* No compatibility | |||||
*/ | |||||
REG_OP(SliceLastDim) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) | |||||
.REQUIRED_ATTR(start, Int) | |||||
.REQUIRED_ATTR(end, Int) | |||||
.ATTR(stride, Int, 1) | |||||
.OP_END_FACTORY_REG(SliceLastDim) | |||||
/** | |||||
* @brief Extracts a strided slice of a tensor. Roughly speaking, this op \n | |||||
* extracts a slice of size (end-begin)/stride from the given input tensor. \n | |||||
* Starting at the location specified by begin the slice continues by \n | |||||
* adding stride to the index until all dimensions are not less than end. \n | |||||
* | |||||
* @par Inputs: | |||||
* Four inputs, including: | |||||
* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n | |||||
* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n | |||||
* complex128, float16, uint32, uint64, complex64, complex128. \n | |||||
* @li begin: A Tensor of type int32 or int64, for the index of the first value to select. | |||||
* | |||||
* @li end: A Tensor of type int32 or int64, for the index of the last value to select. | |||||
* | |||||
* @li axes: A Tensor of type int32 or int64, indicate axis to be select. | |||||
* | |||||
* @li strides: A Tensor of type int32 or int64, for the increment. | |||||
* | |||||
* @par Attributes: | |||||
* @li begin_mask: A Tensor of type int32. \n | |||||
* A bitmask where a bit "i" being "1" means to ignore the begin \n | |||||
* value and instead use the largest interval possible. | |||||
* @li end_mask: A Tensor of type int32. \n | |||||
* Analogous to "begin_mask". | |||||
* @li ellipsis_mask: A Tensor of type int32. \n | |||||
* A bitmask where bit "i" being "1" means the "i"th position \n | |||||
* is actually an ellipsis. | |||||
* @li new_axis_mask: A Tensor of type int32. \n | |||||
* A bitmask where bit "i" being "1" means the "i"th \n | |||||
* specification creates a new shape 1 dimension. | |||||
* @li shrink_axis_mask: A Tensor of type int32. \n | |||||
* A bitmask where bit "i" implies that the "i"th \n | |||||
* specification should shrink the dimensionality. | |||||
* | |||||
* @par Outputs: | |||||
* y: A Tensor. Has the same type as "x". | |||||
* | |||||
* @attention Constraints: | |||||
* | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator StridedSliceV2. | |||||
*/ | |||||
REG_OP(StridedSliceV2) | |||||
.INPUT(x, TensorType::BasicType()) | |||||
.INPUT(begin, TensorType::IndexNumberType()) | |||||
.INPUT(end, TensorType::IndexNumberType()) | |||||
.OPTIONAL_INPUT(axes, TensorType::IndexNumberType()) | |||||
.OPTIONAL_INPUT(strides, TensorType::IndexNumberType()) | |||||
.ATTR(begin_mask, Int, 0) | |||||
.ATTR(end_mask, Int, 0) | |||||
.ATTR(ellipsis_mask, Int, 0) | |||||
.ATTR(new_axis_mask, Int, 0) | |||||
.ATTR(shrink_axis_mask, Int, 0) | |||||
.OUTPUT(y, TensorType::BasicType()) | |||||
.OP_END_FACTORY_REG(StridedSliceV2) | |||||
/** | |||||
*@brief Fills the elements of the input tensor with value val by selecting the indices in the order given in index. \n | |||||
*@par Inputs: | |||||
*Three inputs, including: | |||||
* @li x: A tensor. Must be one of the following types: | |||||
* float16, float32, int32. \n | |||||
*@li assist1: A tensor. Must be one of the following types: | |||||
* float16, float32, int32. \n | |||||
*@li assist2: A tensor. Must be one of the following types: | |||||
* float16, float32, int32. \n | |||||
* @par Attributes: | |||||
* @li dim: A required int. Used to select the dimension of this tensor. \n | |||||
*@par Outputs: | |||||
*y: A Tensor with the same type and shape of input_x's. \n | |||||
*@par Third-party framework compatibility | |||||
*Compatible with the Pytorch operator IndexFill. \n | |||||
*/ | |||||
REG_OP(IndexFillD) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.INPUT(assist1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.INPUT(assist2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.REQUIRED_ATTR(dim, Int) | |||||
.OP_END_FACTORY_REG(IndexFillD) | |||||
/** | |||||
* @brief For each row r of this and for each column c, do (*this)(r, c) += src(j, c), \n | |||||
* where j ranges from indexes[r].first through indexes[r].second - 1. \n | |||||
* In general indexes must be >= 0 and < src.NumRows(); \n | |||||
* but to represent an empty range you may use the pair (-1, -1) or any pair of numbers (i, j) such that i >= j. \n | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li x: A Tensor. Must be one of the following types: | |||||
* float16, float32. | |||||
* @li indices: A Tensor of the indices, type should be int32. | |||||
* @li src: A Tensor of the same type as "x". \n | |||||
* @par Outputs: | |||||
* @li x: A Tensor. Same as input "x". | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the kaldi operator AddRowRanges. | |||||
*/ | |||||
REG_OP(AddRowRanges) | |||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(src, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(indices, TensorType({DT_INT32})) | |||||
.OUTPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(AddRowRanges) | |||||
/** | |||||
*@brief masked fill tensor along with one axis by range. | |||||
* boxes. It is a customized masked fill range operator . \n | |||||
*@par Inputs: | |||||
* Four inputs, including: | |||||
*@li x: input tensor. A ND Tensor of float32/float16/int32/int8 with shapes | |||||
* 1-D (D,), 2-D(N, D), 3-D(N, C, D) | |||||
*@li start: masked fill start pos. A 3D Tensor of int32 with | |||||
* shape (num, N). "num" indicates the number of loop masked fill, and the value N | |||||
* indicates the batch of ND Tensor, if input x shape is 1-D, N = 1. \n | |||||
*@li end: masked fill end pos. A 3D Tensor of int32 with | |||||
* shape (num, N). "num" indicates the number of loop masked fill, and the value N | |||||
* indicates the batch of ND Tensor. \n | |||||
*@li value: masked fill value. A 2D Tensor of float32/float16/int32/int8 with | |||||
* shape (num,). "num" indicates the number of loop masked fill | |||||
*@par Attributes: | |||||
*@li axis: axis with masked fill of int32. Defaults to -1. | |||||
*@par Outputs: | |||||
*y: A ND Tensor of float32/float16/int32/int8 with shapes 1-D (D,), 2-D(N, D), 3-D(N, C, D) | |||||
* @par Restrictions: | |||||
* Warning: input shape's length must not be bigger than 1024 * 1024 * 1024. | |||||
*/ | |||||
REG_OP(MaskedFillRange) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32})) | |||||
.INPUT(start, TensorType({DT_INT32})) | |||||
.INPUT(end, TensorType({DT_INT32})) | |||||
.INPUT(value, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32})) | |||||
.REQUIRED_ATTR(axis, Int) | |||||
.OP_END_FACTORY_REG(MaskedFillRange) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -383,11 +383,11 @@ REG_OP(SparseFillEmptyRowsGrad) | |||||
REG_OP(SparseTensorDenseMatMul) | REG_OP(SparseTensorDenseMatMul) | ||||
.INPUT(x1_indices, TensorType({DT_INT32, DT_INT64})) | .INPUT(x1_indices, TensorType({DT_INT32, DT_INT64})) | ||||
.INPUT(x1_values, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, \ | .INPUT(x1_values, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, \ | ||||
DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16})) | |||||
DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16, DT_INT64})) | |||||
.INPUT(x1_shape, TensorType({DT_INT64})) | .INPUT(x1_shape, TensorType({DT_INT64})) | ||||
.INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \ | |||||
.INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \ | |||||
DT_COMPLEX128, DT_FLOAT16})) | DT_COMPLEX128, DT_FLOAT16})) | ||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \ | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \ | |||||
DT_COMPLEX128, DT_FLOAT16})) | DT_COMPLEX128, DT_FLOAT16})) | ||||
.ATTR(adjoint_a, Bool, false) | .ATTR(adjoint_a, Bool, false) | ||||
.ATTR(adjoint_b, Bool, false) | .ATTR(adjoint_b, Bool, false) | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -26,6 +26,24 @@ | |||||
namespace ge { | namespace ge { | ||||
/** | |||||
*@brief Computes the inverse 1-dimensional discrete Fourier transform over the | |||||
inner-most dimension of `x`. \n | |||||
*@par Inputs: | |||||
*@li x: A Tensor. Must be the following types: complex64, complex128. \n | |||||
*@par Outputs: | |||||
*@li y: A complex tensor of the same rank as `x`. \n | |||||
*@par Third-party framework compatibility | |||||
* Compatible with TensorFlow IFFT operator. | |||||
*/ | |||||
REG_OP(IFFT) | |||||
.INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128})) | |||||
.OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128})) | |||||
.OP_END_FACTORY_REG(IFFT) | |||||
/** | /** | ||||
*@brief Real-valued fast Fourier transform . \n | *@brief Real-valued fast Fourier transform . \n | ||||
@@ -47,6 +65,84 @@ REG_OP(RFFT) | |||||
.OUTPUT(y, TensorType({DT_COMPLEX64})) | .OUTPUT(y, TensorType({DT_COMPLEX64})) | ||||
.OP_END_FACTORY_REG(RFFT) | .OP_END_FACTORY_REG(RFFT) | ||||
/** | |||||
*@brief Inverse real-valued fast Fourier transform. \n | |||||
*@par Inputs: | |||||
*@li x: A complex64 tensor. | |||||
*@li fft_length: An int32 tensor of shape [1]. The FFT length. \n | |||||
*@par Outputs: | |||||
*@li y: A float32 tensor of the same rank as `input`. The inner-most | |||||
dimension of `input` is replaced with the `fft_length` samples of its inverse | |||||
1D Fourier transform. \n | |||||
*@par Third-party framework compatibility | |||||
* Compatible with TensorFlow IRFFT operator. | |||||
*/ | |||||
REG_OP(IRFFT) | |||||
.INPUT(x, TensorType({DT_COMPLEX64})) | |||||
.INPUT(fft_length, TensorType({DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT})) | |||||
.OP_END_FACTORY_REG(IRFFT) | |||||
/** | |||||
*@brief 2D fast Fourier transform. \n | |||||
*@par Inputs: | |||||
*@li x: A complex64 tensor. | |||||
*@par Outputs: | |||||
*@li y: A complex64 tensor of the same shape as `input`. The inner-most 2 | |||||
dimensions of `input` are replaced with their 2D Fourier transform. \n | |||||
*@par Third-party framework compatibility | |||||
* Compatible with TensorFlow FFT2D operator. | |||||
*/ | |||||
REG_OP(FFT2D) | |||||
.INPUT(x, TensorType({DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OUTPUT(y, TensorType({DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OP_END_FACTORY_REG(FFT2D) | |||||
/** | |||||
*@brief Calculate the one-dimensional discrete Fourier transform on the | |||||
innermost dimension of the input. \n | |||||
*@par Inputs: | |||||
*@li x: A Tensor. Must be the following types: complex64, complex128. \n | |||||
*@par Outputs: | |||||
*@li y: A complex tensor with the same shape as input. The innermost dimension | |||||
of the input is replaced by its 1-dimensional Fourier transform. \n | |||||
*@par Third-party framework compatibility | |||||
* Compatible with TensorFlow FFT operator. | |||||
*/ | |||||
REG_OP(FFT) | |||||
.INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128})) | |||||
.OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128})) | |||||
.OP_END_FACTORY_REG(FFT) | |||||
/** | |||||
*@brief Calculate the inverse 1-dimensional discrete Fourier transform on the | |||||
innermost dimension of the input. \n | |||||
*@par Inputs: | |||||
*@li x: A Tensor. Must be the following types: complex64, complex128. \n | |||||
*@par Outputs: | |||||
*@li y: A complex tensor with the same shape as input. The innermost dimension | |||||
of the input is replaced by its inverse two-dimensional Fourier transform. \n | |||||
*@par Third-party framework compatibility | |||||
* Compatible with TensorFlow IFFT2D operator. | |||||
*/ | |||||
REG_OP(IFFT2D) | |||||
.INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128})) | |||||
.OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128})) | |||||
.OP_END_FACTORY_REG(IFFT2D) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -62,8 +62,8 @@ REG_OP(Split) | |||||
*Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 | *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value. | |||||
*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n | |||||
*@li split_dim: A required int32. Specifies the dimension along which to split. No default value. | |||||
*@li num_split: A required int32. Specifies the number of output tensors. No default value . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y:Dynamic output. A list of output tensors. Has the same type and format as "x" . \n | *y:Dynamic output. A list of output tensors. Has the same type and format as "x" . \n | ||||
@@ -94,12 +94,12 @@ REG_OP(SplitD) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Three inputs, including: | * Three inputs, including: | ||||
*@li x: An ND Tensor. | *@li x: An ND Tensor. | ||||
*Must be one of the following types: | |||||
*@li size_splits: A list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension. | |||||
*@li split_dim: An int8, int16, int32, or int64. Specifies the dimension along which to split . \n | |||||
*Must be one of the types:float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. | |||||
*@li size_splits: Must be one of the types:int32, int64. Specifies a list containing the sizes of each output tensor along the split dimension. | |||||
*@li split_dim: Must be the following type:int32. Specifies the dimension along which to split . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n | |||||
*num_split: A required int32. Specifies the number of output tensors. No default value . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n | *y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n | ||||
@@ -129,9 +129,9 @@ REG_OP(SplitV) | |||||
*Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 | *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li size_splits: A required list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension. | |||||
*@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value. | |||||
*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n | |||||
*@li size_splits: A required list of int32. Specifies a list containing the sizes of each output tensor along the split dimension. | |||||
*@li split_dim: A required int32. Specifies the dimension along which to split. No default value. | |||||
*@li num_split: A required int32. Specifies the number of output tensors. No default value . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n | *y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n | ||||
@@ -317,15 +317,15 @@ REG_OP(Concat) | |||||
* int64, uint8, uint16, uint32, uint64, float16, float32, bool . It's a dynamic input. \n | * int64, uint8, uint16, uint32, uint64, float16, float32, bool . It's a dynamic input. \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li axis: A optional int, defaultvalue is 0. | |||||
*@li axis: A optional int, default value is 0. | |||||
* Dimension along which to pack. The range is [-(R+1), R+1). | * Dimension along which to pack. The range is [-(R+1), R+1). | ||||
*@li N: A required int. Number of tensors . \n | *@li N: A required int. Number of tensors . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type as "x". | *y: A Tensor. Has the same type as "x". | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator Pack. | |||||
It's a dynamic output. | |||||
* Compatible with the TensorFlow operator Pack. | |||||
*/ | */ | ||||
REG_OP(Pack) | REG_OP(Pack) | ||||
.DYNAMIC_INPUT(x, TensorType::BasicType()) | .DYNAMIC_INPUT(x, TensorType::BasicType()) | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -25,6 +25,235 @@ | |||||
#include "graph/operator_reg.h" | #include "graph/operator_reg.h" | ||||
namespace ge { | namespace ge { | ||||
/** | |||||
*@brief Creates ngrams from ragged string data . \n | |||||
*@par Inputs: | |||||
include: | |||||
*@li data:1-D.The values tensor of the ragged string tensor to make ngrams out of. | |||||
*@li data_splits:The splits tensor of the ragged string tensor to make ngrams out of . \n | |||||
*@par Attributes: | |||||
* separator:The string to append between elements of the token. Use "" for no separator. | |||||
* ngram_widths:The sizes of the ngrams to create. | |||||
* left_pad:The string to use to pad the left side of the ngram sequence. Only used if pad_width != 0. | |||||
* right_pad:The string to use to pad the right side of the ngram sequence. Only used if pad_width != 0. | |||||
* pad_width:The number of padding elements to add to each side of each sequence. | |||||
* preserve_short_sequences: Preserve short sequences. \n | |||||
*@par Outputs: | |||||
*@li ngrams:The values tensor of the output ngrams ragged tensor. | |||||
*@li ngrams_splits:The splits tensor of the output ngrams ragged tensor. \n | |||||
*@see StringNGrams() | |||||
*@par Third-party framework compatibility | |||||
*compatible with StringNGrams op of tensorflow | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(StringNGrams) | |||||
.INPUT(data, TensorType({DT_STRING})) | |||||
.INPUT(data_splits, TensorType({DT_INT32, DT_INT64})) | |||||
.OUTPUT(ngrams, TensorType({DT_STRING})) | |||||
.OUTPUT(ngrams_splits, TensorType({DT_INT32, DT_INT64})) | |||||
.REQUIRED_ATTR(separator, String) | |||||
.ATTR(ngram_widths, ListInt, {}) | |||||
.REQUIRED_ATTR(left_pad, String) | |||||
.REQUIRED_ATTR(right_pad, String) | |||||
.REQUIRED_ATTR(pad_width, Int) | |||||
.REQUIRED_ATTR(preserve_short_sequences, Bool) | |||||
.OP_END_FACTORY_REG(StringNGrams) | |||||
/** | |||||
*@brief Decodes each string in `input` into a sequence of Unicode code points . \n | |||||
*@par Inputs: | |||||
include: | |||||
*@li input:The text to be decoded. Can have any shape. Note that the output is flattened | |||||
to a vector of char values. \n | |||||
*@par Attributes: | |||||
* input_encoding:Text encoding of the input strings. This is any of the encodings supported | |||||
by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. | |||||
* errors:Error handling policy when there is invalid formatting found in the input. | |||||
The value of 'strict' will cause the operation to produce a InvalidArgument | |||||
error on any invalid input formatting. A value of 'replace' (the default) will | |||||
cause the operation to replace any invalid formatting in the input with the | |||||
`replacement_char` codepoint. A value of 'ignore' will cause the operation to | |||||
skip any invalid formatting in the input and produce no corresponding output | |||||
character. | |||||
* replacement_char:The replacement character codepoint to be used in place of any invalid | |||||
formatting in the input when `errors='replace'`. Any valid unicode codepoint may | |||||
be used. The default value is the default unicode replacement character is | |||||
0xFFFD or U+65533. | |||||
* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the | |||||
`replacement_char`. Default is false. \n | |||||
*@par Outputs: | |||||
*@li row_splits:A 1D tensor containing the row splits. | |||||
*@li char_values:A 1D tensor containing the decoded codepoints. | |||||
*@li char_to_byte_starts:A 1D int32 Tensor containing the byte index in the input string where each | |||||
character in `char_values` starts. \n | |||||
*@see UnicodeDecodeWithOffsets() | |||||
*@par Third-party framework compatibility | |||||
*compatible with UnicodeDecodeWithOffsets op of tensorflow | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(UnicodeDecodeWithOffsets) | |||||
.INPUT(input, TensorType({DT_STRING})) | |||||
.OUTPUT(row_splits, TensorType({DT_INT64})) | |||||
.OUTPUT(char_values, TensorType({DT_INT32})) | |||||
.OUTPUT(char_to_byte_starts, TensorType({DT_INT64})) | |||||
.REQUIRED_ATTR(input_encoding, String) | |||||
.ATTR(errors, String, "replace") | |||||
.ATTR(replacement_char, Int, 65533) | |||||
.ATTR(replace_control_characters, Bool, false) | |||||
.ATTR(Tsplits, Type, DT_INT64) | |||||
.OP_END_FACTORY_REG(UnicodeDecodeWithOffsets) | |||||
/** | |||||
*@brief Decodes each string in `input` into a sequence of Unicode code points. \n | |||||
*@par Inputs: | |||||
include: | |||||
*@li input:The text to be decoded. Can have any shape. Note that the output is flattened | |||||
to a vector of char values. \n | |||||
*@par Attributes: | |||||
* input_encoding:Text encoding of the input strings. This is any of the encodings supported | |||||
by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. | |||||
* errors:Error handling policy when there is invalid formatting found in the input. | |||||
The value of 'strict' will cause the operation to produce a InvalidArgument | |||||
error on any invalid input formatting. A value of 'replace' (the default) will | |||||
cause the operation to replace any invalid formatting in the input with the | |||||
`replacement_char` codepoint. A value of 'ignore' will cause the operation to | |||||
skip any invalid formatting in the input and produce no corresponding output | |||||
character. | |||||
* replacement_char:The replacement character codepoint to be used in place of any invalid | |||||
formatting in the input when `errors='replace'`. Any valid unicode codepoint may | |||||
be used. The default value is the default unicode replacement character is | |||||
0xFFFD or U+65533. | |||||
* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the | |||||
`replacement_char`. Default is false. \n | |||||
*@par Outputs: | |||||
*@li row_splits:A 1D tensor containing the row splits. | |||||
*@li char_values:A 1D tensor containing the decoded codepoints. \n | |||||
*@see UnicodeDecode() | |||||
*@par Third-party framework compatibility | |||||
*compatible with UnicodeDecode op of tensorflow | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(UnicodeDecode) | |||||
.INPUT(input, TensorType({DT_STRING})) | |||||
.OUTPUT(row_splits, TensorType({DT_INT64})) | |||||
.OUTPUT(char_values, TensorType({DT_INT32})) | |||||
.REQUIRED_ATTR(input_encoding, String) | |||||
.ATTR(errors, String, "replace") | |||||
.ATTR(replacement_char, Int, 65533) | |||||
.ATTR(replace_control_characters, Bool, false) | |||||
.ATTR(Tsplits, Type, DT_INT64) | |||||
.OP_END_FACTORY_REG(UnicodeDecode) | |||||
/** | |||||
*@brief Transcode the input text from a source encoding to a destination encoding. \n | |||||
*@par Inputs: | |||||
include: | |||||
*@li input:The text to be processed. Can have any shape. \n | |||||
*@par Attributes: | |||||
* input_encoding:Text encoding of the input strings. This is any of the encodings supported | |||||
by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. | |||||
* output_encoding:The unicode encoding to use in the output. Must be one of `"UTF-8", "UTF-16-BE", "UTF-32-BE"`. | |||||
Multi-byte encodings will be big-endian. | |||||
* errors:Error handling policy when there is invalid formatting found in the input. | |||||
The value of 'strict' will cause the operation to produce a InvalidArgument | |||||
error on any invalid input formatting. A value of 'replace' (the default) will | |||||
cause the operation to replace any invalid formatting in the input with the | |||||
`replacement_char` codepoint. A value of 'ignore' will cause the operation to | |||||
skip any invalid formatting in the input and produce no corresponding output | |||||
character. | |||||
* replacement_char:The replacement character codepoint to be used in place of any invalid | |||||
formatting in the input when `errors='replace'`. Any valid unicode codepoint may | |||||
be used. The default value is the default unicode replacement character is | |||||
0xFFFD or U+65533. | |||||
* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the | |||||
`replacement_char`. Default is false. \n | |||||
*@par Outputs: | |||||
*@li output:A string tensor containing unicode text encoded using `output_encoding`. \n | |||||
*@see UnicodeTranscode() | |||||
*@par Third-party framework compatibility | |||||
*compatible with UnicodeTranscode op of tensorflow | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(UnicodeTranscode) | |||||
.INPUT(input, TensorType({DT_STRING})) | |||||
.OUTPUT(output, TensorType({DT_STRING})) | |||||
.REQUIRED_ATTR(input_encoding, String) | |||||
.ATTR(output_encoding, String, "UTF-8") | |||||
.ATTR(errors, String, "replace") | |||||
.ATTR(replacement_char, Int, 65533) | |||||
.ATTR(replace_control_characters, Bool, false) | |||||
.OP_END_FACTORY_REG(UnicodeTranscode) | |||||
/** | |||||
*@brief Encode a tensor of ints into unicode strings. \n | |||||
*@par Inputs: | |||||
include: | |||||
*@li input_values:A 1D tensor containing the unicode codepoints that should be encoded. | |||||
*@li input_splits:A 1D tensor specifying how the unicode codepoints should be split into strings. \n | |||||
*@par Attributes: | |||||
* output_encoding:The unicode encoding to use in the output. Must be one of `"UTF-8", "UTF-16-BE", "UTF-32-BE"`. | |||||
Multi-byte encodings will be big-endian. | |||||
* errors:Error handling policy when there is invalid formatting found in the input. | |||||
The value of 'strict' will cause the operation to produce a InvalidArgument | |||||
error on any invalid input formatting. A value of 'replace' (the default) will | |||||
cause the operation to replace any invalid formatting in the input with the | |||||
`replacement_char` codepoint. A value of 'ignore' will cause the operation to | |||||
skip any invalid formatting in the input and produce no corresponding output | |||||
character. | |||||
* replacement_char:The replacement character codepoint to be used in place of any invalid | |||||
formatting in the input when `errors='replace'`. Any valid unicode codepoint may | |||||
be used. The default value is the default unicode replacement character is | |||||
0xFFFD or U+65533. \n | |||||
*@par Outputs: | |||||
*@li output:The 1-D Tensor of strings encoded from the provided unicode codepoints. \n | |||||
*@see UnicodeEncode() | |||||
*@par Third-party framework compatibility | |||||
*compatible with UnicodeEncode op of tensorflow | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(UnicodeEncode) | |||||
.INPUT(input_values, TensorType({DT_INT32})) | |||||
.INPUT(input_splits, TensorType({DT_INT32, DT_INT64})) | |||||
.OUTPUT(output, TensorType({DT_STRING})) | |||||
.ATTR(errors, String, "replace") | |||||
.ATTR(output_encoding, String, "UTF-8") | |||||
.ATTR(replacement_char, Int, 65533) | |||||
.OP_END_FACTORY_REG(UnicodeEncode) | |||||
/** | /** | ||||
*@brief Split elements of input based on delimiter into a SparseTensor . \n | *@brief Split elements of input based on delimiter into a SparseTensor . \n | ||||
@@ -61,6 +290,116 @@ REG_OP(StringSplit) | |||||
.ATTR(skip_empty, Bool, true) | .ATTR(skip_empty, Bool, true) | ||||
.OP_END_FACTORY_REG(StringSplit) | .OP_END_FACTORY_REG(StringSplit) | ||||
/** | |||||
*@brief Replaces the match of pattern in input with rewrite. \n | |||||
*@par Inputs: | |||||
include: | |||||
*@li input:A Tensor of type string. The text to be processed. \n | |||||
*@par Attributes: | |||||
*@li pattern:A string. The regular expression to match the input. | |||||
*@li rewrite:A string. The rewrite to be applied to the matched expression. | |||||
*@li replace_global:An optional bool. Defaults to True. If True, the replacement is global, | |||||
otherwise the replacement is done only on the first match. | |||||
*@par output: | |||||
*@li output::A Tensor of type string. | |||||
*/ | |||||
REG_OP(StaticRegexReplace) | |||||
.INPUT(input, TensorType({DT_STRING})) | |||||
.OUTPUT(output, TensorType({DT_STRING})) | |||||
.ATTR(pattern, String, "") | |||||
.ATTR(rewrite, String, "") | |||||
.ATTR(replace_global, Bool, true) | |||||
.OP_END_FACTORY_REG(StaticRegexReplace) | |||||
/** | |||||
*@brief The input is a string tensor of any shape. The pattern is the | |||||
*regular expression to be matched with every element of the input tensor. | |||||
*The boolean values (True or False) of the output tensor indicate | |||||
*if the input matches the regex pattern provided. | |||||
*@par Inputs: | |||||
include: | |||||
*@li input:A Tensor of type string. The text to be processed. \n | |||||
*@par Attributes: | |||||
*@li pattern:A string. The regular expression to match the input. | |||||
*@par output: | |||||
*@li output::A bool tensor with the same shape as `input`. | |||||
*/ | |||||
REG_OP(StaticRegexFullMatch) | |||||
.INPUT(input, TensorType({DT_STRING})) | |||||
.OUTPUT(output, TensorType({DT_BOOL})) | |||||
.ATTR(pattern, String, "") | |||||
.OP_END_FACTORY_REG(StaticRegexFullMatch) | |||||
/** | |||||
*@brief A Tensor of type string. The input to be joined. \n | |||||
*@par Inputs: | |||||
include: | |||||
*@li input:A Tensor of type string. The text to be processed. | |||||
*@li segment_ids:A Tensor. Must be one of the following types: int32, int64. | |||||
*A tensor whose shape is a prefix of data.shape. Negative segment ids are not supported. | |||||
*@li num_segments:A Tensor. Must be one of the following types: int32, int64. A scalar. | |||||
*@par Attributes: | |||||
*@li separator:An optional string. Defaults to "". The separator to use when joining. | |||||
*@par output: | |||||
*@li output::A Tensor of type string.. | |||||
*/ | |||||
REG_OP(UnsortedSegmentJoin) | |||||
.INPUT(input, TensorType({DT_STRING})) | |||||
.INPUT(segment_ids, TensorType({DT_INT32,DT_INT64})) | |||||
.INPUT(num_segments, TensorType({DT_INT32,DT_INT64})) | |||||
.OUTPUT(output, TensorType({DT_STRING})) | |||||
.ATTR(separator, String, "") | |||||
.OP_END_FACTORY_REG(UnsortedSegmentJoin) | |||||
/** | |||||
*@brief Inputs to TensorFlow operations are outputs of another TensorFlow operation. | |||||
*This method is used to obtain a symbolic handle that represents the computation of the input. | |||||
*@par Inputs: | |||||
include: | |||||
*@li input:A Tensor of type string. The text to be processed. | |||||
*@par Attributes: | |||||
*@li encoding:An optional string. Defaults to "". | |||||
*@par output: | |||||
*@li output::A Tensor of type string.. | |||||
*/ | |||||
REG_OP(StringLower) | |||||
.INPUT(input, TensorType({DT_STRING})) | |||||
.OUTPUT(output, TensorType({DT_STRING})) | |||||
.ATTR(encoding, String, "") | |||||
.OP_END_FACTORY_REG(StringLower) | |||||
/** | |||||
*@brief Inputs to TensorFlow operations are outputs of another TensorFlow operation. | |||||
*This method is used to obtain a symbolic handle that represents the computation of the input. | |||||
*@par Inputs: | |||||
include: | |||||
*@li input:A Tensor of type string. The text to be processed. | |||||
*@par Attributes: | |||||
*@li encoding:An optional string. Defaults to "". | |||||
*@par output: | |||||
*@li output::A Tensor of type string.. | |||||
*/ | |||||
REG_OP(StringUpper) | |||||
.INPUT(input, TensorType({DT_STRING})) | |||||
.OUTPUT(output, TensorType({DT_STRING})) | |||||
.ATTR(encoding, String, "") | |||||
.OP_END_FACTORY_REG(StringUpper) | |||||
/** | /** | ||||
*@brief Split elements of source based on sep into a SparseTensor . \n | *@brief Split elements of source based on sep into a SparseTensor . \n | ||||
@@ -488,7 +827,7 @@ include: | |||||
*/ | */ | ||||
REG_OP(AsString) | REG_OP(AsString) | ||||
.INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT, \ | .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT, \ | ||||
DT_DOUBLE, DT_BOOL})) | |||||
DT_DOUBLE, DT_BOOL, DT_COMPLEX64, DT_COMPLEX128})) | |||||
.OUTPUT(y, TensorType({DT_STRING})) | .OUTPUT(y, TensorType({DT_STRING})) | ||||
.ATTR(precision, Int, -1) | .ATTR(precision, Int, -1) | ||||
.ATTR(scientific, Bool, false) | .ATTR(scientific, Bool, false) | ||||
@@ -557,6 +896,45 @@ REG_OP(DecodeBase64) | |||||
.INPUT(x, TensorType({DT_STRING})) | .INPUT(x, TensorType({DT_STRING})) | ||||
.OUTPUT(y, TensorType({DT_STRING})) | .OUTPUT(y, TensorType({DT_STRING})) | ||||
.OP_END_FACTORY_REG(DecodeBase64) | .OP_END_FACTORY_REG(DecodeBase64) | ||||
/** | |||||
*@brief StringNormalization performs string operations for basic cleaning . \n | |||||
*@par Inputs: | |||||
*@li input: only accepts [C] or [1, C] UTF-8 strings tensor . \n | |||||
*@par Outputs: | |||||
*@li output: UTF-8 strings tensor after cleaning . \n | |||||
*@par Attributes: | |||||
*@li stopwords : list of strings (default is empty). | |||||
*List of stop words. If not set, no word would be removed from input strings | |||||
tensor. | |||||
*@li is_case_sensitive : bool (default is false). | |||||
*Boolean. Whether the identification of stop words in input strings tensor is | |||||
case-sensitive. Default is false. | |||||
*@li case_change_action : string (default is "NONE"). | |||||
*string enum that cases output to be lowercased/uppercases/unchanged. Valid | |||||
values are "LOWER", "UPPER", "NONE". Default is "NONE". | |||||
*@li local : string (default is "en_US"). | |||||
*Environment dependent string that denotes the locale according to which output | |||||
strings needs to be upper/lowercased.Default en_US or platform specific equivalent | |||||
as decided by the implementation . \n | |||||
*@attention Constraints: | |||||
*@li input can be either a 1-D or 2-D tensor, the shape of 2-D tensor must be [1, C]. | |||||
*/ | |||||
REG_OP(StringNormalizer) | |||||
.INPUT(input, TensorType({DT_STRING})) | |||||
.OUTPUT(output, TensorType({DT_STRING})) | |||||
.ATTR(stopwords, ListString, {}) | |||||
.ATTR(is_case_sensitive, Bool, false) | |||||
.ATTR(case_change_action, String, "NONE") | |||||
.ATTR(local, String, "en_US") | |||||
.OP_END_FACTORY_REG(StringNormalizer) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_STRING_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_STRING_OPS_H_ |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||