From 608553b8fe649fbe11c67a2558f5782c80e31b00 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Thu, 4 Nov 2021 15:53:22 +0800 Subject: [PATCH] upgrade Ascend package 4 Nov 21 --- ge/common/model/ge_model.cc | 9 +- ge/common/model/ge_model.h | 12 +- inc/external/acl/OWNERS | 1 + inc/external/acl/acl_mdl.h | 6 +- inc/external/acl/acl_rt.h | 39 +- inc/external/acl/acl_tdt.h | 31 +- inc/external/acl/acl_tdt_queue.h | 437 ++++++++++++++++++ inc/external/acl/error_codes/rt_error_codes.h | 4 + inc/external/acl/ops/acl_dvpp.h | 2 +- inc/external/ge/ge_api_error_codes.h | 20 +- inc/external/ge/ge_api_types.h | 2 + inc/external/ge/ge_ir_build.h | 2 +- inc/external/hccl/hccl_types.h | 1 + inc/external/runtime/rt_error_codes.h | 4 + inc/framework/common/ge_compiler_options.h | 2 +- inc/framework/common/profiling/ge_profiling.h | 2 +- .../common/profiling/ge_runner_profiling.h | 2 +- inc/framework/common/taskdown_common.h | 2 +- inc/framework/memory/memory_api.h | 2 +- inc/framework/omg/ge_init.h | 2 +- inc/framework/omg/omg.h | 2 +- inc/framework/omg/parser/model_parser.h | 2 +- inc/framework/omg/parser/op_parser.h | 2 +- inc/framework/omg/parser/parser_api.h | 2 +- inc/framework/omg/parser/parser_factory.h | 2 +- inc/framework/omg/parser/weights_parser.h | 2 +- metadef | 2 +- .../aicpu/aicpu_schedule/aicpu_op_type_list.h | 9 +- third_party/fwkacllib/inc/hccl/base.h | 41 ++ third_party/fwkacllib/inc/mmpa/mmpa_api.h | 1 - .../fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h | 172 +++---- .../fwkacllib/inc/mmpa/sub_inc/mmpa_win.h | 16 +- third_party/fwkacllib/inc/ops/aipp.h | 2 +- third_party/fwkacllib/inc/ops/all_ops.h | 2 +- third_party/fwkacllib/inc/ops/array_ops.h | 117 ++++- third_party/fwkacllib/inc/ops/audio_ops.h | 2 +- .../fwkacllib/inc/ops/avg_pool_1d_ops.h | 2 +- third_party/fwkacllib/inc/ops/batch_ops.h | 2 +- third_party/fwkacllib/inc/ops/bitwise_ops.h | 2 +- .../fwkacllib/inc/ops/boosted_trees_ops.h | 2 +- .../inc/ops/candidate_sampling_ops.h | 2 +- .../fwkacllib/inc/ops/case_condition_ops.h | 2 +- third_party/fwkacllib/inc/ops/cluster.h | 58 +++ third_party/fwkacllib/inc/ops/condtake_ops.h | 2 +- .../fwkacllib/inc/ops/control_flow_ops.h | 2 +- .../inc/ops/coordinates_1d_to_2d_ops.h | 2 +- third_party/fwkacllib/inc/ops/correlation.h | 2 +- third_party/fwkacllib/inc/ops/ctc_ops.h | 2 +- third_party/fwkacllib/inc/ops/data_flow_ops.h | 7 +- .../inc/ops/elewise_calculation_ops.h | 58 +-- .../fwkacllib/inc/ops/functional_ops.h | 2 +- third_party/fwkacllib/inc/ops/get_data_ops.h | 2 +- third_party/fwkacllib/inc/ops/hcom_ops.h | 2 +- third_party/fwkacllib/inc/ops/hvd_ops.h | 2 +- third_party/fwkacllib/inc/ops/image_ops.h | 45 +- .../fwkacllib/inc/ops/index_to_addr_ops.h | 2 +- third_party/fwkacllib/inc/ops/internal_ops.h | 2 +- third_party/fwkacllib/inc/ops/linalg_ops.h | 10 +- third_party/fwkacllib/inc/ops/list_ops.h | 2 +- third_party/fwkacllib/inc/ops/logging_ops.h | 2 +- third_party/fwkacllib/inc/ops/lookup_ops.h | 2 +- third_party/fwkacllib/inc/ops/math_ops.h | 57 ++- .../inc/ops/matrix_calculation_ops.h | 82 +++- .../fwkacllib/inc/ops/nn_batch_norm_ops.h | 33 +- .../fwkacllib/inc/ops/nn_calculation_ops.h | 263 +++++------ third_party/fwkacllib/inc/ops/nn_detect_ops.h | 176 ++++++- third_party/fwkacllib/inc/ops/nn_norm_ops.h | 2 +- third_party/fwkacllib/inc/ops/nn_ops.h | 2 +- .../fwkacllib/inc/ops/nn_pooling_ops.h | 2 +- .../fwkacllib/inc/ops/nn_training_ops.h | 15 +- third_party/fwkacllib/inc/ops/no_op.h | 2 +- .../fwkacllib/inc/ops/nonlinear_fuc_ops.h | 44 +- .../fwkacllib/inc/ops/npu_loss_scale_ops.h | 2 +- third_party/fwkacllib/inc/ops/ocr_ops.h | 268 +++++++++++ third_party/fwkacllib/inc/ops/outfeed_ops.h | 2 +- third_party/fwkacllib/inc/ops/pad_ops.h | 34 +- third_party/fwkacllib/inc/ops/parsing_ops.h | 2 +- third_party/fwkacllib/inc/ops/quantize_ops.h | 40 +- .../fwkacllib/inc/ops/ragged_array_ops.h | 2 +- .../fwkacllib/inc/ops/ragged_conversion_ops.h | 2 +- .../fwkacllib/inc/ops/ragged_math_ops.h | 2 +- third_party/fwkacllib/inc/ops/random_ops.h | 22 +- third_party/fwkacllib/inc/ops/reduce_ops.h | 25 +- .../fwkacllib/inc/ops/resource_variable_ops.h | 2 +- third_party/fwkacllib/inc/ops/rnn.h | 58 ++- third_party/fwkacllib/inc/ops/rpn_ops.h | 2 +- third_party/fwkacllib/inc/ops/save_ops.h | 2 +- third_party/fwkacllib/inc/ops/sdca_ops.h | 2 +- third_party/fwkacllib/inc/ops/selection_ops.h | 46 +- third_party/fwkacllib/inc/ops/set_ops.h | 2 +- .../fwkacllib/inc/ops/slice_write_ops.h | 2 +- third_party/fwkacllib/inc/ops/sparse_ops.h | 2 +- third_party/fwkacllib/inc/ops/spectral_ops.h | 2 +- .../fwkacllib/inc/ops/split_combination_ops.h | 4 +- third_party/fwkacllib/inc/ops/state_ops.h | 2 +- .../fwkacllib/inc/ops/stateful_random_ops.h | 2 +- .../fwkacllib/inc/ops/stateless_random_ops.h | 2 +- third_party/fwkacllib/inc/ops/string_ops.h | 2 +- third_party/fwkacllib/inc/ops/swap_co_ops.h | 2 +- .../inc/ops/target_crop_and_resize.h | 2 +- .../fwkacllib/inc/ops/transformation_ops.h | 12 +- third_party/fwkacllib/inc/ops/vector_search.h | 90 +++- .../fwkacllib/inc/ops/warp_perspective_ops.h | 2 +- third_party/fwkacllib/inc/runtime/base.h | 48 +- third_party/fwkacllib/inc/runtime/config.h | 69 +-- third_party/fwkacllib/inc/runtime/context.h | 16 +- third_party/fwkacllib/inc/runtime/dev.h | 56 ++- .../fwkacllib/inc/runtime/dvfsprofile.h | 16 +- third_party/fwkacllib/inc/runtime/event.h | 38 +- third_party/fwkacllib/inc/runtime/kernel.h | 81 ++-- third_party/fwkacllib/inc/runtime/mem.h | 153 +++--- third_party/fwkacllib/inc/runtime/rt.h | 16 +- third_party/fwkacllib/inc/runtime/rt_dfx.h | 19 +- third_party/fwkacllib/inc/runtime/rt_ffts.h | 34 +- .../fwkacllib/inc/runtime/rt_ffts_plus.h | 27 +- .../inc/runtime/rt_ffts_plus_define.h | 59 ++- third_party/fwkacllib/inc/runtime/rt_model.h | 36 +- third_party/fwkacllib/inc/runtime/rt_stars.h | 33 +- .../fwkacllib/inc/runtime/rt_stars_define.h | 23 +- third_party/fwkacllib/inc/runtime/stream.h | 42 +- third_party/fwkacllib/inc/toolchain/plog.h | 4 +- .../fwkacllib/inc/toolchain/prof_acl_api.h | 58 +-- .../fwkacllib/inc/toolchain/prof_callback.h | 7 +- .../fwkacllib/inc/toolchain/prof_reporter.h | 5 +- 124 files changed, 2550 insertions(+), 785 deletions(-) create mode 100644 inc/external/acl/acl_tdt_queue.h create mode 100644 third_party/fwkacllib/inc/ops/cluster.h create mode 100644 third_party/fwkacllib/inc/ops/ocr_ops.h diff --git a/ge/common/model/ge_model.cc b/ge/common/model/ge_model.cc index 7fc58b6d..b90c3466 100755 --- a/ge/common/model/ge_model.cc +++ b/ge/common/model/ge_model.cc @@ -34,7 +34,6 @@ void GeModel::Init() { } GeModel::GeModel() { - attrs_.InitDefault(); Init(); } @@ -78,12 +77,12 @@ void GeModel::SetPlatformVersion(const std::string &platform_version) { this->pl void GeModel::SetPlatformType(uint8_t platform_type) { this->platform_type_ = platform_type; } -void GeModel::SetAttr(const ProtoAttrMapHelper &attrs) { attrs_ = attrs; } +void GeModel::SetAttr(const ProtoAttrMap &attrs) { attrs_ = attrs; } -ProtoAttrMapHelper GeModel::MutableAttrMap() { return attrs_; } +ProtoAttrMap &GeModel::MutableAttrMap() { return attrs_; } -ConstProtoAttrMapHelper GeModel::GetAttrMap() const { - return ConstProtoAttrMapHelper(attrs_.GetProtoOwner(), attrs_.GetProtoMsg()); +ConstProtoAttrMap &GeModel::GetAttrMap() const { + return attrs_; } Status GeModel::GetSessionId(uint32_t model_id, uint64_t &session_id) const { diff --git a/ge/common/model/ge_model.h b/ge/common/model/ge_model.h index 0e791746..256c6db0 100755 --- a/ge/common/model/ge_model.h +++ b/ge/common/model/ge_model.h @@ -17,10 +17,12 @@ #ifndef GE_MODEL_GE_MODEL_H_ #define GE_MODEL_GE_MODEL_H_ -#include #include #include #include + +#include "securec.h" +#include "runtime/rt.h" #include "common/tbe_kernel_store.h" #include "common/cust_aicpu_kernel_store.h" #include "framework/common/debug/log.h" @@ -60,9 +62,9 @@ class GeModel : public AttrHolder { void SetPlatformVersion(const std::string &platform_version); void SetPlatformType(uint8_t platform_type); - void SetAttr(const ProtoAttrMapHelper &attrs); + void SetAttr(const ProtoAttrMap &attrs); - ProtoAttrMapHelper MutableAttrMap() override; + ProtoAttrMap &MutableAttrMap() override; using AttrHolder::SetAttr; using AttrHolder::GetAllAttrs; @@ -77,12 +79,12 @@ class GeModel : public AttrHolder { } protected: - ConstProtoAttrMapHelper GetAttrMap() const override; + ConstProtoAttrMap &GetAttrMap() const override; private: void Init(); - ProtoAttrMapHelper attrs_; /*lint !e148*/ + ProtoAttrMap attrs_; /*lint !e148*/ Graph graph_; std::shared_ptr task_; /*lint !e148*/ diff --git a/inc/external/acl/OWNERS b/inc/external/acl/OWNERS index 8552e853..b4b22068 100755 --- a/inc/external/acl/OWNERS +++ b/inc/external/acl/OWNERS @@ -5,5 +5,6 @@ approvers: reviewers: - justin_zhao - zhangyongfeng88 +- w00267184 options: no_parent_owners: true \ No newline at end of file diff --git a/inc/external/acl/acl_mdl.h b/inc/external/acl/acl_mdl.h index 522dbd38..778fa519 100644 --- a/inc/external/acl/acl_mdl.h +++ b/inc/external/acl/acl_mdl.h @@ -869,7 +869,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet, * * @see aclmdlCreateAIPP */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0, +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t cscSwitch, int16_t cscMatrixR0C0, int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0, int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0, int16_t cscMatrixR2C1, int16_t cscMatrixR2C2, @@ -1106,7 +1106,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, a * * @param modelId [IN] model id * @param index [IN] index of tensor - * @param aippinfo [OUT] Pointer for static aipp info + * @param aippInfo [OUT] Pointer for static aipp info * * @retval ACL_SUCCESS The function is successfully executed. * @retval ACL_ERROR_MODEL_AIPP_NOT_EXIST The tensor of index is not configured with aipp @@ -1115,7 +1115,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, a * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName */ -ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo); +ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippInfo); /** * @ingroup AscendCL diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h index 50dbc34d..7ea27cba 100644 --- a/inc/external/acl/acl_rt.h +++ b/inc/external/acl/acl_rt.h @@ -541,7 +541,7 @@ ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event); * * @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream */ -ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, aclrtEvent end); +ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent startEvent, aclrtEvent endEvent); /** * @ingroup AscendCL @@ -731,6 +731,43 @@ ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count, aclrtMemcpyKind kind, aclrtStream stream); +/** + * @ingroup AscendCL + * @brief synchronous memory replication of two-dimensional matrix between host and device + * + * @param dst [IN] destination address pointer + * @param dpitch [IN] pitch of destination memory + * @param src [IN] source address pointer + * @param spitch [IN] pitch of source memory + * @param width [IN] width of matrix transfer + * @param height [IN] height of matrix transfer + * @param kind [IN] memcpy type + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtMemcpy2d(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, + size_t height, aclrtMemcpyKind kind); + +/** + * @ingroup AscendCL + * @brief asynchronous memory replication of two-dimensional matrix between host and device + * + * @param dst [IN] destination address pointer + * @param dpitch [IN] pitch of destination memory + * @param src [IN] source address pointer + * @param spitch [IN] pitch of source memory + * @param width [IN] width of matrix transfer + * @param height [IN] height of matrix transfer + * @param kind [IN] memcpy type + * @param stream [IN] asynchronized task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtMemcpy2dAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, + size_t height, aclrtMemcpyKind kind, aclrtStream stream); + /** * @ingroup AscendCL * @brief Asynchronous initialize memory diff --git a/inc/external/acl/acl_tdt.h b/inc/external/acl/acl_tdt.h index c357518d..d928a911 100644 --- a/inc/external/acl/acl_tdt.h +++ b/inc/external/acl/acl_tdt.h @@ -215,7 +215,7 @@ ACL_FUNC_VISIBILITY aclError acltdtStopChannel(acltdtChannelHandle *handle); * @brief Create the channel * * @param deviceId [IN] the device id - * @param name [IN] the channel's name + * @param name [IN] the name of channel * * @retval null for failed * @retval OtherValues success @@ -224,6 +224,22 @@ ACL_FUNC_VISIBILITY aclError acltdtStopChannel(acltdtChannelHandle *handle); */ ACL_FUNC_VISIBILITY acltdtChannelHandle *acltdtCreateChannel(uint32_t deviceId, const char *name); +/** + * @ingroup AscendCL + * @brief Create the channel with max size + * + * @param deviceId [IN] the device id + * @param name [IN] the name of channel + * @param capacity [IN] the capacity of channel + * + * @retval null for failed + * @retval OtherValues success + * + * @see acltdtDestroyChannel + */ +ACL_FUNC_VISIBILITY acltdtChannelHandle *acltdtCreateChannelWithCapacity(uint32_t deviceId, const char *name, + size_t capacity); + /** * @ingroup AscendCL * @brief Destroy the channel @@ -269,6 +285,19 @@ ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset, int32_t timeout); +/** + * @ingroup AscendCL + * @brief query the size of the channel + * + * @param handle [IN] pointer to the channel handle + * @param size [OUT] current size of this channel + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + */ +ACL_FUNC_VISIBILITY aclError acltdtQueryChannelSize(const acltdtChannelHandle *handle, size_t *size); + #ifdef __cplusplus } #endif diff --git a/inc/external/acl/acl_tdt_queue.h b/inc/external/acl/acl_tdt_queue.h new file mode 100644 index 00000000..95cfdb8c --- /dev/null +++ b/inc/external/acl/acl_tdt_queue.h @@ -0,0 +1,437 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_ +#define INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_ + +#include "acl/acl_base.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ACL_TDT_QUEUE_PERMISSION_MANAGE 1 +#define ACL_TDT_QUEUE_PERMISSION_DEQUEUE 2 +#define ACL_TDT_QUEUE_PERMISSION_ENQUEUE 4 + +typedef void *acltdtBuf; +typedef struct tagMemQueueAttr acltdtQueueAttr; +typedef struct acltdtQueueRouteList acltdtQueueRouteList; +typedef struct acltdtQueueRouteQueryInfo acltdtQueueRouteQueryInfo; +typedef struct acltdtQueueRoute acltdtQueueRoute; + +typedef enum { ACL_TDT_QUEUE_NAME_PTR = 0, ACL_TDT_QUEUE_DEPTH_UINT32 } acltdtQueueAttrType; + +typedef enum { + ACL_TDT_QUEUE_ROUTE_SRC_UINT32 = 0, + ACL_TDT_QUEUE_ROUTE_DST_UINT32, + ACL_TDT_QUEUE_ROUTE_STATUS_INT32 +} acltdtQueueRouteParamType; + +typedef enum { + ACL_TDT_QUEUE_ROUTE_QUERY_SRC = 0, + ACL_TDT_QUEUE_ROUTE_QUERY_DST, + ACL_TDT_QUEUE_ROUTE_QUERY_SRC_AND_DST +} acltdtQueueRouteQueryMode; + +typedef enum { + ACL_TDT_QUEUE_ROUTE_QUERY_MODE_ENUM = 0, + ACL_TDT_QUEUE_ROUTE_QUERY_SRC_ID_UINT32, + ACL_TDT_QUEUE_ROUTE_QUERY_DST_ID_UINT32 +} acltdtQueueRouteQueryInfoParamType; + +/** + * @ingroup AscendCL + * @brief create queue + * + * @param attr [IN] pointer to the queue attr + * @param qid [OUT] pointer to the qid + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtDestroyQueue + */ +ACL_FUNC_VISIBILITY aclError acltdtCreateQueue(const acltdtQueueAttr *attr, uint32_t *qid); + +/** + * @ingroup AscendCL + * @brief destroy queue + * + * @param qid [IN] qid which to be destroyed + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateQueue + */ +ACL_FUNC_VISIBILITY aclError acltdtDestroyQueue(uint32_t qid); + +/** + * @ingroup AscendCL + * @brief enqueue function + * + * @param qid [IN] qid + * @param buf [IN] acltdtBuf + * @param timeout [IN] timeout + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtDequeue + */ +ACL_FUNC_VISIBILITY aclError acltdtEnqueue(uint32_t qid, acltdtBuf buf, int32_t timeout); + +/** + * @ingroup AscendCL + * @brief dequeue function + * + * @param qid [IN] qid + * @param buf [OUT] pointer to the acltdtBuf + * @param timeout [IN] timeout + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtEnqueue + */ +ACL_FUNC_VISIBILITY aclError acltdtDequeue(uint32_t qid, acltdtBuf *buf, int32_t timeout); + +/** + * @ingroup AscendCL + * @brief grant queue to other process + * + * @param qid [IN] qid + * @param pid [IN] pid of dst process + * @param permission [IN] permission of queue + * @param timeout [IN] timeout + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see ACL_TDT_QUEUE_PERMISSION_MANAGE | ACL_TDT_QUEUE_PERMISSION_DEQUEUE | ACL_TDT_QUEUE_PERMISSION_ENQUEUE + */ +ACL_FUNC_VISIBILITY aclError acltdtGrantQueue(uint32_t qid, int32_t pid, uint32_t permission, int32_t timeout); + +/** + * @ingroup AscendCL + * @brief attach queue in current process + * + * @param qid [IN] qid + * @param timeout [IN] timeout + * @param permission [OUT] permission of queue + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtGrantQueue + */ +ACL_FUNC_VISIBILITY aclError acltdtAttachQueue(uint32_t qid, int32_t timeout, uint32_t *permission); + +/** + * @ingroup AscendCL + * @brief bind queue routes + * + * @param qRouteList [IN|OUT] pointer to the route list + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acltdtBindQueueRoutes(acltdtQueueRouteList *qRouteList); + +/** + * @ingroup AscendCL + * @brief unbind queue routes + * + * @param qRouteList [IN|OUT] pointer to the route list + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acltdtUnbindQueueRoutes(acltdtQueueRouteList *qRouteList); + +/** + * @ingroup AscendCL + * @brief query queue routes according to query mode + * + * @param queryInfo [IN] pointer to the queue route query info + * @param qRouteList [IN|OUT] pointer to the route list + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acltdtQueryQueueRoutes(const acltdtQueueRouteQueryInfo *queryInfo, + acltdtQueueRouteList *qRouteList); + +/** + * @ingroup AscendCL + * @brief alloc acltdtBuf + * + * @param size [IN] alloc buf size + * @param buf [OUT] pointer to the acltdtBuf + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtFreeBuf + */ +ACL_FUNC_VISIBILITY aclError acltdtAllocBuf(size_t size, acltdtBuf *buf); + +/** + * @ingroup AscendCL + * @brief free acltdtBuf + * + * @param buf [IN] pointer to the acltdtBuf + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtAllocBuf + */ +ACL_FUNC_VISIBILITY aclError acltdtFreeBuf(acltdtBuf buf); + +/** + * @ingroup AscendCL + * @brief get data buf address + * + * @param buf [IN] acltdtBuf + * @param dataPtr [OUT] pointer to the data ptr which is acquired from acltdtBuf + * @param size [OUT] pointer to the size + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtAllocBuf + */ +ACL_FUNC_VISIBILITY aclError acltdtGetBufData(const acltdtBuf buf, void **dataPtr, size_t *size); + +/** + * @ingroup AscendCL + * @brief Create the queue attr + * + * @retval null for failed + * @retval OtherValues success + * + * @see acltdtDestroyQueueAttr + */ +ACL_FUNC_VISIBILITY acltdtQueueAttr *acltdtCreateQueueAttr(); + +/** + * @ingroup AscendCL + * @brief Destroy the queue attr + * + * @param attr [IN] pointer to the queue attr + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateQueueAttr + */ +ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueAttr(const acltdtQueueAttr *attr); + +/** + * @ingroup AscendCL + * @brief Set parameter for queue attr + * + * @param attr [IN|OUT] pointer to the queue attr + * @param type [IN] parameter type + * @param len [IN] parameter length + * @param param [IN] pointer to parameter value + * + * @retval ACL_SUCCESS for success, other for failure + * + * @see acltdtCreateQueueAttr + */ +ACL_FUNC_VISIBILITY aclError acltdtSetQueueAttr(acltdtQueueAttr *attr, acltdtQueueAttrType type, size_t len, + const void *param); + +/** + * @ingroup AscendCL + * + * @brief Get parameter for queue attr. + * + * @param attr [IN] pointer to the queue attr + * @param type [IN] parameter type + * @param len [IN] parameter length + * @param paramRetSize [OUT] pointer to parameter real length + * @param param [OUT] pointer to parameter value + * + * @retval ACL_SUCCESS for success, other for failure + * + * @see acltdtCreateQueueAttr + */ +ACL_FUNC_VISIBILITY aclError acltdtGetQueueAttr(const acltdtQueueAttr *attr, acltdtQueueAttrType type, size_t len, + size_t *paramRetSize, void *param); + +/** + * @ingroup AscendCL + * @brief Create the queue route + * + * @param srcId [IN] src id of queue route + * @param dstId [IN] dst id of queue route + * + * @retval null for failed + * @retval OtherValues success + * + * @see acltdtDestroyQueueRoute + */ +ACL_FUNC_VISIBILITY acltdtQueueRoute *acltdtCreateQueueRoute(uint32_t srcId, uint32_t dstId); + +/** + * @ingroup AscendCL + * @brief Destroy the queue attr + * + * @param route [IN] pointer to the queue route + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateQueueRoute + */ +ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRoute(const acltdtQueueRoute *route); + +/** + * @ingroup AscendCL + * + * @brief Get parameter for queue route. + * + * @param route [IN] pointer to the queue route + * @param type [IN] parameter type + * @param len [IN] parameter length + * @param paramRetSize [OUT] pointer to parameter real length + * @param param [OUT] pointer to parameter value + * + * @retval ACL_SUCCESS for success, other for failure + * + * @see acltdtCreateQueueRoute + */ +ACL_FUNC_VISIBILITY aclError acltdtGetQueueRouteParam(const acltdtQueueRoute *route, acltdtQueueRouteParamType type, + size_t len, size_t *paramRetSize, void *param); + +/** + * @ingroup AscendCL + * @brief Create the queue route list + * + * @retval null for failed + * @retval OtherValues success + * + * @see acltdtDestroyQueueRouteList + */ +ACL_FUNC_VISIBILITY acltdtQueueRouteList *acltdtCreateQueueRouteList(); + +/** + * @ingroup AscendCL + * @brief Destroy the queue route list + * + * @param routeList [IN] pointer to the queue route list + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateQueueRouteList + */ +ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRouteList(const acltdtQueueRouteList *routeList); + +/** + * @ingroup AscendCL + * @brief add queue route to the route list + * + * @param routeList [IN|OUT] pointer to the queue route list + * @param route [IN] pointer to the queue route + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateQueueRouteList | acltdtCreateQueueRoute + * + */ +ACL_FUNC_VISIBILITY aclError acltdtAddQueueRoute(acltdtQueueRouteList *routeList, const acltdtQueueRoute *route); + +/** + * @ingroup AscendCL + * @brief get queue route from route list + * + * @param routeList [IN] pointer to the queue route list + * @param index [IN] index of queue route in route list + * @param route [IN|OUT] pointer to the queue route + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateQueueRouteList | acltdtCreateQueueRoute + * + */ +ACL_FUNC_VISIBILITY aclError acltdtGetQueueRoute(const acltdtQueueRouteList *routeList, size_t index, + acltdtQueueRoute *route); + +/** + * @ingroup AscendCL + * @brief get queue route num from route list + * + * @param routeList [IN] pointer to the queue route list + * + * @retval the number of queue route + * + */ +ACL_FUNC_VISIBILITY size_t acltdtGetQueueRouteNum(const acltdtQueueRouteList *routeList); + +/** + * @ingroup AscendCL + * @brief Create the queue route query info + * + * @retval null for failed + * @retval OtherValues success + * + * @see acltdtDestroyQueueRouteQueryInfo + */ +ACL_FUNC_VISIBILITY acltdtQueueRouteQueryInfo *acltdtCreateQueueRouteQueryInfo(); + +/** + * @ingroup AscendCL + * @brief Destroy the queue route query info + * + * @param info [IN] pointer to the queue route info + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateQueueRouteQueryInfo + * + */ +ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRouteQueryInfo(const acltdtQueueRouteQueryInfo *info); + +/** + * @ingroup AscendCL + * @brief Set parameter for queue route info + * + * @param attr [IN|OUT] pointer to the queue route info + * @param type [IN] parameter type + * @param len [IN] parameter length + * @param param [IN] pointer to parameter value + * + * @retval ACL_SUCCESS for success, other for failure + * + * @see acltdtCreateQueueRouteQueryInfo + */ +ACL_FUNC_VISIBILITY aclError acltdtSetQueueRouteQueryInfo(acltdtQueueRouteQueryInfo *param, + acltdtQueueRouteQueryInfoParamType type, size_t len, + const void *value); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_ \ No newline at end of file diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h index 1c196c48..556652be 100644 --- a/inc/external/acl/error_codes/rt_error_codes.h +++ b/inc/external/acl/error_codes/rt_error_codes.h @@ -56,6 +56,10 @@ static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event res static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource +static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource +static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit +static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty +static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h index 5418ebd3..a536a23b 100644 --- a/inc/external/acl/ops/acl_dvpp.h +++ b/inc/external/acl/ops/acl_dvpp.h @@ -125,7 +125,7 @@ enum acldvppPixelFormat { enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL }; // Supported Channel Mode -enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 }; +enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4, DVPP_CHNMODE_PNGD = 8 }; // Supported Border Type enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 }; diff --git a/inc/external/ge/ge_api_error_codes.h b/inc/external/ge/ge_api_error_codes.h index d0d7981e..2512de0a 100644 --- a/inc/external/ge/ge_api_error_codes.h +++ b/inc/external/ge/ge_api_error_codes.h @@ -72,17 +72,23 @@ class GE_FUNC_VISIBILITY StatusFactory { class GE_FUNC_VISIBILITY ErrorNoRegisterar { public: - ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } - ErrorNoRegisterar(uint32_t err, const char *desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } + ErrorNoRegisterar(uint32_t err, const std::string &desc) { + StatusFactory::Instance()->RegisterErrorNo(err, desc); + } + ErrorNoRegisterar(uint32_t err, const char *desc) { + StatusFactory::Instance()->RegisterErrorNo(err, desc); + } ~ErrorNoRegisterar() {} }; // Code compose(4 byte), runtime: 2 bit, type: 2 bit, level: 3 bit, sysid: 8 bit, modid: 5 bit, value: 12 bit -#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc) \ - constexpr ge::Status name = \ - ((0xFF & (static_cast(runtime))) << 30) | ((0xFF & (static_cast(type))) << 28) | \ - ((0xFF & (static_cast(level))) << 25) | ((0xFF & (static_cast(sysid))) << 17) | \ - ((0xFF & (static_cast(modid))) << 12) | (0x0FFF & (static_cast(value))); \ +#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc) \ + constexpr ge::Status name = (static_cast(0xFFU & (static_cast(runtime))) << 30) | \ + (static_cast(0xFFU & (static_cast(type))) << 28) | \ + (static_cast(0xFFU & (static_cast(level))) << 25) | \ + (static_cast(0xFFU & (static_cast(sysid))) << 17) | \ + (static_cast(0xFFU & (static_cast(modid))) << 12) | \ + (static_cast(0x0FFFU) & (static_cast(value))); \ const ErrorNoRegisterar g_##name##_errorno(name, desc); #define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_##name##_errorno(name, desc); diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index ac821281..d2d5bf5d 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -28,6 +28,7 @@ namespace ge { // Option key: graph run mode const char *const OPTION_GRAPH_RUN_MODE = "ge.graphRunMode"; +const char *const OPTION_DEVICE_TYPE = "ge.deviceType"; // Option key: ome init const char *const OPTION_EXEC_SESSION_ID = "ge.exec.sessionId"; @@ -67,6 +68,7 @@ const char *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOp const char *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput"; const char *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode"; const char *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; +const char *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr"; // Option key: memory init const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; diff --git a/inc/external/ge/ge_ir_build.h b/inc/external/ge/ge_ir_build.h index 729685a9..cf543315 100644 --- a/inc/external/ge/ge_ir_build.h +++ b/inc/external/ge/ge_ir_build.h @@ -103,7 +103,7 @@ GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph, * @retval OtherValues Failure */ ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *, const ModelBufferData &)) -GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &model); +GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const std::string &output_file, const ModelBufferData &model); GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model); diff --git a/inc/external/hccl/hccl_types.h b/inc/external/hccl/hccl_types.h index 0e832396..ea09fa4d 100644 --- a/inc/external/hccl/hccl_types.h +++ b/inc/external/hccl/hccl_types.h @@ -53,6 +53,7 @@ typedef enum { HCCL_E_PROFILING = 17, /**< call profiling api fail */ HCCL_E_CCE = 18, /**< call cce api fail */ HCCL_E_NETWORK = 19, /**< call network api fail */ + HCCL_E_AGAIN = 20, /**< try again */ HCCL_E_RESERVED /**< reserved */ } HcclResult; diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h index ef7e2ec7..8966f738 100644 --- a/inc/external/runtime/rt_error_codes.h +++ b/inc/external/runtime/rt_error_codes.h @@ -58,6 +58,10 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream re static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource +static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit +static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty +static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full +static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015; // repeated init static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error diff --git a/inc/framework/common/ge_compiler_options.h b/inc/framework/common/ge_compiler_options.h index 5c947346..6876740e 100644 --- a/inc/framework/common/ge_compiler_options.h +++ b/inc/framework/common/ge_compiler_options.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/inc/framework/common/profiling/ge_profiling.h b/inc/framework/common/profiling/ge_profiling.h index e9b207af..0a7c9181 100644 --- a/inc/framework/common/profiling/ge_profiling.h +++ b/inc/framework/common/profiling/ge_profiling.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/inc/framework/common/profiling/ge_runner_profiling.h b/inc/framework/common/profiling/ge_runner_profiling.h index 27e19bce..1c594c5b 100644 --- a/inc/framework/common/profiling/ge_runner_profiling.h +++ b/inc/framework/common/profiling/ge_runner_profiling.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/inc/framework/common/taskdown_common.h b/inc/framework/common/taskdown_common.h index f2f731be..81a532dd 100644 --- a/inc/framework/common/taskdown_common.h +++ b/inc/framework/common/taskdown_common.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/inc/framework/memory/memory_api.h b/inc/framework/memory/memory_api.h index a316fd59..da36cdd6 100644 --- a/inc/framework/memory/memory_api.h +++ b/inc/framework/memory/memory_api.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/inc/framework/omg/ge_init.h b/inc/framework/omg/ge_init.h index 42fd8979..67ef1082 100644 --- a/inc/framework/omg/ge_init.h +++ b/inc/framework/omg/ge_init.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/inc/framework/omg/omg.h b/inc/framework/omg/omg.h index 37d9e26b..a5360bb3 100644 --- a/inc/framework/omg/omg.h +++ b/inc/framework/omg/omg.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h index 70444e0a..d311706d 100644 --- a/inc/framework/omg/parser/model_parser.h +++ b/inc/framework/omg/parser/model_parser.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/inc/framework/omg/parser/op_parser.h b/inc/framework/omg/parser/op_parser.h index 70bec218..7d02b257 100644 --- a/inc/framework/omg/parser/op_parser.h +++ b/inc/framework/omg/parser/op_parser.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/inc/framework/omg/parser/parser_api.h b/inc/framework/omg/parser/parser_api.h index 23df0177..a493e29f 100644 --- a/inc/framework/omg/parser/parser_api.h +++ b/inc/framework/omg/parser/parser_api.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/inc/framework/omg/parser/parser_factory.h b/inc/framework/omg/parser/parser_factory.h index 9d6590c0..06aaecb7 100644 --- a/inc/framework/omg/parser/parser_factory.h +++ b/inc/framework/omg/parser/parser_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/inc/framework/omg/parser/weights_parser.h b/inc/framework/omg/parser/weights_parser.h index e4436044..a568f927 100644 --- a/inc/framework/omg/parser/weights_parser.h +++ b/inc/framework/omg/parser/weights_parser.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/metadef b/metadef index 60df4b39..7d777404 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 60df4b39a6f639c21dd7deb220b93345451938f5 +Subproject commit 7d777404b3b7fe7daeaf00e566e431c6a05b040a diff --git a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h index 7e0f94a8..2a10859c 100644 --- a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h +++ b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h @@ -17,6 +17,7 @@ #ifndef AICPU_OP_TYPE_LIST_H_ #define AICPU_OP_TYPE_LIST_H_ +extern "C" { enum OpKernelType { TF_KERNEL, CPU_KERNEL @@ -36,12 +37,6 @@ struct SysOpInfo { OpKernelType kernelsType; }; -struct OpParamInfo { - uint64_t num; - uint64_t dtypeList; - uint64_t formatList; -}; - struct SysOpCheckInfo { uint64_t opListNum; uint64_t offSetLen; @@ -57,4 +52,6 @@ struct SysOpCheckResp { uint64_t opParamInfoList; }; #pragma pack(pop) +} + #endif // AICPU_OP_TYPE_LIST_H_ diff --git a/third_party/fwkacllib/inc/hccl/base.h b/third_party/fwkacllib/inc/hccl/base.h index ffbf552b..aa43c82a 100644 --- a/third_party/fwkacllib/inc/hccl/base.h +++ b/third_party/fwkacllib/inc/hccl/base.h @@ -86,6 +86,47 @@ enum OriginalGraphShapeType { SHAPE_RESERVED /**< reserved */ }; +enum HcclEventType { + HCCL_EVENT_SEND_COMPLETION = 0, + HCCL_EVENT_RECV_REQUEST, + HCCL_EVENT_RECV_COMPLETION, + HCCL_EVENT_CONGESTION_RELIEF, + HCCL_EVENT_RESERVED /**< reserved */ +}; + +const u32 TAG_MAX_LEN = 127; // ×î´óµÄtag ³¤¶È +using TagAttr = struct TagAttrDef { + char name[TAG_MAX_LEN + 1]; // tag±êʶ + // tag±êʶµÄ½ÓÊÕÊý¾Ý£¬µ÷ÓÃÕßÊÇ·ñ»áÖ÷¶¯µ÷ÓýÓÊÕ½Ó¿Ú£¬0 = ·ñ, 1 = »á(Ô¤Áô£¬Ôݲ»Ö§³Ö)¡£ + // ¶ÔÓÚactiveRecv = 0£¬µ±½ÓÊÕ²àÊÕµ½Êý¾Ý»òÕß·¢ËÍÇëÇóʱ£¬Ö÷¶¯Í¨Öªµ÷ÓÃÕß¡£ + uint32_t activeRecv; + uint32_t sendCredit; // ÅäÖøÃtagÔÊÐíinflightµÄsend¸öÊý + uint32_t eventId; +}; + +using HcclEventMsg = struct HcclEventMsgDef { + HcclComm comm; + u32 peerRank; + u32 tag; + // 0:HCCL_SEND_COMPLETION; 1:HCCL_RECV_COMPLETION; 2:HCCL_RECV_REQUEST; 3:HCCL_CONGESTION_RELIEF + u32 hcclEventType; + union { + struct { + u32 reserver; + } sendCompletionItem; + struct { + u32 reserver; + } recvRequestItem; + struct { + u32 reserver; + } recvCompletionItem; + struct CongestionReliefItem { + u32 reserver; + } congestionReliefItem; + } desc; +}; + + /** * @brief stream handle. */ diff --git a/third_party/fwkacllib/inc/mmpa/mmpa_api.h b/third_party/fwkacllib/inc/mmpa/mmpa_api.h index f8d5ccf3..c48aaa63 100644 --- a/third_party/fwkacllib/inc/mmpa/mmpa_api.h +++ b/third_party/fwkacllib/inc/mmpa/mmpa_api.h @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h index 3d196e41..46fb6e21 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h @@ -58,31 +58,31 @@ typedef long MM_LONG; typedef VOID *(*userProcFunc)(VOID *pulArg); typedef struct { - userProcFunc procFunc; // Callback function pointer - VOID *pulArg; // Callback function parameters + userProcFunc procFunc; // Callback function pointer + VOID *pulArg; // Callback function parameters } mmUserBlock_t; typedef struct { - const char *dli_fname; - void *dli_fbase; - const char *dli_sname; - void *dli_saddr; - size_t dli_size; /* ELF only */ - int dli_bind; /* ELF only */ - int dli_type; + const CHAR *dli_fname; + VOID *dli_fbase; + const CHAR *dli_sname; + VOID *dli_saddr; + size_t dli_size; /* ELF only */ + INT32 dli_bind; /* ELF only */ + INT32 dli_type; } mmDlInfo; typedef struct { - int wSecond; // Seconds. [0-60] (1 leap second) - int wMinute; // Minutes. [0-59] - int wHour; // Hours. [0-23] - int wDay; // Day. [1-31] - int wMonth; // Month. [1-12] - int wYear; // Year - int wDayOfWeek; // Day of week. [0-6] - int tm_yday; // Days in year.[0-365] - int tm_isdst; // DST. [-1/0/1] - long int wMilliseconds; // milliseconds + INT32 wSecond; // Seconds. [0-60] (1 leap second) + INT32 wMinute; // Minutes. [0-59] + INT32 wHour; // Hours. [0-23] + INT32 wDay; // Day. [1-31] + INT32 wMonth; // Month. [1-12] + INT32 wYear; // Year + INT32 wDayOfWeek; // Day of week. [0-6] + INT32 tm_yday; // Days in year.[0-365] + INT32 tm_isdst; // DST. [-1/0/1] + LONG wMilliseconds; // milliseconds } mmSystemTime_t; typedef sem_t mmSem_t; @@ -98,72 +98,72 @@ typedef ssize_t mmSsize_t; typedef size_t mmSize; // size typedef struct { - UINT32 createFlag; - INT32 oaFlag; + UINT32 createFlag; + INT32 oaFlag; } mmCreateFlag; typedef struct { - VOID *sendBuf; - INT32 sendLen; + VOID *sendBuf; + INT32 sendLen; } mmIovSegment; typedef struct in_addr mmInAddr; typedef struct { - VOID *inbuf; - INT32 inbufLen; - VOID *outbuf; - INT32 outbufLen; - mmOverLap *oa; + VOID *inbuf; + INT32 inbufLen; + VOID *outbuf; + INT32 outbufLen; + mmOverLap *oa; } mmIoctlBuf; typedef int mmAtomicType; typedef int mmAtomicType64; typedef enum { - pollTypeRead = 1, // pipe read - pollTypeRecv, // socket recv - pollTypeIoctl, // ioctl + pollTypeRead = 1, // pipe read + pollTypeRecv, // socket recv + pollTypeIoctl, // ioctl } mmPollType; typedef struct { - mmPollHandle handle; // The file descriptor or handle of poll is required - mmPollType pollType; // Operation type requiring poll + mmPollHandle handle; // The file descriptor or handle of poll is required + mmPollType pollType; // Operation type requiring poll // read or recv or ioctl - INT32 ioctlCode; // IOCTL operation code, dedicated to IOCTL - mmComPletionKey completionKey; // The default value is blank, which is used in windows + INT32 ioctlCode; // IOCTL operation code, dedicated to IOCTL + mmComPletionKey completionKey; // The default value is blank, which is used in windows // The data used to receive the difference between which handle is readable } mmPollfd; typedef struct { - VOID *priv; // User defined private content - mmPollHandle bufHandle; // Value of handle corresponding to buf - mmPollType bufType; // Data types polled to - VOID *buf; // Data used in poll - UINT32 bufLen; // Data length used in poll - UINT32 bufRes; // Actual return length + VOID *priv; // User defined private content + mmPollHandle bufHandle; // Value of handle corresponding to buf + mmPollType bufType; // Data types polled to + VOID *buf; // Data used in poll + UINT32 bufLen; // Data length used in poll + UINT32 bufRes; // Actual return length } mmPollData, *pmmPollData; typedef VOID (*mmPollBack)(pmmPollData); typedef struct { - INT32 tz_minuteswest; // How many minutes is it different from Greenwich - INT32 tz_dsttime; // type of DST correction + INT32 tz_minuteswest; // How many minutes is it different from Greenwich + INT32 tz_dsttime; // type of DST correction } mmTimezone; typedef struct { - LONG tv_sec; - LONG tv_usec; + LONG tv_sec; + LONG tv_usec; } mmTimeval; typedef struct { - MM_LONG tv_sec; - MM_LONG tv_nsec; + MM_LONG tv_sec; + MM_LONG tv_nsec; } mmTimespec; typedef struct { - ULONGLONG totalSize; - ULONGLONG freeSize; - ULONGLONG availSize; + ULONGLONG totalSize; + ULONGLONG freeSize; + ULONGLONG availSize; } mmDiskSize; #define mmTLS __thread @@ -174,40 +174,40 @@ typedef mode_t mmMode_t; typedef struct option mmStructOption; typedef struct { - char addr[MMPA_MACINFO_DEFAULT_SIZE]; // ex:aa-bb-cc-dd-ee-ff\0 + CHAR addr[MMPA_MACINFO_DEFAULT_SIZE]; // ex:aa-bb-cc-dd-ee-ff\0 } mmMacInfo; typedef struct { - char **argv; - INT32 argvCount; - char **envp; - INT32 envpCount; + CHAR **argv; + INT32 argvCount; + CHAR **envp; + INT32 envpCount; } mmArgvEnv; typedef struct { - char arch[MMPA_CPUDESC_DEFAULT_SIZE]; - char manufacturer[MMPA_CPUDESC_DEFAULT_SIZE]; // vendor - char version[MMPA_CPUDESC_DEFAULT_SIZE]; // modelname - INT32 frequency; // cpu frequency - INT32 maxFrequency; // max speed - INT32 ncores; // cpu cores - INT32 nthreads; // cpu thread count - INT32 ncounts; // logical cpu nums + CHAR arch[MMPA_CPUDESC_DEFAULT_SIZE]; + CHAR manufacturer[MMPA_CPUDESC_DEFAULT_SIZE]; // vendor + CHAR version[MMPA_CPUDESC_DEFAULT_SIZE]; // modelname + INT32 frequency; // cpu frequency + INT32 maxFrequency; // max speed + INT32 ncores; // cpu cores + INT32 nthreads; // cpu thread count + INT32 ncounts; // logical cpu nums } mmCpuDesc; typedef mode_t MODE; typedef struct { - INT32 detachFlag; // Determine whether to set separation property 0, not to separate 1 - INT32 priorityFlag; // Determine whether to set priority 0 and not set 1 - INT32 priority; // Priority value range to be set 1-99 - INT32 policyFlag; // Set scheduling policy or not 0 do not set 1 setting - INT32 policy; // Scheduling policy value value - // MMPA_THREAD_SCHED_RR - // MMPA_THREAD_SCHED_OTHER - // MMPA_THREAD_SCHED_FIFO - INT32 stackFlag; // Set stack size or not: 0 does not set 1 setting - UINT32 stackSize; // The stack size unit bytes to be set cannot be less than MMPA_THREAD_STACK_MIN + INT32 detachFlag; // Determine whether to set separation property 0, not to separate 1 + INT32 priorityFlag; // Determine whether to set priority 0 and not set 1 + INT32 priority; // Priority value range to be set 1-99 + INT32 policyFlag; // Set scheduling policy or not 0 do not set 1 setting + INT32 policy; // Scheduling policy value value + // MMPA_THREAD_SCHED_RR + // MMPA_THREAD_SCHED_OTHER + // MMPA_THREAD_SCHED_FIFO + INT32 stackFlag; // Set stack size or not: 0 does not set 1 setting + UINT32 stackSize; // The stack size unit bytes to be set cannot be less than MMPA_THREAD_STACK_MIN } mmThreadAttr; #ifdef __ANDROID__ @@ -326,8 +326,8 @@ MMPA_FUNC_VISIBILITY INT32 mmCondDestroy(mmCond *cond); MMPA_FUNC_VISIBILITY INT32 mmGetPid(); MMPA_FUNC_VISIBILITY INT32 mmGetTid(); MMPA_FUNC_VISIBILITY INT32 mmGetPidHandle(mmProcess *processHandle); -MMPA_FUNC_VISIBILITY INT32 mmGetLocalTime(mmSystemTime_t *sysTime); -MMPA_FUNC_VISIBILITY INT32 mmGetSystemTime(mmSystemTime_t *sysTime); +MMPA_FUNC_VISIBILITY INT32 mmGetLocalTime(mmSystemTime_t *sysTimePtr); +MMPA_FUNC_VISIBILITY INT32 mmGetSystemTime(mmSystemTime_t *sysTimePtr); MMPA_FUNC_VISIBILITY INT32 mmSemInit(mmSem_t *sem, UINT32 value); MMPA_FUNC_VISIBILITY INT32 mmSemWait(mmSem_t *sem); @@ -393,7 +393,7 @@ MMPA_FUNC_VISIBILITY mmSsize_t mmWritev(mmProcess fd, mmIovSegment *iov, INT32 i MMPA_FUNC_VISIBILITY VOID mmMb(); MMPA_FUNC_VISIBILITY INT32 mmInetAton(const CHAR *addrStr, mmInAddr *addr); -MMPA_FUNC_VISIBILITY mmProcess mmOpenFile(const CHAR *fileName, UINT32 access, mmCreateFlag fileFlag); +MMPA_FUNC_VISIBILITY mmProcess mmOpenFile(const CHAR *fileName, UINT32 accessFlag, mmCreateFlag fileFlag); MMPA_FUNC_VISIBILITY mmSsize_t mmReadFile(mmProcess fileId, VOID *buffer, INT32 len); MMPA_FUNC_VISIBILITY mmSsize_t mmWriteFile(mmProcess fileId, VOID *buffer, INT32 len); MMPA_FUNC_VISIBILITY INT32 mmCloseFile(mmProcess fileId); @@ -407,13 +407,13 @@ MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueSub64(mmAtomicType64 *ptr, mmAtomicTy MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithDetach(mmThread *threadHandle, mmUserBlock_t *funcBlock); // The following 3 interfaces are to be deleted -MMPA_FUNC_VISIBILITY INT32 mmCreateNamedPipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode); -MMPA_FUNC_VISIBILITY INT32 mmOpenNamePipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode); +MMPA_FUNC_VISIBILITY INT32 mmCreateNamedPipe(mmPipeHandle pipeHandle[], CHAR *pipeName[], INT32 waitMode); +MMPA_FUNC_VISIBILITY INT32 mmOpenNamePipe(mmPipeHandle pipeHandle[], CHAR *pipeName[], INT32 waitMode); MMPA_FUNC_VISIBILITY VOID mmCloseNamedPipe(mmPipeHandle namedPipe[]); -MMPA_FUNC_VISIBILITY INT32 mmCreatePipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode); -MMPA_FUNC_VISIBILITY INT32 mmOpenPipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode); -MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount); +MMPA_FUNC_VISIBILITY INT32 mmCreatePipe(mmPipeHandle pipeHandle[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode); +MMPA_FUNC_VISIBILITY INT32 mmOpenPipe(mmPipeHandle pipeHandle[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode); +MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipeHandle[], UINT32 pipeCount); // Poll related interface MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort(); @@ -467,10 +467,10 @@ MMPA_FUNC_VISIBILITY INT32 mmGetOptOpt(); MMPA_FUNC_VISIBILITY VOID mmSetOpOpt(INT32 mmOptOpt); MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg(); MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg); -MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts); +MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, CHAR *const *argv, const CHAR *opts); MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc, - char *const *argv, - const char *opts, + CHAR *const *argv, + const CHAR *opts, const mmStructOption *longOpts, INT32 *longIndex); @@ -496,7 +496,7 @@ MMPA_FUNC_VISIBILITY INT32 mmSetEnv(const CHAR *name, const CHAR *value, INT32 o MMPA_FUNC_VISIBILITY CHAR *mmStrTokR(CHAR *str, const CHAR *delim, CHAR **saveptr); MMPA_FUNC_VISIBILITY CHAR *mmDirName(CHAR *path); MMPA_FUNC_VISIBILITY CHAR *mmBaseName(CHAR *path); -MMPA_FUNC_VISIBILITY INT32 mmGetDiskFreeSpace(const char *path, mmDiskSize *diskSize); +MMPA_FUNC_VISIBILITY INT32 mmGetDiskFreeSpace(const CHAR *path, mmDiskSize *diskSize); /* * Function: set the thread name created by mmcreatetask @@ -540,7 +540,7 @@ MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count); MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count); MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName, const mmArgvEnv *env, - const char *stdoutRedirectFile, + const CHAR *stdoutRedirectFile, mmProcess *id); MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h index b17234b8..699fe815 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h @@ -368,8 +368,8 @@ MMPA_FUNC_VISIBILITY INT32 mmCondDestroy(mmCond *cond); MMPA_FUNC_VISIBILITY INT32 mmGetPid(VOID); MMPA_FUNC_VISIBILITY INT32 mmGetTid(VOID); MMPA_FUNC_VISIBILITY INT32 mmGetPidHandle(mmProcess *processHandle); -MMPA_FUNC_VISIBILITY INT32 mmGetLocalTime(mmSystemTime_t *sysTime); -MMPA_FUNC_VISIBILITY INT32 mmGetSystemTime(mmSystemTime_t *sysTime); +MMPA_FUNC_VISIBILITY INT32 mmGetLocalTime(mmSystemTime_t *sysTimePtr); +MMPA_FUNC_VISIBILITY INT32 mmGetSystemTime(mmSystemTime_t *sysTimePtr); MMPA_FUNC_VISIBILITY INT32 mmSemInit(mmSem_t *sem, UINT32 value); MMPA_FUNC_VISIBILITY INT32 mmSemWait(mmSem_t *sem); MMPA_FUNC_VISIBILITY INT32 mmSemPost(mmSem_t *sem); @@ -431,7 +431,7 @@ MMPA_FUNC_VISIBILITY mmSsize_t mmWritev(mmSockHandle fd, mmIovSegment *iov, INT3 MMPA_FUNC_VISIBILITY VOID mmMb(); MMPA_FUNC_VISIBILITY INT32 mmInetAton(const CHAR *addrStr, mmInAddr *addr); -MMPA_FUNC_VISIBILITY mmProcess mmOpenFile(const CHAR *fileName, UINT32 access, mmCreateFlag fileFlag); +MMPA_FUNC_VISIBILITY mmProcess mmOpenFile(const CHAR *fileName, UINT32 accessFlag, mmCreateFlag fileFlag); MMPA_FUNC_VISIBILITY mmSsize_t mmReadFile(mmProcess fileId, VOID *buffer, INT32 len); MMPA_FUNC_VISIBILITY mmSsize_t mmWriteFile(mmProcess fileId, VOID *buffer, INT32 len); MMPA_FUNC_VISIBILITY INT32 mmCloseFile(mmProcess fileId); @@ -444,13 +444,13 @@ MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueInc64(mmAtomicType64 *ptr, mmAtomicTy MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueSub64(mmAtomicType64 *ptr, mmAtomicType64 value); MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithDetach(mmThread *threadHandle, mmUserBlock_t *funcBlock); -MMPA_FUNC_VISIBILITY INT32 mmCreateNamedPipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode); -MMPA_FUNC_VISIBILITY INT32 mmOpenNamePipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode); +MMPA_FUNC_VISIBILITY INT32 mmCreateNamedPipe(mmPipeHandle pipeHandle[], CHAR *pipeName[], INT32 waitMode); +MMPA_FUNC_VISIBILITY INT32 mmOpenNamePipe(mmPipeHandle pipeHandle[], CHAR *pipeName[], INT32 waitMode); MMPA_FUNC_VISIBILITY VOID mmCloseNamedPipe(mmPipeHandle namedPipe[]); -MMPA_FUNC_VISIBILITY INT32 mmCreatePipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode); -MMPA_FUNC_VISIBILITY INT32 mmOpenPipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode); -MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount); +MMPA_FUNC_VISIBILITY INT32 mmCreatePipe(mmPipeHandle pipeHandle[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode); +MMPA_FUNC_VISIBILITY INT32 mmOpenPipe(mmPipeHandle pipeHandle[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode); +MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipeHandle[], UINT32 pipeCount); MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort(); MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle); diff --git a/third_party/fwkacllib/inc/ops/aipp.h b/third_party/fwkacllib/inc/ops/aipp.h index 86805f72..6db4d783 100644 --- a/third_party/fwkacllib/inc/ops/aipp.h +++ b/third_party/fwkacllib/inc/ops/aipp.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/all_ops.h b/third_party/fwkacllib/inc/ops/all_ops.h index b15f2292..b67ead37 100644 --- a/third_party/fwkacllib/inc/ops/all_ops.h +++ b/third_party/fwkacllib/inc/ops/all_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h index 4b45f4cf..18028c19 100644 --- a/third_party/fwkacllib/inc/ops/array_ops.h +++ b/third_party/fwkacllib/inc/ops/array_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -701,6 +701,31 @@ REG_OP(Unsqueeze) .ATTR(axes, ListInt, {}) .OP_END_FACTORY_REG(Unsqueeze) +/** +*@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape is changed, without changing the data. \n + +*@par Inputs: +*@li x: Original tensor. + +*@par Attributes: +*@li axes: List of ints indicating the dimensions to be inserted. \n + +*@par Outputs: +*y: Reshape tensor with same data as input. \n + +*@par Third-party framework compatibility +*Compatible with the Onnx operator Unsqueeze. + +*@par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use Unsqueeze instead. +*/ + +REG_OP(UnsqueezeV2) + .INPUT(x, TensorType::ALL()) + .OUTPUT(y, TensorType::ALL()) + .ATTR(axis, ListInt, {}) + .OP_END_FACTORY_REG(UnsqueezeV2) + /** *@brief Reshapes a tensor. Only the tensor shape is changed, without changing the data. \n @@ -753,6 +778,30 @@ REG_OP(Squeeze) .ATTR(axis, ListInt, {}) .OP_END_FACTORY_REG(Squeeze) +/** +*@brief Removes dimensions of size 1 from the shape of a tensor. \n + +*@par Inputs: +*x: A tensor. \n + +*@par Attributes: +*axis: An optional list of int32 or int64. If not specified, squeezes all dimensions of size 1. If specified, only squeezes the dimensions listed. It is an error to squeeze a dimension that is not 1. \n + +*@par Outputs: +*y: A tensor. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Squeeze. + +*@par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use Squeeze instead. +*/ +REG_OP(SqueezeV2) + .INPUT(x, TensorType::ALL()) + .OUTPUT(y, TensorType::ALL()) + .ATTR(axis, ListInt, {}) + .OP_END_FACTORY_REG(SqueezeV2) + /** *@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n @@ -1226,6 +1275,35 @@ REG_OP(NonZero) .ATTR(dtype, Type, DT_INT64) .OP_END_FACTORY_REG(NonZero) +/** +*@Returns a tensor containing the indices of all non-zero elements of input. \n + +*@par Inputs: +*x: A Tensor. Must be one of the following types: float16, float32, int32, int64. + +*@par Attributes: +* transpose: the output tensor will be transposed if true. \n + +*@par Outputs: +* value: A Tensor. Has the same type as "x" . \n +* index: A Tensor. The type is INT32, means index for input. \n +* count: A Scalar. The type is INT32, means count for non_zero ele in input. \n + +*@par Third-party framework compatibility +*Compatible with the PyTorch operator NonZeroWithValue. +*/ + +REG_OP(NonZeroWithValue) + .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \ + DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL})) + .OUTPUT(value, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \ + DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL})) + .OUTPUT(index, TensorType({DT_INT32})) + .OUTPUT(count, TensorType({DT_INT32})) + .ATTR(transpose, Bool, false) + .ATTR(dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(NonZeroWithValue) + /** * @brief Expand the input tensor to a compatible shape. \n @@ -1258,24 +1336,53 @@ REG_OP(ExpandD) * Three inputs, including: * @li bucket_list: A 1-D tensor of type int32 with the value of ivf_counts and ivf_offset index. \n * @li ivf_counts: A 1-D tensor of type int32 with the value of ivf counts. \n -* @li ivf_offset: A 1-D tensor of type int32 with the value of ivf offset. \n +* @li ivf_offset: A 1-D tensor of type int32 or int64 with the value of ivf offset. \n * @par Attributes: * total_limit: A int64 type maximum value of the sum of ivf_counts corresponding to bucket_list. \n * @par Outputs: * @li buckets_limit: A 1-D tensor of type int32 with the sum <= total_limit. \n -* @li buckets_offset: A 1-D tensor of type int32 with the value of ivf_offset corresponding to bucket_list. \n +* @li buckets_offset: A 1-D tensor of type int32 or int64 with the value of ivf_offset corresponding to bucket_list. \n */ REG_OP(CalcBucketsLimitAndOffset) .INPUT(bucket_list, TensorType({DT_INT32})) .INPUT(ivf_counts, TensorType({DT_INT32})) - .INPUT(ivf_offset, TensorType({DT_INT32})) + .INPUT(ivf_offset, TensorType({DT_INT32, DT_INT64})) .OUTPUT(buckets_limit, TensorType({DT_INT32})) - .OUTPUT(buckets_offset, TensorType({DT_INT32})) + .OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) .REQUIRED_ATTR(total_limit, Int) .OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) +/** +*@brief Get dim number in tensordesc. \n + +*@par Inputs: +*x: A Tensor. \n + +*@par Outputs: +*y: A 1D tensor. The data type must be int32. \n +*/ +REG_OP(GetShape) + .DYNAMIC_INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \ + DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL})) + .OUTPUT(y, TensorType({DT_INT32})) + .OP_END_FACTORY_REG(GetShape) + +/** +*@brief Update the tensor_desc of the output. \n + +* @par attributes: +* @li shape: A listInt contains the data to update. \n + +*@par outputs: +* y: a tensor_desc, type is int.\n +*/ +REG_OP(UpdateTensorDesc) + .OUTPUT(y, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, + DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE})) + .REQUIRED_ATTR(shape, ListInt) + .OP_END_FACTORY_REG(UpdateTensorDesc) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/audio_ops.h b/third_party/fwkacllib/inc/ops/audio_ops.h index f05135d1..d9883253 100644 --- a/third_party/fwkacllib/inc/ops/audio_ops.h +++ b/third_party/fwkacllib/inc/ops/audio_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h b/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h index d0800a08..9583eff9 100644 --- a/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h +++ b/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/batch_ops.h b/third_party/fwkacllib/inc/ops/batch_ops.h index ca4fe1db..4b78951d 100644 --- a/third_party/fwkacllib/inc/ops/batch_ops.h +++ b/third_party/fwkacllib/inc/ops/batch_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/bitwise_ops.h b/third_party/fwkacllib/inc/ops/bitwise_ops.h index dac78118..d032476d 100644 --- a/third_party/fwkacllib/inc/ops/bitwise_ops.h +++ b/third_party/fwkacllib/inc/ops/bitwise_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h index 08e54824..550e8b7d 100644 --- a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h +++ b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h index 890c52ae..e20607bf 100644 --- a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h +++ b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/case_condition_ops.h b/third_party/fwkacllib/inc/ops/case_condition_ops.h index 85dba609..85064845 100644 --- a/third_party/fwkacllib/inc/ops/case_condition_ops.h +++ b/third_party/fwkacllib/inc/ops/case_condition_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/cluster.h b/third_party/fwkacllib/inc/ops/cluster.h new file mode 100644 index 00000000..f3242a13 --- /dev/null +++ b/third_party/fwkacllib/inc/ops/cluster.h @@ -0,0 +1,58 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * \file cluster.h + * \brief + */ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_ + +#include "graph/operator_reg.h" +#include "graph/operator.h" + +namespace ge { +/** +* @brief Perform k-means clustering on a data matrix. \n + +* @par Inputs: +* Three required inputs and one optional inputs, including: \n +* @li x: A 2D tensor of data type float32. \n +* @li y: A 2D tensor of data type float32. \n +* @li sum_square_x: An optional 2D tensor of data type float32. \n +* @li sum_square_y: A 2D tensor of data type float32. \n + +* @par Attributes: +* use_actual_distance: Indicates whether to calculate the complete distance. \n + +* @par Outputs: +* @li segment_sum: A tensor of data type float32. \n +* @li segment_count: A tensor of data type float32. \n +* @li k_mean_total_sum: A tensor of data type float32. \n +*/ +REG_OP(KMeansCentroids) + .INPUT(x, TensorType({DT_FLOAT})) + .INPUT(y, TensorType({DT_FLOAT})) + .INPUT(sum_square_y, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(sum_square_x, TensorType({DT_FLOAT})) + .OUTPUT(segment_sum, TensorType({DT_FLOAT})) + .OUTPUT(segment_count, TensorType({DT_FLOAT})) + .OUTPUT(kmean_total_sum, TensorType({DT_FLOAT})) + .ATTR(use_actual_distance, Bool, false) + .OP_END_FACTORY_REG(KMeansCentroids) +} // namespace ge + +#endif // OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_ diff --git a/third_party/fwkacllib/inc/ops/condtake_ops.h b/third_party/fwkacllib/inc/ops/condtake_ops.h index 029cffbf..5e91eb07 100644 --- a/third_party/fwkacllib/inc/ops/condtake_ops.h +++ b/third_party/fwkacllib/inc/ops/condtake_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/control_flow_ops.h b/third_party/fwkacllib/inc/ops/control_flow_ops.h index cd993599..53a213f7 100644 --- a/third_party/fwkacllib/inc/ops/control_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/control_flow_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/coordinates_1d_to_2d_ops.h b/third_party/fwkacllib/inc/ops/coordinates_1d_to_2d_ops.h index f52c90b0..79a64c2c 100644 --- a/third_party/fwkacllib/inc/ops/coordinates_1d_to_2d_ops.h +++ b/third_party/fwkacllib/inc/ops/coordinates_1d_to_2d_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/correlation.h b/third_party/fwkacllib/inc/ops/correlation.h index caebba50..c7262cbb 100644 --- a/third_party/fwkacllib/inc/ops/correlation.h +++ b/third_party/fwkacllib/inc/ops/correlation.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/ctc_ops.h b/third_party/fwkacllib/inc/ops/ctc_ops.h index 6e908091..7729432e 100644 --- a/third_party/fwkacllib/inc/ops/ctc_ops.h +++ b/third_party/fwkacllib/inc/ops/ctc_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h index 32454d27..492a58ae 100644 --- a/third_party/fwkacllib/inc/ops/data_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -2336,12 +2336,14 @@ REG_OP(CacheAllIndexToLocal) /** *@brief LRUCacheV2, aicore LRUCache. + *@par Inputs: *index_list: exchange index list *data: host data *cache: gm cache *tag: cache's tag *is_last_call: if is last call write all cache to data + *@par Outputs: *data: output data *cache: gm cache @@ -2349,8 +2351,11 @@ REG_OP(CacheAllIndexToLocal) *index_offset_list: index_offset_list *not_in_cache_index_list: output not in cache's index_list *not_in_cache_number: scalar + *@par Attributes: *pre_route_count: types of all outputs + +*@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LRUCacheV2) diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index bcf50058..4cb3d961 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -132,7 +132,7 @@ REG_OP(MinimumGrad) *dst_type: An required attribute of type int32, specifying the dst data type. \n *@par Outputs: -*y:A Tensor. Has the same type as x. +*y:A Tensor with same shape as x, and data type is specified by dst_type. */ REG_OP(Cast) .INPUT(x, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, @@ -3391,57 +3391,57 @@ REG_OP(TensorRedirect) .OP_END_FACTORY_REG(TensorRedirect) /** -* @brief Performs the element-wise division of tensor x2 by tensor x3, -* multiply the result by the scalar value and add it to tensor x1 +* @brief Performs the element-wise division of tensor x1 by tensor x2, +* multiply the result by the scalar value and add it to tensor input_data. * @par Inputs: * Four inputs, including: * @li input_data: A mutable input Tensor. Must be one of the following types: -* float16, float32. -* @li x1: A mutable input Tensor of the same type as x1. -* @li x2: A mutable input Tensor of the same type as x1. +* float16, float32, double, int64. +* @li x1: A mutable input Tensor of the same type as input_data. +* @li x2: A mutable input Tensor of the same type as input_data. * @li value: A mutable input Tensor. Must be one of the following types: -* float16, float32, int32. \n +* float16, float32, double, int64, int32. \n + * @par Outputs: -* y: A mutable Tensor. Has the same type as "x1". \n +* y: A mutable Tensor. Has the same type as input_data. \n * @par Third-party framework compatibility -* Compatible with the Pytorch operator Addcdiv. +* Compatible with the Pytorch operator Addcdiv(version-1.5.0). */ REG_OP(Addcdiv) - .INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64})) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64})) + .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32, DT_DOUBLE, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64})) .OP_END_FACTORY_REG(Addcdiv) /** -* @brief Performs the element-wise multiplication of tensor x2 by tensor x3, +* @brief Performs the element-wise multiplication of tensor x1 by tensor x2, * multiply the result by the scalar value and add it to tensor input_data - * @par Inputs: * Four inputs, including: * @li input_data: A mutable input Tensor. Must be one of the following types: -* float16, float32, int8, int32, uint8. -* @li x1: A mutable input Tensor of the same type as x1. -* @li x2: A mutable input Tensor of the same type as x1. -* @li value: A tensor which includes only one element of the same type as x1. \n +* float16, float32, double, int64, int8, int32, uint8. +* @li x1: A mutable input Tensor of the same type as input_data. +* @li x2: A mutable input Tensor of the same type as input_data. +* @li value: A tensor which includes only one element of the same type as input_data. \n * @par Outputs: -* y: A mutable output Tensor. Has the same type as "x1". \n +* y: A mutable output Tensor. Has the same type as input_data. \n * @par Third-party framework compatibility * Compatible with the Pytorch operator Addcmul. */ REG_OP(Addcmul) - .INPUT(input_data, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) - .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) - .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) - .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) - .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) + .INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64})) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64})) + .INPUT(value, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64})) .OP_END_FACTORY_REG(Addcmul) /** @@ -3508,8 +3508,8 @@ REG_OP(StrideAdd) * Compatible with the Pytorch equal operator. \n */ REG_OP(TensorEqual) - .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) - .INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_INT8, DT_UINT8})) + .INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_INT8, DT_UINT8})) .OUTPUT(output_z, TensorType({DT_BOOL})) .OP_END_FACTORY_REG(TensorEqual) diff --git a/third_party/fwkacllib/inc/ops/functional_ops.h b/third_party/fwkacllib/inc/ops/functional_ops.h index 7cfe39c4..e1fbe6b3 100644 --- a/third_party/fwkacllib/inc/ops/functional_ops.h +++ b/third_party/fwkacllib/inc/ops/functional_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/get_data_ops.h b/third_party/fwkacllib/inc/ops/get_data_ops.h index e5518ef8..33dc4f14 100644 --- a/third_party/fwkacllib/inc/ops/get_data_ops.h +++ b/third_party/fwkacllib/inc/ops/get_data_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/hcom_ops.h b/third_party/fwkacllib/inc/ops/hcom_ops.h index 497f6a68..6db276a9 100644 --- a/third_party/fwkacllib/inc/ops/hcom_ops.h +++ b/third_party/fwkacllib/inc/ops/hcom_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/hvd_ops.h b/third_party/fwkacllib/inc/ops/hvd_ops.h index 00299ef7..a49ec5ed 100644 --- a/third_party/fwkacllib/inc/ops/hvd_ops.h +++ b/third_party/fwkacllib/inc/ops/hvd_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index 2327e76e..319681e4 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1848,14 +1848,51 @@ REG_OP(DenseImageWarpGrad) *Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(GridSampler2D) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) .ATTR(interpolation_mode, String, "bilinear") .ATTR(padding_mode, String, "zeros") .ATTR(align_corners, Bool, false) .OP_END_FACTORY_REG(GridSampler2D) +/** +*@brief Computes the gradients of GridSampler2D. + +*@par Inputs: +*@li grad: A 4-D Tensor with shape `[batch, channels, height, width]`. +*@li x: A 4-D Tensor with shape `[batch, channels, height, width]`. +*@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`. + +*@par Attributes: +*@li interpolation_mode: An optional string specifying the interpolation method. + Defaults to "bilinear". +*@li padding_mode: An optional string specifying the pad method. + Defaults to "zeros". +*@li align_corners: An optional bool. If "true", the centers of the corner + pixels of the input and output tensors are aligned. Defaults to false. + +*@par Outputs: +*dx: Returns 4-D Tensor with the same dtype and shape as `x`. +*dgrid: Returns 4-D Tensor with the same dtype and shape as `grid`. + +*@par Third-party framework compatibility +*Compatible with pytorch GridSampler2DGrad operator. + +*@par Restrictions: +*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(GridSampler2DGrad) + .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(dgrid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .ATTR(interpolation_mode, String, "bilinear") + .ATTR(padding_mode, String, "zeros") + .ATTR(align_corners, Bool, false) + .OP_END_FACTORY_REG(GridSampler2DGrad) + /** *@brief This operation unnormalize input Grid, which is usually gennerated by affine_grid. diff --git a/third_party/fwkacllib/inc/ops/index_to_addr_ops.h b/third_party/fwkacllib/inc/ops/index_to_addr_ops.h index 3af17a45..c6bbaaa8 100644 --- a/third_party/fwkacllib/inc/ops/index_to_addr_ops.h +++ b/third_party/fwkacllib/inc/ops/index_to_addr_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/internal_ops.h b/third_party/fwkacllib/inc/ops/internal_ops.h index bcc3f1c3..9dde14a5 100644 --- a/third_party/fwkacllib/inc/ops/internal_ops.h +++ b/third_party/fwkacllib/inc/ops/internal_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h index f6cc8694..0f362d31 100644 --- a/third_party/fwkacllib/inc/ops/linalg_ops.h +++ b/third_party/fwkacllib/inc/ops/linalg_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -410,10 +410,10 @@ form square matrices. \n */ REG_OP(Svd) - .INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT })) - .OUTPUT(sigma, TensorType({ DT_DOUBLE, DT_FLOAT })) - .OUTPUT(u, TensorType({ DT_DOUBLE, DT_FLOAT })) - .OUTPUT(v, TensorType({ DT_DOUBLE, DT_FLOAT })) + .INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) + .OUTPUT(sigma, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) + .OUTPUT(u, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) + .OUTPUT(v, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) .ATTR(compute_uv, Bool, true) .ATTR(full_matrices, Bool, false) .OP_END_FACTORY_REG(Svd) diff --git a/third_party/fwkacllib/inc/ops/list_ops.h b/third_party/fwkacllib/inc/ops/list_ops.h index 0aa94e73..53024878 100644 --- a/third_party/fwkacllib/inc/ops/list_ops.h +++ b/third_party/fwkacllib/inc/ops/list_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/logging_ops.h b/third_party/fwkacllib/inc/ops/logging_ops.h index dd565657..a20370fd 100644 --- a/third_party/fwkacllib/inc/ops/logging_ops.h +++ b/third_party/fwkacllib/inc/ops/logging_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/lookup_ops.h b/third_party/fwkacllib/inc/ops/lookup_ops.h index b1fc254f..3fdc01fe 100644 --- a/third_party/fwkacllib/inc/ops/lookup_ops.h +++ b/third_party/fwkacllib/inc/ops/lookup_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index 6eb418d8..48867203 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -480,7 +480,7 @@ REG_OP(HistogramFixedWidth) .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) .INPUT(nbins, TensorType({DT_INT32})) .OUTPUT(y, TensorType({DT_INT32})) - .ATTR(dtype, String, "int32") + .ATTR(dtype, Int, 3) .OP_END_FACTORY_REG(HistogramFixedWidth) /** @@ -511,7 +511,7 @@ REG_OP(HistogramFixedWidthD) .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) .OUTPUT(y, TensorType({DT_INT32})) .REQUIRED_ATTR(nbins, Int) - .ATTR(dtype, String, "int32") + .ATTR(dtype, Int, 3) .OP_END_FACTORY_REG(HistogramFixedWidthD) /** @@ -938,6 +938,57 @@ REG_OP(LpNorm) .ATTR(epsilon, Float, 1e-12) .OP_END_FACTORY_REG(LpNorm) +/** +* @brief Computes LpNormReduce. + +* @par Inputs: +* x: An ND tensor of type float16, float32. \n +* +* @par Attributes: +* @li p: Int, "inf" or "-inf", default value is 2. +* @li axes: ListInt, {} means all axes will be computed. +* @li keepdim: Bool, default is false. +* @li epsilon: Float, default is 1e-12. \n + +* @par Outputs: +* y: An ND tensor of type float16, float32. The shape of y is depending +* on axes and keepdim. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator LpNormReduce. +*/ +REG_OP(LpNormReduce) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(p, Int, 2) + .ATTR(axes, ListInt, {}) + .ATTR(keepdim, Bool, false) + .ATTR(epsilon, Float, 1e-12) + .OP_END_FACTORY_REG(LpNormReduce) + +/** +* @brief Computes LpNormUpdate. + +* @par Inputs: +* x: An ND tensor of type float16, float32. \n +* +* @par Attributes: +* @li p: Int, "inf" or "-inf", default value is 2. +* @li epsilon: Float, default is 1e-12. \n + +* @par Outputs: +* y: An ND tensor of type float16, float32. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator LpNormUpdate. +*/ +REG_OP(LpNormUpdate) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(p, Int, 2) + .ATTR(epsilon, Float, 1e-12) + .OP_END_FACTORY_REG(LpNormUpdate) + /** * @brief get complex. diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index e82251bb..289a33a6 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,10 +49,10 @@ namespace ge { * Compatible with the TensorFlow operator BatchMatmul. */ REG_OP(MatMul) - .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) - .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) + .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) .ATTR(transpose_x1, Bool, false) .ATTR(transpose_x2, Bool, false) .OP_END_FACTORY_REG(MatMul) @@ -88,10 +88,10 @@ REG_OP(MatMul) * Compatible with the TensorFlow operator BatchMatmul. */ REG_OP(MatMulV2) - .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4})) - .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8 DT_INT4})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16})) + .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4})) .ATTR(transpose_x1, Bool, false) .ATTR(transpose_x2, Bool, false) @@ -531,6 +531,36 @@ REG_OP(ScatterAdd) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ScatterAdd) +/** +*@brief Adds sparse "updates" to a variable reference . \n + +*@par Inputs: +* Three inputs, including: +*@li var: An ND Tensor . +*Must be one of the following types: float16, float32, int32, int8, uint8 + +*@li indices: An ND Tensor of type int32 or int64 + +*@li updates: An ND Tensor . +*Must be one of the following types: float16, float32, int32, int8, uint8 + +*@par Attributes: +* axis: An required int. The axis along which to index. \n + +*@par Outputs: +*var: A Tensor. Has the same type and format as input "var" . \n + +*@par Third-party framework compatibility +* Compatible with the pytorch operator ScatterAdd. +*/ +REG_OP(ScatterAddWithAxis) + .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .INPUT(indices, TensorType::IndexNumberType()) + .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) + .REQUIRED_ATTR(axis, Int) + .OP_END_FACTORY_REG(ScatterAddWithAxis) + /** *@brief Divides a variable reference by sparse updates . \n @@ -1066,6 +1096,40 @@ REG_OP(MatrixSetDiagV2) .OUTPUT(output, TensorType::BasicType()) .OP_END_FACTORY_REG(MatrixSetDiagV2) +/** +*@brief Returns a batched matrix tensor with new batched diagonal values . \n + +*@par Inputs: +* Three inputs, including: +*@li input: "Rank `r+1`, where `r >= 1`. \n + +*@li diagonal: Rank `r` when `k` is an integer or `k[0] == k[1]`. Otherwise, it has rank `r+1`. \n + +*@li k: +*Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main \n +*diagonal, and negative value means subdiagonals. `k` can be a single integer \n +*(for a single diagonal) or a pair of integers specifying the low and high ends \n +*of a matrix band. `k[0]` must not be larger than `k[1]`. \n + +*@par Attributes: +*@li align: An optional string. Defaults to RIGHT_LEFT. It is a string specifying \n +*how superdiagonals and subdiagonals should be aligned, respectively. \n +*other optional: LEFT_RIGHT, LEFT_LEFT, and RIGHT_RIGHT.\n + +*@par Outputs: +*output: Rank `r+1`, with `output.shape = input.shape` . \n + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterUpdate. +*/ +REG_OP(MatrixSetDiagV3) + .INPUT(input, TensorType::BasicType()) + .INPUT(diagonal, TensorType::BasicType()) + .INPUT(k, TensorType({DT_INT32})) + .OUTPUT(output, TensorType::BasicType()) + .ATTR(align, String, "RIGHT_LEFT") + .OP_END_FACTORY_REG(MatrixSetDiagV3) + /** *@brief Returns a batched diagonal tensor with given batched diagonal values . \n diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h index 9629976e..66d67551 100644 --- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -114,7 +114,8 @@ Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. -Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n +Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . +*@li reserve_space_3: An optional Tensor of type float32. For compatibility with tensorflow, only has one useless element. \n *@attention Constraints: *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, @@ -136,11 +137,35 @@ REG_OP(BatchNorm) .OUTPUT(batch_variance, TensorType({DT_FLOAT})) .OUTPUT(reserve_space_1, TensorType({DT_FLOAT})) .OUTPUT(reserve_space_2, TensorType({DT_FLOAT})) + .OUTPUT(reserve_space_3, TensorType({DT_FLOAT})) .ATTR(epsilon, Float, 0.0001) .ATTR(data_format, String, "NHWC") .ATTR(is_training, Bool, true) .OP_END_FACTORY_REG(BatchNorm) +/** +*@brief part of SyncBatchNormBackward . \n + +*@par Inputs: +* Three inputs, including: +*@li sum_dy: A Tensor. Must be one of the following types: float16, float32 . +*@li sum_dy_dx_pad: A Tensor. Must be one of the following types: float16, float32 . +*@li mean: A Tensor. Must be one of the following types: float16, float32 . +*@li invert_std: A Tensor. Must be one of the following types: float16, float32 . \n + +*@par Outputs: +*@li sum_dy_xmu: A Tensor. Has the same type and format as input "sum_dy" +*@li y: A Tensor. Has the same type and format as input "sum_dy" . \n +*/ +REG_OP(SyncBatchNormBackwardReduce) + .INPUT(sum_dy, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(sum_dy_dx_pad, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(invert_std, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(sum_dy_xmu, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(SyncBatchNormBackwardReduce) + /** *@brief Performs batch normalization . \n @@ -260,7 +285,8 @@ REG_OP(BatchNormExt2) *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0. *@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. *@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm. -*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . \n +*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . +*@li reserve_space_3: A 1D optional Tensor of type float32. It is an output of BatchNorm . \n *@par Attributes: *@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". @@ -287,6 +313,7 @@ REG_OP(BatchNormGrad) .INPUT(scale, TensorType({DT_FLOAT})) .INPUT(reserve_space_1, TensorType({DT_FLOAT})) .INPUT(reserve_space_2, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(reserve_space_3, TensorType({DT_FLOAT})) .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT})) .OUTPUT(scale_backprop, TensorType({DT_FLOAT})) .OUTPUT(offset_backprop, TensorType({DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index ed7cb9b5..025f669c 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -269,10 +269,10 @@ REG_OP(DepthwiseConv2DBackpropInputD) *@par Inputs: *Two required inputs and two optional inputs, including: \n -* @li x: A 4D tensor of type float16 or int8, with shape [N, C, H, W] or [N, H, W, C] -* @li filter: A 4D tensor of type float16 or int8, with shape [H, W, C, K] +* @li x: A 4D tensor of type float16 or int8 or int4, with shape [N, C, H, W] or [N, H, W, C] +* @li filter: A 4D tensor of type float16 or int8 or int4, with shape [H, W, C, K] * @li bias: An optional tensor of type float16 or int32 -* @li offset_w: An optional float16 or int8, used for quantized inference +* @li offset_w: An optional float16 or int8 or int4, used for quantized inference * @par Attributes: * @li strides: A required list or tuple. The stride of the sliding window for @@ -312,10 +312,10 @@ REG_OP(DepthwiseConv2DBackpropInputD) * @li Compatible with the Caffe operator DepthwiseConv2D. */ REG_OP(DepthwiseConv2D) - .INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) - .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) + .INPUT(x, TensorType({DT_FLOAT16, DT_INT8, DT_INT4})) + .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8, DT_INT4})) .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32})) - .OPTIONAL_INPUT(offset_w, TensorType({DT_FLOAT16, DT_INT8})) + .OPTIONAL_INPUT(offset_w, TensorType({DT_FLOAT16, DT_INT8, DT_INT4})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32})) .REQUIRED_ATTR(strides, ListInt) .ATTR(dilations, ListInt, {1, 1, 1, 1}) @@ -369,16 +369,14 @@ REG_OP(BiasAddGrad) *\n * The following are the supported data types and data formats:\n *\n - | Tensor | out_bckprop | filter | y\n - ------------|-------------|---------|--------\n - | Data Type | float16 | float16 | float16\n - | |-------------|---------|--------\n - | | float32 | float32 | float32\n - | |-------------|---------|--------\n - | | float64 | float64 | float64\n - ------------|-------------|---------|--------\n - | Format | NCHW | NCHW | NCHW\n - | | NHWC | HWCN | NHWC\n + *\n + | Tensor | out_bckprop | filter | y |\n + |-----------|-------------|---------|--------|\n + | Data Type | float16 | float16 | float16|\n + | | float32 | float32 | float32|\n + | | float64 | float64 | float64|\n + | Format | NCHW | NCHW | NCHW |\n + | | NHWC | HWCN | NHWC |\n *\n * For float32 and float64 type, the actual calculation on the chip is based on * float16. @@ -400,30 +398,25 @@ REG_OP(BiasAddGrad) *\n * The following value range restrictions must be met:\n *\n - | Name | Field | Scope\n - -------------------|----------|--------------\n - | input_size | H | [1, 200000]\n - | | W | [1, 4096]\n - -------------------|----------|--------------\n - | Filter | H | [1, 255]\n - | | W | [1, 255]\n - -------------------|----------|--------------\n - | out_backprop | H*strideH| [1, 200000]\n - | | W*strideW| [1, 4096]\n - -------------------|----------|--------------\n - | y(fmap) | H | [1, 200000]\n - | | W | [1, 4096]\n - -------------------|----------|--------------\n - | Stride | H | [1, 63]\n - | | W | [1, 63]\n - -------------------|----------|--------------\n - | Padding | Top | [0, 255]\n - | | Bottom | [0, 255]\n - | | Left | [0, 255]\n - | | Right | [0, 255]\n - -------------------|----------|--------------\n - | Dilation | H | [1, 255]\n - | | W | [1, 255]\n + *\n + | Name | Field | Scope |\n + |------------------|----------|--------------|\n + | input_size | H | [1, 200000] |\n + | | W | [1, 4096] |\n + | Filter | H | [1, 255] |\n + | | W | [1, 255] |\n + | out_backprop | H*strideH| [1, 200000] |\n + | | W*strideW| [1, 4096] |\n + | y(fmap) | H | [1, 200000] |\n + | | W | [1, 4096] |\n + | Stride | H | [1, 63] |\n + | | W | [1, 63] |\n + | Padding | Top | [0, 255] |\n + | | Bottom | [0, 255] |\n + | | Left | [0, 255] |\n + | | Right | [0, 255] |\n + | Dilation | H | [1, 255] |\n + | | W | [1, 255] |\n *\n * In Ascend910, fmap or out_backprop's H and W not support 1 when @@ -495,9 +488,9 @@ REG_OP(Conv2DBackpropInput) * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv2DBackpropInput instead. */ REG_OP(Conv2DBackpropInputD) - .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) - .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32})) + .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8, DT_BF16})) + .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_INT8, DT_BF16})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32, DT_BF16})) .REQUIRED_ATTR(input_size, ListInt) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) @@ -523,13 +516,12 @@ REG_OP(Conv2DBackpropInputD) *\n * The following are the supported data types and data formats:\n *\n - | Tensor | x | filter | bias | y\n - ------------|---------|---------|---------|--------\n - | Data Type | float16 | float16 | float16 | float16\n - | |---------|---------|---------|--------\n - | | int8 | int8 | int32 | int32\n - ------------|---------|---------|---------|--------\n - | Format | NCHW | NCHW | ND | NCHW\n + *\n + | Tensor | x | filter | bias | y |\n + |-----------|---------|---------|---------|--------|\n + | Data Type | float16 | float16 | float16 | float16|\n + | | int8 | int8 | int32 | int32 |\n + | Format | NCHW | NCHW | ND | NCHW |\n *\n * For int8, a dequant or requant operator must be followed. *\n @@ -553,29 +545,24 @@ REG_OP(Conv2DBackpropInputD) *\n * The following value range restrictions must be met:\n *\n - | Name | Field | Scope\n - -------------------|----------|--------------\n - | x (out_backprop) | H*strideH| [1, 200000]\n - | | W*strideW| [1, 4096]\n - -------------------|----------|--------------\n - | Filter | H | [1, 255]\n - | | W | [1, 255]\n - -------------------|----------|--------------\n - | y (fmap) | H | [1, 200000]\n - | | W | [1, 4096]\n - -------------------|----------|--------------\n - | Stride | H | [1, 63]\n - | | W | [1, 63]\n - -------------------|----------|--------------\n - | Padding | Top | [0, 255]\n - | | Bottom | [0, 255]\n - | | Left | [0, 255]\n - | | Right | [0, 255]\n - -------------------|----------|--------------\n - | Dilation | H | [1, 255]\n - | | W | [1, 255]\n - -------------------|----------|--------------\n - | Offset_x | | [-128, 127]\n + *\n + | Name | Field | Scope |\n + |------------------|----------|--------------|\n + | x (out_backprop) | H*strideH| [1, 200000] |\n + | | W*strideW| [1, 4096] |\n + | Filter | H | [1, 255] |\n + | | W | [1, 255] |\n + | y (fmap) | H | [1, 200000] |\n + | | W | [1, 4096] |\n + | Stride | H | [1, 63] |\n + | | W | [1, 63] |\n + | Padding | Top | [0, 255] |\n + | | Bottom | [0, 255] |\n + | | Left | [0, 255] |\n + | | Right | [0, 255] |\n + | Dilation | H | [1, 255] |\n + | | W | [1, 255] |\n + | Offset_x | | [-128, 127] |\n *\n * In Ascend910, fmap or out_backprop's H and W not support 1 when * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 @@ -631,16 +618,14 @@ REG_OP(Deconvolution) *\n * The following are the supported data types and data formats:\n *\n - | Tensor | x | out_backprop | y\n - ------------|---------|--------------|---------\n - | Data Type | float16 | float16 | float16\n - | |---------|--------------|---------\n - | | float32 | float32 | float32\n - | |---------|--------------|---------\n - | | float64 | float64 | float64\n - |-----------|---------|--------------|---------\n - | Format | NCHW | NCHW | NCHW\n - | | NHWC | NHWC | HWCN\n + *\n + | Tensor | x | out_backprop | y |\n + |-----------|---------|--------------|---------|\n + | Data Type | float16 | float16 | float16 |\n + | | float32 | float32 | float32 |\n + | | float64 | float64 | float64 |\n + | Format | NCHW | NCHW | NCHW |\n + | | NHWC | NHWC | HWCN |\n *\n * For float32 and float64 type of x and outbackprop, the actual calculation on the chip * is based on float16. @@ -662,30 +647,25 @@ REG_OP(Deconvolution) *\n * The following value range restrictions must be met:\n *\n - | Name | Field | Scope\n - -------------------|----------|--------------\n - | x(fmap) | H | [1, 200000]\n - | | W | [1, 4096]\n - -------------------|----------|--------------\n - | Filter Size | H | [1, 255]\n - | | W | [1, 255]\n - -------------------|----------|--------------\n - | out_backprop | H | [1, 200000]\n - | | W | [1, 4096]\n - -------------------|----------|--------------\n - | y | H | [1, 200000]\n - | | W | [1, 4096]\n - -------------------|----------|--------------\n - | Stride | H | [1, 63]\n - | | W | [1, 63]\n - -------------------|----------|--------------\n - | Padding | Top | [0, 255]\n - | | Bottom | [0, 255]\n - | | Left | [0, 255]\n - | | Right | [0, 255]\n - -------------------|----------|--------------\n - | Dilation | H | [1, 255]\n - | | W | [1, 255]\n + *\n + | Name | Field | Scope |\n + |------------------|----------|--------------|\n + | x(fmap) | H | [1, 200000] |\n + | | W | [1, 4096] |\n + | Filter Size | H | [1, 255] |\n + | | W | [1, 255] |\n + | out_backprop | H | [1, 200000] |\n + | | W | [1, 4096] |\n + | y | H | [1, 200000] |\n + | | W | [1, 4096] |\n + | Stride | H | [1, 63] |\n + | | W | [1, 63] |\n + | Padding | Top | [0, 255] |\n + | | Bottom | [0, 255] |\n + | | Left | [0, 255] |\n + | | Right | [0, 255] |\n + | Dilation | H | [1, 255] |\n + | | W | [1, 255] |\n *\n *@par Outputs: * y: A Tensor. Has the same type as x, has the same format as filter_size. @@ -853,11 +833,11 @@ REG_OP(Conv2DBackpropFilterD) *@li Compatible with the Caffe operator 2D "Convolution". */ REG_OP(Conv2D) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) - .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_BF16})) + .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_BF16})) .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_BF16})) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) .ATTR(dilations, ListInt, {1, 1, 1, 1}) @@ -1441,14 +1421,13 @@ REG_OP(Conv3DTransposeD) *\n * The following are the supported data types and data formats:\n *\n - | Tensor | x | filter | bias | y\n - ------------|---------|---------|---------|--------\n - | Data Type | float16 | float16 | float16 | float16\n - | |---------|---------|---------|--------\n - | | int8 | int8 | int32 | int32\n - ------------|---------|---------|---------|--------\n - | Format | NCHW | NCHW | ND | NCHW\n - | | NHWC | HWCN | | NHWC\n + *\n + | Tensor | x | filter | bias | y |\n + |-----------|---------|---------|---------|--------|\n + | Data Type | float16 | float16 | float16 | float16|\n + | | int8 | int8 | int32 | int32 |\n + | Format | NCHW | NCHW | ND | NCHW |\n + | | NHWC | HWCN | | NHWC |\n *\n * For int8, a dequant or requant operator must be followed. *\n @@ -1476,32 +1455,26 @@ REG_OP(Conv3DTransposeD) *\n * The following value range restrictions must be met:\n *\n - | Name | Field | Scope\n - -------------------|----------|--------------\n - | input_size | H | [1, 200000]\n - | | W | [1, 4096]\n - -------------------|----------|--------------\n - | x (out_backprop) | H*strideH| [1, 200000]\n - | | W*strideW| [1, 4096]\n - -------------------|----------|--------------\n - | filter | H | [1, 255]\n - | | W | [1, 255]\n - -------------------|----------|--------------\n - | y (fmap) | H | [1, 200000]\n - | | W | [1, 4096]\n - -------------------|----------|--------------\n - | Stride | H | [1, 63]\n - | | W | [1, 63]\n - -------------------|----------|--------------\n - | Padding | Top | [0, 255]\n - | | Bottom | [0, 255]\n - | | Left | [0, 255]\n - | | Right | [0, 255]\n - -------------------|----------|--------------\n - | Dilation | H | [1, 255]\n - | | W | [1, 255]\n - -------------------|----------|--------------\n - | Offset_x | | [-128, 127]\n + *\n + | Name | Field | Scope |\n + |------------------|----------|--------------|\n + | input_size | H | [1, 200000] |\n + | | W | [1, 4096] |\n + | x (out_backprop) | H*strideH| [1, 200000] |\n + | | W*strideW| [1, 4096] |\n + | filter | H | [1, 255] |\n + | | W | [1, 255] |\n + | y (fmap) | H | [1, 200000] |\n + | | W | [1, 4096] |\n + | Stride | H | [1, 63] |\n + | | W | [1, 63] |\n + | Padding | Top | [0, 255] |\n + | | Bottom | [0, 255] |\n + | | Left | [0, 255] |\n + | | Right | [0, 255] |\n + | Dilation | H | [1, 255] |\n + | | W | [1, 255] |\n + | Offset_x | | [-128, 127] |\n *\n * In Ascend910, fmap or out_backprop's H and W not support 1 when * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index 0011c72e..e960234e 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -135,7 +135,8 @@ REG_OP(CheckValid) * the value "4" refers to "x0", "x1", "y0", and "y1" . \n *@par Attributes: -*mode: Computation mode, a character string with the value range of [iou, iof] . \n +*@li mode: Computation mode, a character string with the value range of [iou, iof] +*@li eps: An optional float, prevent division by 0, default value is 1.0 . \n *@par Outputs: *overlap: A 2D Tensor of type float16 or float32 with shape [M, N], specifying @@ -150,6 +151,7 @@ REG_OP(Iou) .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT})) .ATTR(mode, String, "iou") + .ATTR(eps, Float, 1.0) .OP_END_FACTORY_REG(Iou) /** @@ -205,7 +207,8 @@ the value "5" indicates the indexes of images where the ROIs are located, "x0", *@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image. *@li sample_num: An optional attribute of type int, specifying the horizontal and vertical sampling frequency of each output. If this attribute is set to "0", the sampling frequency is -equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . \n +equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . +*@li roi_end_mode: An optional attribute of type int, specifying the align mode .\n *@par Outputs: *xdiff: Gradient added to input "features". Has the same 5HD shape as input "features". @@ -220,6 +223,7 @@ REG_OP(ROIAlignGrad) .REQUIRED_ATTR(pooled_height, Int) .REQUIRED_ATTR(spatial_scale, Float) .ATTR(sample_num, Int, 2) + .ATTR(roi_end_mode, Int, 1) .OP_END_FACTORY_REG(ROIAlignGrad) /** @@ -579,6 +583,172 @@ REG_OP(Yolo) .ATTR(softmaxtree, Bool, false) .OP_END_FACTORY_REG(Yolo) +/** +*@brief Normalizes data. It is called Region on YOLO v2 and Yolo on YOLO v3 . \n + +*@par Inputs: +*x: An NCHW tensor of type float16 or float32. The data is with shape (N, boxes*(coords+obj+classes), H, W), +where, "obj" indicates the confidence of an object, and only one confidence is supported. Boxes are arranged +as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n + +*@par Attributes: +*@li boxes: A required int32, specifying the number of anchor boxes. Defaults to "5" for V2 or "3" for V3. +*@li coords: An int32, specifying the number of parameters required for locating an object. The value is fixed at "4", corresponding to (x,y,w,h). +*@li classes: An int32, specifying the number of prediction classes. Defaults to "80". The value range is [1, 1024]. +*@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3".Defaults to "V3" +*@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2". Defaults to "false". +*@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2". Defaults to "false". +*@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used. Defaults to "false" . \n + +*@par Outputs: +*@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2], +* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box. +*@li obj_prob: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2], +* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence. +*@li classes_prob: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2], +* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n + +*@attention Constraints: +*@li This operator applies to YOLO v2,v3 and v5 networks. +*@li The succeeding layer of the Yolo operator must be operator Yolov5DetectionOutput. +*@par Third-party framework compatibility +* It is a custom operator. +*/ +REG_OP(YoloPreDetection) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(boxes, Int, 3) + .ATTR(coords, Int, 4) + .ATTR(classes, Int, 80) + .ATTR(yolo_version, String, "V5") + .ATTR(softmax, Bool, false) + .ATTR(background, Bool, false) + .ATTR(softmaxtree, Bool, false) + .OP_END_FACTORY_REG(YoloPreDetection) + +/** +*@brief Performs YOLO V5 detection . \n + +*@par Inputs: +*Ten inputs, including: +*@li Operator Yolov5DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n +There are three Yolo operators at Yolov5DetectionOutput's preceding layer on Yolo v5. For details, see the description of operator Yolo. +*@li img_info: A float16 or float32, describing the image information including the required image height and width \n +* and the actual image height and width. + +*@par Attributes: +*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" +*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. +*@li coords: Specifies the number of coordinate parameters. Must be 4. +*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80]. +*@li relative: An optional bool. Defaults to and must be "true". +*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0]. + +*@li post_nms_topn: An optional int32. This attribute is reserved. +*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. + +*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n + +*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". + +*@par Outputs: +*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2), +* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. +*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. +* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 + +*@attention Constraints:\n +*@li This operator applies only to the YOLO v5 network. +*@li The preceding layer of operator Yolov5DetectionOutput must be three Yolo operators. + +*@see Yolo() +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. +*/ +REG_OP(YoloV5DetectionOutput) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(biases, ListFloat) + .ATTR(boxes, Int, 3) + .ATTR(coords, Int, 4) + .ATTR(classes, Int, 80) + .ATTR(relative, Bool, true) + .ATTR(obj_threshold, Float, 0.5) + .ATTR(post_nms_topn, Int, 512) + .ATTR(score_threshold, Float, 0.5) + .ATTR(iou_threshold, Float, 0.45) + .ATTR(pre_nms_topn, Int, 512) + .ATTR(N, Int, 10) + .ATTR(resize_origin_img_to_net, Bool, false) + .ATTR(out_box_dim, Int, 3) + .ATTR(alpha, Float, 2.0) + .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(box_out_num, TensorType({DT_INT32})) + .OP_END_FACTORY_REG(YoloV5DetectionOutput) + +/** +*@brief Performs YOLO V5 detection. + +*@par Inputs: +*16 Input, including: +*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v5) are used as the inputs of operator Yolov5DetectionOutput. +* A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. +*@li imginfo: A float16, describing the image information including the required image height and width +* and the actual image height and width. +*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. +* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] +* is formed for the three Yolo outputs, respectively .It's a dynamic input. \n + +*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n +*@par Attributes: +*@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" +*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. +*@li coords: Specifies the number of coordinate parameters. Must be 4. +*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80]. +*@li relative: An optional bool. Defaults to and must be "true". +*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0]. +*@li post_nms_topn: An optional int32. This attribute is reserved. +*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. +*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0]. +*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". +* +*@par Outputs: +*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2), +* describing the information of each output box. +* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. +*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. +* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 +* +*@attention Constraints: +*@li This operator applies only to the YOLO v5 network. +*@li The preceding layer of operator Yolov5DetectionOutput must be three Yolo operators. +*@see Yolo() +*@par Third-party framework compatibility +* It is a custom operator. +*/ +REG_OP(YoloV5DetectionOutputD) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(biases, ListFloat) + .ATTR(boxes, Int, 3) + .ATTR(coords, Int, 4) + .ATTR(classes, Int, 80) + .ATTR(relative, Bool, true) + .ATTR(obj_threshold, Float, 0.5) + .ATTR(post_nms_topn, Int, 512) + .ATTR(score_threshold, Float, 0.5) + .ATTR(iou_threshold, Float, 0.45) + .ATTR(pre_nms_topn, Int, 512) + .ATTR(N, Int, 10) + .ATTR(resize_origin_img_to_net, Bool, false) + .ATTR(out_box_dim, Int, 3) + .ATTR(alpha, Float, 2.0) + .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(box_out_num, TensorType({DT_INT32})) + .OP_END_FACTORY_REG(YoloV5DetectionOutputD) + /** *@brief Performs YOLO V2 detection . \n diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 9ce7abfd..d66c8948 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/nn_ops.h b/third_party/fwkacllib/inc/ops/nn_ops.h index 5b1a4dd0..a08b610b 100644 --- a/third_party/fwkacllib/inc/ops/nn_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index ee599a76..978c480c 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h index 9dd502cd..39234057 100644 --- a/third_party/fwkacllib/inc/ops/nn_training_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -2645,6 +2645,19 @@ REG_OP(SparseApplyAdadeltaD) REG_OP(AtomicAddrClean) .ATTR(automic_add_mem_size, ListInt, {}) .OP_END_FACTORY_REG(AtomicAddrClean) + +/** +*@brief Clean memory of workspace list . \n + +*@par Attributes: +* @li workspace_size: sizes of workspaces . \n + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(DynamicAtomicAddrClean) + .ATTR(automic_add_mem_size, ListInt, {}) + .OP_END_FACTORY_REG(DynamicAtomicAddrClean) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_TRAINING_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/no_op.h b/third_party/fwkacllib/inc/ops/no_op.h index b27b1fa0..7834591c 100644 --- a/third_party/fwkacllib/inc/ops/no_op.h +++ b/third_party/fwkacllib/inc/ops/no_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index 067357de..a4f2fe80 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,6 +58,25 @@ REG_OP(HardSwish) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OP_END_FACTORY_REG(HardSwish) +/** +*@brief Computes the gradient for the hard_swish of "x" . \n + +* @par Inputs: +*Two inputs, including: +* @li grad: A Tensor. Must be one of the following types: float16, float32 +* @li x: A Tensor of the same type as "grad" . \n + +*@par Outputs: +*y: A Tensor. Has the same type as "grad". +* @par Third-party framework compatibility +* Compatible with the Torch operator HardSwishGrad. +*/ +REG_OP(HardSwishGrad) + .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(HardSwishGrad) + /** *@brief Computes the for the Swish of "x" . \n @@ -80,6 +99,29 @@ REG_OP(Swish) .ATTR(scale, Float, 1.0) .OP_END_FACTORY_REG(Swish) +/** +*@brief Computes the gradient for the Swish of "x" . \n + +*@par Inputs: +*Three inputs, including: +* @li grad: A Tensor. Must be one of the following types: float16, float32 +* @li x: A Tensor of the same type as "grad". +* @li y: A Tensor of the same type as "grad" . \n +* @par Attributes: +* scale: A optional scalar. The data type is float . \n +*@par Outputs: +*grad_x: A Tensor. Has the same type as "grad". +*@par Third-party framework compatibility +*Compatible with the Torch operator SwishGrad +*/ +REG_OP(SwishGrad) + .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(grad_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(scale, Float, 1.0) + .OP_END_FACTORY_REG(SwishGrad) + /** *@brief Computes the gradient for the gelu of "x" . \n diff --git a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h index f36d2935..8d7ef9f9 100644 --- a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h +++ b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/ocr_ops.h b/third_party/fwkacllib/inc/ops/ocr_ops.h new file mode 100644 index 00000000..639c34de --- /dev/null +++ b/third_party/fwkacllib/inc/ops/ocr_ops.h @@ -0,0 +1,268 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * \file ocr_ops.h + * \brief + */ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_OCR_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_OCR_OPS_H_ + +#include "graph/operator_reg.h" + +namespace ge { +/** +*@brief batch input x acording to attr batch_size and enqueue. +*@par Inputs: +*@li x: A Tensor need to batch of type float16/float32/float64/int8/int32/int64/uint8/uint32/uint64. \n +*@li queue_id:A Tensor of type uint32, queue id. + +*@par Outputs: +*enqueue_count: A Tensor of type int64, enqueue tensor number. + +*@par Attributes: +*@li batch_size: An optional int. Batch size. +*@li queue_name: An optional string. Queue name. +*@li queue_depth: An optional int. Queue depth. +*@li pad_mode: An optional string from: '"REPLICATE", "ZERO"'. Defaults to +"REPLICATE". Pad mode. +*/ +REG_OP(BatchEnqueue) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \ + DT_INT8, DT_INT32, DT_INT64, DT_UINT8, DT_UINT32, DT_UINT64})) + .OPTIONAL_INPUT(queue_id, TensorType({DT_UINT32})) + .OUTPUT(enqueue_count, TensorType({DT_INT32})) + .ATTR(batch_size, Int, 8) + .ATTR(queue_name, String, "") + .ATTR(queue_depth, Int, 100) + .ATTR(pad_mode, String, "REPLICATE") + .OP_END_FACTORY_REG(BatchEnqueue) + +/** +*@brief batch input x acording to attr batch_size and enqueue. +*@par Inputs: +*@li imgs_data: A Tensor of type uint8. Multi img data value. \n +*@li imgs_offset:A Tensor of type int32. Offset of every img data in input imgs_data. \n +*@li imgs_size:A Tensor of type int32. Shape of every img data. \n +*@li langs:A Tensor of type int32. Lang of every img data. \n +*@li langs_score:A Tensor of type int32. Lang score of every img data. \n + +*@par Outputs: +*@liimgs: A Tensor of type uint8. Multi imgs data after reconition pre handle. +*@liimgs_relation: A Tensor of type int32. Output imgs orders in input imgs. +*@liimgs_lang: A Tensor of type int32. Output batch imgs langs. + +*@par Attributes: +*@li batch_size: An optional int. Batch size. +*@li data_format: An optional string from: '"NHWC", "NCHW"'. Defaults to +"NHWC". Data format. +*@li pad_mode: An optional string from: '"REPLICATE", "ZERO"'. Defaults to +"REPLICATE". Pad mode. +*/ +REG_OP(OCRRecognitionPreHandle) + .INPUT(imgs_data, TensorType({DT_UINT8})) + .INPUT(imgs_offset, TensorType({DT_INT32})) + .INPUT(imgs_size, TensorType({DT_INT32})) + .INPUT(langs, TensorType({DT_INT32})) + .INPUT(langs_score, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(imgs, TensorType({DT_UINT8})) + .OUTPUT(imgs_relation, TensorType({DT_INT32})) + .OUTPUT(imgs_lang, TensorType({DT_INT32})) + .ATTR(batch_size, Int, 8) + .ATTR(data_format, String, "NHWC") + .ATTR(pad_mode, String, "REPLICATE") + .OP_END_FACTORY_REG(OCRRecognitionPreHandle) + +/** +*@brief ocr detection pre handle. +*@par Inputs: +*img: A Tensor of type uint8. img data value. \n + +*@par Outputs: +*@li resized_img: A Tensor of type uint8. Img after detection pre handle. +*@li h_scale: A Tensor of type float. H scale. +*@li w_scale: A Tensor of type float. W scale. + +*@par Attributes: +*data_format: An optional string from: '"NHWC", "NCHW"'. Defaults to +"NHWC". Data format. +*/ +REG_OP(OCRDetectionPreHandle) + .INPUT(img, TensorType({DT_UINT8})) + .OUTPUT(resized_img, TensorType({DT_UINT8})) + .OUTPUT(h_scale, TensorType({DT_FLOAT})) + .OUTPUT(w_scale, TensorType({DT_FLOAT})) + .ATTR(data_format, String, "NHWC") + .OP_END_FACTORY_REG(OCRDetectionPreHandle) + +/** +*@brief ocr identify prehandle. +*@par Inputs: +*@li imgs_data: A Tensor of type uint8. Multi img data value. \n +*@li imgs_offset:A Tensor of type int32. Offset of every img data in input imgs_data. \n +*@li imgs_size:A Tensor of type int32. Shape of every img data. \n + +*@par Outputs: +*resized_imgs: A Tensor of type uint8. Multi imgs after identify pre handle. + +*@par Attributes: +*@li size: An optional int. Size. +*@li data_format: An optional string from: '"NHWC", "NCHW"'. Defaults to +"NHWC". Data format. +*/ +REG_OP(OCRIdentifyPreHandle) + .INPUT(imgs_data, TensorType({DT_UINT8})) + .INPUT(imgs_offset, TensorType({DT_INT32})) + .INPUT(imgs_size, TensorType({DT_INT32})) + .OUTPUT(resized_imgs, TensorType({DT_UINT8})) + .ATTR(size, ListInt, {}) + .ATTR(data_format, String, "NHWC") + .OP_END_FACTORY_REG(OCRIdentifyPreHandle) + +/** +*@brief batch dilate polygons according to expand_scale. +*@par Inputs: +*@li polys_data: A Tensor of type int32. point data of every polygon. \n +*@li polys_offset:A Tensor of type int32. Offset of every polygon . \n +*@li polys_size:A Tensor of type int32. Size of every polygon. \n +*@li score:A Tensor of type float. Score of every point in image. \n +*@li min_border:A Tensor of type int32. Minimum width of each polygon. \n +*@li min_area_thr:A Tensor of type int32. Minimum area of each polygon. \n +*@li score_thr:A Tensor of type float. Minimum confidence score of each polygon. \n +*@li expands_cale:A Tensor of type float. Polygon expansion multiple. \n + +*@par Outputs: +*@li dilated_polys_data: A Tensor of type int32. Point data of every dilated polygon. \n +*@li dilated_polys_offset: A Tensor of type int32. Offset of every dilated polygon . \n +*@li dilated_polys_size: A Tensor of type int32. Size of every dilated polygon. \n +*/ +REG_OP(BatchDilatePolys) + .INPUT(polys_data, TensorType({DT_INT32})) + .INPUT(polys_offset, TensorType({DT_INT32})) + .INPUT(polys_size, TensorType({DT_INT32})) + .INPUT(score, TensorType({DT_FLOAT})) + .INPUT(min_border, TensorType({DT_INT32})) + .INPUT(min_area_thr, TensorType({DT_INT32})) + .INPUT(score_thr, TensorType({DT_FLOAT})) + .INPUT(expands_cale, TensorType({DT_FLOAT})) + .OUTPUT(dilated_polys_data, TensorType({DT_INT32})) + .OUTPUT(dilated_polys_offset, TensorType({DT_INT32})) + .OUTPUT(dilated_polys_size, TensorType({DT_INT32})) + .OP_END_FACTORY_REG(BatchDilatePolys) + +/** +*@brief find contours acording to img. +*@par Inputs: +*@li img: A Tensor of type uint8. Img data value. \n + +*@par Outputs: +*@li polys_data: A Tensor of type int32. Point data of every contours. \n +*@li polys_offset:A Tensor of type int32. Offset of every contours . \n +*@li polys_size:A Tensor of type int32. Size of every contours. \n +*/ +REG_OP(OCRFindContours) + .INPUT(img, TensorType({DT_UINT8})) + .OUTPUT(polys_data, TensorType({DT_INT32})) + .OUTPUT(polys_offset, TensorType({DT_INT32})) + .OUTPUT(polys_size, TensorType({DT_INT32})) + .ATTR(value_mode, Int, 0) + .OP_END_FACTORY_REG(OCRFindContours) + +/** +*@brief dequeue data acording to queue_id and queue_name. +*@par Inputs: +*@li queue_id:An Tensor of type uint32, queue id. \n + +*@par Outputs: +*data: A Tensor of type RealNumberType, dequeue tensor. \n + +*@par Attributes: +*@li output_type: A required type. dequeue data type. +*@li output_shape: A required listint. dequeue data shape. +*@li queue_name: An optional string. Queue name. \n +*/ +REG_OP(Dequeue) + .OPTIONAL_INPUT(queue_id, TensorType({DT_UINT32})) + .OUTPUT(data, TensorType::RealNumberType()) + .REQUIRED_ATTR(output_type, Type) + .REQUIRED_ATTR(output_shape, ListInt) + .ATTR(queue_name, String, "") + .OP_END_FACTORY_REG(Dequeue); + +/** +*@brief ocr detection post handle. +*@par Inputs: +*@li img: A Tensor of type uint8. original image data. +*@li polys_data: A Tensor of type int32. point data of every poly. +*@li polys_offset:A Tensor of type int32. Offset of every poly. +*@li polys_size:A Tensor of type int32. Size of every poly. \n + +*@par Outputs: +*@li imgs_data: A Tensor of type int32. imgs_data of original image. +*@li imgs_offset: A Tensor of type int32. Offset of every imgs data. +*@li imgs_size: A Tensor of type int32. Shape of every imgs data. +*@li rect_points: A Tensor of type int32. Rect points of every imgs. \n + +*@par Attributes: +*@li data_format: An optional string from: '"NHWC", "NCHW"'. Defaults to +"NHWC". Data format. +*/ +REG_OP(OCRDetectionPostHandle) + .INPUT(img, TensorType({DT_UINT8})) + .INPUT(polys_data, TensorType({DT_INT32})) + .INPUT(polys_offset, TensorType({DT_INT32})) + .INPUT(polys_size, TensorType({DT_INT32})) + .OUTPUT(imgs_data, TensorType({DT_UINT8})) + .OUTPUT(imgs_offset, TensorType({DT_INT32})) + .OUTPUT(imgs_size, TensorType({DT_INT32})) + .OUTPUT(rect_points, TensorType({DT_INT32})) + .ATTR(data_format, String, "NHWC") + .OP_END_FACTORY_REG(OCRDetectionPostHandle); + +/** +*@brief resize and clip polys. +*@par Inputs: +*@li polys_data: A Tensor of type int32. point data of every poly. +*@li polys_offset:A Tensor of type int32. Offset of every poly . +*@li polys_size:A Tensor of type int32. Size of every poly. +*@li img_h:A Tensor of type int32. Height of original image. +*@li img_w:A Tensor of type int32. Width of original image. +*@li h_scale:A Tensor of type float. Expand scale of height. +*@li w_scale:A Tensor of type float. Expand scale of width. \n + +*@par Outputs: +*@li clipped_polys_data: A Tensor of type int32. point data of every clipped poly. \n +*@li clipped_polys_offset: A Tensor of type int32. Offset of every clipped poly . \n +*@li clipped_polys_size: A Tensor of type int32. Size of every clipped poly. \n +*/ +REG_OP(ResizeAndClipPolys) + .INPUT(polys_data, TensorType({DT_INT32})) + .INPUT(polys_offset, TensorType({DT_INT32})) + .INPUT(polys_size, TensorType({DT_INT32})) + .INPUT(img_h, TensorType({DT_INT32})) + .INPUT(img_w, TensorType({DT_INT32})) + .INPUT(h_scale, TensorType({DT_FLOAT})) + .INPUT(w_scale, TensorType({DT_FLOAT})) + .OUTPUT(clipped_polys_data, TensorType({DT_INT32})) + .OUTPUT(clipped_polys_offset, TensorType({DT_INT32})) + .OUTPUT(clipped_polys_size, TensorType({DT_INT32})) + .OP_END_FACTORY_REG(ResizeAndClipPolys); + + +} // namespace ge + + +#endif // OPS_BUILT_IN_OP_PROTO_INC_OCR_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/outfeed_ops.h b/third_party/fwkacllib/inc/ops/outfeed_ops.h index 53b9d701..e0b783bc 100644 --- a/third_party/fwkacllib/inc/ops/outfeed_ops.h +++ b/third_party/fwkacllib/inc/ops/outfeed_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h index 9d0e7a62..a9a3b0f0 100644 --- a/third_party/fwkacllib/inc/ops/pad_ops.h +++ b/third_party/fwkacllib/inc/ops/pad_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -274,6 +274,38 @@ REG_OP(PadV3) .ATTR(mode, String, "constant") .ATTR(paddings_contiguous, Bool, true) .OP_END_FACTORY_REG(PadV3) + + /** +*@brief Cal the grad of Pads. + +*@par Inputs: +*Two inputs, including: +* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, +* uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, +* complex128, uint32, uint64. +* @li paddings: A Tensor of type int32 or int64. + +*@par Attributes: +* @li mode: An optional string, Defaults to "reflect", indicates paddings mode, +* support "reflect", "edge" +* @li paddings_contiguous: An optional bool value, Defaults to true. +* If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...] +* If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...] + +*@par Outputs: +*y: A Tensor of the same type as "x". + +*@par Third-party framework compatibility: +* Compatible with ONNX operator PadGrad. +*/ + +REG_OP(PadV3Grad) + .INPUT(x, TensorType::BasicType()) + .INPUT(paddings, TensorType::IndexNumberType()) + .OUTPUT(y, TensorType::BasicType()) + .ATTR(mode, String, "reflect") + .ATTR(paddings_contiguous, Bool, true) + .OP_END_FACTORY_REG(PadV3Grad) /** *@brief Pads a tensor. diff --git a/third_party/fwkacllib/inc/ops/parsing_ops.h b/third_party/fwkacllib/inc/ops/parsing_ops.h index e578997c..03024f96 100644 --- a/third_party/fwkacllib/inc/ops/parsing_ops.h +++ b/third_party/fwkacllib/inc/ops/parsing_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h index 0ed46e80..e4b1075b 100644 --- a/third_party/fwkacllib/inc/ops/quantize_ops.h +++ b/third_party/fwkacllib/inc/ops/quantize_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -85,7 +85,7 @@ REG_OP(Quantize) *@brief Quantizes the input . \n *@par Inputs: -*x: An NC1HWC0 tensor of type float16 or float32, specifying the input . \n +*x: An tensor of type float16 or float32, specifying the input . \n *@par Attributes: *@li scale: A required float32, specifying the scaling ratio. @@ -96,7 +96,7 @@ REG_OP(Quantize) *@li dst_type: A optional int32, specifying the output data type. Defaults to "DT_INT8" . \n *@par Outputs: -*y: The quantized output tensor of type int8 or int4 and with format NC1HWC0 . \n +*y: The quantized output tensor of type int8 or int4. \n *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. @@ -115,8 +115,8 @@ REG_OP(AscendQuant) *@brief Dequantizes the input . \n *@par Inputs: -*@li x: An NC1HWC0 tensor of type int32, specifying the input. -*@li deq_scale: An NC1HWC0 tensor of type float16 or uint64, specifying the scaling ratio . \n +*@li x: An tensor of type int32, specifying the input. +*@li deq_scale: An tensor of type float16 or uint64, specifying the scaling ratio . \n *@par Attributes: *@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False". @@ -124,7 +124,7 @@ REG_OP(AscendQuant) *@li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT" . \n *@par Outputs: -*y: The dequantized output tensor of type float16 or float32 and with format NC1HWC0 . \n +*y: The dequantized output tensor of type float16 or float32. \n *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. @@ -142,7 +142,7 @@ REG_OP(AscendDequant) *@brief Anti quantizes the input . \n *@par Inputs: -*x: An NC1HWC0 tensor of type int8, specifying the input . \n +*x: An tensor of type int8, specifying the input . \n *@par Attributes: *@li scale: A required float32 scale. @@ -151,7 +151,7 @@ REG_OP(AscendDequant) *@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False" . \n *@par Outputs: -*y: The dequantized output tensor of type float16 or float32 and with format NC1HWC0 . \n +*y: The dequantized output tensor of type float16 or float32. \n *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. @@ -169,15 +169,15 @@ REG_OP(AscendAntiQuant) *@brief Dequantizes the input of int16 . \n *@par Inputs: -*@li x0: An NC1HWC0 tensor of type int32, specifying the input. -*@li deq_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio. -*@li x1: An NC1HWC0 tensor of type int16, specifying the input . \n +*@li x0: An tensor of type int32, specifying the input. +*@li deq_scale: An tensor of type uint64, specifying the scaling ratio. +*@li x1: An tensor of type int16, specifying the input . \n *@par Attributes: *relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n *@par Outputs: -*y: The dequantized output tensor of type int16 and with format NC1HWC0 . \n +*y: The dequantized output tensor of type int16. \n *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. @@ -194,14 +194,14 @@ REG_OP(AscendDequantS16) *@brief Requantizes the input . \n *@par Inputs: -*@li x: An NC1HWC0 tensor of type int32, specifying the input. -*@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio . \n +*@li x: An tensor of type int32, specifying the input. +*@li req_scale: An tensor of type uint64, specifying the scaling ratio . \n *@par Attributes: *relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n *@par Outputs: -*y: The dequantized output tensor of type int8 and with format NC1HWC0 . \n +*y: The dequantized output tensor of type int8. \n *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. @@ -217,17 +217,17 @@ REG_OP(AscendRequant) *@brief Requantizes the input of int16 . \n *@par Inputs: -*@li x0: An NC1HWC0 tensor of type int16, specifying the input. -*@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio. -*@li x1: An NC1HWC0 tensor of type int16 . \n +*@li x0: An tensor of type int16, specifying the input. +*@li req_scale: An tensor of type uint64, specifying the scaling ratio. +*@li x1: An tensor of type int16 . \n *@par Attributes: *@li dual_output: A optional bool, specifying whether to perform dual ouput, either "True" or "False". Defaults to "False". *@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n *@par Outputs: -*@li y0: The dequantized output tensor of type int8 and with format NC1HWC0. -*@li y1: The dequantized output tensor of type int16 and with format NC1HWC0 . \n +*@li y0: The dequantized output tensor of type int8. +*@li y1: The dequantized output tensor of type int16. \n *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. diff --git a/third_party/fwkacllib/inc/ops/ragged_array_ops.h b/third_party/fwkacllib/inc/ops/ragged_array_ops.h index 5af2dd74..9d116760 100644 --- a/third_party/fwkacllib/inc/ops/ragged_array_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_array_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h index ceaa64e4..2e253ed4 100644 --- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/ragged_math_ops.h b/third_party/fwkacllib/inc/ops/ragged_math_ops.h index 4376437f..b2caa0be 100644 --- a/third_party/fwkacllib/inc/ops/ragged_math_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_math_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index 66f9b65f..28fbb7f2 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -685,6 +685,24 @@ REG_OP(Uniform) .ATTR(from, Float, 0.0) .ATTR(to, Float, 1.0) .OP_END_FACTORY_REG(Uniform) -} // namespace ge +/** +*@brief Outputs integers consisting of 0 and 1, used for lstm etc. \n +*@par Inputs +* @li time_step: A tensor with data type int64. 0-D. +* @li batch_size: A tensor with data type int64. 0-D. + +*@par Outputs: +*y: A Tensor. Has the type float16 or float, 2-D, [time_step,batch_size]. \n + +*@attention Constraints: +* Compatible with the Caffe operator ContinuationIndicator. +*/ + +REG_OP(ContinuationIndicator) + .REQUIRED_ATTR(time_step, Int) + .REQUIRED_ATTR(batch_size, Int) + .OUTPUT(y, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(ContinuationIndicator) +} // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index 1578ba59..e8c14b1a 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -185,7 +185,7 @@ REG_OP(BN3DTrainingReduceGrad) *@li This operator is a BatchNorm fusion operator for updating the moving averages for training. *This operator is used in conjunction with BNTrainingUpdate. -*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square +*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to the square * root instruction. */ REG_OP(BNTrainingUpdate) @@ -238,7 +238,7 @@ REG_OP(BNTrainingUpdate) *@li This operator is a BatchNorm fusion operator for updating the moving averages for training. *This operator is used in conjunction with BN3DTrainingUpdate. -*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square +*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to the square * root instruction. */ REG_OP(BN3DTrainingUpdate) @@ -277,7 +277,7 @@ REG_OP(BN3DTrainingUpdate) *y: A 5D Tensor of type float16 or float32 for the normalized "x" . \n *@attention Constraints: -*For Ascend 310, the result accuracy fails to reach 1‰ due to the square root +*For Ascend 310, the result accuracy fails to reach 1/1000 due to the square root * instruction. */ REG_OP(BNInfer) @@ -313,7 +313,7 @@ assignmoving average . \n *@attention Constraints: *This operator is used in conjunction with BNTrainingReduce. -For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. +For Ascend 310, the result accuracy fails to reach 1/1000 due to the square root instruction. */ REG_OP(BNTrainingUpdateV2) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -351,7 +351,7 @@ assign moving average . \n *@attention Constraints: *@li This operator is used in conjunction with BNTrainingReduce. -*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. +*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to the square root instruction. */ REG_OP(BNTrainingUpdateV3) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -1189,7 +1189,7 @@ for the updated variance. *@attention Constraints: *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. * This operator is used in conjunction with GNTrainingUpdate. -*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. +*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to the square root instruction. */ REG_OP(GNTrainingUpdate) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -1275,7 +1275,7 @@ REG_OP(ReduceStd) * @par Attributes: -* Three Attributes, including: +* Five Attributes, including: * @li dim: An optional listint, Defaults to "None". \n * @li unbiased: An optional bool. Defaults to "True". * If "True", Use Bessel Correction. @@ -1283,9 +1283,14 @@ REG_OP(ReduceStd) * @li keepdim: An optional bool. Defaults to "False". * If "True", Keep the original tensor dimension. * If "False", Do not keep the original tensor dimension. \n +* @li invert: An optional bool, Defaults to "False". +* If "True", the output is inverse of variance. +* If "False", the output is variance. +* @li epsilon: An optional floar, Defaults to 0.001. +* Prevent division by 0. * @par Outputs: -* @li y: A Tensor. It's the std of X. Has the same type as "x". +* @li y: A Tensor. It's the variance of X or reciprocal of vaiance of X. Has the same type as "x". * @par Third-party framework compatibility * Compatible with the Pytorch operator ReduceStdWithMean. @@ -1297,6 +1302,8 @@ REG_OP(ReduceStdWithMean) .ATTR(dim, ListInt, {}) .ATTR(unbiased, Bool, true) .ATTR(keepdim, Bool, false) + .ATTR(invert, Bool, false) + .ATTR(epsilon, Float, 0.001) .OP_END_FACTORY_REG(ReduceStdWithMean) } //namespace ge diff --git a/third_party/fwkacllib/inc/ops/resource_variable_ops.h b/third_party/fwkacllib/inc/ops/resource_variable_ops.h index 156f2f34..a5d7f9c3 100644 --- a/third_party/fwkacllib/inc/ops/resource_variable_ops.h +++ b/third_party/fwkacllib/inc/ops/resource_variable_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index cc0bff00..691f1e9f 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -822,7 +822,7 @@ REG_OP(DynamicGRU) *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. *@li bias_input:Must be one of the following types: float16, float32. The format must be ND. *@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. -*@li seq_length:Must be one of the following types: int32. The format must be ND. +*@li seq_length:Must be one of the following types: float16 in FRACTAL_NZ and int32 in ND. *@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Attributes: @@ -852,7 +852,7 @@ REG_OP(DynamicGRUV2) .INPUT(weight_hidden, TensorType({DT_FLOAT16})) .OPTIONAL_INPUT(bias_input, TensorType({DT_FLOAT16, DT_FLOAT})) .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) - .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16})) .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -880,7 +880,7 @@ REG_OP(DynamicGRUV2) *@li x_weight_input:Must be one of the following types: float32. The format must be FRACTAL_NZ. *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. *@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. -*@li seq_length:Must be one of the following types: int32. The format must be ND. +*@li seq_length:Must be one of the following types: float16 in FRACTAL_NZ and int32 in ND. *@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Attributes: @@ -913,7 +913,7 @@ REG_OP(DynamicGRUV2Hidden) .INPUT(x_weight_input, TensorType({DT_FLOAT32})) .INPUT(weight_hidden, TensorType({DT_FLOAT16})) .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) - .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16})) .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1049,6 +1049,50 @@ REG_OP(GRUV2HiddenGradCell) .ATTR(gate_order, String, "zrh") .OP_END_FACTORY_REG(GRUV2HiddenGradCell) +/** +*@brief: DynamicGRUCellGrad calculation. +*@par Inputs: +*ten inputs: \n +*@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.+ +*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li t_state:A 1D Tensor. Must be one of the following types: int32. The format must be ND. + +*@par Attributes: +*gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. + +*@par Outputs: +*three outputs: \n +*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(DynamicGRUCellGrad) + .INPUT(dh_pre_t, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(update, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(new, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(t_state, TensorType({DT_INT32, DT_INT32})) + .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(gate_order, String, "zrh") + .OP_END_FACTORY_REG(DynamicGRUCellGrad) + /** * @brief Calculates the reversed outputs of the function "embedding". \n @@ -1137,8 +1181,8 @@ REG_OP(CommonLSTM) * * @par Inputs: * @li seq_length: A 1D Tensor. Must be one of the following types: int32. Record the current length of each batch. [batch_size]. - * @li b: A 1D Tensor. Must be one of the following types: fp16/fp32. Record the hidden_size. [4 * hidden_size]. * @li x: A 3D Tensor. Must be one of the following types: fp16/fp32. Record the num_step/batch_size/input_size. [num_step, batch_size, input_size]. + * @li hidden_size: An optional attribute of type int32. pass the hidden_size. \n * * @par Outputs: * seq_mask: A 3D Tensor. Must be one of the following types: fp16/fp32. with the shape of [num_step, batch_size, hidden_size]. And has the same type as "b" \n @@ -1148,8 +1192,8 @@ REG_OP(CommonLSTM) */ REG_OP(RnnGenMaskV2) .INPUT(seq_length, TensorType({DT_INT32})) - .INPUT(b, TensorType({{DT_FLOAT16, DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(hidden_size, Int) .OUTPUT(seq_mask, TensorType({DT_FLOAT16, DT_FLOAT})) .OP_END_FACTORY_REG(RnnGenMaskV2) diff --git a/third_party/fwkacllib/inc/ops/rpn_ops.h b/third_party/fwkacllib/inc/ops/rpn_ops.h index 850b3e5a..90707602 100644 --- a/third_party/fwkacllib/inc/ops/rpn_ops.h +++ b/third_party/fwkacllib/inc/ops/rpn_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/save_ops.h b/third_party/fwkacllib/inc/ops/save_ops.h index 5ce6c2e0..0ce473b7 100644 --- a/third_party/fwkacllib/inc/ops/save_ops.h +++ b/third_party/fwkacllib/inc/ops/save_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/sdca_ops.h b/third_party/fwkacllib/inc/ops/sdca_ops.h index 601b360b..e8f3e6b6 100644 --- a/third_party/fwkacllib/inc/ops/sdca_ops.h +++ b/third_party/fwkacllib/inc/ops/sdca_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index 7f7c4fc8..8812a14f 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -904,8 +904,10 @@ REG_OP(SliceDV2) * @li indices: A Tensor of type int32, specifying the indices of sorted data . \n * @attention Constraints: -* @li k =< 5120 +* @li k =< 4096 * @li Size of the last dimension =< 1458176 +* @li k =< 2048 under lhisi version +* @li Size of the last dimension =< 1040000 under lhisi version * @li sorted = true * @li It's unstable sorted indices on the platform of Ascend310 @@ -1306,8 +1308,7 @@ REG_OP(CumprodD) *@par Inputs: * Two inputs, including: -*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, -* complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64. +*@li x: A Tensor. Must be one of the following types: float32, int32, uint8, int8, float16. *@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". * *@par Attributes: @@ -1333,8 +1334,7 @@ REG_OP(Cumsum) * *@par Inputs: * One input: -*x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, -* complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64. +*x: A Tensor. Must be one of the following types: float32, int32, uint8, int8, float16. * *@par Attributes: *@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". @@ -2408,6 +2408,40 @@ REG_OP(TopKPQDistanceMerge) .OUTPUT(topk_index, TensorType({DT_INT32})) .REQUIRED_ATTR(k, Int) .OP_END_FACTORY_REG(TopKPQDistanceMerge) + +/** +*@brief Extracts a strided slice of a tensor. Roughly speaking, this op + extracts a slice of size (end-begin)/stride from the given input tensor. + Starting at the location specified by begin the slice continues by + adding stride to the index until all dimensions are not less than end. + +*@par Inputs: +*Four inputs, including: +* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, +* complex128, float16, uint32, uint64. +* @li begin: A Tensor of type int32 or int64, for the index of the first value to select . \n + +* @li end: A Tensor of type int32 or int64, for the index of the last value to select . \n + +* @li strides: A Tensor of type int32 or int64, for the increment . \n + +* @li axes: A Tensor of type int32 or int64, for the increment . \n + +*@par Outputs: +*y: A Tensor. Has the same type as "x" . \n + +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(StridedSliceV3) + .INPUT(x, TensorType::BasicType()) + .INPUT(begin, TensorType::IndexNumberType()) + .INPUT(end, TensorType::IndexNumberType()) + .OPTIONAL_INPUT(axes, TensorType::IndexNumberType()) + .OPTIONAL_INPUT(strides, TensorType::IndexNumberType()) + .OUTPUT(y, TensorType::BasicType()) + .OP_END_FACTORY_REG(StridedSliceV3) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/set_ops.h b/third_party/fwkacllib/inc/ops/set_ops.h index 04e04f1b..1d02fa15 100644 --- a/third_party/fwkacllib/inc/ops/set_ops.h +++ b/third_party/fwkacllib/inc/ops/set_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/slice_write_ops.h b/third_party/fwkacllib/inc/ops/slice_write_ops.h index 0c161b2d..994f197c 100644 --- a/third_party/fwkacllib/inc/ops/slice_write_ops.h +++ b/third_party/fwkacllib/inc/ops/slice_write_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/ops/sparse_ops.h index 8eb7b521..d9fb4d0a 100644 --- a/third_party/fwkacllib/inc/ops/sparse_ops.h +++ b/third_party/fwkacllib/inc/ops/sparse_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/spectral_ops.h b/third_party/fwkacllib/inc/ops/spectral_ops.h index ab9e1dec..d17cbfdd 100644 --- a/third_party/fwkacllib/inc/ops/spectral_ops.h +++ b/third_party/fwkacllib/inc/ops/spectral_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h index 98d4d111..cba8e648 100644 --- a/third_party/fwkacllib/inc/ops/split_combination_ops.h +++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -298,8 +298,8 @@ REG_OP(ConcatD) * Compatible with the TensorFlow operator Concat. */ REG_OP(Concat) - .DYNAMIC_INPUT(x, TensorType::BasicType()) .INPUT(concat_dim, TensorType::IndexNumberType()) + .DYNAMIC_INPUT(x, TensorType::BasicType()) .OUTPUT(y, TensorType::BasicType()) .ATTR(N, Int, 1) .OP_END_FACTORY_REG(Concat) diff --git a/third_party/fwkacllib/inc/ops/state_ops.h b/third_party/fwkacllib/inc/ops/state_ops.h index d1ec00b5..49029317 100644 --- a/third_party/fwkacllib/inc/ops/state_ops.h +++ b/third_party/fwkacllib/inc/ops/state_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/stateful_random_ops.h b/third_party/fwkacllib/inc/ops/stateful_random_ops.h index f4eb763c..a3d18922 100644 --- a/third_party/fwkacllib/inc/ops/stateful_random_ops.h +++ b/third_party/fwkacllib/inc/ops/stateful_random_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/stateless_random_ops.h b/third_party/fwkacllib/inc/ops/stateless_random_ops.h index ff9daaa3..dad3c379 100644 --- a/third_party/fwkacllib/inc/ops/stateless_random_ops.h +++ b/third_party/fwkacllib/inc/ops/stateless_random_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/string_ops.h b/third_party/fwkacllib/inc/ops/string_ops.h index a78d63a1..a2699315 100644 --- a/third_party/fwkacllib/inc/ops/string_ops.h +++ b/third_party/fwkacllib/inc/ops/string_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/swap_co_ops.h b/third_party/fwkacllib/inc/ops/swap_co_ops.h index 6e8eaac3..a1bf4f8b 100644 --- a/third_party/fwkacllib/inc/ops/swap_co_ops.h +++ b/third_party/fwkacllib/inc/ops/swap_co_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/target_crop_and_resize.h b/third_party/fwkacllib/inc/ops/target_crop_and_resize.h index 9bef1d7b..9c61f2c9 100644 --- a/third_party/fwkacllib/inc/ops/target_crop_and_resize.h +++ b/third_party/fwkacllib/inc/ops/target_crop_and_resize.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index 3560db11..775dd4a9 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -368,8 +368,9 @@ REG_OP(SpaceToDepth) * complex128, uint32, uint64 *@par Attributes: -*Two attributes, including: +*Three attributes, including: * @li block_size: An int >= 2, specifying the size of the spatial block. +* @li mode: An optional string, specifying the mode. Defaults to "DCR". * @li data_format: An optional string, specifying the data format. Defaults to "NHWC" . \n *@par Outputs: @@ -382,6 +383,7 @@ REG_OP(DepthToSpace) .INPUT(x, TensorType::BasicType()) .OUTPUT(y, TensorType::BasicType()) .REQUIRED_ATTR(block_size, Int) + .ATTR(mode, String, "DCR") .ATTR(data_format, String, "NHWC") .OP_END_FACTORY_REG(DepthToSpace) @@ -845,7 +847,11 @@ with the same setting for this option. Default: False \n selected indices from the boxes tensor, where M <= max_output_size. \n *@attention Constraints: -*Input theta must be float16 or float, output_size must be int32 type . \n +*Input theta must be float16 or float, output_size must be int32 type . +The current implementation of AffineGrid operator AiCore adopts +BatchMatMul's FP16 fusion operator scheme, and the accuracy will +decrease when the theta range exceeds [-10,10].If the model requires +high accuracy of AffineGrid, it is recommended to use AICPU. \n *@par Third-party framework compatibility *Compatible with Pytorch affine_grid operator. diff --git a/third_party/fwkacllib/inc/ops/vector_search.h b/third_party/fwkacllib/inc/ops/vector_search.h index e3099511..8c1d0a2e 100644 --- a/third_party/fwkacllib/inc/ops/vector_search.h +++ b/third_party/fwkacllib/inc/ops/vector_search.h @@ -1,5 +1,5 @@ /** - * Copyright 2021 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,11 @@ namespace ge { * @li bucket_list: A Tensor. Must be one of the following types: int32, int64. * * @par Outputs: -* @li adc_tables: A Tensor. Must be one of the following types: float16, float32. +* adc_tables: A Tensor. Must be one of the following types: float16, float32. +* +* @par Attributes: +* distance_type: The string indicates the distance type of ADC tables. Examples: `"l2sqr", "inner_product"`. +The default value is "l2sqr". */ REG_OP(GenADC) .INPUT(query, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -42,7 +46,89 @@ REG_OP(GenADC) .INPUT(centroids, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(bucket_list, TensorType({DT_INT32, DT_INT64})) .OUTPUT(adc_tables, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(distance_type, String, "l2sqr") .OP_END_FACTORY_REG(GenADC) + +/** +* @brief Finds values and indices of the "k" largest or least elements for the last dimension. \n +* +* @par Inputs: +* Dynamin inputs, including: +* @li actual_count: A Tensor of type int32, the actual number of pq_distance. +* @li pq_distance: A Tensor, Will be updated after calculation. Must be one of the following types: float32, float16. +* @li grouped_extreme_distance: A Tensor, the extremum in each group. Must be one of the following types: float32, float16. +* @li pq_index: A Tensor of type int32, index corresponding to pq_distance. +* @li pq_ivf: A Tensor of type int32 , the bucket number corresponding to pq_distance. +* +* @par Attributes: +* @li order: A string, indicates the sorting method of topk_pq_distance. \n +* @li k: Int, k maximum or minimum values. \n +* @li group_size: Int, the group size of the extremum. \n +* +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(TopKPQDistance) + .DYNAMIC_INPUT(actual_count, TensorType({DT_INT32})) + .DYNAMIC_INPUT(pq_distance, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_INPUT(grouped_extreme_distance, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_INPUT(pq_ivf, TensorType({DT_INT32})) + .DYNAMIC_INPUT(pq_index, TensorType({DT_INT32})) + .OUTPUT(topk_distance, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(topk_ivf, TensorType({DT_INT32})) + .OUTPUT(topk_index, TensorType({DT_INT32})) + .ATTR(order, String, "ASC") + .ATTR(k, Int, 0) + .ATTR(group_size, Int, 0) + .OP_END_FACTORY_REG(TopKPQDistance) + +/** +* @brief Calculate PQ distance. \n +* +* @par Inputs: +* Six inputs, including: +* @li ivf: A Tensor, dtype is uint8. +* @li bucket_list: A Tensor, dtype is int32. +* @li bucket_base_distance: A Tensor, dtype is float16. +* @li bucket_limits: A Tensor, dtype is int32. +* @li bucket_offsets: A Tensor, dtype is int32. +* @li adc_tables: A Tensor. dtype is float16. \n +* +* @par Outputs: +* Five outputs, including: +* @li actual_count: A Tensor, dtype is int32, the first element means the length of processed ivf. +* @li pq_distance: A Tensor, dtype is float16. +* @li grouped_extreme_distance: A Tensor, dtype is float16. +* @li pq_ivf: A Tensor, dtype is int32. +* @li pq_index: A Tensor, dtype is int32. \n +* +* @par Attributes: +* Five attributes, including: +* @li group_size: A Scalar, indicates the group size when compute grouped_extreme_distance. +* @li total_limit: A Scalar, indicates the total length of the outputs. +* @li extreme_mode: A Scalar, indicates the type of extremum, 0 means minimum, and 1 means maximum. +* @li split_count: A Scalar. +* @li split_index: A Scalar. \n +* +*/ +REG_OP(ScanPQCodes) + .INPUT(ivf, TensorType({DT_UINT8})) + .INPUT(bucket_list, TensorType({DT_INT32, DT_INT64})) + .INPUT(bucket_base_distance, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(bucket_limits, TensorType({DT_INT32})) + .INPUT(bucket_offsets, TensorType({DT_INT64})) + .INPUT(adc_tables, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(actual_count, TensorType({DT_INT32})) + .OUTPUT(pq_distance, TensorType({DT_FLOAT16})) + .OUTPUT(grouped_extreme_distance, TensorType({DT_FLOAT16})) + .OUTPUT(pq_ivf, TensorType({DT_INT32})) + .OUTPUT(pq_index, TensorType({DT_INT32})) + .REQUIRED_ATTR(total_limit, Int) + .ATTR(group_size, Int, 64) + .ATTR(extreme_mode, Int, 0) + .ATTR(split_count, Int, 1) + .ATTR(split_index, Int, 0) + .OP_END_FACTORY_REG(ScanPQCodes) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_ diff --git a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h index 8ef69d8b..e19cbd7c 100644 --- a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h +++ b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index fc2cd038..9e95a8b9 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -1,18 +1,18 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef __CCE_RUNTIME_BASE_H__ #define __CCE_RUNTIME_BASE_H__ @@ -36,13 +36,25 @@ extern "C" { typedef int32_t rtError_t; static const int32_t RT_ERROR_NONE = 0; // success +#ifndef char_t +typedef char char_t; +#endif + +#ifndef float32_t +typedef float float32_t; +#endif + +#ifndef float64_t +typedef double float64_t; +#endif + /** * @ingroup dvrt_base * @brief device mode. */ typedef enum tagRtDeviceMode { RT_DEVICE_MODE_SINGLE_DIE = 0, - RT_DEVICE_MODE_MULTI_DIE = 1, + RT_DEVICE_MODE_MULTI_DIE, RT_DEVICE_MODE_RESERVED } rtDeviceMode; @@ -166,19 +178,19 @@ typedef enum { * @ingroup profiling_base * @brief runtime handle. */ -RTS_API rtError_t rtSetProfDirEx(const char *profDir, const char *address, const char *jobCtx); +RTS_API rtError_t rtSetProfDirEx(const char_t *profDir, const char_t *address, const char_t *jobCtx); /** * @ingroup profiling_base * @brief init profiler object. */ -RTS_API rtError_t rtProfilerInit(const char *profDir, const char *address, const char *jobCtx); +RTS_API rtError_t rtProfilerInit(const char_t *profDir, const char_t *address, const char_t *jobCtx); /** * @ingroup profiling_base * @brief config rts profiler. */ -RTS_API rtError_t rtProfilerConfig(uint16_t type); +RTS_API rtError_t rtProfilerConfig(uint16_t profConfig); /** * @ingroup profiling_base @@ -249,18 +261,6 @@ RTS_API rtError_t rtProfSetProSwitch(void *data, uint32_t len); */ RTS_API rtError_t rtProfRegisterCtrlCallback(uint32_t moduleId, rtProfCtrlHandle callback); -/** - * @ingroup dvrt_base - * @brief Returns the last error from a runtime call. - */ -RTS_API rtError_t rtGetLastError(); - -/** - * @ingroup dvrt_base - * @brief Returns the last error from a runtime call. - */ -RTS_API rtError_t rtPeekAtLastError(); - /** * @ingroup dvrt_base * @brief register callback for error code @@ -285,7 +285,7 @@ RTS_API rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback); * @param [out] NA * @return RT_ERROR_NONE for ok */ -RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback); +RTS_API rtError_t rtRegDeviceStateCallback(const char_t *regName, rtDeviceStateCallback callback); /** * @ingroup dvrt_base @@ -295,7 +295,7 @@ RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCal * @param [out] NA * @return RT_ERROR_NONE for ok */ -RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallback callback); +RTS_API rtError_t rtRegTaskFailCallbackByModule(const char_t *moduleName, rtTaskFailCallback callback); /** * @ingroup dvrt_base @@ -373,7 +373,7 @@ RTS_API rtError_t rtLabelGoto(rtLabel_t label, rtStream_t stream); * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtNameLabel(rtLabel_t label, const char *name); +RTS_API rtError_t rtNameLabel(rtLabel_t label, const char_t *name); /** * @ingroup dvrt_base diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index 76836e7b..64ab1497 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -1,21 +1,21 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ -#ifndef __CCE_RUNTIME_CONFIG_H__ -#define __CCE_RUNTIME_CONFIG_H__ +#ifndef CCE_RUNTIME_CONFIG_H +#define CCE_RUNTIME_CONFIG_H #include "base.h" @@ -23,28 +23,28 @@ extern "C" { #endif -#define PLAT_COMBINE(arch, chip, ver) ((arch << 16) | (chip << 8) | (ver)) -#define PLAT_GET_ARCH(type) ((type >> 16) & 0xffff) -#define PLAT_GET_CHIP(type) ((type >> 8) & 0xff) -#define PLAT_GET_VER(type) (type & 0xff) +#define PLAT_COMBINE(arch, chip, ver) (((arch) << 16U) | ((chip) << 8U) | (ver)) +#define PLAT_GET_ARCH(type) (((type) >> 16U) & 0xffffU) +#define PLAT_GET_CHIP(type) (((type) >> 8U) & 0xffU) +#define PLAT_GET_VER(type) ((type) & 0xffU) typedef enum tagRtArchType { ARCH_BEGIN = 0, ARCH_V100 = ARCH_BEGIN, - ARCH_V200, - ARCH_END, + ARCH_V200 = 1, + ARCH_END = 2, } rtArchType_t; typedef enum tagRtChipType { CHIP_BEGIN = 0, CHIP_MINI = CHIP_BEGIN, - CHIP_CLOUD, - CHIP_MDC, - CHIP_LHISI, - CHIP_DC, - CHIP_CLOUD_V2, - CHIP_NO_DEVICE, - CHIP_END, + CHIP_CLOUD = 1, + CHIP_MDC = 2, + CHIP_LHISI = 3, + CHIP_DC = 4, + CHIP_CLOUD_V2 = 5, + CHIP_NO_DEVICE = 6, + CHIP_END = 7, } rtChipType_t; typedef enum tagRtAicpuScheType { @@ -59,29 +59,32 @@ typedef enum tagRtDeviceCapabilityType { RT_SCHEDULE_HARDWARE, // HWTS Schedule RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation + RT_MODE_NO_FFTS, // no ffts + RT_MODE_FFTS, // 1981 get ffts work mode, ffts + RT_MODE_FFTS_PLUS, // 1981 get ffts work mode, ffts plus } rtDeviceCapabilityType; typedef enum tagRtVersion { VER_BEGIN = 0, VER_NA = VER_BEGIN, - VER_ES, - VER_CS, - VER_SD3403, - VER_END, + VER_ES = 1, + VER_CS = 2, + VER_SD3403 = 3, + VER_END = 4, } rtVersion_t; /* match rtChipType_t */ typedef enum tagRtPlatformType { PLATFORM_BEGIN = 0, PLATFORM_MINI_V1 = PLATFORM_BEGIN, - PLATFORM_CLOUD_V1, - PLATFORM_MINI_V2, - PLATFORM_LHISI_ES, - PLATFORM_LHISI_CS, - PLATFORM_DC, - PLATFORM_CLOUD_V2, - PLATFORM_LHISI_SD3403, - PLATFORM_END, + PLATFORM_CLOUD_V1 = 1, + PLATFORM_MINI_V2 = 2, + PLATFORM_LHISI_ES = 3, + PLATFORM_LHISI_CS = 4, + PLATFORM_DC = 5, + PLATFORM_CLOUD_V2 = 6, + PLATFORM_LHISI_SD3403 = 7, + PLATFORM_END = 8, } rtPlatformType_t; typedef enum tagRtCubeFracMKNFp16 { @@ -240,4 +243,4 @@ RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout); } #endif -#endif // __CCE_RUNTIME_STREAM_H__ +#endif // CCE_RUNTIME_CONFIG_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h index bb6bf111..bc8dda52 100644 --- a/third_party/fwkacllib/inc/runtime/context.h +++ b/third_party/fwkacllib/inc/runtime/context.h @@ -1,21 +1,21 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ -#ifndef __CCE_RUNTIME_CONTEXT_H__ -#define __CCE_RUNTIME_CONTEXT_H__ +#ifndef CCE_RUNTIME_CONTEXT_H +#define CCE_RUNTIME_CONTEXT_H #include "base.h" @@ -173,4 +173,4 @@ RTS_API rtError_t rtSetCtxINFMode(bool mode); #endif -#endif // __CCE_RUNTIME_CONTEXT_H__ +#endif // CCE_RUNTIME_CONTEXT_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index 3d3da22e..f6777262 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -1,21 +1,21 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ -#ifndef __CCE_RUNTIME_DEVICE_H__ -#define __CCE_RUNTIME_DEVICE_H__ +#ifndef CCE_RUNTIME_DEVICE_H +#define CCE_RUNTIME_DEVICE_H #include "base.h" @@ -23,8 +23,9 @@ extern "C" { #endif -#define RT_CAPABILITY_SUPPORT (0x1) -#define RT_CAPABILITY_NOT_SUPPORT (0x0) +#define RT_CAPABILITY_SUPPORT (0x1U) +#define RT_CAPABILITY_NOT_SUPPORT (0x0U) +#define MEMORY_INFO_TS_4G_LIMITED (0x0) // for compatibility typedef struct tagRTDeviceInfo { uint8_t env_type; // 0: FPGA 1: EMU 2: ESL @@ -45,27 +46,28 @@ typedef struct tagRTDeviceInfo { typedef enum tagRtRunMode { RT_RUN_MODE_OFFLINE = 0, - RT_RUN_MODE_ONLINE = 1, - RT_RUN_MODE_AICPU_SCHED = 2, + RT_RUN_MODE_ONLINE, + RT_RUN_MODE_AICPU_SCHED, RT_RUN_MODE_RESERVED } rtRunMode; typedef enum tagRtAicpuDeployType { AICPU_DEPLOY_CROSS_OS = 0x0, - AICPU_DEPLOY_CROSS_PROCESS = 0x1, - AICPU_DEPLOY_CROSS_THREAD = 0x2, + AICPU_DEPLOY_CROSS_PROCESS, + AICPU_DEPLOY_CROSS_THREAD, AICPU_DEPLOY_RESERVED } rtAicpuDeployType_t; typedef enum tagRtFeatureType { FEATURE_TYPE_MEMCPY = 0, - FEATURE_TYPE_MEMORY = 1, + FEATURE_TYPE_MEMORY, FEATURE_TYPE_RSV } rtFeatureType_t; typedef enum tagRtDeviceFeatureType { FEATURE_TYPE_SCHE, FEATURE_TYPE_BLOCKING_OPERATOR, + FEATURE_TYPE_FFTS_MODE, FEATURE_TYPE_END, } rtDeviceFeatureType_t; @@ -75,7 +77,7 @@ typedef enum tagMemcpyInfo { } rtMemcpyInfo_t; typedef enum tagMemoryInfo { - MEMORY_INFO_TS_4G_LIMITED = 0, + MEMORY_INFO_TS_LIMITED = 0, MEMORY_INFO_RSV } rtMemoryInfo_t; @@ -90,6 +92,15 @@ typedef enum tagRtDeviceModuleType { RT_MODULE_TYPE_VECTOR_CORE, /**< VECTOR CORE info*/ } rtDeviceModuleType_t; +// used for rtGetDevMsg callback function +typedef void (*rtGetMsgCallback)(const char_t *msg, uint32_t len); + +typedef enum tagGetDevMsgType { + RT_GET_DEV_ERROR_MSG = 0, + RT_GET_DEV_RUNNING_STREAM_SNAPSHOT_MSG, + RT_GET_DEV_MSG_RESERVE +} rtGetDevMsgType_t; + /** * @ingroup dvrt_dev * @brief get total device number. @@ -353,14 +364,14 @@ RTS_API rtError_t rtGetAicpuDeploy(rtAicpuDeployType_t *deployType); * @brief set chipType * @return RT_ERROR_NONE for ok */ -RTS_API rtError_t rtSetSocVersion(const char *version); +RTS_API rtError_t rtSetSocVersion(const char_t *version); /** * @ingroup dvrt_dev * @brief get chipType * @return RT_ERROR_NONE for ok */ -RTS_API rtError_t rtGetSocVersion(char *version, const uint32_t maxLen); +RTS_API rtError_t rtGetSocVersion(char_t *version, const uint32_t maxLen); /** * @ingroup dvrt_dev @@ -408,8 +419,17 @@ RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device); */ RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device); +/** + * @ingroup dvrt_dev + * @brief get device message + * @param [in] rtGetDevMsgType_t getMsgType:msg type + * @param [in] GetMsgCallback callback:acl callback function + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtGetDevMsg(rtGetDevMsgType_t getMsgType, rtGetMsgCallback callback); #if defined(__cplusplus) } #endif -#endif // __CCE_RUNTIME_DEVICE_H__ +#endif // CCE_RUNTIME_DEVICE_H diff --git a/third_party/fwkacllib/inc/runtime/dvfsprofile.h b/third_party/fwkacllib/inc/runtime/dvfsprofile.h index 33e2f4c1..c610bbb6 100644 --- a/third_party/fwkacllib/inc/runtime/dvfsprofile.h +++ b/third_party/fwkacllib/inc/runtime/dvfsprofile.h @@ -1,21 +1,21 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ -#ifndef __CCE_RUNTIME_DVFSPROFILE_H__ -#define __CCE_RUNTIME_DVFSPROFILE_H__ +#ifndef CCE_RUNTIME_DVFSPROFILE_H +#define CCE_RUNTIME_DVFSPROFILE_H #include "base.h" @@ -60,4 +60,4 @@ RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode); } #endif -#endif // __CCE_RUNTIME_PROFILE_H__ +#endif // CCE_RUNTIME_DVFSPROFILE_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h index 81b635c3..f6141d42 100644 --- a/third_party/fwkacllib/inc/runtime/event.h +++ b/third_party/fwkacllib/inc/runtime/event.h @@ -1,21 +1,21 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ -#ifndef __CCE_RUNTIME_EVENT_H__ -#define __CCE_RUNTIME_EVENT_H__ +#ifndef CCE_RUNTIME_EVENT_H +#define CCE_RUNTIME_EVENT_H #include "base.h" @@ -33,8 +33,8 @@ typedef enum rtEventWaitStatus { * @ingroup event_flags * @brief event op bit flags */ -#define RT_EVENT_DEFAULT (0x0E) -#define RT_EVENT_WITH_FLAG (0x0B) +#define RT_EVENT_DEFAULT (0x0EU) +#define RT_EVENT_WITH_FLAG (0x0BU) #define RT_EVENT_DDSYNC_NS 0x01U #define RT_EVENT_STREAM_MARK 0x02U @@ -133,7 +133,7 @@ RTS_API rtError_t rtEventQueryWaitStatus(rtEvent_t event, rtEventWaitStatus_t *s * @param [in] end ending event * @return RT_ERROR_NONE for ok, errno for failed */ -RTS_API rtError_t rtEventElapsedTime(float *time, rtEvent_t start, rtEvent_t end); +RTS_API rtError_t rtEventElapsedTime(float32_t *time, rtEvent_t start, rtEvent_t end); /** * @ingroup dvrt_event @@ -153,7 +153,7 @@ RTS_API rtError_t rtEventGetTimeStamp(uint64_t *time, rtEvent_t event); * @return RT_ERROR_INVALID_VALUE for error input of event, name * @return RT_ERROR_DRV_ERR for driver error */ -RTS_API rtError_t rtNameEvent(rtEvent_t event, const char *name); +RTS_API rtError_t rtNameEvent(rtEvent_t event, const char_t *name); /** * @ingroup dvrt_event @@ -200,14 +200,14 @@ RTS_API rtError_t rtNotifyWait(rtNotify_t notify, rtStream_t stream); /** * @ingroup dvrt_event * @brief Wait for a notify with time out - * @param [in] notify_ notify to be wait - * @param [in] stream_ input stream + * @param [in] notify notify to be wait + * @param [in] stream input stream * @param [in] timeOut input timeOut * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_STREAM_CONTEXT for stream is not in current ctx */ -RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify_, rtStream_t stream_, uint32_t timeOut); +RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify, rtStream_t stream, uint32_t timeOut); /** * @ingroup dvrt_event @@ -217,7 +217,7 @@ RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify_, rtStream_t stream_ * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtNameNotify(rtNotify_t notify, const char *name); +RTS_API rtError_t rtNameNotify(rtNotify_t notify, const char_t *name); /** * @ingroup dvrt_event @@ -237,7 +237,7 @@ RTS_API rtError_t rtGetNotifyID(rtNotify_t notify, uint32_t *notifyId); * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input of */ -RTS_API rtError_t rtIpcSetNotifyName(rtNotify_t notify, char *name, uint32_t len); +RTS_API rtError_t rtIpcSetNotifyName(rtNotify_t notify, char_t *name, uint32_t len); /** * @ingroup dvrt_event @@ -247,7 +247,7 @@ RTS_API rtError_t rtIpcSetNotifyName(rtNotify_t notify, char *name, uint32_t len * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtIpcOpenNotify(rtNotify_t *notify, const char *name); +RTS_API rtError_t rtIpcOpenNotify(rtNotify_t *notify, const char_t *name); /** * @ingroup dvrt_event @@ -270,10 +270,10 @@ RTS_API rtError_t rtNotifyGetAddrOffset(rtNotify_t notify, uint64_t *devAddrOffs * @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_DRV_ERR for driver error */ -RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num); +RTS_API rtError_t rtSetIpcNotifyPid(const char_t *name, int32_t pid[], int32_t num); #if defined(__cplusplus) } #endif -#endif // __CCE_RUNTIME_EVENT_H__ +#endif // CCE_RUNTIME_EVENT_H diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index f33b51d3..8c556e3a 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -1,21 +1,21 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ -#ifndef __CCE_RUNTIME_KERNEL_H__ -#define __CCE_RUNTIME_KERNEL_H__ +#ifndef CCE_RUNTIME_KERNEL_H +#define CCE_RUNTIME_KERNEL_H #include "base.h" #include "stream.h" @@ -116,9 +116,9 @@ typedef struct rtKernelInfo { * @brief op name */ typedef struct rtKernelLaunchNames { - const char *soName; // defined for so name - const char *kernelName; // defined for kernel type name - const char *opName; // defined for operator name + const char_t *soName; // defined for so name + const char_t *kernelName; // defined for kernel type name + const char_t *opName; // defined for operator name } rtKernelLaunchNames_t; /** @@ -131,7 +131,10 @@ typedef struct tagRtArgsWithTiling { uint32_t argsSizeWithoutTiling; // input + output + tiling addr size uint16_t tilingAddrOffset; // tiling addr offset uint16_t tilingDataOffset; // tiling data offset - uint16_t reserved[2]; + uint16_t hostInputAddrOffset; // index of host_memory input in inputs_addrs list + uint16_t hostInputDataOffset; // host_mem input data offset + bool hasHostMemInput; // has host_memory input data in args or not: ture or false + uint8_t reserved[7]; } rtArgsWithTiling_t; /** @@ -141,7 +144,7 @@ typedef struct tagRtArgsWithTiling { typedef enum tagRtDumpKind { RT_DATA_DUMP_KIND_INVALID = -1, RT_DATA_DUMP_KIND_DUMP = 0, - RT_DATA_DUMP_KIND_RESERVED + RT_DATA_DUMP_KIND_RESERVED = 1, } rtDumpKind_t; /** @@ -160,72 +163,72 @@ typedef void (*rtCallback_t)(void *fnData); * @ingroup rt_kernel * @brief magic number of plain binary for aicore */ -#define RT_DEV_BINARY_MAGIC_PLAIN 0xabceed50 +#define RT_DEV_BINARY_MAGIC_PLAIN 0xabceed50U /** * @ingroup rt_kernel * @brief magic number of plain binary for aicpu */ -#define RT_DEV_BINARY_MAGIC_PLAIN_AICPU 0xabceed51 +#define RT_DEV_BINARY_MAGIC_PLAIN_AICPU 0xabceed51U /** * @ingroup rt_kernel * @brief magic number of plain binary for aivector */ -#define RT_DEV_BINARY_MAGIC_PLAIN_AIVEC 0xabceed52 +#define RT_DEV_BINARY_MAGIC_PLAIN_AIVEC 0xabceed52U /** * @ingroup rt_kernel * @brief magic number of elf binary for aicore */ -#define RT_DEV_BINARY_MAGIC_ELF 0x43554245 +#define RT_DEV_BINARY_MAGIC_ELF 0x43554245U /** * @ingroup rt_kernel * @brief magic number of elf binary for aicpu */ -#define RT_DEV_BINARY_MAGIC_ELF_AICPU 0x41415243 +#define RT_DEV_BINARY_MAGIC_ELF_AICPU 0x41415243U /** * @ingroup rt_kernel * @brief magic number of elf binary for aivector */ -#define RT_DEV_BINARY_MAGIC_ELF_AIVEC 0x41415246 +#define RT_DEV_BINARY_MAGIC_ELF_AIVEC 0x41415246U /** * @ingroup rt_kernel * @brief magic number of elf binary for aicube */ -#define RT_DEV_BINARY_MAGIC_ELF_AICUBE 0x41494343 +#define RT_DEV_BINARY_MAGIC_ELF_AICUBE 0x41494343U /** * @ingroup rt_kernel_flags * @brief kernel op bit flags */ -#define RT_KERNEL_DEFAULT (0x00) -#define RT_KERNEL_CONVERT (0x01) -#define RT_KERNEL_DUMPFLAG (0x02) -#define RT_FUSION_KERNEL_DUMPFLAG (0x04) -#define RT_KERNEL_CUSTOM_AICPU (0x08) +#define RT_KERNEL_DEFAULT (0x00U) +#define RT_KERNEL_CONVERT (0x01U) +#define RT_KERNEL_DUMPFLAG (0x02U) +#define RT_FUSION_KERNEL_DUMPFLAG (0x04U) +#define RT_KERNEL_CUSTOM_AICPU (0x08U) // STARS topic scheduler sqe : topic_type -#define RT_KERNEL_DEVICE_FIRST (0x10) -#define RT_KERNEL_HOST_ONLY (0x20) -#define RT_KERNEL_HOST_FIRST (0x40) +#define RT_KERNEL_DEVICE_FIRST (0x10U) +#define RT_KERNEL_HOST_ONLY (0x20U) +#define RT_KERNEL_HOST_FIRST (0x40U) /** * @ingroup rt_kernel * @brief kernel mode **/ -#define RT_DEFAULT_KERNEL_MODE (0x00) -#define RT_NORMAL_KERNEL_MODE (0x01) -#define RT_ALL_KERNEL_MODE (0x02) +#define RT_DEFAULT_KERNEL_MODE (0x00U) +#define RT_NORMAL_KERNEL_MODE (0x01U) +#define RT_ALL_KERNEL_MODE (0x02U) /** * @ingroup rt_kernel * @brief kernel L1 Fusion Dump bit flags */ -#define RT_DDR_ADDR (0x0) +#define RT_DDR_ADDR (0x0U) /** * @ingroup rt_kernel @@ -273,7 +276,7 @@ RTS_API rtError_t rtDevBinaryUnRegister(void *handle); * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtMetadataRegister(void *handle, const char *metadata); +RTS_API rtError_t rtMetadataRegister(void *handle, const char_t *metadata); /** * @ingroup rt_kernel @@ -296,7 +299,7 @@ RTS_API rtError_t rtDependencyRegister(void *mHandle, void *sHandle); * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char *stubName, const void *devFunc, +RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char_t *stubName, const void *devFunc, uint32_t funcMode); /** @@ -307,7 +310,7 @@ RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, cons * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtGetFunctionByName(const char *stubName, void **stubFunc); +RTS_API rtError_t rtGetFunctionByName(const char_t *stubName, void **stubFunc); /** * @ingroup rt_kernel @@ -325,7 +328,7 @@ RTS_API rtError_t rtGetAddrByFun(const void *stubFunc, void **addr); * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtQueryFunctionRegistered(const char *stubName); +RTS_API rtError_t rtQueryFunctionRegistered(const char_t *stubName); /** * @ingroup rt_kernel @@ -410,7 +413,7 @@ RTS_API rtError_t rtKernelLaunchEx(void *args, uint32_t argsSize, uint32_t flags * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtKernelLaunchFwk(const char *opName, void *args, uint32_t argsSize, uint32_t flags, +RTS_API rtError_t rtKernelLaunchFwk(const char_t *opName, void *args, uint32_t argsSize, uint32_t flags, rtStream_t rtStream); /** @@ -672,7 +675,7 @@ RTS_API rtError_t rtStopMDCProfiler(void *addr); * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockDim, - rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream_); + rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream); /** * @ingroup rt_kernel @@ -688,11 +691,11 @@ RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockD * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtKernelLaunchWithHandleAndTiling(void *handle, const void *devFunc, uint32_t blockDim, - rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream_, const void* kernelInfo); + rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream, const void* kernelInfo); #if defined(__cplusplus) } #endif -#endif // __CCE_RUNTIME_KERNEL_H__ +#endif // CCE_RUNTIME_KERNEL_H diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index b049e762..971f0cb0 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -1,25 +1,23 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ -#ifndef __CCE_RUNTIME_MEM_H__ -#define __CCE_RUNTIME_MEM_H__ +#ifndef CCE_RUNTIME_MEM_H +#define CCE_RUNTIME_MEM_H -/*lint -e7*/ #include -/*lint +e7*/ #include "base.h" #include "config.h" #include "stream.h" @@ -32,43 +30,43 @@ extern "C" { * @ingroup dvrt_mem * @brief memory type */ -#define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device -#define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device -#define RT_MEMORY_RDMA_HBM ((uint32_t)0x3) // RDMA-HBM memory on device -#define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device -#define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device -#define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device -#define RT_MEMORY_P2P_DDR ((uint32_t)0x11) // DDR memory on other device -#define RT_MEMORY_DDR_NC ((uint32_t)0x20) // DDR memory of non-cache -#define RT_MEMORY_TS_4G ((uint32_t)0x40) -#define RT_MEMORY_TS ((uint32_t)0x80) -#define RT_MEMORY_RESERVED ((uint32_t)0x100) +#define RT_MEMORY_DEFAULT (0x0U) // default memory on device +#define RT_MEMORY_HBM (0x2U) // HBM memory on device +#define RT_MEMORY_RDMA_HBM (0x3U) // RDMA-HBM memory on device +#define RT_MEMORY_DDR (0x4U) // DDR memory on device +#define RT_MEMORY_SPM (0x8U) // shared physical memory on device +#define RT_MEMORY_P2P_HBM (0x10U) // HBM memory on other 4P device +#define RT_MEMORY_P2P_DDR (0x11U) // DDR memory on other device +#define RT_MEMORY_DDR_NC (0x20U) // DDR memory of non-cache +#define RT_MEMORY_TS (0x40U) // Used for Ts memory +#define RT_MEMORY_TS_4G (0x40U) // Used for Ts memory(only 1951) +#define RT_MEMORY_RESERVED (0x100U) -#define RT_MEMORY_L1 ((uint32_t)0x1<<16) -#define RT_MEMORY_L2 ((uint32_t)0x1<<17) +#define RT_MEMORY_L1 (0x1U << 16U) +#define RT_MEMORY_L2 (0x1U << 17U) /** * @ingroup dvrt_mem * @brief memory info type */ -#define RT_MEM_INFO_TYPE_DDR_SIZE ((uint32_t)0x1) -#define RT_MEM_INFO_TYPE_HBM_SIZE ((uint32_t)0x2) -#define RT_MEM_INFO_TYPE_DDR_P2P_SIZE ((uint32_t)0x3) -#define RT_MEM_INFO_TYPE_HBM_P2P_SIZE ((uint32_t)0x4) +#define RT_MEM_INFO_TYPE_DDR_SIZE (0x1U) +#define RT_MEM_INFO_TYPE_HBM_SIZE (0x2U) +#define RT_MEM_INFO_TYPE_DDR_P2P_SIZE (0x3U) +#define RT_MEM_INFO_TYPE_HBM_P2P_SIZE (0x4U) /** * @ingroup dvrt_mem * @brief memory Policy */ -#define RT_MEMORY_POLICY_NONE ((uint32_t)0x0) // Malloc mem prior hage page, then default page -#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST ((uint32_t)0x1 << 10) // Malloc mem prior hage page, then default page -#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY ((uint32_t)0x1 << 11) // Malloc mem only use hage page -#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY ((uint32_t)0x1 << 12) // Malloc mem only use default page -#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P ((uint32_t)0x1 << 13) // Malloc mem prior hage page, then default page, use for p2p -#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P ((uint32_t)0x1 << 14) // Malloc mem only use hage page, use for p2p -#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P ((uint32_t)0x1 << 15) // Malloc mem only use default page, use for p2p +#define RT_MEMORY_POLICY_NONE (0x0U) // Malloc mem prior huge page, then default page +#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST (0x1U << 10U) // Malloc mem prior huge page, then default page +#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY (0x1U << 11U) // Malloc mem only use huge page +#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY (0x1U << 12U) // Malloc mem only use default page +#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P (0x1U << 13U) // Malloc mem prior huge page, then default page, for p2p +#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P (0x1U << 14U) // Malloc mem only use huge page, use for p2p +#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P (0x1U << 15U) // Malloc mem only use default page, use for p2p -#define MEM_ALLOC_TYPE_BIT ((uint32_t)0x3FF) // mem type bit in <0, 9> +#define MEM_ALLOC_TYPE_BIT (0x3FFU) // mem type bit in <0, 9> /** * @ingroup dvrt_mem @@ -80,10 +78,10 @@ typedef uint32_t rtMemType_t; * @ingroup dvrt_mem * @brief memory advise type */ -#define RT_MEMORY_ADVISE_EXE (0x02) -#define RT_MEMORY_ADVISE_THP (0x04) -#define RT_MEMORY_ADVISE_PLE (0x08) -#define RT_MEMORY_ADVISE_PIN (0x16) +#define RT_MEMORY_ADVISE_EXE (0x02U) +#define RT_MEMORY_ADVISE_THP (0x04U) +#define RT_MEMORY_ADVISE_PLE (0x08U) +#define RT_MEMORY_ADVISE_PIN (0x16U) /** * @ingroup dvrt_mem @@ -119,7 +117,7 @@ typedef enum tagRtRecudeKind { RT_MEMCPY_SDMA_AUTOMATIC_MAX = 11, RT_MEMCPY_SDMA_AUTOMATIC_MIN = 12, RT_MEMCPY_SDMA_AUTOMATIC_EQUAL = 13, - RT_RECUDE_KIND_END + RT_RECUDE_KIND_END = 14, } rtRecudeKind_t; typedef enum tagRtDataType { @@ -134,7 +132,7 @@ typedef enum tagRtDataType { RT_DATA_TYPE_UINT8 = 8, // uint8 RT_DATA_TYPE_UINT16= 9, // uint16 RT_DATA_TYPE_UINT32= 10,// uint32 - RT_DATA_TYPE_END + RT_DATA_TYPE_END = 11, } rtDataType_t; /** @@ -191,21 +189,21 @@ typedef struct tagRtPointerAttributes { typedef struct rtMallocHostSharedMemoryIn { - const char *name; + const char_t *name; const uint64_t size; uint32_t flag; } rtMallocHostSharedMemoryIn; typedef struct rtMallocHostSharedMemoryOut { - int fd; + int32_t fd; void *ptr; void *devPtr; } rtMallocHostSharedMemoryOut; typedef struct rtFreeHostSharedMemoryIn { - const char *name; + const char_t *name; const uint64_t size; - int fd; + int32_t fd; void *ptr; void *devPtr; } rtFreeHostSharedMemoryIn; @@ -310,6 +308,18 @@ RTS_API rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag); * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtMemFreeManaged(void *ptr); + +/** + * @ingroup dvrt_mem + * @brief Specifies how memory is use + * @param [in] devPtr memory pointer + * @param [in] size memory size + * @param [in] advise reserved, set to 1 + * @return RT_ERROR_NONE for ok + * @return others for error + */ +RTS_API rtError_t rtMemAdvise(void *devPtr, uint64_t size, uint32_t advise); + /** * @ingroup dvrt_mem * @brief alloc cached device memory @@ -382,6 +392,39 @@ RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, ui RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind, rtDataType_t type, rtStream_t stream); +/** + * @ingroup dvrt_mem + * @brief synchronized memcpy2D + * @param [in] dst destination address pointer + * @param [in] dstPitch pitch of destination memory + * @param [in] src source address pointer + * @param [in] srcPitch pitch of source memory + * @param [in] width width of matrix transfer + * @param [in] height height of matrix transfer + * @param [in] kind memcpy type + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtMemcpy2d(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width, + uint64_t height, rtMemcpyKind_t kind); + +/** + * @ingroup dvrt_mem + * @brief asynchronized memcpy2D + * @param [in] dst destination address pointer + * @param [in] dstPitch length of destination address memory + * @param [in] src source address pointer + * @param [in] srcPitch length of destination address memory + * @param [in] width width of matrix transfer + * @param [in] height height of matrix transfer + * @param [in] kind memcpy type + * @param [in] stream asynchronized task stream + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtMemcpy2dAsync(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width, + uint64_t height, rtMemcpyKind_t kind, rtStream_t stream); + /** * @ingroup dvrt_mem * @brief query memory size @@ -429,22 +472,22 @@ RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t value, uin /** * @ingroup dvrt_mem * @brief get current device memory total and free - * @param [out] free - * @param [out] total + * @param [out] freeSize + * @param [out] totalSize * @return RT_ERROR_NONE for ok, errno for failed * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtMemGetInfo(size_t *free, size_t *total); +RTS_API rtError_t rtMemGetInfo(size_t *freeSize, size_t *totalSize); /** * @ingroup dvrt_mem * @brief get current device memory total and free * @param [in] memInfoType - * @param [out] free - * @param [out] total + * @param [out] freeSize + * @param [out] totalSize * @return RT_ERROR_NONE for ok, errno for failed */ -RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *free, size_t *total); +RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *freeSize, size_t *totalSize); /** * @ingroup dvrt_mem @@ -477,7 +520,7 @@ RTS_API rtError_t rtPointerGetAttributes(rtPointerAttributes_t *attributes, cons * @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_DRV_ERR for driver error */ -RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char *name, uint32_t len); +RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char_t *name, uint32_t len); /** * @ingroup dvrt_mem @@ -487,7 +530,7 @@ RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char * * @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_DRV_ERR for driver error */ -RTS_API rtError_t rtIpcDestroyMemoryName(const char *name); +RTS_API rtError_t rtIpcDestroyMemoryName(const char_t *name); /** * @ingroup dvrt_mem @@ -498,7 +541,7 @@ RTS_API rtError_t rtIpcDestroyMemoryName(const char *name); * @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_DRV_ERR for driver error */ -RTS_API rtError_t rtIpcOpenMemory(void **ptr, const char *name); +RTS_API rtError_t rtIpcOpenMemory(void **ptr, const char_t *name); /** * @ingroup dvrt_mem @@ -533,7 +576,7 @@ RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqeIndex, rtStream_t strea * @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_DRV_ERR for driver error */ -RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num); +RTS_API rtError_t rtSetIpcMemPid(const char_t *name, int32_t pid[], int num); /** * @ingroup dvrt_mem @@ -551,4 +594,4 @@ RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t str } #endif -#endif // __CCE_RUNTIME_MEM_H__ +#endif // CCE_RUNTIME_MEM_H diff --git a/third_party/fwkacllib/inc/runtime/rt.h b/third_party/fwkacllib/inc/runtime/rt.h index 6c2f5318..519ccd40 100644 --- a/third_party/fwkacllib/inc/runtime/rt.h +++ b/third_party/fwkacllib/inc/runtime/rt.h @@ -1,21 +1,21 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ -#ifndef __CCE_RUNTIME_RT_H__ -#define __CCE_RUNTIME_RT_H__ +#ifndef CCE_RUNTIME_RT_H +#define CCE_RUNTIME_RT_H #include "base.h" #include "config.h" @@ -32,4 +32,4 @@ #include "rt_ffts_plus.h" #include "rt_dfx.h" -#endif // __CCE_RUNTIME_RT_H__ +#endif // CCE_RUNTIME_RT_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/rt_dfx.h b/third_party/fwkacllib/inc/runtime/rt_dfx.h index d5927575..7e0bc280 100644 --- a/third_party/fwkacllib/inc/runtime/rt_dfx.h +++ b/third_party/fwkacllib/inc/runtime/rt_dfx.h @@ -1,6 +1,17 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. - * Description: dfx interface +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ #ifndef CCE_RUNTIME_RT_DFX_H @@ -26,7 +37,7 @@ extern "C" { * @return RT_ERROR_NONE for ok * @return other failed */ -RTS_API rtError_t rtSetTaskTag(const char *taskTag); +RTS_API rtError_t rtSetTaskTag(const char_t *taskTag); #if defined(__cplusplus) } diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts.h b/third_party/fwkacllib/inc/runtime/rt_ffts.h index f2809218..ded78d25 100644 --- a/third_party/fwkacllib/inc/runtime/rt_ffts.h +++ b/third_party/fwkacllib/inc/runtime/rt_ffts.h @@ -1,10 +1,21 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. - * Description: ffts interface +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -#ifndef __CCE_RUNTIME_FFTS_H -#define __CCE_RUNTIME_FFTS_H +#ifndef CCE_RUNTIME_RT_FFTS_H +#define CCE_RUNTIME_RT_FFTS_H #include "base.h" @@ -33,7 +44,7 @@ typedef enum tagFftsSubTaskType { RT_FFTS_SUB_TASK_TYPE_MIX_AIC = 6, RT_FFTS_SUB_TASK_TYPE_MIX_AIV = 7, RT_FFTS_SUB_TASK_TYPE_SDMA = 8, - RT_FFTS_SUB_TASK_TYPE_RESERVED, + RT_FFTS_SUB_TASK_TYPE_RESERVED = 9, } rtFftsSubTaskType_t; typedef struct tagManualThreadDmuInfo { @@ -64,7 +75,7 @@ typedef struct tagManualThreadAicAivInfo { // num: thread0_prefetch_dmu_descriptor_index – prefetch_once_dmu_descriptor_index uint16_t threadPrefetchDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; // max valid is threadDim uint16_t threadBlkDim[RT_FFTS_MAX_MANUAL_THREAD_NUM]; - const char *threadTaskFuncStub[RT_FFTS_MAX_MANUAL_THREAD_NUM]; + const char_t *threadTaskFuncStub[RT_FFTS_MAX_MANUAL_THREAD_NUM]; rtManualThreadDmuInfo_t *prefetchList; // dmu desc 0-64k, length is the last threadPrefetchDmuIdx[threadDim-1] rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; @@ -91,8 +102,8 @@ typedef struct tagAutoThreadAicAivInfo { uint16_t tailBlkDim; uint16_t nonTailBlkDim; - const char *nonTailTaskFuncStub; - const char *tailTaskFuncStub; + const char_t *nonTailTaskFuncStub; + const char_t *tailTaskFuncStub; // for prefetch, valid num is prefetchEnableBitmap bit count. // if prefetchEnableBitmap='00010011', need prefetch number is 3, srcPrefetch is only 0, 1, 2 is valid @@ -177,8 +188,11 @@ typedef struct tagFftsTaskInfo { } rtFftsTaskInfo_t; RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream); +RTS_API rtError_t rtGetC2cCtrlAddr(uint64_t *addr, uint32_t *len); + +RTS_API rtError_t rtFftsTaskLaunchWithFlag(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream, uint32_t flag); #if defined(__cplusplus) } #endif -#endif // __CCE_RUNTIME_FFTS_H +#endif // CCE_RUNTIME_RT_FFTS_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h b/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h index 61eee9f3..53f3e60a 100644 --- a/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h +++ b/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h @@ -1,10 +1,21 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. - * Description: ffts plus interface +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -#ifndef __CCE_RUNTIME_FFTS_PLUS_H -#define __CCE_RUNTIME_FFTS_PLUS_H +#ifndef CCE_RUNTIME_RT_FFTS_PLUS_H +#define CCE_RUNTIME_RT_FFTS_PLUS_H #include "base.h" #include "rt_ffts_plus_define.h" @@ -26,9 +37,13 @@ typedef struct tagFftsPlusTaskInfo { #pragma pack(pop) RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *handle, const void *devFunc, void **addr, uint32_t *prefetchCnt); + RTS_API rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream); +RTS_API rtError_t rtFftsPlusTaskLaunchWithFlag(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream, + uint32_t flag); + #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif -#endif // __CCE_RUNTIME_FFTS_H +#endif // CCE_RUNTIME_RT_FFTS_PLUS_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h b/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h index 9887b943..8956e009 100644 --- a/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h +++ b/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h @@ -1,10 +1,21 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. - * Description: the definition of ffts plus +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -#ifndef __CCE_RUNTIME_FFTS_PLUS_DEFINE_H -#define __CCE_RUNTIME_FFTS_PLUS_DEFINE_H +#ifndef CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H +#define CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H #include "base.h" @@ -30,7 +41,7 @@ typedef enum tagFftsPlusHwType { RT_HW_CTX_TYPE_WRITEBACK_DATA = 11, RT_HW_CTX_TYPE_AICPU = 12, RT_HW_CTX_TYPE_LOAD = 13, - RT_HW_CTX_TYPE_MAX, + RT_HW_CTX_TYPE_MAX = 14, } rtFftsPlusHwType_t; // hardware context type @@ -40,7 +51,8 @@ typedef enum tagFftsPlusSoftType { RT_SOFT_CTX_TYPE_AT_START = 3, RT_SOFT_CTX_TYPE_AT_END = 4, RT_SOFT_CTX_TYPE_LABEL = 5, - RT_SOFT_CTX_TYPE_MAX, + RT_SOFT_CTX_PERSISTENT_CACHE = 6, + RT_SOFT_CTX_TYPE_MAX = 7, } rtFftsPlusSoftType_t; typedef enum tagFftsPlusContextType { @@ -61,6 +73,7 @@ typedef enum tagFftsPlusContextType { RT_CTX_TYPE_AT_START = 0x0300, RT_CTX_TYPE_AT_END = 0x0400, RT_CTX_TYPE_LABEL = 0x0500, + RT_CTX_TYPE_PERSISTENT_CACHE = 0x0600, }rtFftsPlusContextType_t; // condition type @@ -71,7 +84,7 @@ typedef enum tagFftsPlusCondType { RT_COND_TYPE_GREATER_OR_EQUAL = 3, RT_COND_TYPE_LESS = 4, RT_COND_TYPE_LESS_OR_EQUAL = 5, - RT_COND_TYPE_MAX, + RT_COND_TYPE_MAX = 6, } rtFftsPlusCondType_t; // the definition of ffts plus context @@ -505,7 +518,7 @@ typedef struct tagFftsPlusAtStartCtx { uint16_t threadIdInit; uint16_t threadWindowSize; // 80-127 - uint16_t res9[12]; + uint32_t res9[12]; } rtFftsPlusAtStartCtx_t; // at end context @@ -707,9 +720,35 @@ typedef struct tagFftsPlusCondSwitchCtx { uint32_t cmpValue2; } rtFftsPlusCondSwitchCtx_t; +// ffts plus persistent cache context +typedef struct tagFftsPlusPersistentCacheCtx { + // 0- 3bytes + uint16_t contextType; + uint8_t successorNum; + uint8_t res1 : 7; + uint8_t aten : 1; + // 4-7 + uint8_t res2[2]; + uint8_t predCntInit; + uint8_t predCnt; + // 8-11 + uint8_t res3[4]; + // 12-63 + uint16_t successorList[RT_CTX_SUCCESSOR_NUM]; + // 64-67 + uint8_t persistentEnable : 1; + uint8_t res4 : 7; + uint8_t res5; + uint16_t persistentSize; + // 68-71 + uint32_t persistentId; + // 72-127 + uint32_t res6[14]; +} rtFftsPlusPersistentCacheCtx_t; + #pragma pack(pop) #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif -#endif // __CCE_RUNTIME_FFTS_PLUS_DEFINE_H +#endif // CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index d0ffe9c8..9486639d 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -1,21 +1,21 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ -#ifndef __CCE_RUNTIME_MODEL_H__ -#define __CCE_RUNTIME_MODEL_H__ +#ifndef CCE_RUNTIME_RT_MODEL_H +#define CCE_RUNTIME_RT_MODEL_H #include "base.h" @@ -42,7 +42,7 @@ typedef enum tagModelTaskType { RT_MODEL_TASK_NOTIFY_WAIT, RT_MODEL_TASK_REDUCE_ASYNC, RT_MODEL_TASK_RDMA_SEND, - RT_MODEL_TASK_EVENT_RESET = 18, + RT_MODEL_TASK_EVENT_RESET, RT_MODEL_TASK_MODEL_END_GRAPH, RT_MODEL_TASK_STREAM_SWITCH_N, RT_MODEL_TASK_RDMA_DB_SEND, @@ -66,16 +66,16 @@ typedef enum tagModelQueueFlag { RT_MODEL_OUTPUT_QUEUE = 1 } rtModelQueueFlag_t; -#define EXECUTOR_NONE ((uint32_t)0x0) -#define EXECUTOR_TS ((uint32_t)0x01) -#define EXECUTOR_AICPU ((uint32_t)0x02) +#define EXECUTOR_NONE (0x0U) +#define EXECUTOR_TS (0x01U) +#define EXECUTOR_AICPU (0x02U) /* * @ingroup rt_model * @brief debug flag for kernel exception dump */ -#define RT_DEBUG_FLAG_AICORE_OVERFLOW (0x1 << 0) -#define RT_DEBUG_FLAG_ATOMIC_ADD_OVERFLOW (0x1 << 1) +#define RT_DEBUG_FLAG_AICORE_OVERFLOW (0x1U << 0U) +#define RT_DEBUG_FLAG_ATOMIC_ADD_OVERFLOW (0x1U << 1U) /** * @ingroup @@ -125,7 +125,7 @@ typedef struct tagKernelTaskInfo { uint16_t argsCount; uint16_t argsSize; uint16_t reserved; - char *stubFunc; + char_t *stubFunc; uint8_t *smDesc; uint8_t *args; uint16_t *argsOffset; @@ -392,12 +392,12 @@ RTS_API rtError_t rtModelExecute(rtModel_t model, rtStream_t stream, uint32_t fl * @ingroup rt_model * @brief get model the last persist task id * @param [in] model model to execute - * @param [out] taskid last task id of the model - * @param [out] streamid last steam id of the model + * @param [out] taskId last task id of the model + * @param [out] streamId last steam id of the model * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtModelGetTaskId(rtModel_t model, uint32_t *taskid, uint32_t *streamid); +RTS_API rtError_t rtModelGetTaskId(rtModel_t model, uint32_t *taskId, uint32_t *streamId); /** * @ingroup rt_model @@ -495,4 +495,4 @@ RTS_API rtError_t rtDebugUnRegister(rtModel_t model); } #endif -#endif // __CCE_RUNTIME_MODEL_H__ +#endif // CCE_RUNTIME_RT_MODEL_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/rt_stars.h b/third_party/fwkacllib/inc/runtime/rt_stars.h index 016c352a..857006b5 100644 --- a/third_party/fwkacllib/inc/runtime/rt_stars.h +++ b/third_party/fwkacllib/inc/runtime/rt_stars.h @@ -1,10 +1,21 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. - * Description: +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -#ifndef __CCE_RUNTIME_STARS_H -#define __CCE_RUNTIME_STARS_H +#ifndef CCE_RUNTIME_RT_STARS_H +#define CCE_RUNTIME_RT_STARS_H #include "base.h" @@ -32,7 +43,7 @@ RTS_API rtError_t rtStarsTaskLaunch(const void *taskSqe, uint32_t sqeLen, rtStre * @param [in] queName cdq name * @return RT_ERROR_NONE for ok, ACL_ERROR_RT_NO_CDQ_RESOURCE for no cdq resources */ -RTS_API rtError_t rtCdqCreate(uint32_t batchNum, uint32_t batchSize, const char *queName); +RTS_API rtError_t rtCdqCreate(uint32_t batchNum, uint32_t batchSize, const char_t *queName); /** * @ingroup rt_stars @@ -40,7 +51,7 @@ RTS_API rtError_t rtCdqCreate(uint32_t batchNum, uint32_t batchSize, const char * @param [in] queName cdq name * @return RT_ERROR_NONE for ok, others failed */ -RTS_API rtError_t rtCdqDestroy(const char *queName); +RTS_API rtError_t rtCdqDestroy(const char_t *queName); /** * @ingroup rt_stars @@ -50,7 +61,7 @@ RTS_API rtError_t rtCdqDestroy(const char *queName); * @param [out] batchId batch index * @return RT_ERROR_NONE for ok, ACL_ERROR_RT_WAIT_TIMEOUT for timeout */ -RTS_API rtError_t rtCdqAllocBatch(const char *queName, int32_t timeout, uint32_t *batchId); +RTS_API rtError_t rtCdqAllocBatch(const char_t *queName, int32_t timeout, uint32_t *batchId); /** * @ingroup rt_stars @@ -63,7 +74,7 @@ RTS_API rtError_t rtCdqAllocBatch(const char *queName, int32_t timeout, uint32_t * @param [in] stream launch task on the stream * @return RT_ERROR_NONE for ok, others failed */ -RTS_API rtError_t rtCdqEnQueue(const char *queName, uint32_t cdqeIndex, void *data, uint32_t dataSize, +RTS_API rtError_t rtCdqEnQueue(const char_t *queName, uint32_t cdqeIndex, void *data, uint32_t dataSize, rtStream_t stream); /** @@ -77,11 +88,11 @@ RTS_API rtError_t rtCdqEnQueue(const char *queName, uint32_t cdqeIndex, void *da * @param [in] stream launch task on the stream * @return RT_ERROR_NONE for ok, others failed */ -RTS_API rtError_t rtCdqEnQueuePtrMode(const char *queName, uint32_t cdqeIndex, const void *ptrAddr, +RTS_API rtError_t rtCdqEnQueuePtrMode(const char_t *queName, uint32_t cdqeIndex, const void *ptrAddr, rtStream_t stream); #if defined(__cplusplus) } #endif -#endif // __CCE_RUNTIME_STARS_H +#endif // CCE_RUNTIME_RT_STARS_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/rt_stars_define.h b/third_party/fwkacllib/inc/runtime/rt_stars_define.h index d77a8a8e..861af80b 100644 --- a/third_party/fwkacllib/inc/runtime/rt_stars_define.h +++ b/third_party/fwkacllib/inc/runtime/rt_stars_define.h @@ -1,10 +1,21 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. - * Description: the definition of stars +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -#ifndef __CCE_RUNTIME_STARS_DEFINE__H -#define __CCE_RUNTIME_STARS_DEFINE__H +#ifndef CCE_RUNTIME_RT_STARS_DEFINE_H +#define CCE_RUNTIME_RT_STARS_DEFINE_H #include "base.h" @@ -88,4 +99,4 @@ typedef struct tagFftsPlusSqe { #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif -#endif // __CCE_RUNTIME_STARS_DEFINE__H \ No newline at end of file +#endif // CCE_RUNTIME_RT_STARS_DEFINE_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h index 3a078e99..a7ca9ebb 100644 --- a/third_party/fwkacllib/inc/runtime/stream.h +++ b/third_party/fwkacllib/inc/runtime/stream.h @@ -1,21 +1,21 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ -#ifndef __CCE_RUNTIME_STREAM_H__ -#define __CCE_RUNTIME_STREAM_H__ +#ifndef CCE_RUNTIME_STREAM_H +#define CCE_RUNTIME_STREAM_H #include "base.h" #include "event.h" @@ -28,27 +28,27 @@ extern "C" { * @ingroup stream_flags * @brief stream op bit flags */ -#define RT_STREAM_DEFAULT (0x00) -#define RT_STREAM_PERSISTENT (0x01) -#define RT_STREAM_FORCE_COPY (0x02) -#define RT_STREAM_HUGE (0x04) -#define RT_STREAM_AICPU (0x08) -#define RT_STREAM_FORBIDDEN_DEFAULT (0x10) -#define RT_STREAM_HEAD (0x20) -#define RT_STREAM_PRIMARY_DEFAULT (0x40) -#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80) +#define RT_STREAM_DEFAULT (0x00U) +#define RT_STREAM_PERSISTENT (0x01U) +#define RT_STREAM_FORCE_COPY (0x02U) +#define RT_STREAM_HUGE (0x04U) +#define RT_STREAM_AICPU (0x08U) +#define RT_STREAM_FORBIDDEN_DEFAULT (0x10U) +#define RT_STREAM_HEAD (0x20U) +#define RT_STREAM_PRIMARY_DEFAULT (0x40U) +#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80U) /** * @ingroup stream_type * @brief stream type */ -#define RT_NORMAL_STREAM (0x00) -#define RT_HUGE_STREAM (0x01) +#define RT_NORMAL_STREAM (0x00U) +#define RT_HUGE_STREAM (0x01U) /** * priority level default value when create a stream */ -#define RT_STREAM_PRIORITY_DEFAULT (0) +#define RT_STREAM_PRIORITY_DEFAULT (0U) /** * @ingroup dvrt_stream @@ -137,7 +137,7 @@ RTS_API rtError_t rtGetMaxStreamAndTask(uint32_t streamType, uint32_t *maxStrCou * @return RT_ERROR_NONE for complete * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtNameStream(rtStream_t stream, const char *name); +RTS_API rtError_t rtNameStream(rtStream_t stream, const char_t *name); /** * @ingroup dvrt_stream @@ -215,4 +215,4 @@ RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stream); } #endif -#endif // __CCE_RUNTIME_STREAM_H__ +#endif // CCE_RUNTIME_STREAM_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/toolchain/plog.h b/third_party/fwkacllib/inc/toolchain/plog.h index 6134c3e6..8dd8d403 100644 --- a/third_party/fwkacllib/inc/toolchain/plog.h +++ b/third_party/fwkacllib/inc/toolchain/plog.h @@ -44,14 +44,14 @@ extern "C" { * @brief DlogReportInitialize: init log in service process before all device setting. * @return: 0: SUCCEED, others: FAILED */ -DLL_EXPORT int DlogReportInitialize(); +DLL_EXPORT int DlogReportInitialize(void); /** * @ingroup plog * @brief DlogReportFinalize: release log resource in service process after all device reset. * @return: 0: SUCCEED, others: FAILED */ -DLL_EXPORT int DlogReportFinalize(); +DLL_EXPORT int DlogReportFinalize(void); #ifdef __cplusplus } diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h index d65aac83..0bc63385 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h +++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h @@ -22,18 +22,7 @@ #define PROF_TASK_TIME 0x00000002 #define PROF_AICORE_METRICS 0x00000004 #define PROF_AICPU_TRACE 0x00000008 -#define PROF_MODEL_EXECUTE 0x00000010 -#define PROF_RUNTIME_API 0x00000020 -#define PROF_RUNTIME_TRACE 0x00000040 -#define PROF_SCHEDULE_TIMELINE 0x00000080 -#define PROF_SCHEDULE_TRACE 0x00000100 -#define PROF_AIVECTORCORE_METRICS 0x00000200 -#define PROF_SUBTASK_TIME 0x00000400 - -#define PROF_TRAINING_TRACE 0x00000800 -#define PROF_HCCL_TRACE 0x00001000 - -#define PROF_TASK_TRACE 0x00001852 +#define PROF_L2CACHE 0x00000010 // system profilinig switch #define PROF_CPU 0x00010000 @@ -44,6 +33,19 @@ #define PROF_SYS_AICORE_SAMPLE 0x00200000 #define PROF_AIVECTORCORE_SAMPLE 0x00400000 +#define PROF_MODEL_EXECUTE 0x0000001000000 +#define PROF_RUNTIME_API 0x0000002000000 +#define PROF_RUNTIME_TRACE 0x0000004000000 +#define PROF_SCHEDULE_TIMELINE 0x0000008000000 +#define PROF_SCHEDULE_TRACE 0x0000010000000 +#define PROF_AIVECTORCORE_METRICS 0x0000020000000 +#define PROF_SUBTASK_TIME 0x0000040000000 + +#define PROF_TRAINING_TRACE 0x0000080000000 +#define PROF_HCCL_TRACE 0x0000100000000 + +#define PROF_TASK_TRACE 0x0000185000002 + #define PROF_MODEL_LOAD 0x8000000000000000 // DataTypeConfig MASK @@ -51,16 +53,7 @@ #define PROF_TASK_TIME_MASK 0x00000002 #define PROF_AICORE_METRICS_MASK 0x00000004 #define PROF_AICPU_TRACE_MASK 0x00000008 -#define PROF_MODEL_EXECUTE_MASK 0x00000010 -#define PROF_RUNTIME_API_MASK 0x00000020 -#define PROF_RUNTIME_TRACE_MASK 0x00000040 -#define PROF_SCHEDULE_TIMELINE_MASK 0x00000080 -#define PROF_SCHEDULE_TRACE_MASK 0x00000100 -#define PROF_AIVECTORCORE_METRICS_MASK 0x00000200 -#define PROF_SUBTASK_TIME_MASK 0x00000400 - -#define PROF_TRAINING_TRACE_MASK 0x00000800 -#define PROF_HCCL_TRACE_MASK 0x00001000 +#define PROF_L2CACHE_MASK 0x00000010 // system profilinig mask #define PROF_CPU_MASK 0x00010000 @@ -71,20 +64,27 @@ #define PROF_SYS_AICORE_SAMPLE_MASK 0x00200000 #define PROF_AIVECTORCORE_SAMPLE_MASK 0x00400000 -#define PROF_MODEL_LOAD_MASK 0x8000000000000000 +#define PROF_MODEL_EXECUTE_MASK 0x0000001000000 +#define PROF_RUNTIME_API_MASK 0x0000002000000 +#define PROF_RUNTIME_TRACE_MASK 0x0000004000000 +#define PROF_SCHEDULE_TIMELINE_MASK 0x0000008000000 +#define PROF_SCHEDULE_TRACE_MASK 0x0000010000000 +#define PROF_AIVECTORCORE_METRICS_MASK 0x0000020000000 +#define PROF_SUBTASK_TIME_MASK 0x0000040000000 -#ifndef OS_TYPE -#define OS_TYPE 0 -#endif // OS_TYPE +#define PROF_TRAINING_TRACE_MASK 0x0000080000000 +#define PROF_HCCL_TRACE_MASK 0x0000100000000 + +#define PROF_MODEL_LOAD_MASK 0x8000000000000000 -#if (OS_TYPE != LINUX) +#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) #define MSVP_PROF_API __declspec(dllexport) #else #define MSVP_PROF_API __attribute__((visibility("default"))) #endif #include -#include +#include namespace Msprofiler { namespace Api { @@ -106,7 +106,7 @@ extern "C" { MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); -typedef uint32_t Status; +typedef int32_t Status; typedef struct aclprofSubscribeConfig aclprofSubscribeConfig1; /// /// @ingroup AscendCL diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h index e6ac64bf..cb531417 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_callback.h +++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h @@ -1,5 +1,5 @@ /** - * Copyright 2020-2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,9 +12,6 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * - * @file prof_callback.h - * @brief declaraion of profiling callbacks */ #ifndef MSPROFILER_PROF_CALLBACK_H_ @@ -24,7 +21,7 @@ extern "C" { #endif // __cplusplus -#if (OS_TYPE != LINUX) +#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) #define MSVP_PROF_API __declspec(dllexport) #else #define MSVP_PROF_API __attribute__((visibility("default"))) diff --git a/third_party/fwkacllib/inc/toolchain/prof_reporter.h b/third_party/fwkacllib/inc/toolchain/prof_reporter.h index d5ed7569..f0747833 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_reporter.h +++ b/third_party/fwkacllib/inc/toolchain/prof_reporter.h @@ -16,11 +16,8 @@ #ifndef MSPROF_ENGINE_PROF_REPORTER_H_ #define MSPROF_ENGINE_PROF_REPORTER_H_ -#ifndef OS_TYPE -#define OS_TYPE 0 -#endif // OS_TYPE -#if (OS_TYPE != LINUX) +#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) #define MSVP_PROF_API __declspec(dllexport) #else #define MSVP_PROF_API __attribute__((visibility("default")))