diff --git a/inc/external/acl/acl_op_compiler.h b/inc/external/acl/acl_op_compiler.h index a0a3f786..faf5e649 100644 --- a/inc/external/acl/acl_op_compiler.h +++ b/inc/external/acl/acl_op_compiler.h @@ -36,7 +36,8 @@ typedef enum { ACL_DEBUG_DIR, ACL_OP_COMPILER_CACHE_MODE, ACL_OP_COMPILER_CACHE_DIR, - ACL_OP_PERFORMANCE_MODE + ACL_OP_PERFORMANCE_MODE, + ACL_OP_JIT_COMPILE } aclCompileOpt; typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclOpCompileFlag; diff --git a/inc/external/acl/acl_prof.h b/inc/external/acl/acl_prof.h index 4a9a5be9..a6f0c832 100644 --- a/inc/external/acl/acl_prof.h +++ b/inc/external/acl/acl_prof.h @@ -128,8 +128,8 @@ MSVP_PROF_API aclError aclprofStart(const aclprofConfig *profilerConfig); * @see aclprofDestroyConfig */ MSVP_PROF_API aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, - aclprofAicoreMetrics aicoreMetrics, aclprofAicoreEvents *aicoreEvents, - uint64_t dataTypeConfig); + aclprofAicoreMetrics aicoreMetrics, + const aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); /** * @ingroup AscendCL diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h index cc607b74..56c56438 100644 --- a/inc/external/acl/error_codes/rt_error_codes.h +++ b/inc/external/acl/error_codes/rt_error_codes.h @@ -113,6 +113,7 @@ static const int32_t ACL_ERROR_RT_AICORE_TRAP_READ_OVERFLOW = 507042; // static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043; // aic trap write overflow static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044; // aiv trap read overflow static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow +static const int32_t ACL_ERROR_RT_STREAM_SYNC_TIMEOUT = 507046; // stream sync time out static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index b77bebfc..42c6f980 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -30,6 +30,8 @@ namespace ge { // Option key: graph run mode const char_t *const OPTION_GRAPH_RUN_MODE = "ge.graphRunMode"; const char_t *const OPTION_DEVICE_TYPE = "ge.deviceType"; +// Option key: topo sorting mode +const char *const OPTION_TOPO_SORTING_MODE = "ge.topoSortingMode"; // Option key: ome init const char_t *const OPTION_EXEC_SESSION_ID = "ge.exec.sessionId"; @@ -129,6 +131,7 @@ const char_t *const MODIFY_MIXLIST = "ge.exec.modify_mixlist"; const char_t *const OP_PRECISION_MODE = "ge.exec.op_precision_mode"; const char_t *const CUSTOMIZE_DTYPES = "ge.customizeDtypes"; const char_t *const COMPRESSION_OPTIMIZE_CONF = "ge.compressionOptimizeConf"; +const char_t *const BUILD_GRAPH_ALREADY_INITIALIZED = "build_graph_already_initialized"; } // namespace configure_option // Configure stream num by Session constructor options param, // its value should be int32_t type, default value is "1" @@ -293,6 +296,9 @@ const std::string FUSION_SWITCH_FILE = "ge.fusionSwitchFile"; // Configure compression optimize file path const std::string COMPRESSION_OPTIMIZE_CONF = "ge.compressionOptimizeConf"; +// Configure for amct +const std::string BUILD_GRAPH_ALREADY_INITIALIZED = "build_graph_already_initialized"; + // Configure customize dtypes path const std::string CUSTOMIZE_DTYPES = "ge.customizeDtypes"; @@ -394,7 +400,8 @@ const char_t *const GRAPH_PARALLEL_OPTION_PATH = "ge.graphParallelOptionPath"; const char_t *const EVALUATE_GRAPH_RESOURCE_MODE = "ge.evaluateGraphResourceMode"; // Graph run mode enum GraphRunMode { PREDICTION = 0, TRAIN }; - +// Topo sorting mode +enum class TopoSortingMode { BFS = 0, DFS = 1 }; // Input/Output tensor info struct InputTensorInfo { uint32_t data_type; // data type @@ -478,6 +485,8 @@ static const char_t *const MODIFY_MIXLIST = ge::MODIFY_MIXLIST.c_str(); static const char_t *const OP_PRECISION_MODE = ge::OP_PRECISION_MODE.c_str(); static const char_t *const CUSTOMIZE_DTYPES = "ge.customizeDtypes"; static const char_t *const COMPRESSION_OPTIMIZE_CONF = "ge.compressionOptimizeConf"; +static const char_t *const BUILD_GRAPH_ALREADY_INITIALIZED = "build_graph_already_initialized"; +static const char_t *const INPUT_DATA_NAMES = "input_data_names"; // for interface: aclgrphBuildModel #ifdef __GNUC__ @@ -514,8 +523,8 @@ const std::set ir_builder_suppported_options = {INPUT_FORMAT, // for interface: aclgrphParse const std::set ir_parser_suppported_options = { - INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT, - OUT_NODES, ENABLE_SCOPE_FUSION_PASSES}; + INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT, OUT_NODES, + ENABLE_SCOPE_FUSION_PASSES, INPUT_DATA_NAMES}; // for interface: aclgrphBuildInitialize const std::set global_options = {CORE_TYPE, @@ -540,7 +549,8 @@ const std::set global_options = {CORE_TYPE, OP_COMPILER_CACHE_DIR, OP_COMPILER_CACHE_MODE, MODIFY_MIXLIST, - COMPRESSION_OPTIMIZE_CONF}; + COMPRESSION_OPTIMIZE_CONF, + BUILD_GRAPH_ALREADY_INITIALIZED}; #endif } // namespace ir_option } // namespace ge diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h index cc607b74..56c56438 100644 --- a/inc/external/runtime/rt_error_codes.h +++ b/inc/external/runtime/rt_error_codes.h @@ -113,6 +113,7 @@ static const int32_t ACL_ERROR_RT_AICORE_TRAP_READ_OVERFLOW = 507042; // static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043; // aic trap write overflow static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044; // aiv trap read overflow static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow +static const int32_t ACL_ERROR_RT_STREAM_SYNC_TIMEOUT = 507046; // stream sync time out static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error diff --git a/inc/framework/common/aicpu_op.h b/inc/framework/common/aicpu_op.h deleted file mode 100644 index 773d42fd..00000000 --- a/inc/framework/common/aicpu_op.h +++ /dev/null @@ -1,22 +0,0 @@ -/** - * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_FRAMEWORK_COMMON_AICPU_OP_H_ -#define INC_FRAMEWORK_COMMON_AICPU_OP_H_ - -#include "cce/customize.h" - -#endif // INC_FRAMEWORK_COMMON_AICPU_OP_H_ diff --git a/inc/framework/common/file_constant_util.h b/inc/framework/common/file_constant_util.h deleted file mode 100644 index a53c5a24..00000000 --- a/inc/framework/common/file_constant_util.h +++ /dev/null @@ -1,51 +0,0 @@ -/** - * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_FRAMEWORK_COMMON_FILE_CONSTANT_UTIL_H -#define INC_FRAMEWORK_COMMON_FILE_CONSTANT_UTIL_H - -#include -#include -#include -#include "ge/ge_api_error_codes.h" -#include "nlohmann/json.hpp" -#include "graph/op_desc.h" -#include "graph/ge_tensor.h" - -namespace ge { -struct FileConstantInfo { - std::string value_bin_file_id; - std::string value_bin_file_path; -}; - -struct OptionInfo { - std::vector info; -}; - -void from_json(const nlohmann::json &j, FileConstantInfo &info); - -void from_json(const nlohmann::json &j, OptionInfo &option_info); - -Status GetFilePathFromOption(std::map &file_id_and_path_map); - -Status CopyOneWeightFromFile(const void *const curr_dev_ptr, const std::string &value, const size_t file_constant_size, - size_t &left_size); - -Status GetFilePath(const OpDescPtr &op_desc, const std::map &file_id_and_path_map, - std::string &file_path); -} // namespace ge - -#endif // INC_FRAMEWORK_COMMON_FILE_CONSTANT_UTIL_H diff --git a/inc/framework/common/fmk_error_codes.h b/inc/framework/common/fmk_error_codes.h index dd1bd678..0ef4f412 100644 --- a/inc/framework/common/fmk_error_codes.h +++ b/inc/framework/common/fmk_error_codes.h @@ -19,18 +19,14 @@ #if defined(_MSC_VER) #ifdef FUNC_VISIBILITY -#define GE_FUNC_VISIBILITY _declspec(dllexport) #define GE_OBJECT_VISIBILITY #else -#define GE_FUNC_VISIBILITY #define GE_OBJECT_VISIBILITY #endif #else #ifdef FUNC_VISIBILITY -#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) #define GE_OBJECT_VISIBILITY #else -#define GE_FUNC_VISIBILITY #define GE_OBJECT_VISIBILITY __attribute__((visibility("hidden"))) #endif #endif @@ -40,6 +36,7 @@ #include "framework/common/fmk_types.h" #include "register/register_error_codes.h" +#include "external/ge/ge_error_codes.h" // Each module uses the following four macros to define error codes: #define DECLARE_ERRORNO_OMG(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OMG, name, value) diff --git a/inc/framework/common/gflags_util.h b/inc/framework/common/gflags_util.h deleted file mode 100644 index 5d374261..00000000 --- a/inc/framework/common/gflags_util.h +++ /dev/null @@ -1,85 +0,0 @@ -/** - * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ -#define INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ - -#if defined(_MSC_VER) -#ifdef FUNC_VISIBILITY -#define GE_FUNC_VISIBILITY _declspec(dllexport) -#else -#define GE_FUNC_VISIBILITY -#endif -#else -#ifdef FUNC_VISIBILITY -#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) -#else -#define GE_FUNC_VISIBILITY -#endif -#endif - -#include -#include - -namespace ge { -class GE_FUNC_VISIBILITY GflagsUtils { - public: - static bool IsSetCommandTrue(const char *name) { - std::string out; - return gflags::GetCommandLineOption(name, &out) && out == "true"; - } - - /// - /// @brief Determines whether the parameter is empty - /// @param name name parameter name - /// @return true if empty otherwise false - /// - static bool IsSetCommandNotEmpty(const char *name) { - std::string out; - return gflags::GetCommandLineOption(name, &out) && !out.empty(); - } - - /// - /// @brief Determines whether the parameter is not default - /// @param flag_name name parameter name - /// @return true if not default otherwise false - /// - static bool IsCommandLineNotDefault(const char *flag_name) { - google::CommandLineFlagInfo info; - return GetCommandLineFlagInfo(flag_name, &info) && !info.is_default; - } - - /// - /// @brief Modify gflags to print help information - /// @param flags_h Pass in the self-defined help parameter, it is recommended to be FLAGS_h - /// @return void - /// - static void ChangeHelpFlags(bool flags_h) { - if (flags_h || IsSetCommandTrue("help") || IsSetCommandTrue("helpfull") || IsSetCommandNotEmpty("helpon") || - IsSetCommandNotEmpty("helpmatch") || IsSetCommandTrue("helppackage") || IsSetCommandTrue("helpxml")) { - gflags::SetCommandLineOption("help", "false"); - gflags::SetCommandLineOption("helpfull", "false"); - gflags::SetCommandLineOption("helpon", ""); - gflags::SetCommandLineOption("helpmatch", ""); - gflags::SetCommandLineOption("helppackage", "false"); - gflags::SetCommandLineOption("helpxml", "false"); - gflags::SetCommandLineOption("helpshort", "true"); - } - } -}; -} // namespace ge - -#endif // INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ diff --git a/inc/framework/common/helper/model_helper.h b/inc/framework/common/helper/model_helper.h index e4141a4b..5fc8f1f8 100644 --- a/inc/framework/common/helper/model_helper.h +++ b/inc/framework/common/helper/model_helper.h @@ -34,6 +34,8 @@ class GE_FUNC_VISIBILITY ModelHelper { Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, const std::string &output_file, ge::ModelBufferData &model) const; + Status GenerateGeModel(const OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index, + const bool is_dyn_root) const; Status SaveToOmRootModel(const GeRootModelPtr &ge_root_model, const SaveParam &save_param, const std::string &output_file, ModelBufferData &model, const bool is_unknown_shape) const; Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file) const; @@ -67,8 +69,6 @@ class GE_FUNC_VISIBILITY ModelHelper { bool IsPartitionedGraph(const GeModelPtr &cur_model) const; - Status GenerateGeModel(const OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index, - const bool is_dyn_root) const; Status GenerateGeRootModel(const OmFileLoadHelper &om_load_helper); Status LoadModelData(const OmFileLoadHelper &om_load_helper, const GeModelPtr &cur_model, diff --git a/inc/framework/common/op/attr_value_util.h b/inc/framework/common/op/attr_value_util.h deleted file mode 100644 index 5a41de05..00000000 --- a/inc/framework/common/op/attr_value_util.h +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ -#define INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ - -#if defined(_MSC_VER) -#ifdef FUNC_VISIBILITY -#define GE_FUNC_VISIBILITY _declspec(dllexport) -#else -#define GE_FUNC_VISIBILITY -#endif -#else -#ifdef FUNC_VISIBILITY -#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) -#else -#define GE_FUNC_VISIBILITY -#endif -#endif - -#include -#include -#include -#include "graph/debug/ge_attr_define.h" -#include "proto/om.pb.h" - -namespace ge { -GE_FUNC_VISIBILITY void SetAttrDef(const std::string &value, domi::AttrDef *const out); -} -#endif // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ \ No newline at end of file diff --git a/inc/framework/common/string_util.h b/inc/framework/common/string_util.h index c369d04f..40c0f5cc 100644 --- a/inc/framework/common/string_util.h +++ b/inc/framework/common/string_util.h @@ -17,20 +17,6 @@ #ifndef INC_FRAMEWORK_COMMON_STRING_UTIL_H_ #define INC_FRAMEWORK_COMMON_STRING_UTIL_H_ -#if defined(_MSC_VER) -#ifdef FUNC_VISIBILITY -#define GE_FUNC_VISIBILITY _declspec(dllexport) -#else -#define GE_FUNC_VISIBILITY -#endif -#else -#ifdef FUNC_VISIBILITY -#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) -#else -#define GE_FUNC_VISIBILITY -#endif -#endif - #include #include @@ -40,6 +26,7 @@ #include #include #include "graph/types.h" +#include "external/ge/ge_error_codes.h" namespace ge { class GE_FUNC_VISIBILITY StringUtils { diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index f8529fa2..c3d04ee2 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -45,7 +45,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFIL FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASKS; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_HOST_BASE_ADDR; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_HOST_SVM_BASE_ADDR; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_HOST_MEMORY_SIZE; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_HOST_SVM_SIZE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_FUSION_MODEL_DEF; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint64_t ALLOC_MEMORY_MAX_SIZE; // Max size of 8 GB. diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 1c2a52e6..8951697f 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -323,7 +323,7 @@ class GE_FUNC_VISIBILITY GeExecutor { static Status ReleaseSingleOpResource(void *const stream); - static Status ClearCustomAicpuSo(); + static Status ClearCustomAicpuSo(const uint32_t &device_id); static Status GetDeviceIdByModelId(const uint32_t model_id, uint32_t &device_id); diff --git a/inc/framework/memory/memory_api.h b/inc/framework/memory/memory_api.h index d771bddc..d7afd933 100644 --- a/inc/framework/memory/memory_api.h +++ b/inc/framework/memory/memory_api.h @@ -25,6 +25,7 @@ enum MemStorageType { HBM = 0, RDMA_HBM, HOST_DDR, + HOST_SVM, }; struct HostVarInfo { @@ -38,27 +39,23 @@ struct TensorInfo { DataType data_type; }; -/// /// \param size [in] rdma pool memory size to be allocated. /// \param mem_type [in] memory type for rdma pool. /// \return Status result of function GE_FUNC_VISIBILITY Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_MEMORY_HBM); -/// /// \param var_info [in] host variable addr infos. /// \param mem_type [in] memory type for rdma pool. /// \return Status result of function GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector &var_info, rtMemType_t mem_type = RT_MEMORY_HBM); -/// /// \param tensor_info [in] description for tensor stored shared memory. /// \param dev_addr [out] malloced shared memory addr. /// \param memory_size [out] malloced shared memory size. /// \return Status result of function GE_FUNC_VISIBILITY Status MallocSharedMemory(const TensorInfo &tensor_info, uint64_t &dev_addr, uint64_t &memory_size); -/// /// \param var_name [in] var_name name of host variable. /// \param base_addr [out] base_addr vase addr of host variable. /// \param var_size [out] var_size memory_size of host variable. diff --git a/inc/framework/omg/model_tool.h b/inc/framework/omg/model_tool.h deleted file mode 100644 index 24554e65..00000000 --- a/inc/framework/omg/model_tool.h +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_FRAMEWORK_OMG_MODEL_TOOL_H_ -#define INC_FRAMEWORK_OMG_MODEL_TOOL_H_ - -#include -#include - -#include "framework/common/debug/ge_log.h" -#include "proto/ge_ir.pb.h" - -namespace ge { -class GE_FUNC_VISIBILITY ModelTool { - public: - static Status GetModelInfoFromOm(const char *model_file, ge::proto::ModelDef &model_def, uint32_t &modeldef_size); - - static Status GetModelInfoFromPbtxt(const char *model_file, ge::proto::ModelDef &model_def); -}; -} // namespace ge - -#endif // INC_FRAMEWORK_OMG_MODEL_TOOL_H_ diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h index 1addd326..5931d60c 100644 --- a/inc/framework/omg/omg_inner_types.h +++ b/inc/framework/omg/omg_inner_types.h @@ -99,6 +99,7 @@ struct OmgContext { std::string atc_cmdline; bool user_attr_index_valid = false; bool is_online_model = false; + bool is_subgraph_multi_batch = false; }; } // namespace ge diff --git a/inc/framework/runtime/model_v2_executor.h b/inc/framework/runtime/model_v2_executor.h index dad4f4e4..45655795 100644 --- a/inc/framework/runtime/model_v2_executor.h +++ b/inc/framework/runtime/model_v2_executor.h @@ -28,10 +28,10 @@ #include "subscriber/executor_subscribers_scheduler.h" namespace gert { +enum class ExecutorState { kInit, kLoaded }; enum SubExeGraphType { kInitExeGraph, kMainExeGraph, kDeInitExeGraph, kSubExeGraphTypeEnd }; -static constexpr char *kSubExeGraphTypeStrs[kSubExeGraphTypeEnd] = { - const_cast("Init"), const_cast("Main"), const_cast("DeInit")}; inline const char *GetSubExeGraphTypeStr(SubExeGraphType type) { + constexpr const char *kSubExeGraphTypeStrs[kSubExeGraphTypeEnd] = {"Init", "Main", "DeInit"}; return kSubExeGraphTypeStrs[type]; } @@ -74,6 +74,7 @@ class VISIBILITY_EXPORT ModelV2Executor { ModelDesc *model_desc_ = nullptr; rtStream_t default_stream_ = nullptr; ExecutorSubscribersScheduler subscribers_; + ExecutorState state_ = ExecutorState::kInit; }; } // namespace gert diff --git a/inc/framework/runtime/subscriber/built_in_subscriber_definitions.h b/inc/framework/runtime/subscriber/built_in_subscriber_definitions.h index 4ee2aff2..0dcfbf69 100644 --- a/inc/framework/runtime/subscriber/built_in_subscriber_definitions.h +++ b/inc/framework/runtime/subscriber/built_in_subscriber_definitions.h @@ -32,8 +32,7 @@ enum class BuiltInSubscriberType { kProfiling, kDumper, kNum }; enum class ProfilingType { kHost, // 打开Host侧调度的profiling kDevice, - kGeHost, // 打开GE Host侧调度的profiling - kSingleOpReg, // 单算子需要打开此开关开始register node name和kernel type + kGeHost, // 打开GE Host侧调度的profiling kNum, kAll = kNum }; diff --git a/inc/framework/runtime/subscriber/executor_subscribers_scheduler.h b/inc/framework/runtime/subscriber/executor_subscribers_scheduler.h index 1a9936f8..f697d578 100644 --- a/inc/framework/runtime/subscriber/executor_subscribers_scheduler.h +++ b/inc/framework/runtime/subscriber/executor_subscribers_scheduler.h @@ -23,9 +23,6 @@ #include "global_profiling.h" #include "framework/common/ge_visibility.h" namespace gert { -namespace { -constexpr size_t kInitSubscriberSize = 1UL; -} class ModelV2Executor; class VISIBILITY_EXPORT ExecutorSubscribersScheduler { public: @@ -78,7 +75,7 @@ class VISIBILITY_EXPORT ExecutorSubscribersScheduler { if (ins == nullptr) { return nullptr; } - + constexpr size_t kInitSubscriberSize = 1UL; // profiler exists when ess init if (subscribers_.size() == kInitSubscriberSize) { enabled_ = true; diff --git a/inc/framework/runtime/subscriber/global_profiling.h b/inc/framework/runtime/subscriber/global_profiling.h index 6b84214e..afdbcef1 100644 --- a/inc/framework/runtime/subscriber/global_profiling.h +++ b/inc/framework/runtime/subscriber/global_profiling.h @@ -54,6 +54,11 @@ class GlobalProfiler { class VISIBILITY_EXPORT GlobalProfilingWrapper { public: + GlobalProfilingWrapper(const GlobalProfilingWrapper &) = delete; + GlobalProfilingWrapper(GlobalProfilingWrapper &&) = delete; + GlobalProfilingWrapper &operator=(const GlobalProfilingWrapper &) = delete; + GlobalProfilingWrapper &operator=(GlobalProfilingWrapper &&) = delete; + static GlobalProfilingWrapper *GetInstance() { static GlobalProfilingWrapper global_prof_wrapper; return &global_prof_wrapper; diff --git a/metadef b/metadef index 62c14e1c..03482feb 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 62c14e1cde161dccf6967f151ece9509f778c416 +Subproject commit 03482feb52fd7cc8544231f32891c86db3bc91a2 diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h index 8c0c1847..3332cb34 100644 --- a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h +++ b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef AICPU_ENGINE_STRUCT_H__ -#define AICPU_ENGINE_STRUCT_H__ +#ifndef AICPU_ENGINE_STRUCT_H +#define AICPU_ENGINE_STRUCT_H #include "fwk_adpt_struct.h" @@ -53,4 +53,4 @@ struct SessionInfo { #ifdef __cplusplus } #endif -#endif // AICPU_ENGINE_STRUCT_H__ +#endif // AICPU_ENGINE_STRUCT_H diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h index ec92a036..c5873a1b 100644 --- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h +++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef FWK_ADPT_STRUCT_H__ -#define FWK_ADPT_STRUCT_H__ +#ifndef FWK_ADPT_STRUCT_H +#define FWK_ADPT_STRUCT_H #include @@ -112,7 +112,7 @@ struct StrFWKKernel { }; #pragma pack(pop) -typedef StrFWKKernel FWKOperateParam; +using FWKOperateParam = StrFWKKernel; // Extent info ShapeAndType const uint32_t kMaxShapeDims = 8U; @@ -154,4 +154,4 @@ struct AsyncWait { } // end namespace FWKAdapter } // namespace aicpu -#endif // FWK_ADPT_STRUCT_H__ +#endif // FWK_ADPT_STRUCT_H diff --git a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h index a5a2642c..4ee0eac5 100644 --- a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h +++ b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h @@ -107,6 +107,7 @@ static const int32_t ACL_ERROR_RT_AICORE_TRAP_READ_OVERFLOW = 507042; // a static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043; // aic trap write overflow static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044; // aiv trap read overflow static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow +static const int32_t ACL_ERROR_RT_STREAM_SYNC_TIMEOUT = 507046; // stream sync time out static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error diff --git a/third_party/fwkacllib/inc/ops/OWNERS b/third_party/fwkacllib/inc/ops/OWNERS index f95df23c..60662397 100755 --- a/third_party/fwkacllib/inc/ops/OWNERS +++ b/third_party/fwkacllib/inc/ops/OWNERS @@ -21,7 +21,6 @@ reviewers: - luanma_bl - LDLD0524 - wywismygod2020 -- lipeiyang3699 - koala-zhang - zhu-jingjing - zhaozhihui5 @@ -41,18 +40,14 @@ reviewers: - djh602 - wangjiangben_hw - li1jie -- clinglai - liujun2014 - soupkey - wu-shengji - cimeng -- ccl_ligang -- xiaozhedeng - granpad7 - tc1qaz - Ronnie_zheng - xiexianhu -- zhouyujoe - zhaoping12 - tanshengshun - fanqirui @@ -62,4 +57,4 @@ reviewers: - gegenhua - qiaohairong options: - no_parent_owners: true \ No newline at end of file + no_parent_owners: true diff --git a/third_party/fwkacllib/inc/ops/cluster.h b/third_party/fwkacllib/inc/ops/cluster.h index d2ee7f09..81cee365 100644 --- a/third_party/fwkacllib/inc/ops/cluster.h +++ b/third_party/fwkacllib/inc/ops/cluster.h @@ -1,5 +1,5 @@ /** - * Copyright 2021 Huawei Technologies Co., Ltd + * Copyright (c) Huawei Technologies Co., Ltd. 2020-2022. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index 4e8fb312..782c8796 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -125,7 +125,7 @@ REG_OP(MinimumGrad) *@par Inputs: *One input: *x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8, - int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. + int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32, uint1. For float32 type, the actual calculation on the chip is based on float16. \n *@par Attributes: @@ -137,7 +137,7 @@ REG_OP(MinimumGrad) REG_OP(Cast) .INPUT(x, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE, DT_COMPLEX64, - DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32, DT_BF16})) + DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32, DT_BF16, DT_UINT1})) .OUTPUT(y, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32, DT_BF16})) diff --git a/third_party/fwkacllib/inc/ops/experiment_ops.h b/third_party/fwkacllib/inc/ops/experiment_ops.h index 769b5a0b..82965c39 100644 --- a/third_party/fwkacllib/inc/ops/experiment_ops.h +++ b/third_party/fwkacllib/inc/ops/experiment_ops.h @@ -77,6 +77,49 @@ REG_OP(ApplyAdamW) .ATTR(maximize, Bool, false) .OP_END_FACTORY_REG(ApplyAdamW) +/** +* @brief Calculate SQ distance. \n +* +* @par Inputs: +* @li ivf: A Tensor, dtype is uint8. +* @li query: A Tensor, dtype is float16 or float32. +* @li bucket_list: A Tensor, dtype is int32 or int64. +* @li bucket_limits: A Tensor, dtype is int32 or int64. +* @li bucket_offsets: A Tensor, dtype is int32 or int64. +* @li vmin: A Tensor, dtype is float16 or float32. +* @li vdiff: A Tensor, dtype is float16 or float32. \n +* +* @par Outputs: +* @li actual_count: A Tensor, dtype is int32 or int64, the actual number of sq_distance. +* @li sq_distance: A Tensor, dtype is float16 or float32. +* @li grouped_extreme_distance: A Tensor, dtype is float16 or float32, the extremum in each group of sq_distance. +* @li sq_ivf: A Tensor, dtype is int32 or int64. +* @li sq_index: A Tensor, dtype is int32 or int64. \n +* +* @par Attributes: +* @li total_limit: A Int, indicates the max length of the output sq_distance. +* @li group_size: A Int, indicates the group size of the extremum. +* @li extreme_mode: A Int, indicates the type of extremum, 0 means minimum, and 1 means maximum. \n +* +*/ +REG_OP(ScanSQCodes) + .INPUT(ivf, TensorType({DT_UINT8})) + .INPUT(query, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(bucket_list, TensorType({DT_INT32, DT_INT64})) + .INPUT(bucket_limits, TensorType({DT_INT32, DT_INT64})) + .INPUT(bucket_offsets, TensorType({DT_INT32, DT_INT64})) + .INPUT(vmin, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(vdiff, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(actual_count, TensorType({DT_INT32, DT_INT64})) + .OUTPUT(sq_distance, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(grouped_extreme_distance, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(sq_ivf, TensorType({DT_INT32, DT_INT64})) + .OUTPUT(sq_index, TensorType({DT_INT32, DT_INT64})) + .REQUIRED_ATTR(total_limit, Int) + .ATTR(group_size, Int, 64) + .ATTR(extreme_mode, Int, 0) + .OP_END_FACTORY_REG(ScanSQCodes) + /** * @brief Multiplies matrix "a" by matrix "b", producing "a * b". \n * @par Inputs: @@ -153,6 +196,103 @@ REG_OP(RotatedNMS) .OUTPUT(keep_indices, TensorType({DT_INT32, DT_INT64})) .REQUIRED_ATTR(iou_threshold, Float) .OP_END_FACTORY_REG(RotatedNMS) + +/** +* @brief Performs average pooling on the input. Used in the combination of conv + avgpoolupdate to replace avgpool +* @par Inputs: +* x1: Output of upstream Conv2d. A tensor of type float16, float32. +* x2: Input feature map of upstream Conv2d. A tensor of type int8, float16, float32. + +* @par Attributes: +* @li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, +* where N = C = 1, and H and W are positive integers within the range [1, 255]. +* @li strides: A required list of 4 ints, specifying the stride of the sliding window. +* The strides of the N and C dimensions are 1. +* The strides of the H and W dimensions are positive integers within the range [1, 63]. +* @li padding_mode: A required string, specifying the padding algorithm, +* either "VALID", "SAME" and "CALCULATED". +* With "SAME" means that the outputs will have the same spatial dimensions as its inputs. +* With "VALID" means no padding. +* @li pads: Pad value when padding_mode is "CALCULATED". +* @li data_format: An optional string, specifying the data format of "ksize" and "strides", +* either "NCHW", or "NHWC" (default). +* @li ceil_mode: Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". +* @li exclusive: Ignore padding area or not when calculating average. + +* @par Outputs: +* y: The average pooled output tensor. Has the same type and format as input "x1". + +* @attention Constraints: +* @li Only single input and single output are supported. +* @li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. ksize_H * ksize_W < 256 +* @li Due to instruction restrictions, +* the values of "strides_h" and "strides_w" are positive integers within the range [1, 63]. +* @par Third-party framework compatibility +* Compatible with the TensorFlow/Pytorch/Onnx operator AvgPoolV2. +*/ +REG_OP(AvgPoolUpdate) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(x2, TensorType({DA_INT4, DT_INT8, DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(ksize, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .ATTR(padding_mode, String, "CALCULATED") + .ATTR(pads, ListInt, {0, 0, 0, 0}) + .ATTR(data_format, String, "NHWC") + .ATTR(ceil_mode, Bool, false) + .ATTR(exclusive, Bool, true) + .OP_END_FACTORY_REG(AvgPoolUpdate) + +/** +* @brief batch input by time +* @par Inputs: +* x: A list of input tensors. It's a dynamic input + +* @par Attributes: +* @li window: time window, [-1, int64_max], if -1 will batch by input data flag, +* else will batch by input timestamp and data flag. +* @li batch_dim: [-1, input_shape_range), if -1 input shape:[x, ..., x] ---> output shape:[-1, x, ..., x], +* else output shape:[x, ..., -1(batch_dim), ..., x]; +* @li drop_remainder: a bool flag, take effect when window > -1, +* if true when batch data window < window, will drop data. + +* @par Outputs: +* y: A list of output tensors. It's a dynamic input, the same size as "x". + +* @attention Constraints: +* @li Only support in helper udf +*/ +REG_OP(TimeBatch) + .DYNAMIC_INPUT(x, TensorType::RealNumberType()) + .DYNAMIC_OUTPUT(y, TensorType::RealNumberType()) + .REQUIRED_ATTR(window, Int) + .ATTR(batch_dim, Int, -1) + .ATTR(drop_remainder, Bool, false) + .OP_END_FACTORY_REG(TimeBatch) + +/** +* @brief Auto Batch process. \n + +* @par Inputs: +* @li x: A list of input tensor objects. It's a dynamic input. \n + +* @par Outputs: +* @li y: A list of output tensor objects. It's a dynamic output. \n + +* @par Attributes: +* @li batch_size: auto batch size. +* @li timeout: auto batch wait timeout(unit:ms). +* @li padding: weather to pad when batch is insufficient. +* @li slide_stride: sliding window step. +*/ +REG_OP(AutoBatch) + .DYNAMIC_INPUT(x, TensorType::RealNumberType()) + .DYNAMIC_OUTPUT(y, TensorType::RealNumberType()) + .REQUIRED_ATTR(batch_size, Int) + .ATTR(timeout, Int, 0) + .ATTR(padding, Bool, false) + .ATTR(slide_stride, Int, 0) + .OP_END_FACTORY_REG(AutoBatch) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_EXPERIMENT_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index 3db3cb84..0446aff8 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -2039,24 +2039,24 @@ REG_OP(IMGWarpOffsets) .OP_END_FACTORY_REG(IMGWarpOffsets) /** -*@brief This operation samples 3d input x by using interpolation based on flow field grid, - which is usually gennerated by affine_grid. +* @brief This operation samples 3d input x by using interpolation based on flow field grid, + which is usually gennerated by affine_grid. -*@par Inputs: -*@li x: 5-D Tensor with shape `[batch, channels, depth, height, width]`. -*@li grid: flow field grid, 5-D Tensor with shape `[batch, depth, height, width, 2]`. +* @par Inputs: +* @li x: 5-D Tensor with shape `[batch, channels, depth, height, width]`. +* @li grid: flow field grid, 5-D Tensor with shape `[batch, depth, height, width, 2]`. -*@par Attributes: -*@li interpolation_mode: An optional string specifying the interpolation method. -*@li padding_mode: An optional string specifying the pad method. -*@li align_corners: An optional bool. If "true", the centers of the corner - pixels of the input and output tensors are aligned. Defaults to "false" . +* @par Attributes: +* @li interpolation_mode: An optional string specifying the interpolation method. +* @li padding_mode: An optional string specifying the pad method. +* @li align_corners: An optional bool. If "true", the centers of the corner + pixels of the input and output tensors are aligned. Defaults to "false" . -*@par Outputs: -*y: Returns 5-D Tensor with the same dtype as `x`. +* @par Outputs: +* y: Returns 5-D Tensor with the same dtype as `x`. -*@par Third-party framework compatibility -*Compatible with pytorch GridSampler3D operator. +* @par Third-party framework compatibility +* Compatible with pytorch GridSampler3D operator. */ REG_OP(GridSampler3D) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) diff --git a/third_party/fwkacllib/inc/ops/map_ops.h b/third_party/fwkacllib/inc/ops/map_ops.h index 6ac15bf6..91dd665b 100644 --- a/third_party/fwkacllib/inc/ops/map_ops.h +++ b/third_party/fwkacllib/inc/ops/map_ops.h @@ -88,7 +88,7 @@ REG_OP(TensorMapInsert) * @par Inputs: * @li input_handle: The input map. Must be type: DT_VARIANT. -* @li key: A Tensor,the key to be looked up. Must be one of +* @li key: A Tensor, the key to be looked up. Must be one of the following types: int32,int64,string . \n * @par Attributes: diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index 3d162d3a..f4c5d2b4 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -674,7 +674,7 @@ REG_OP(Conj) *@par Inputs: *The input x and weight must have the same type. Inputs include: *@li x: A Tensor dtype of float32. -*@li target: A Tensor dtype of int32. +*@li target: A Tensor dtype of int32 or int64. *@li weight: A Tensor dtype of float32 . \n *@par Attributes: @@ -690,7 +690,7 @@ REG_OP(Conj) */ REG_OP(NLLLoss) .INPUT(x, TensorType({DT_FLOAT})) - .INPUT(target, TensorType({DT_INT32})) + .INPUT(target, TensorType({DT_INT32, DT_INT64})) .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT})) .OUTPUT(total_weight, TensorType({DT_FLOAT})) @@ -704,7 +704,7 @@ REG_OP(NLLLoss) *@par Inputs: *@li x:A Tensor dtype of float32. *@li y_grad:A Tensor dtype of float32. -*@li target:A Tensor dtype of int32. +*@li target:A Tensor dtype of int32, int64. *@li weight:A Tensor dtype of float32. *@li total_weight:A Tensor dtype of float32 . \n @@ -721,7 +721,7 @@ REG_OP(NLLLoss) REG_OP(NLLLossGrad) .INPUT(x, TensorType({DT_FLOAT})) .INPUT(y_grad, TensorType({DT_FLOAT})) - .INPUT(target, TensorType({DT_INT32})) + .INPUT(target, TensorType({DT_INT32, DT_INT64})) .INPUT(weight, TensorType({DT_FLOAT})) .INPUT(total_weight, TensorType({DT_FLOAT})) .OUTPUT(x_grad, TensorType({DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index 272d4021..57724273 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -210,10 +210,10 @@ REG_OP(SwinTransformerLnQKV) * float32, int32. Has format [ND, NHWC]. \n * @par Attributes: -* @li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to -* [K, M]. -* @li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to -* [K, M]. \n +* @li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to +* [M, K] before multiplication. +* @li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to +* [K, N] before multiplication. \n * @par Outputs: * y: The result matrix Tensor. 2D. Must be one of the following types: float16, @@ -246,9 +246,9 @@ REG_OP(MatMul) * @par Attributes: * @li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to -* [M, K]. +* [M, K] before multiplication. * @li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to -* [K, N]. +* [K, N] before multiplication. * @li offset_x: An optional integer for quantized MatMulV2. * The negative offset added to the input x1 for int8 type. Ensure offset_x * within the effective range of int8 [-128, 127]. Defaults to "0". \n @@ -289,9 +289,9 @@ REG_OP(MatMulV2) * @par Attributes: * @li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to -* [M, K]. +* [M, K] before multiplication. * @li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to -* [K, N]. +* [K, N] before multiplication. * @li offset_x: An optional integer for quantized MatMulV2Compress. * The negative offset added to the input x1 for int8 type. Ensure offset_x * within the effective range of int8 [-128, 127]. Defaults to "0". \n @@ -333,29 +333,20 @@ REG_OP(MatMulV2Compress) * @li c: A matrix Tensor. Must be one of the following types:float32, float16, * int8, int32. Has format ND. * @li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the -* following types: float16, int32, float32, int8. Has format ND. +* following types: float32, float16, int8, int32. Has format ND. * @li beta: A 1D Tensor. The shape of beta is [1]. Must be one of the following -* types: float16, int32, float32, int8. Has format ND.\n -* The format of a, b, c has restriction:\n -* When type of a is int8 and type of c is int32, the format of a, b, c should -* all be ND.\n -* When type of a is int8 and type of c is float32, the format of a, b, c -* should all be ND.\n -* When type of a is float16 and type of c is float16, the format of a, b, c -* should all be ND.\n -* When type of a is float16 and type of c is float32, the format of a, b, c -* should all be ND. \n +* types: float32, float16, int8, int32. Has format ND. \n * @par Attributes: * Two attributes, including: * @li transpose_a: Optional. A bool. If True, changes the shape of "a" from -* [M, K] to [K, M]. +* [K, M] to [M, K] before multiplication. * @li transpose_b: Optional. A bool. If True, changes the shape of "b" from -* [K, N] to [N, K]. \n +* [N, K] to [K, N] before multiplication. \n * @par Outputs: -* y: The result matrix Tensor. Must be one of the following types: float16, -* float32, int32, int8. Has format [ND], the format should be equal to a. +* y: The result matrix Tensor. Must be one of the following types: float32, +* float16, int8, int32. Has format [ND], the format should be equal to a. */ REG_OP(GEMM) @@ -379,10 +370,10 @@ REG_OP(GEMM) * float32, int32. 2D or higher. Has format [ND, NHWC]. \n * @par Attributes: -* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] -* to [B, K, M]. -* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] -* to [B, K, M]. \n +* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, K, M] +* to [B, M, K] before multiplication. +* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, N, K] +* to [B, K, N] before multiplication. \n * @par Outputs: * y: The result matrix Tensor. 2D or higher. Must be one of the following @@ -418,10 +409,10 @@ REG_OP(BatchMatMul) * int8, int4. Has format [ND, NHWC]. \n * @par Attributes: -* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to -* [B, K, M]. -* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to -* [B, K, M]. \n +* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, K, M] to +* [B, M, K] before multiplication. +* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, N, K] to +* [B, K, N] before multiplication. \n * @par Outputs: * y: The result matrix Tensor. 2D or higher. Must be one of the following @@ -784,7 +775,8 @@ REG_OP(TensorScatterUpdate) * @par Attributes: * @li axis: An optional attribute. Defaults to 0. -* @li reduction: An optional attribute. Defaults to string "none" and can be "add" or "mul". +* @li reduction: An optional attribute. Defaults to string "none" and can be +* "add" or "mul". \n * @par Outputs: * y: A Tensor. Has the same type and format as input "data" . \n @@ -1147,7 +1139,7 @@ REG_OP(DiagPart) * with a set of learned weights, and (optionally) adds biases. \n * @par Inputs: * Four inputs, including: -* @li x: A Tensor of type float16, int8, int4, float32. +* @li x: A Tensor of type float16, int8, int4. * @li w: A weight matrix of type float16, int8, int4, float32. * @li b: An optional Tensor of type float16, int32, float32. * @li offset_w: An optional Tensor of type int8, int4. @@ -1175,11 +1167,11 @@ REG_OP(DiagPart) * Yes */ REG_OP(FullyConnection) - .INPUT(x, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT32, DT_BF16})) - .INPUT(w, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT32, DT_BF16})) - .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32, DT_BF16})) + .INPUT(x, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT, DT_BF16})) + .INPUT(w, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT, DT_BF16})) + .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT, DT_BF16})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32, DT_BF16})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT, DT_BF16})) .REQUIRED_ATTR(num_output, Int) .ATTR(transpose, Bool, false) .ATTR(axis, Int, 1) @@ -1643,7 +1635,7 @@ REG_OP(Tril) * @par Inputs: * @li x: A list of Tensors. Must be one of the following types: int32, * float16, float32. Tensors to be concatenated. All must have size 1 in -* the first dimension and same shape.It's a dynamic input. \n +* the first dimension and same shape. It's a dynamic input. \n * @par Attributes: * @li equation: The subscripts for the Einstein summation. \n @@ -1658,7 +1650,7 @@ REG_OP(Tril) * Input N must be Int. \n * @par Third-party framework compatibility -* Compatible with Pytorch einsum operator. +* Compatible with Tensorflow 2.x einsum operator. */ REG_OP(Einsum) .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h index 96213764..2bb8c2ec 100644 --- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h @@ -99,8 +99,8 @@ Specifies the variance used for inference. Must be "None" if the operation is used for training . \n *@par Attributes: -*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. -Defaults to "0.0001". +*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. +* Defaults to "0.0001". *@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". *@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index c6244a81..aca8e217 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -31,7 +31,7 @@ namespace ge { * Three inputs include: * @li input: 4D origin shape of input tensor [N, C, H, W] or [N, H, W, C], * support float16. -* @li filter_size: A 4D tensor of type int32, int64, with shape [H, W, C, K] +* @li filter_size: A 4D tensor of type int32. * @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C]. * Must be one of the following types: float16. \n @@ -52,9 +52,9 @@ namespace ge { * @par Outputs: * filter_grad: Gradient of the deep convolution relative to the filter with -* shape [H, W, C, K]. Must be one of the following types: float16. \n +* shape [H, W, C, K]. Must be one of the following types: float32. \n -* @attention Constraints:\n +* @attention Constraints: * The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but * the data is 5D with shape [N, C1, Hi, Wi, C0], where C0 is 16.\n * The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape @@ -90,7 +90,7 @@ REG_OP(DepthwiseConv2DBackpropFilter) * Two inputs include: \n * @li input: 4D tensor with shape [N, C, H, W] or [N, H, W, C], of type float16 * @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C], -* of type float16 +* of type float16. * @par Attributes: * @li filter_size: A required list or tuple. Shape of filter. @@ -133,8 +133,8 @@ REG_OP(DepthwiseConv2DBackpropFilter) * instead. */ REG_OP(DepthwiseConv2DBackpropFilterD) - .INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16})) - .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16})) + .INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16})) + .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16})) .OUTPUT(filter_grad, TensorType({DT_FLOAT32})) .REQUIRED_ATTR(filter_size, ListInt) .REQUIRED_ATTR(strides, ListInt) @@ -147,9 +147,9 @@ REG_OP(DepthwiseConv2DBackpropFilterD) * @brief Computes the gradients of depthwise convolution with respect to the * input. \n * @par Inputs: -* Three inputs include: \n +* Three inputs include: * @li input_size: 4D shape of input tensor [N, C, H, W] or [N, H, W, C], -* support int32, int64. +* support int32. * @li filter: 4D filter tensor with shape of [H, W, C, K], support float16. * @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C]. * Must be one of the following types: float16 . \n @@ -172,7 +172,7 @@ REG_OP(DepthwiseConv2DBackpropFilterD) * @par Outputs: * input_grad: Gradient of the deep convolution relative to the input with shape * [N, C, H, W] or [N, H, W, C] Must be one of the following types: -* float16, float32. \n +* float16. \n * @attention Constraints:\n * The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but @@ -184,7 +184,7 @@ REG_OP(DepthwiseConv2DBackpropFilterD) * data is 5D with shape [N, C1, Ho, Wo, C0], * where C is the same as that of the feature map and C0 is 16.\n * Limited by Tiling: max_h_in_l1 >= C0, where max_h_in_l1 = (l1_size - Hf * -* Wf * C0 * C0 * 2) / (2 * Wo *C0).\n +* Wf * C0 * C0 * 2) / (2 * Wo *C0). \n * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropInput. @@ -194,7 +194,7 @@ REG_OP(DepthwiseConv2DBackpropInput) .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) .INPUT(filter, TensorType({DT_FLOAT16})) .INPUT(out_backprop, TensorType({DT_FLOAT16})) - .OUTPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT32})) + .OUTPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(strides, ListInt) .ATTR(dilations, ListInt, {1, 1, 1, 1}) .REQUIRED_ATTR(pads, ListInt) @@ -355,9 +355,8 @@ REG_OP(BiasAddGrad) * data tensor. An integer vector representing the shape of input, where * input is a 4-D tensor [batch, height, width, channels] * or [batch, channels, height, width]. - * @li filter: A Tensor. Must be one of the following types: float16, float32, - * float64. 4-D with shape - * [filter_height, filter_width, in_channels, out_channels] + * @li filter: A Tensor. Must be one of the following types: float16. + * 4-D with shape [filter_height, filter_width, in_channels, out_channels] * or [out_channels, filter_height, filter_width, in_channels] * or [out_channels, in_channel, filter_height, filter_width]. * @li out_backprop: A Tensor. Must have the same type as filter. @@ -372,14 +371,9 @@ REG_OP(BiasAddGrad) | Tensor | out_bckprop | filter | y |\n |-----------|-------------|---------|--------|\n | Data Type | float16 | float16 | float16|\n - | | float32 | float32 | float32|\n - | | float64 | float64 | float64|\n | Format | NCHW | NCHW | NCHW |\n | | NHWC | HWCN | NHWC |\n *\n - * For float32 and float64 type, the actual calculation on the chip is based - * on float16. - *\n * *@par Attributes: * Five attributes: @@ -400,13 +394,13 @@ REG_OP(BiasAddGrad) *\n | Name | Field | Scope |\n |------------------|----------|--------------|\n - | input_size | H | [1, 200000] |\n + | input_size | H | [1, 4096] |\n | | W | [1, 4096] |\n | Filter | H | [1, 255] |\n | | W | [1, 255] |\n - | out_backprop | H*strideH| [1, 200000] |\n + | out_backprop | H*strideH| [1, 4096] |\n | | W*strideW| [1, 4096] |\n - | y(fmap) | H | [1, 200000] |\n + | y(fmap) | H | [1, 4096] |\n | | W | [1, 4096] |\n | Stride | H | [1, 63] |\n | | W | [1, 63] |\n @@ -455,7 +449,7 @@ REG_OP(Conv2DBackpropInput) *@brief Computes the gradients of convolution with respect to the input. * @par Inputs: * Two inputs: - * @li filter: A Tensor. Types is float16. + * @li filter: A Tensor. Types is float16 or int8. * 4-D with shape [filter_height, filter_width, in_channels, out_channels] * or [out_channels, filter_height, filter_width, in_channels] * or [out_channels, in_channel, filter_height, filter_width]. @@ -479,8 +473,8 @@ REG_OP(Conv2DBackpropInput) * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to * "NHWC". Specify the data format of the input and output data. *@par Outputs: - * y: A Tensor. Has the same type as filter,4-D tensor [batch, height, width, - * channels] or [batch, channels, height, width]. + * y: A Tensor. with the type of: float16, float32, int32, 4-D tensor + * [batch, height, width, channels] or [batch, channels, height, width]. * @par Third-party framework compatibility * Compatible with Tensorflow's conv2d_backprop_input *@par Restrictions: @@ -547,11 +541,11 @@ REG_OP(Conv2DBackpropInputD) *\n | Name | Field | Scope |\n |------------------|----------|--------------|\n - | x (out_backprop) | H*strideH| [1, 200000] |\n + | x (out_backprop) | H*strideH| [1, 4096] |\n | | W*strideW| [1, 4096] |\n | Filter | H | [1, 255] |\n | | W | [1, 255] |\n - | y (fmap) | H | [1, 200000] |\n + | y (fmap) | H | [1, 4096] |\n | | W | [1, 4096] |\n | Stride | H | [1, 63] |\n | | W | [1, 63] |\n @@ -602,8 +596,8 @@ REG_OP(Deconvolution) *@brief Computes the gradients of convolution with respect to the filter *@par Inputs: * Three inputs: - * @li x: A Tensor. Must be one of the following types: float16, float32, - * float64. 4-D with shape [batch, in_height, in_width, in_channels] or + * @li x: A Tensor. Must be one of the following types: float16. + * 4-D with shape [batch, in_height, in_width, in_channels] or * [batch, in_channels, in_height, in_width]. * @li filter_size: A const Tensor of type int32. Currently does not support * data tensor. An integer vector representing the tensor shape of filter, @@ -621,9 +615,7 @@ REG_OP(Deconvolution) *\n | Tensor | x | out_backprop | y |\n |-----------|---------|--------------|---------|\n - | Data Type | float16 | float16 | float16 |\n - | | float32 | float32 | float32 |\n - | | float64 | float64 | float64 |\n + | Data Type | float16 | float16 | float32 |\n | Format | NCHW | NCHW | NCHW |\n | | NHWC | NHWC | HWCN |\n *\n @@ -650,13 +642,13 @@ REG_OP(Deconvolution) *\n | Name | Field | Scope |\n |------------------|----------|--------------|\n - | x(fmap) | H | [1, 200000] |\n + | x(fmap) | H | [1, 4096] |\n | | W | [1, 4096] |\n | Filter Size | H | [1, 255] |\n | | W | [1, 255] |\n - | out_backprop | H | [1, 200000] |\n + | out_backprop | H | [1, 4096] |\n | | W | [1, 4096] |\n - | y | H | [1, 200000] |\n + | y | H | [1, 4096] |\n | | W | [1, 4096] |\n | Stride | H | [1, 63] |\n | | W | [1, 63] |\n @@ -1015,13 +1007,12 @@ REG_OP(DeformableConv2D) /** *@brief Computes a 3D convolution given 5D "x" and "filter" tensors. *@par Inputs: - * @li x: A 5D tensor. Must be one of the following types: float16, - * (Currently does not support int8). The format of x is NCDHW or NDHWC. + * @li x: A 5D tensor. Must be one of the following types: float16, int8. + * The format of x is NCDHW or NDHWC. * @li filter: A 5D tensor of the same type as "x". - * (Currently does not support int8). * The format is NCDHW, NDHWC or DHWCN. * @li bias: Optional. An 1D tensor of the same type as "x". - * @li offset_w: Optional. An 1D tensor for quantized deconvolution. Reserved. \n + * @li offset_w: Optional. An 1D tensor for quantized deconvolution. \n *@par Attributes: * @li strides: Required. A list of 5 integers. Specifies the stride of the @@ -1041,7 +1032,8 @@ REG_OP(DeformableConv2D) * Defaults to 0. Reserved. \n *@par Outputs: - * y: A Tensor. Has the same type and data format as "x". \n + * y: A Tensor. Has the same data format as "x". if the type of "x" is int8, + * the type of y is int32. \n *@attention Constraints: * The image size after padding is greater than the filter size. \n @@ -1051,11 +1043,11 @@ REG_OP(DeformableConv2D) * @li Compatible with the Caffe operator Convolution. */ REG_OP(Conv3D) - .INPUT(x, TensorType({DT_FLOAT16})) - .INPUT(filter, TensorType({DT_FLOAT16})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT32})) + .INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) + .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT32, DT_INT32})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_INT32})) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) .ATTR(dilations, ListInt, {1, 1, 1, 1, 1}) @@ -1068,12 +1060,11 @@ REG_OP(Conv3D) /** *@brief Computes the gradients of convolution 3d with respect to the input. *@par Inputs: - * @li input_size: A Tensor of type int32, int64. An integer vector + * @li input_size: A Tensor of type int32. An integer vector * representing the shape of input, where input is a 5-D tensor * [batch, depth, height, width, channels] or * [batch, channels, depth, height, width]. - * @li filter: A Tensor. Must be one of the following types: float16, float32. - * Currently does not support double. + * @li filter: A Tensor. Must be one of the following types: float16. * @li out_backprop: A Tensor. Must have the same type as filter. * 5-D with shape [batch, depth, out_height, out_width, out_channels] * or [batch, out_channels, depth, out_height, out_width]. Gradients with @@ -1095,8 +1086,7 @@ REG_OP(Conv3D) * Defaults to "NDHWC". Specify the data format of the input and output data. \n *@par Outputs: - * y: A Tensor. Has the same type as filter,and has same format as - * "input_size". \n + * y: A Tensor. Has same format as "input_size". \n *@par Third-party framework compatibility * Compatible with Tensorflow's conv3d_backprop_input @@ -1207,8 +1197,7 @@ REG_OP(LSTM) /** *@brief Computes the gradients of convolution3D with respect to the filter *@par Inputs: - * @li x: A Tensor. Must be one of the following types: float16, float32, - * double. Currently does not support double. + * @li x: A Tensor. Must be one of the following types: float16. * 5-D with shape [batch, in_depth, in_height, in_width, in_channels] * or [batch, in_channels, in_depth, in_height, in_width]. * @li filter_size: A Tensor of type int32. An integer vector representing the @@ -1236,7 +1225,7 @@ REG_OP(LSTM) * Defaults to "NDHWC". Specify the data format of the input and output data. \n *@par Outputs: - * y: A Tensor that has the same type as "x" and the format is NDHWC, NCDHW + * y: A Tensor that has the type float32 and the format is NDHWC, NCDHW * or DHWCN. \n *@par Third-party framework compatibility @@ -1310,7 +1299,7 @@ REG_OP(Conv3DBackpropFilterD) *@brief Computes the transpose of convolution 3d with respect to the input. *@par Inputs: - * @li input_size: A Tensor of type int32, int64. An integer vector + * @li input_size: A Tensor of type int32. An integer vector * representing the shape of input. * @li x: A Tensor of type float16, currently does not support int8. The format * is NDHWC or NCDHW. @@ -1336,7 +1325,7 @@ REG_OP(Conv3DBackpropFilterD) * @li offset_x: Optional. Input offset_x value. Reserved. \n *@par Outputs: - * y: A Tensor. Has the same type and format as "x". + * y: A Tensor. Has the same format as "x", has the type float16, float32. */ REG_OP(Conv3DTranspose) .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) @@ -1362,7 +1351,7 @@ REG_OP(Conv3DTranspose) * The format is NDHWC or NCDHW. * @li filter: A Tensor of type float16, currently does not support int8. * The format is NDHWC, NCDHW or DHWCN. - * @li bias: Optional. An 1D tensor of the same type as "x". Reserved. + * @li bias: Optional. An 1D tensor of the same type as "x". * @li offset_w: Optional. An 1D tensor for quantized deconvolution. Reserved. \n *@par Attributes: @@ -1383,7 +1372,7 @@ REG_OP(Conv3DTranspose) * @li offset_x: Optional. Input offset_x value. Reserved. \n *@par Outputs: - * y: A Tensor. Has the same type and format as "x". \n + * y: A Tensor. Has the same format as "x", has the type float16, float32. \n *@par Restrictions: * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead. @@ -1428,7 +1417,7 @@ REG_OP(Conv3DTransposeD) | Tensor | x | filter | bias | y |\n |-----------|---------|---------|---------|--------|\n | Data Type | float16 | float16 | float16 | float16|\n - | | int8 | int8 | int32 | int32 |\n + | | float16 | float16 | float32 | float32|\n | Format | NCHW | NCHW | ND | NCHW |\n | | NHWC | HWCN | | NHWC |\n *\n @@ -1461,13 +1450,13 @@ REG_OP(Conv3DTransposeD) *\n | Name | Field | Scope |\n |------------------|----------|--------------|\n - | input_size | H | [1, 200000] |\n + | input_size | H | [1, 4096] |\n | | W | [1, 4096] |\n - | x (out_backprop) | H*strideH| [1, 200000] |\n + | x (out_backprop) | H*strideH| [1, 4096] |\n | | W*strideW| [1, 4096] |\n | filter | H | [1, 255] |\n | | W | [1, 255] |\n - | y (fmap) | H | [1, 200000] |\n + | y (fmap) | H | [1, 4096] |\n | | W | [1, 4096] |\n | Stride | H | [1, 63] |\n | | W | [1, 63] |\n @@ -1503,9 +1492,9 @@ REG_OP(Conv2DTranspose) .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) .INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT})) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) .ATTR(dilations, ListInt, {1, 1, 1, 1}) @@ -1522,7 +1511,7 @@ REG_OP(Conv2DTranspose) * @li x: A Tensor of type float16, int8. * @li filter: A Tensor of type float16, int8. Must have the same type as "x". * @li bias: An optional 1D tensor of the same type as "x". - * @li offset_w: An optional 1D tensor for quantized inference. Type is int8. Reserved. + * @li offset_w: An optional 1D tensor for quantized inference. Type is int8. *@par Required Attributes: * @li input_size: A Tensor of type int32 or int64. An integer vector representing the * shape of input. @@ -1550,9 +1539,9 @@ REG_OP(Conv2DTranspose) REG_OP(Conv2DTransposeD) .INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT})) .REQUIRED_ATTR(input_size, ListInt) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 65411e2a..8db82ec9 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -193,6 +193,39 @@ REG_OP(SigmoidCrossEntropyWithLogitsV2) .ATTR(reduction, String, "mean") .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsV2) +/** +* @brief Computes the sigmoid focal loss of "pred" and "target". + +* @par Inputs: +* Three inputs, including: +* @li pred: A 2-dimensional Tensor of type float16 or float32, specifying the predicted value. +* @li target: A 1-dimensional Tensor of type int32, specifying the target value. +* @li weight: A 1-dimensional Tensor, specifying the weight value. \n + +* @par Attributes: +* @li gamma: An optional float, specifying the exponent of the modulating factor (1 - pt) +* to balance easy/hard examples. Defaults to 2.0. +* @li alpha: An optional float, specifying the weighting factor in range (1, 0) to balance +* the importance of positive/negative examples or less than 0 for ignore. Defaults to 0.25. +* @li reduction: A optional character string from "none", "mean", and "sum", specifying the +* reduction type to be applied to the output. Defaults to "mean". \n + +* @par Outputs: +* loss: Sigmoid focal loss between the predicted value and target value. Has the same dimensions as "pred". \n + +* @par Third-party framework compatibility +* Compatible with mmcv operator SigmoidFocalLoss. +*/ +REG_OP(SigmoidFocalLoss) + .INPUT(pred, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(target, TensorType({DT_INT32})) + .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(loss, TensorType({DT_FLOAT16,DT_FLOAT})) + .ATTR(gamma, Float, 2.0) + .ATTR(alpha, Float, 0.25) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(SigmoidFocalLoss) + /** * @brief Computes the regression box of the RPN. It is a FasterRCNN operator . @@ -1834,5 +1867,79 @@ REG_OP(AxpyWithSoftmaxAndDropOutDoMask) .REQUIRED_ATTR(input_keep_prob, Float) .ATTR(axis, ListInt, {-1}) .OP_END_FACTORY_REG(AxpyWithSoftmaxAndDropOutDoMask) + +/** +* @brief MMCV Function: sigmoid_focal_loss_grad . \n + +* @par Inputs: +* Three inputs, including: +* @li pred: the predicted tensor. The type support float16 and float32. +* @li target: the target label Tensor. The type support Int32. +* @li dout: the grad of previous op grad, which has the sampe shape wth pred. The type support float16 and float32. +* @li weight: A optioanl input Tensor, default is None, which helps to calculate the loss by supplying sample weights: +* shape of pred should be (B,D), B means batch size, D means the number of labels. +* shape of target should be (D, ). +* shape of weight should be (D, ) \n + +* @par Attributes: +* @li alpha: A attribute is used to reweight the sample. The type is float . \n +* @li gamma: A attribute is used to calculate the power of the probability. +* The type is float . \n +* @li reduction: a type of the reduce method. default is 'mean', which means computing the average loss. + 'sum' means computing the sum of the loss, 'none' means no reducing .\n + +* @par Outputs: +* grad: A mutable Tensor. Has the same type and shape as "pred". \n + +* @par Third-party framework compatibility +* Compatible with the MMCV operator SigmoidFocalLoss. +*/ +REG_OP(SigmoidFocalLossGrad) + .INPUT(pred, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(target, TensorType({DT_INT32})) + .INPUT(dout, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(alpha, Float, 0.25) + .ATTR(gamma, Float, 2.0) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(SigmoidFocalLossGrad) + +/** +* @brief MMCV Function: softmax_focal_loss_grad . \n + +* @par Inputs: +* Three inputs, including: +* @li pred: the predicted tensor. The type support float16 and float32. +* @li target: the target label Tensor. The type support Int32. +* @li dout: the grad of previous op grad, which has the sampe shape wth pred. The type support float16 and float32. +* @li weight: A optioanl input Tensor, default is None, which helps to calculate the loss by supplying sample weights: +* shape of pred should be (B,D), B means batch size, D means the number of labels. +* shape of target should be (B, D). +* shape of weight should be (D, ) \n + +* @par Attributes: +* @li alpha: A attribute is used to reweight the sample. The type is float . \n +* @li gamma: A attribute is used to calculate the power of the probability. +* The type is float . \n +* @li reduction: a type of the reduce method. default is 'mean', which means computing the average loss. + 'sum' means computing the sum of the loss, 'none' means no reducing .\n + +* @par Outputs: +* grad: A mutable Tensor. Has the same type and shape as "pred". \n + +* @par Third-party framework compatibility +* Compatible with the MMCV operator SoftmaxFocalLossGrad. +*/ +REG_OP(SoftmaxFocalLossGrad) + .INPUT(pred, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(target, TensorType({DT_INT32})) + .INPUT(dout, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(alpha, Float, 0.25) + .ATTR(gamma, Float, 2.0) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(SoftmaxFocalLossGrad) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index 16ec4357..dec8688a 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -174,8 +174,8 @@ REG_OP(AvgPoolV2) /** * @brief Performs average pooling on the input. \n * @par Inputs: -* x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type -* float16, float32, double. \n +* @li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and +* type float16. \n * @par Attributes: * @li ksize: List of ints that has length 1, 3 or 5. The size of the window @@ -201,8 +201,8 @@ REG_OP(AvgPoolV2) * Compatible with the TensorFlow operator AvgPool3D. */ REG_OP(AvgPool3D) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) .REQUIRED_ATTR(ksize, ListInt) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) @@ -216,9 +216,9 @@ REG_OP(AvgPool3D) /** * @brief Performs average pooling on the input. * @par Inputs: -* @li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double. -* @li filter: An optional tensor of type float16, float32, double, fractal_z_3d layout. -* @li multiplier: An optional tensor of float16, float32, double. +* @li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16. +* @li filter: An optional tensor of type float16, fractal_z_3d layout. +* @li multiplier: An optional tensor of float16. * @par Attributes: * @li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor. @@ -239,10 +239,10 @@ REG_OP(AvgPool3D) * Compatible with the TensorFlow operator AvgPool3D. */ REG_OP(AvgPool3DD) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) - .OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) - .OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE})) .REQUIRED_ATTR(ksize, ListInt) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) @@ -256,7 +256,7 @@ REG_OP(AvgPool3DD) * @brief Computes AvgPool3DGrad function. \n * @par Inputs: * @li orig_input_shape: An NDHWC tensor of type int32. -* @li grads: An NDHWC tensor of type float16, float32, or double. \n +* @li grads: An NDHWC tensor of type float16. \n * @par Attributes: * @li ksize: List of ints that has length 5. The size of the window for @@ -284,8 +284,8 @@ REG_OP(AvgPool3DD) REG_OP(AvgPool3DGrad) .INPUT(orig_input_shape, TensorType({DT_INT32})) - .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) - .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) .REQUIRED_ATTR(ksize, ListInt) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) @@ -299,7 +299,7 @@ REG_OP(AvgPool3DGrad) * @brief Performs average pooling on the input. * @par Inputs: * @li grads: An NDHWC tensor of type float16. -* @li filter: An optional tensor of type float16, fractal_z_3d layout. +* @li filter: An optional tensor of type float16. * @li multiplier: An optional tensor of float16. * @par Attributes: @@ -867,8 +867,8 @@ REG_OP(MaxPoolGradGradWithArgmax) /** * @brief Computes avgpoograd function. \n * @par Inputs: -* @li orig_input_shape: An NHWC tensor of type int32. -* @li input_grad: An NHWC tensor of type float16, float32, or double. \n +* @li orig_input_shape: A tensor of type int32. +* @li input_grad: A tensor of type float16. \n * @par Attributes: * @li ksize: A required tuple or list, specifying the size of the window for @@ -887,8 +887,8 @@ REG_OP(MaxPoolGradGradWithArgmax) */ REG_OP(AvgPoolGrad) .INPUT(orig_input_shape, TensorType({DT_INT32})) - .INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) - .OUTPUT(out_grad, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(out_grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) .REQUIRED_ATTR(ksize, ListInt) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(padding, String) @@ -898,9 +898,9 @@ REG_OP(AvgPoolGrad) /** * @brief Computes gradients of average pooling function . \n * @par Inputs: -* @input_grad: An NHWC tensor of type float16. -* @mean_matrix: Assist matrix, an NHWC tensor of type float16. -* @kernel_matrix: Assist matrix, an NHWC tensor of type float16. +* @li input_grad: An NHWC tensor of type float16. +* @li mean_matrix: Assist matrix, an NHWC tensor of type float16. +* @li kernel_matrix: Assist matrix, an NHWC tensor of type float16. * @par Attributes: * @li orig_input_shape: A required Original input dimensions. @@ -913,7 +913,7 @@ REG_OP(AvgPoolGrad) * @li data_format: An optional string. Defaults to "NHWC" . \n * @par Outputs: -* @out_grad: A mutable tensor with the same shape and type as "orig_input". +* @li out_grad: A mutable tensor with the same shape and type as "orig_input". * * @par Restrictions: * Warning: THIS FUNCTION IS DEPRECATED. Please use AvgPoolGrad instead. diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index 079982db..6fde6d46 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -277,7 +277,7 @@ REG_OP(BN3DTrainingUpdate) *y: A tensor of type float16 or float32 for the normalized "x" . \n *@attention Constraints: -*For Ascend 310, the result accuracy fails to reach 1/1000 due to the +* For Ascend 310, the result accuracy fails to reach 1/1000 due to the * square root instruction. */ REG_OP(BNInfer) @@ -314,8 +314,8 @@ REG_OP(BNInfer) *@li batch_variance: A tensor of type float32, for the variance of "x" . \n *@attention Constraints: -*This operator is used in conjunction with BNTrainingReduce. -*For Ascend 310, the result accuracy fails to reach 1/1000 due to +*@li This operator is used in conjunction with BNTrainingReduce. +*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to * the square root instruction. */ REG_OP(BNTrainingUpdateV2) diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index 2081ac97..18a50d2e 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -83,7 +83,8 @@ typedef enum tagRtPlatformType { PLATFORM_CLOUD_V2_910B2 = 11, PLATFORM_CLOUD_V2_910B3 = 12, PLATFORM_CLOUD_V2_910B4 = 13, - PLATFORM_END = 14, + PLATFORM_MDC_PG2 = 14, + PLATFORM_END = 15, } rtPlatformType_t; typedef enum tagRtCubeFracMKNFp16 { diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index b1157c2b..d5fb6fc6 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -55,10 +55,10 @@ typedef enum tagRtFeatureType { } rtFeatureType_t; typedef enum tagRtDeviceFeatureType { - FEATURE_TYPE_SCHE, - FEATURE_TYPE_BLOCKING_OPERATOR, - FEATURE_TYPE_FFTS_MODE, - FEATURE_TYPE_END, + FEATURE_TYPE_SCHE, + FEATURE_TYPE_BLOCKING_OPERATOR, + FEATURE_TYPE_FFTS_MODE, + FEATURE_TYPE_END, } rtDeviceFeatureType_t; typedef enum tagMemcpyInfo { diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index 1c33e1b3..3610ad3c 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -392,7 +392,23 @@ RTS_API rtError_t rtMemcpyHostTask(void * const dst, const uint64_t destMax, con RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind, rtStream_t stm); -typedef struct rtMemcpyAddrInfo { +/** + * @ingroup dvrt_mem + * @brief asynchronized memcpy + * @param [in] dst destination address pointer + * @param [in] Max length of destination address memory + * @param [in] src source address pointer + * @param [in] count the number of byte to copy + * @param [in] kind memcpy type + * @param [in] stream asynchronized task stream + * @param [in] qosCfg asynchronized task qosCfg + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtMemcpyAsyncWithCfg(void *dst, uint64_t destMax, const void *src, uint64_t count, + rtMemcpyKind_t kind, rtStream_t stream, uint32_t qosCfg); + +typedef struct { uint32_t resv0; uint32_t resv1; uint32_t resv2; @@ -420,6 +436,23 @@ RTS_API rtError_t rtMemcpyAsyncPtr(void *memcpyAddrInfo, uint64_t destMax, uint6 RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtRecudeKind_t kind, rtDataType_t type, rtStream_t stm); +/** + * @ingroup dvrt_mem + * @brief asynchronized reduce memcpy + * @param [in] dst destination address pointer + * @param [in] Max length of destination address memory + * @param [in] src source address pointer + * @param [in] count the number of byte to copy + * @param [in] kind memcpy type + * @param [in] type data type + * @param [in] stm asynchronized task stream + * @param [in] qosCfg asynchronized task qosCfg + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtReduceAsyncWithCfg(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtRecudeKind_t kind, + rtDataType_t type, rtStream_t stm, uint32_t qosCfg); + /** * @ingroup dvrt_mem * @brief asynchronized reduce memcpy diff --git a/third_party/fwkacllib/inc/runtime/rt_mem_queue.h b/third_party/fwkacllib/inc/runtime/rt_mem_queue.h index 2ed9fd08..0997cfe3 100644 --- a/third_party/fwkacllib/inc/runtime/rt_mem_queue.h +++ b/third_party/fwkacllib/inc/runtime/rt_mem_queue.h @@ -372,6 +372,17 @@ RTS_API rtError_t rtMbufInit(rtMemBuffCfg_t *cfg); */ RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *memBuf, uint64_t size); +/** +* @ingroup rt_mem_queue +* @brief alloc buff +* @param [out] memBuf: buff addr alloced +* @param [in] size: The amount of memory space requested +* @param [in] flag: Huge page flag(bit0~31: mem type, bit32~bit35: devid, bit36~63: resv) +* @param [in] grpId: group id +* @return RT_ERROR_NONE for ok +*/ +RTS_API rtError_t rtMbufAllocEx(rtMbufPtr_t *memBuf, uint64_t size, uint64_t flag, int32_t grpId); + /** * @ingroup rt_mem_queue * @brief free buff @@ -417,6 +428,15 @@ RTS_API rtError_t rtMbufGetBuffSize(rtMbufPtr_t memBuf, uint64_t *totalSize); */ RTS_API rtError_t rtMbufGetPrivInfo(rtMbufPtr_t memBuf, void **priv, uint64_t *size); +/** +* @ingroup rt_mem_queue +* @brief copy buf ref +* @param [in] memBuf: src buff addr +* @param [out] newMemBuf: des buff addr +* @return RT_ERROR_NONE for ok +*/ +RTS_API rtError_t rtMbufCopyBufRef(rtMbufPtr_t memBuf, rtMbufPtr_t *newMemBuf); + // mem group typedef struct { uint64_t maxMemSize; // max buf size in grp, in KB. = 0 means no limit @@ -430,30 +450,43 @@ typedef struct { uint32_t rsv : 28; } rtMemGrpShareAttr_t; -#define RT_MEM_GRP_QUERY_GROUPS_OF_PROCESS 1 // query process all grp +#define RT_MEM_GRP_QUERY_GROUPS_OF_PROCESS 1 // query process all grp +#define RT_MEM_GRP_QUERY_GROUP_ID 2 // query group id from name + +#define RT_MEM_GRP_NAME_LEN 32 // it must be same as driver define BUFF_GRP_NAME_LEN typedef struct { int32_t pid; } rtMemGrpQueryByProc_t; // cmd: GRP_QUERY_GROUPS_OF_PROCESS +typedef struct { + char grpName[RT_MEM_GRP_NAME_LEN]; +} rtMemGrpQueryGroupId_t; // cmd: RT_MEM_GRP_QUERY_GROUP_ID + typedef struct { int32_t cmd; union { rtMemGrpQueryByProc_t grpQueryByProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS + rtMemGrpQueryGroupId_t grpQueryGroupId; // cmd: RT_MEM_GRP_QUERY_GROUP_ID }; } rtMemGrpQueryInput_t; -#define RT_MEM_GRP_NAME_LEN 32 // it must be same as driver define BUFF_GRP_NAME_LEN - typedef struct { char_t groupName[RT_MEM_GRP_NAME_LEN]; // group name rtMemGrpShareAttr_t attr; // process in group attribute } rtMemGrpOfProc_t; // cmd: GRP_QUERY_GROUPS_OF_PROCESS typedef struct { - rtMemGrpOfProc_t *groupsOfProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS + int32_t groupId; // group id +} rtMemGrpQueryGroupIdInfo_t; // cmd: RT_MEM_GRP_QUERY_GROUP_ID + +typedef struct { size_t maxNum; // max number of result size_t resultNum; // if the number of results exceeds 'maxNum', only 'maxNum' results are filled in buffer + union { + rtMemGrpOfProc_t *groupsOfProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS + rtMemGrpQueryGroupIdInfo_t *groupIdInfo; // cmd: RT_MEM_GRP_QUERY_GROUP_ID + }; } rtMemGrpQueryOutput_t; /** diff --git a/third_party/fwkacllib/inc/runtime/rt_stars.h b/third_party/fwkacllib/inc/runtime/rt_stars.h index b778550f..3fc71e8d 100644 --- a/third_party/fwkacllib/inc/runtime/rt_stars.h +++ b/third_party/fwkacllib/inc/runtime/rt_stars.h @@ -99,8 +99,66 @@ RTS_API rtError_t rtCmoTaskLaunch(rtCmoTaskInfo_t *taskInfo, rtStream_t stm, uin * @return RT_ERROR_NONE for ok, others failed */ RTS_API rtError_t rtBarrierTaskLaunch(rtBarrierTaskInfo_t *taskInfo, rtStream_t stm, uint32_t flag); -#if defined(__cplusplus) +/** + * @ingroup rt_stars + * @brief dvpp group handle. + */ +typedef void *rtDvppGrp_t; + +typedef struct tagDvppGrpRptInfo { + uint32_t deviceId; + uint32_t streamId; + uint32_t taskId; + uint8_t sqeType; + uint8_t cqeErrorCode; + uint8_t reserve[2]; + uint32_t accErrorCode; +} rtDvppGrpRptInfo_t; + +typedef void (*rtDvppGrpCallback)(rtDvppGrpRptInfo_t *rptInfo); + +/** + * @ingroup rt_stars + * @brief create dvpp group. + * @param [in] flags group flag, reserved parameter + * @param [out] grp group handle + * @return RT_ERROR_NONE for ok, others failed + */ +RTS_API rtError_t rtDvppGroupCreate(rtDvppGrp_t *grp, uint32_t flags); + +/** + * @ingroup rt_stars + * @brief destroy dvpp group. + * @param [in] grp group handle + * @return RT_ERROR_NONE for ok, others failed + */ +RTS_API rtError_t rtDvppGroupDestory(rtDvppGrp_t grp); + +/** + * @ingroup rt_stars + * @brief create stream with grp handle + * @param [in|out] stm created stream + * @param [in] priority stream priority + * @param [in] flags stream op flags + * @param [in] grp grp handle + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + * @return RT_ERROR_NONE for ok, others failed + */ +RTS_API rtError_t rtStreamCreateByGrp(rtStream_t *stm, int32_t priority, uint32_t flags, rtDvppGrp_t grp); + +/** + * @ingroup rt_stars + * @brief wait report by grp + * @param [in] grp group handle + * @param [in] callBackFunc callback + * @param [in] timeout wait timeout config, ms, -1: wait forever + * @return RT_ERROR_NONE for ok, others failed + */ +RTS_API rtError_t rtDvppWaitGroupReport(rtDvppGrp_t grp, rtDvppGrpCallback callBackFunc, int32_t timeout); + +#if defined(__cplusplus) } #endif -#endif // CCE_RUNTIME_RT_STARS_H \ No newline at end of file +#endif // CCE_RUNTIME_RT_STARS_H diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h index a6abc8fa..a4151ca0 100644 --- a/third_party/fwkacllib/inc/runtime/stream.h +++ b/third_party/fwkacllib/inc/runtime/stream.h @@ -101,6 +101,16 @@ RTS_API rtError_t rtStreamWaitEventWithTimeout(rtStream_t stm, rtEvent_t evt, ui */ RTS_API rtError_t rtStreamSynchronize(rtStream_t stm); +/** + * @ingroup dvrt_stream + * @brief wait stream to be complete and set timeout + * @param [in] stm stream to wait + * @param [in] timeout timeout value,the unit is milliseconds + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtStreamSynchronizeWithTimeout(rtStream_t stm, int32_t timeout); + /** * @ingroup dvrt_stream * @brief queries an asynchronous stream for completion status @@ -202,7 +212,7 @@ RTS_API rtError_t rtStreamSwitchN(void *ptr, uint32_t size, void *valuePtr, rtSt * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stm, uint32_t flag, const void *addr, - uint32_t *streamId, uint32_t *taskId); + uint32_t *streamId, uint32_t *taskId); /* * @ingroup rt_model diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h index 2353d967..1f09000b 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h +++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h @@ -18,6 +18,8 @@ #define PROF_TRAINING_TRACE 0x00000040ULL #define PROF_MSPROFTX 0x00000080ULL #define PROF_RUNTIME_API 0x00000100ULL +#define PROF_TASK_FRAMEWORK 0x00000200ULL +#define PROF_TASK_TSFW 0x00000400ULL // system profilinig switch #define PROF_CPU 0x00010000ULL @@ -52,6 +54,8 @@ constexpr uint64_t PROF_AICPU_MODEL = 0x4000000000000000ULL; #define PROF_TRAINING_TRACE_MASK 0x00000040ULL #define PROF_MSPROFTX_MASK 0x00000080ULL #define PROF_RUNTIME_API_MASK 0x00000100ULL +#define PROF_TASK_FRAMEWORK_MASK 0x00000200ULL +#define PROF_TASK_TSFW_MASK 0x00000400ULL // system profilinig mask #define PROF_CPU_MASK 0x00010000ULL @@ -102,7 +106,7 @@ extern "C" { MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); -typedef int32_t Status; +using Status = int32_t; typedef struct aclprofSubscribeConfig aclprofSubscribeConfig1; /// /// @ingroup AscendCL diff --git a/third_party/fwkacllib/inc/toolchain/prof_common.h b/third_party/fwkacllib/inc/toolchain/prof_common.h index 37702c9b..eb284272 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_common.h +++ b/third_party/fwkacllib/inc/toolchain/prof_common.h @@ -159,8 +159,14 @@ enum MsprofGeTaskType { MSPROF_GE_TASK_TYPE_AI_CPU, MSPROF_GE_TASK_TYPE_AIV, MSPROF_GE_TASK_TYPE_WRITE_BACK, + MSPROF_GE_TASK_TYPE_MIX_AIC, + MSPROF_GE_TASK_TYPE_MIX_AIV, + MSPROF_GE_TASK_TYPE_FFTS_PLUS, + MSPROF_GE_TASK_TYPE_DSA, + MSPROF_GE_TASK_TYPE_DVPP, MSPROF_GE_TASK_TYPE_INVALID }; + enum MsprofGeShapeType { MSPROF_GE_SHAPE_TYPE_STATIC = 0, MSPROF_GE_SHAPE_TYPE_DYNAMIC,