Merge pull request !2133 from yanghaoran/masterpull/2134/head
@@ -36,7 +36,8 @@ typedef enum { | |||||
ACL_DEBUG_DIR, | ACL_DEBUG_DIR, | ||||
ACL_OP_COMPILER_CACHE_MODE, | ACL_OP_COMPILER_CACHE_MODE, | ||||
ACL_OP_COMPILER_CACHE_DIR, | ACL_OP_COMPILER_CACHE_DIR, | ||||
ACL_OP_PERFORMANCE_MODE | |||||
ACL_OP_PERFORMANCE_MODE, | |||||
ACL_OP_JIT_COMPILE | |||||
} aclCompileOpt; | } aclCompileOpt; | ||||
typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclOpCompileFlag; | typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclOpCompileFlag; | ||||
@@ -128,8 +128,8 @@ MSVP_PROF_API aclError aclprofStart(const aclprofConfig *profilerConfig); | |||||
* @see aclprofDestroyConfig | * @see aclprofDestroyConfig | ||||
*/ | */ | ||||
MSVP_PROF_API aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, | MSVP_PROF_API aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, | ||||
aclprofAicoreMetrics aicoreMetrics, aclprofAicoreEvents *aicoreEvents, | |||||
uint64_t dataTypeConfig); | |||||
aclprofAicoreMetrics aicoreMetrics, | |||||
const aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -113,6 +113,7 @@ static const int32_t ACL_ERROR_RT_AICORE_TRAP_READ_OVERFLOW = 507042; // | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043; // aic trap write overflow | static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043; // aic trap write overflow | ||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044; // aiv trap read overflow | static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044; // aiv trap read overflow | ||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow | static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow | ||||
static const int32_t ACL_ERROR_RT_STREAM_SYNC_TIMEOUT = 507046; // stream sync time out | |||||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | ||||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | ||||
@@ -30,6 +30,8 @@ namespace ge { | |||||
// Option key: graph run mode | // Option key: graph run mode | ||||
const char_t *const OPTION_GRAPH_RUN_MODE = "ge.graphRunMode"; | const char_t *const OPTION_GRAPH_RUN_MODE = "ge.graphRunMode"; | ||||
const char_t *const OPTION_DEVICE_TYPE = "ge.deviceType"; | const char_t *const OPTION_DEVICE_TYPE = "ge.deviceType"; | ||||
// Option key: topo sorting mode | |||||
const char *const OPTION_TOPO_SORTING_MODE = "ge.topoSortingMode"; | |||||
// Option key: ome init | // Option key: ome init | ||||
const char_t *const OPTION_EXEC_SESSION_ID = "ge.exec.sessionId"; | const char_t *const OPTION_EXEC_SESSION_ID = "ge.exec.sessionId"; | ||||
@@ -129,6 +131,7 @@ const char_t *const MODIFY_MIXLIST = "ge.exec.modify_mixlist"; | |||||
const char_t *const OP_PRECISION_MODE = "ge.exec.op_precision_mode"; | const char_t *const OP_PRECISION_MODE = "ge.exec.op_precision_mode"; | ||||
const char_t *const CUSTOMIZE_DTYPES = "ge.customizeDtypes"; | const char_t *const CUSTOMIZE_DTYPES = "ge.customizeDtypes"; | ||||
const char_t *const COMPRESSION_OPTIMIZE_CONF = "ge.compressionOptimizeConf"; | const char_t *const COMPRESSION_OPTIMIZE_CONF = "ge.compressionOptimizeConf"; | ||||
const char_t *const BUILD_GRAPH_ALREADY_INITIALIZED = "build_graph_already_initialized"; | |||||
} // namespace configure_option | } // namespace configure_option | ||||
// Configure stream num by Session constructor options param, | // Configure stream num by Session constructor options param, | ||||
// its value should be int32_t type, default value is "1" | // its value should be int32_t type, default value is "1" | ||||
@@ -293,6 +296,9 @@ const std::string FUSION_SWITCH_FILE = "ge.fusionSwitchFile"; | |||||
// Configure compression optimize file path | // Configure compression optimize file path | ||||
const std::string COMPRESSION_OPTIMIZE_CONF = "ge.compressionOptimizeConf"; | const std::string COMPRESSION_OPTIMIZE_CONF = "ge.compressionOptimizeConf"; | ||||
// Configure for amct | |||||
const std::string BUILD_GRAPH_ALREADY_INITIALIZED = "build_graph_already_initialized"; | |||||
// Configure customize dtypes path | // Configure customize dtypes path | ||||
const std::string CUSTOMIZE_DTYPES = "ge.customizeDtypes"; | const std::string CUSTOMIZE_DTYPES = "ge.customizeDtypes"; | ||||
@@ -394,7 +400,8 @@ const char_t *const GRAPH_PARALLEL_OPTION_PATH = "ge.graphParallelOptionPath"; | |||||
const char_t *const EVALUATE_GRAPH_RESOURCE_MODE = "ge.evaluateGraphResourceMode"; | const char_t *const EVALUATE_GRAPH_RESOURCE_MODE = "ge.evaluateGraphResourceMode"; | ||||
// Graph run mode | // Graph run mode | ||||
enum GraphRunMode { PREDICTION = 0, TRAIN }; | enum GraphRunMode { PREDICTION = 0, TRAIN }; | ||||
// Topo sorting mode | |||||
enum class TopoSortingMode { BFS = 0, DFS = 1 }; | |||||
// Input/Output tensor info | // Input/Output tensor info | ||||
struct InputTensorInfo { | struct InputTensorInfo { | ||||
uint32_t data_type; // data type | uint32_t data_type; // data type | ||||
@@ -478,6 +485,8 @@ static const char_t *const MODIFY_MIXLIST = ge::MODIFY_MIXLIST.c_str(); | |||||
static const char_t *const OP_PRECISION_MODE = ge::OP_PRECISION_MODE.c_str(); | static const char_t *const OP_PRECISION_MODE = ge::OP_PRECISION_MODE.c_str(); | ||||
static const char_t *const CUSTOMIZE_DTYPES = "ge.customizeDtypes"; | static const char_t *const CUSTOMIZE_DTYPES = "ge.customizeDtypes"; | ||||
static const char_t *const COMPRESSION_OPTIMIZE_CONF = "ge.compressionOptimizeConf"; | static const char_t *const COMPRESSION_OPTIMIZE_CONF = "ge.compressionOptimizeConf"; | ||||
static const char_t *const BUILD_GRAPH_ALREADY_INITIALIZED = "build_graph_already_initialized"; | |||||
static const char_t *const INPUT_DATA_NAMES = "input_data_names"; | |||||
// for interface: aclgrphBuildModel | // for interface: aclgrphBuildModel | ||||
#ifdef __GNUC__ | #ifdef __GNUC__ | ||||
@@ -514,8 +523,8 @@ const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT, | |||||
// for interface: aclgrphParse | // for interface: aclgrphParse | ||||
const std::set<std::string> ir_parser_suppported_options = { | const std::set<std::string> ir_parser_suppported_options = { | ||||
INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT, | |||||
OUT_NODES, ENABLE_SCOPE_FUSION_PASSES}; | |||||
INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT, OUT_NODES, | |||||
ENABLE_SCOPE_FUSION_PASSES, INPUT_DATA_NAMES}; | |||||
// for interface: aclgrphBuildInitialize | // for interface: aclgrphBuildInitialize | ||||
const std::set<std::string> global_options = {CORE_TYPE, | const std::set<std::string> global_options = {CORE_TYPE, | ||||
@@ -540,7 +549,8 @@ const std::set<std::string> global_options = {CORE_TYPE, | |||||
OP_COMPILER_CACHE_DIR, | OP_COMPILER_CACHE_DIR, | ||||
OP_COMPILER_CACHE_MODE, | OP_COMPILER_CACHE_MODE, | ||||
MODIFY_MIXLIST, | MODIFY_MIXLIST, | ||||
COMPRESSION_OPTIMIZE_CONF}; | |||||
COMPRESSION_OPTIMIZE_CONF, | |||||
BUILD_GRAPH_ALREADY_INITIALIZED}; | |||||
#endif | #endif | ||||
} // namespace ir_option | } // namespace ir_option | ||||
} // namespace ge | } // namespace ge | ||||
@@ -113,6 +113,7 @@ static const int32_t ACL_ERROR_RT_AICORE_TRAP_READ_OVERFLOW = 507042; // | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043; // aic trap write overflow | static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043; // aic trap write overflow | ||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044; // aiv trap read overflow | static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044; // aiv trap read overflow | ||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow | static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow | ||||
static const int32_t ACL_ERROR_RT_STREAM_SYNC_TIMEOUT = 507046; // stream sync time out | |||||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | ||||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | ||||
@@ -1,22 +0,0 @@ | |||||
/** | |||||
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_FRAMEWORK_COMMON_AICPU_OP_H_ | |||||
#define INC_FRAMEWORK_COMMON_AICPU_OP_H_ | |||||
#include "cce/customize.h" | |||||
#endif // INC_FRAMEWORK_COMMON_AICPU_OP_H_ |
@@ -1,51 +0,0 @@ | |||||
/** | |||||
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_FRAMEWORK_COMMON_FILE_CONSTANT_UTIL_H | |||||
#define INC_FRAMEWORK_COMMON_FILE_CONSTANT_UTIL_H | |||||
#include <map> | |||||
#include <string> | |||||
#include <vector> | |||||
#include "ge/ge_api_error_codes.h" | |||||
#include "nlohmann/json.hpp" | |||||
#include "graph/op_desc.h" | |||||
#include "graph/ge_tensor.h" | |||||
namespace ge { | |||||
struct FileConstantInfo { | |||||
std::string value_bin_file_id; | |||||
std::string value_bin_file_path; | |||||
}; | |||||
struct OptionInfo { | |||||
std::vector<FileConstantInfo> info; | |||||
}; | |||||
void from_json(const nlohmann::json &j, FileConstantInfo &info); | |||||
void from_json(const nlohmann::json &j, OptionInfo &option_info); | |||||
Status GetFilePathFromOption(std::map<std::string, std::string> &file_id_and_path_map); | |||||
Status CopyOneWeightFromFile(const void *const curr_dev_ptr, const std::string &value, const size_t file_constant_size, | |||||
size_t &left_size); | |||||
Status GetFilePath(const OpDescPtr &op_desc, const std::map<std::string, std::string> &file_id_and_path_map, | |||||
std::string &file_path); | |||||
} // namespace ge | |||||
#endif // INC_FRAMEWORK_COMMON_FILE_CONSTANT_UTIL_H |
@@ -19,18 +19,14 @@ | |||||
#if defined(_MSC_VER) | #if defined(_MSC_VER) | ||||
#ifdef FUNC_VISIBILITY | #ifdef FUNC_VISIBILITY | ||||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
#define GE_OBJECT_VISIBILITY | #define GE_OBJECT_VISIBILITY | ||||
#else | #else | ||||
#define GE_FUNC_VISIBILITY | |||||
#define GE_OBJECT_VISIBILITY | #define GE_OBJECT_VISIBILITY | ||||
#endif | #endif | ||||
#else | #else | ||||
#ifdef FUNC_VISIBILITY | #ifdef FUNC_VISIBILITY | ||||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
#define GE_OBJECT_VISIBILITY | #define GE_OBJECT_VISIBILITY | ||||
#else | #else | ||||
#define GE_FUNC_VISIBILITY | |||||
#define GE_OBJECT_VISIBILITY __attribute__((visibility("hidden"))) | #define GE_OBJECT_VISIBILITY __attribute__((visibility("hidden"))) | ||||
#endif | #endif | ||||
#endif | #endif | ||||
@@ -40,6 +36,7 @@ | |||||
#include "framework/common/fmk_types.h" | #include "framework/common/fmk_types.h" | ||||
#include "register/register_error_codes.h" | #include "register/register_error_codes.h" | ||||
#include "external/ge/ge_error_codes.h" | |||||
// Each module uses the following four macros to define error codes: | // Each module uses the following four macros to define error codes: | ||||
#define DECLARE_ERRORNO_OMG(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OMG, name, value) | #define DECLARE_ERRORNO_OMG(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OMG, name, value) | ||||
@@ -1,85 +0,0 @@ | |||||
/** | |||||
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ | |||||
#define INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ | |||||
#if defined(_MSC_VER) | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#else | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#endif | |||||
#include <gflags/gflags.h> | |||||
#include <string> | |||||
namespace ge { | |||||
class GE_FUNC_VISIBILITY GflagsUtils { | |||||
public: | |||||
static bool IsSetCommandTrue(const char *name) { | |||||
std::string out; | |||||
return gflags::GetCommandLineOption(name, &out) && out == "true"; | |||||
} | |||||
/// | |||||
/// @brief Determines whether the parameter is empty | |||||
/// @param name name parameter name | |||||
/// @return true if empty otherwise false | |||||
/// | |||||
static bool IsSetCommandNotEmpty(const char *name) { | |||||
std::string out; | |||||
return gflags::GetCommandLineOption(name, &out) && !out.empty(); | |||||
} | |||||
/// | |||||
/// @brief Determines whether the parameter is not default | |||||
/// @param flag_name name parameter name | |||||
/// @return true if not default otherwise false | |||||
/// | |||||
static bool IsCommandLineNotDefault(const char *flag_name) { | |||||
google::CommandLineFlagInfo info; | |||||
return GetCommandLineFlagInfo(flag_name, &info) && !info.is_default; | |||||
} | |||||
/// | |||||
/// @brief Modify gflags to print help information | |||||
/// @param flags_h Pass in the self-defined help parameter, it is recommended to be FLAGS_h | |||||
/// @return void | |||||
/// | |||||
static void ChangeHelpFlags(bool flags_h) { | |||||
if (flags_h || IsSetCommandTrue("help") || IsSetCommandTrue("helpfull") || IsSetCommandNotEmpty("helpon") || | |||||
IsSetCommandNotEmpty("helpmatch") || IsSetCommandTrue("helppackage") || IsSetCommandTrue("helpxml")) { | |||||
gflags::SetCommandLineOption("help", "false"); | |||||
gflags::SetCommandLineOption("helpfull", "false"); | |||||
gflags::SetCommandLineOption("helpon", ""); | |||||
gflags::SetCommandLineOption("helpmatch", ""); | |||||
gflags::SetCommandLineOption("helppackage", "false"); | |||||
gflags::SetCommandLineOption("helpxml", "false"); | |||||
gflags::SetCommandLineOption("helpshort", "true"); | |||||
} | |||||
} | |||||
}; | |||||
} // namespace ge | |||||
#endif // INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ |
@@ -34,6 +34,8 @@ class GE_FUNC_VISIBILITY ModelHelper { | |||||
Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, const std::string &output_file, | Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, const std::string &output_file, | ||||
ge::ModelBufferData &model) const; | ge::ModelBufferData &model) const; | ||||
Status GenerateGeModel(const OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index, | |||||
const bool is_dyn_root) const; | |||||
Status SaveToOmRootModel(const GeRootModelPtr &ge_root_model, const SaveParam &save_param, | Status SaveToOmRootModel(const GeRootModelPtr &ge_root_model, const SaveParam &save_param, | ||||
const std::string &output_file, ModelBufferData &model, const bool is_unknown_shape) const; | const std::string &output_file, ModelBufferData &model, const bool is_unknown_shape) const; | ||||
Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file) const; | Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file) const; | ||||
@@ -67,8 +69,6 @@ class GE_FUNC_VISIBILITY ModelHelper { | |||||
bool IsPartitionedGraph(const GeModelPtr &cur_model) const; | bool IsPartitionedGraph(const GeModelPtr &cur_model) const; | ||||
Status GenerateGeModel(const OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index, | |||||
const bool is_dyn_root) const; | |||||
Status GenerateGeRootModel(const OmFileLoadHelper &om_load_helper); | Status GenerateGeRootModel(const OmFileLoadHelper &om_load_helper); | ||||
Status LoadModelData(const OmFileLoadHelper &om_load_helper, const GeModelPtr &cur_model, | Status LoadModelData(const OmFileLoadHelper &om_load_helper, const GeModelPtr &cur_model, | ||||
@@ -1,43 +0,0 @@ | |||||
/** | |||||
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ | |||||
#define INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ | |||||
#if defined(_MSC_VER) | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#else | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#endif | |||||
#include <google/protobuf/map.h> | |||||
#include <unordered_map> | |||||
#include <string> | |||||
#include "graph/debug/ge_attr_define.h" | |||||
#include "proto/om.pb.h" | |||||
namespace ge { | |||||
GE_FUNC_VISIBILITY void SetAttrDef(const std::string &value, domi::AttrDef *const out); | |||||
} | |||||
#endif // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ |
@@ -17,20 +17,6 @@ | |||||
#ifndef INC_FRAMEWORK_COMMON_STRING_UTIL_H_ | #ifndef INC_FRAMEWORK_COMMON_STRING_UTIL_H_ | ||||
#define INC_FRAMEWORK_COMMON_STRING_UTIL_H_ | #define INC_FRAMEWORK_COMMON_STRING_UTIL_H_ | ||||
#if defined(_MSC_VER) | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#else | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#endif | |||||
#include <cctype> | #include <cctype> | ||||
#include <securec.h> | #include <securec.h> | ||||
@@ -40,6 +26,7 @@ | |||||
#include <string> | #include <string> | ||||
#include <vector> | #include <vector> | ||||
#include "graph/types.h" | #include "graph/types.h" | ||||
#include "external/ge/ge_error_codes.h" | |||||
namespace ge { | namespace ge { | ||||
class GE_FUNC_VISIBILITY StringUtils { | class GE_FUNC_VISIBILITY StringUtils { | ||||
@@ -45,7 +45,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFIL | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASKS; | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASKS; | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR; | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR; | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_HOST_BASE_ADDR; | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_HOST_BASE_ADDR; | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_HOST_SVM_BASE_ADDR; | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_HOST_MEMORY_SIZE; | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_HOST_MEMORY_SIZE; | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_HOST_SVM_SIZE; | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR; | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR; | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_FUSION_MODEL_DEF; | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_FUSION_MODEL_DEF; | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint64_t ALLOC_MEMORY_MAX_SIZE; // Max size of 8 GB. | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint64_t ALLOC_MEMORY_MAX_SIZE; // Max size of 8 GB. | ||||
@@ -323,7 +323,7 @@ class GE_FUNC_VISIBILITY GeExecutor { | |||||
static Status ReleaseSingleOpResource(void *const stream); | static Status ReleaseSingleOpResource(void *const stream); | ||||
static Status ClearCustomAicpuSo(); | |||||
static Status ClearCustomAicpuSo(const uint32_t &device_id); | |||||
static Status GetDeviceIdByModelId(const uint32_t model_id, uint32_t &device_id); | static Status GetDeviceIdByModelId(const uint32_t model_id, uint32_t &device_id); | ||||
@@ -25,6 +25,7 @@ enum MemStorageType { | |||||
HBM = 0, | HBM = 0, | ||||
RDMA_HBM, | RDMA_HBM, | ||||
HOST_DDR, | HOST_DDR, | ||||
HOST_SVM, | |||||
}; | }; | ||||
struct HostVarInfo { | struct HostVarInfo { | ||||
@@ -38,27 +39,23 @@ struct TensorInfo { | |||||
DataType data_type; | DataType data_type; | ||||
}; | }; | ||||
/// | |||||
/// \param size [in] rdma pool memory size to be allocated. | /// \param size [in] rdma pool memory size to be allocated. | ||||
/// \param mem_type [in] memory type for rdma pool. | /// \param mem_type [in] memory type for rdma pool. | ||||
/// \return Status result of function | /// \return Status result of function | ||||
GE_FUNC_VISIBILITY Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_MEMORY_HBM); | GE_FUNC_VISIBILITY Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_MEMORY_HBM); | ||||
/// | |||||
/// \param var_info [in] host variable addr infos. | /// \param var_info [in] host variable addr infos. | ||||
/// \param mem_type [in] memory type for rdma pool. | /// \param mem_type [in] memory type for rdma pool. | ||||
/// \return Status result of function | /// \return Status result of function | ||||
GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, | GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, | ||||
rtMemType_t mem_type = RT_MEMORY_HBM); | rtMemType_t mem_type = RT_MEMORY_HBM); | ||||
/// | |||||
/// \param tensor_info [in] description for tensor stored shared memory. | /// \param tensor_info [in] description for tensor stored shared memory. | ||||
/// \param dev_addr [out] malloced shared memory addr. | /// \param dev_addr [out] malloced shared memory addr. | ||||
/// \param memory_size [out] malloced shared memory size. | /// \param memory_size [out] malloced shared memory size. | ||||
/// \return Status result of function | /// \return Status result of function | ||||
GE_FUNC_VISIBILITY Status MallocSharedMemory(const TensorInfo &tensor_info, uint64_t &dev_addr, uint64_t &memory_size); | GE_FUNC_VISIBILITY Status MallocSharedMemory(const TensorInfo &tensor_info, uint64_t &dev_addr, uint64_t &memory_size); | ||||
/// | |||||
/// \param var_name [in] var_name name of host variable. | /// \param var_name [in] var_name name of host variable. | ||||
/// \param base_addr [out] base_addr vase addr of host variable. | /// \param base_addr [out] base_addr vase addr of host variable. | ||||
/// \param var_size [out] var_size memory_size of host variable. | /// \param var_size [out] var_size memory_size of host variable. | ||||
@@ -1,35 +0,0 @@ | |||||
/** | |||||
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_FRAMEWORK_OMG_MODEL_TOOL_H_ | |||||
#define INC_FRAMEWORK_OMG_MODEL_TOOL_H_ | |||||
#include <memory> | |||||
#include <string> | |||||
#include "framework/common/debug/ge_log.h" | |||||
#include "proto/ge_ir.pb.h" | |||||
namespace ge { | |||||
class GE_FUNC_VISIBILITY ModelTool { | |||||
public: | |||||
static Status GetModelInfoFromOm(const char *model_file, ge::proto::ModelDef &model_def, uint32_t &modeldef_size); | |||||
static Status GetModelInfoFromPbtxt(const char *model_file, ge::proto::ModelDef &model_def); | |||||
}; | |||||
} // namespace ge | |||||
#endif // INC_FRAMEWORK_OMG_MODEL_TOOL_H_ |
@@ -99,6 +99,7 @@ struct OmgContext { | |||||
std::string atc_cmdline; | std::string atc_cmdline; | ||||
bool user_attr_index_valid = false; | bool user_attr_index_valid = false; | ||||
bool is_online_model = false; | bool is_online_model = false; | ||||
bool is_subgraph_multi_batch = false; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
@@ -28,10 +28,10 @@ | |||||
#include "subscriber/executor_subscribers_scheduler.h" | #include "subscriber/executor_subscribers_scheduler.h" | ||||
namespace gert { | namespace gert { | ||||
enum class ExecutorState { kInit, kLoaded }; | |||||
enum SubExeGraphType { kInitExeGraph, kMainExeGraph, kDeInitExeGraph, kSubExeGraphTypeEnd }; | enum SubExeGraphType { kInitExeGraph, kMainExeGraph, kDeInitExeGraph, kSubExeGraphTypeEnd }; | ||||
static constexpr char *kSubExeGraphTypeStrs[kSubExeGraphTypeEnd] = { | |||||
const_cast<char *>("Init"), const_cast<char *>("Main"), const_cast<char *>("DeInit")}; | |||||
inline const char *GetSubExeGraphTypeStr(SubExeGraphType type) { | inline const char *GetSubExeGraphTypeStr(SubExeGraphType type) { | ||||
constexpr const char *kSubExeGraphTypeStrs[kSubExeGraphTypeEnd] = {"Init", "Main", "DeInit"}; | |||||
return kSubExeGraphTypeStrs[type]; | return kSubExeGraphTypeStrs[type]; | ||||
} | } | ||||
@@ -74,6 +74,7 @@ class VISIBILITY_EXPORT ModelV2Executor { | |||||
ModelDesc *model_desc_ = nullptr; | ModelDesc *model_desc_ = nullptr; | ||||
rtStream_t default_stream_ = nullptr; | rtStream_t default_stream_ = nullptr; | ||||
ExecutorSubscribersScheduler subscribers_; | ExecutorSubscribersScheduler subscribers_; | ||||
ExecutorState state_ = ExecutorState::kInit; | |||||
}; | }; | ||||
} // namespace gert | } // namespace gert | ||||
@@ -32,8 +32,7 @@ enum class BuiltInSubscriberType { kProfiling, kDumper, kNum }; | |||||
enum class ProfilingType { | enum class ProfilingType { | ||||
kHost, // 打开Host侧调度的profiling | kHost, // 打开Host侧调度的profiling | ||||
kDevice, | kDevice, | ||||
kGeHost, // 打开GE Host侧调度的profiling | |||||
kSingleOpReg, // 单算子需要打开此开关开始register node name和kernel type | |||||
kGeHost, // 打开GE Host侧调度的profiling | |||||
kNum, | kNum, | ||||
kAll = kNum | kAll = kNum | ||||
}; | }; | ||||
@@ -23,9 +23,6 @@ | |||||
#include "global_profiling.h" | #include "global_profiling.h" | ||||
#include "framework/common/ge_visibility.h" | #include "framework/common/ge_visibility.h" | ||||
namespace gert { | namespace gert { | ||||
namespace { | |||||
constexpr size_t kInitSubscriberSize = 1UL; | |||||
} | |||||
class ModelV2Executor; | class ModelV2Executor; | ||||
class VISIBILITY_EXPORT ExecutorSubscribersScheduler { | class VISIBILITY_EXPORT ExecutorSubscribersScheduler { | ||||
public: | public: | ||||
@@ -78,7 +75,7 @@ class VISIBILITY_EXPORT ExecutorSubscribersScheduler { | |||||
if (ins == nullptr) { | if (ins == nullptr) { | ||||
return nullptr; | return nullptr; | ||||
} | } | ||||
constexpr size_t kInitSubscriberSize = 1UL; | |||||
// profiler exists when ess init | // profiler exists when ess init | ||||
if (subscribers_.size() == kInitSubscriberSize) { | if (subscribers_.size() == kInitSubscriberSize) { | ||||
enabled_ = true; | enabled_ = true; | ||||
@@ -54,6 +54,11 @@ class GlobalProfiler { | |||||
class VISIBILITY_EXPORT GlobalProfilingWrapper { | class VISIBILITY_EXPORT GlobalProfilingWrapper { | ||||
public: | public: | ||||
GlobalProfilingWrapper(const GlobalProfilingWrapper &) = delete; | |||||
GlobalProfilingWrapper(GlobalProfilingWrapper &&) = delete; | |||||
GlobalProfilingWrapper &operator=(const GlobalProfilingWrapper &) = delete; | |||||
GlobalProfilingWrapper &operator=(GlobalProfilingWrapper &&) = delete; | |||||
static GlobalProfilingWrapper *GetInstance() { | static GlobalProfilingWrapper *GetInstance() { | ||||
static GlobalProfilingWrapper global_prof_wrapper; | static GlobalProfilingWrapper global_prof_wrapper; | ||||
return &global_prof_wrapper; | return &global_prof_wrapper; | ||||
@@ -1 +1 @@ | |||||
Subproject commit 62c14e1cde161dccf6967f151ece9509f778c416 | |||||
Subproject commit 03482feb52fd7cc8544231f32891c86db3bc91a2 |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -14,8 +14,8 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#ifndef AICPU_ENGINE_STRUCT_H__ | |||||
#define AICPU_ENGINE_STRUCT_H__ | |||||
#ifndef AICPU_ENGINE_STRUCT_H | |||||
#define AICPU_ENGINE_STRUCT_H | |||||
#include "fwk_adpt_struct.h" | #include "fwk_adpt_struct.h" | ||||
@@ -53,4 +53,4 @@ struct SessionInfo { | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif | #endif | ||||
#endif // AICPU_ENGINE_STRUCT_H__ | |||||
#endif // AICPU_ENGINE_STRUCT_H |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -14,8 +14,8 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#ifndef FWK_ADPT_STRUCT_H__ | |||||
#define FWK_ADPT_STRUCT_H__ | |||||
#ifndef FWK_ADPT_STRUCT_H | |||||
#define FWK_ADPT_STRUCT_H | |||||
#include <cstdint> | #include <cstdint> | ||||
@@ -112,7 +112,7 @@ struct StrFWKKernel { | |||||
}; | }; | ||||
#pragma pack(pop) | #pragma pack(pop) | ||||
typedef StrFWKKernel FWKOperateParam; | |||||
using FWKOperateParam = StrFWKKernel; | |||||
// Extent info ShapeAndType | // Extent info ShapeAndType | ||||
const uint32_t kMaxShapeDims = 8U; | const uint32_t kMaxShapeDims = 8U; | ||||
@@ -154,4 +154,4 @@ struct AsyncWait { | |||||
} // end namespace FWKAdapter | } // end namespace FWKAdapter | ||||
} // namespace aicpu | } // namespace aicpu | ||||
#endif // FWK_ADPT_STRUCT_H__ | |||||
#endif // FWK_ADPT_STRUCT_H |
@@ -107,6 +107,7 @@ static const int32_t ACL_ERROR_RT_AICORE_TRAP_READ_OVERFLOW = 507042; // a | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043; // aic trap write overflow | static const int32_t ACL_ERROR_RT_AICORE_TRAP_WRITE_OVERFLOW = 507043; // aic trap write overflow | ||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044; // aiv trap read overflow | static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_READ_OVERFLOW = 507044; // aiv trap read overflow | ||||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow | static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_WRITE_OVERFLOW = 507045; // aiv trap write overflow | ||||
static const int32_t ACL_ERROR_RT_STREAM_SYNC_TIMEOUT = 507046; // stream sync time out | |||||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | ||||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | ||||
@@ -21,7 +21,6 @@ reviewers: | |||||
- luanma_bl | - luanma_bl | ||||
- LDLD0524 | - LDLD0524 | ||||
- wywismygod2020 | - wywismygod2020 | ||||
- lipeiyang3699 | |||||
- koala-zhang | - koala-zhang | ||||
- zhu-jingjing | - zhu-jingjing | ||||
- zhaozhihui5 | - zhaozhihui5 | ||||
@@ -41,18 +40,14 @@ reviewers: | |||||
- djh602 | - djh602 | ||||
- wangjiangben_hw | - wangjiangben_hw | ||||
- li1jie | - li1jie | ||||
- clinglai | |||||
- liujun2014 | - liujun2014 | ||||
- soupkey | - soupkey | ||||
- wu-shengji | - wu-shengji | ||||
- cimeng | - cimeng | ||||
- ccl_ligang | |||||
- xiaozhedeng | |||||
- granpad7 | - granpad7 | ||||
- tc1qaz | - tc1qaz | ||||
- Ronnie_zheng | - Ronnie_zheng | ||||
- xiexianhu | - xiexianhu | ||||
- zhouyujoe | |||||
- zhaoping12 | - zhaoping12 | ||||
- tanshengshun | - tanshengshun | ||||
- fanqirui | - fanqirui | ||||
@@ -62,4 +57,4 @@ reviewers: | |||||
- gegenhua | - gegenhua | ||||
- qiaohairong | - qiaohairong | ||||
options: | options: | ||||
no_parent_owners: true | |||||
no_parent_owners: true |
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2021 Huawei Technologies Co., Ltd | |||||
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2022. All rights reserved. | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -125,7 +125,7 @@ REG_OP(MinimumGrad) | |||||
*@par Inputs: | *@par Inputs: | ||||
*One input: | *One input: | ||||
*x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8, | *x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8, | ||||
int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. | |||||
int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32, uint1. | |||||
For float32 type, the actual calculation on the chip is based on float16. \n | For float32 type, the actual calculation on the chip is based on float16. \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -137,7 +137,7 @@ REG_OP(MinimumGrad) | |||||
REG_OP(Cast) | REG_OP(Cast) | ||||
.INPUT(x, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, | .INPUT(x, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, | ||||
DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE, DT_COMPLEX64, | DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE, DT_COMPLEX64, | ||||
DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32, DT_BF16})) | |||||
DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32, DT_BF16, DT_UINT1})) | |||||
.OUTPUT(y, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, | .OUTPUT(y, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, | ||||
DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE, DT_COMPLEX64, | DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE, DT_COMPLEX64, | ||||
DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32, DT_BF16})) | DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32, DT_BF16})) | ||||
@@ -77,6 +77,49 @@ REG_OP(ApplyAdamW) | |||||
.ATTR(maximize, Bool, false) | .ATTR(maximize, Bool, false) | ||||
.OP_END_FACTORY_REG(ApplyAdamW) | .OP_END_FACTORY_REG(ApplyAdamW) | ||||
/** | |||||
* @brief Calculate SQ distance. \n | |||||
* | |||||
* @par Inputs: | |||||
* @li ivf: A Tensor, dtype is uint8. | |||||
* @li query: A Tensor, dtype is float16 or float32. | |||||
* @li bucket_list: A Tensor, dtype is int32 or int64. | |||||
* @li bucket_limits: A Tensor, dtype is int32 or int64. | |||||
* @li bucket_offsets: A Tensor, dtype is int32 or int64. | |||||
* @li vmin: A Tensor, dtype is float16 or float32. | |||||
* @li vdiff: A Tensor, dtype is float16 or float32. \n | |||||
* | |||||
* @par Outputs: | |||||
* @li actual_count: A Tensor, dtype is int32 or int64, the actual number of sq_distance. | |||||
* @li sq_distance: A Tensor, dtype is float16 or float32. | |||||
* @li grouped_extreme_distance: A Tensor, dtype is float16 or float32, the extremum in each group of sq_distance. | |||||
* @li sq_ivf: A Tensor, dtype is int32 or int64. | |||||
* @li sq_index: A Tensor, dtype is int32 or int64. \n | |||||
* | |||||
* @par Attributes: | |||||
* @li total_limit: A Int, indicates the max length of the output sq_distance. | |||||
* @li group_size: A Int, indicates the group size of the extremum. | |||||
* @li extreme_mode: A Int, indicates the type of extremum, 0 means minimum, and 1 means maximum. \n | |||||
* | |||||
*/ | |||||
REG_OP(ScanSQCodes) | |||||
.INPUT(ivf, TensorType({DT_UINT8})) | |||||
.INPUT(query, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(bucket_list, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(bucket_limits, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(bucket_offsets, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(vmin, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(vdiff, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(actual_count, TensorType({DT_INT32, DT_INT64})) | |||||
.OUTPUT(sq_distance, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(grouped_extreme_distance, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(sq_ivf, TensorType({DT_INT32, DT_INT64})) | |||||
.OUTPUT(sq_index, TensorType({DT_INT32, DT_INT64})) | |||||
.REQUIRED_ATTR(total_limit, Int) | |||||
.ATTR(group_size, Int, 64) | |||||
.ATTR(extreme_mode, Int, 0) | |||||
.OP_END_FACTORY_REG(ScanSQCodes) | |||||
/** | /** | ||||
* @brief Multiplies matrix "a" by matrix "b", producing "a * b". \n | * @brief Multiplies matrix "a" by matrix "b", producing "a * b". \n | ||||
* @par Inputs: | * @par Inputs: | ||||
@@ -153,6 +196,103 @@ REG_OP(RotatedNMS) | |||||
.OUTPUT(keep_indices, TensorType({DT_INT32, DT_INT64})) | .OUTPUT(keep_indices, TensorType({DT_INT32, DT_INT64})) | ||||
.REQUIRED_ATTR(iou_threshold, Float) | .REQUIRED_ATTR(iou_threshold, Float) | ||||
.OP_END_FACTORY_REG(RotatedNMS) | .OP_END_FACTORY_REG(RotatedNMS) | ||||
/** | |||||
* @brief Performs average pooling on the input. Used in the combination of conv + avgpoolupdate to replace avgpool | |||||
* @par Inputs: | |||||
* x1: Output of upstream Conv2d. A tensor of type float16, float32. | |||||
* x2: Input feature map of upstream Conv2d. A tensor of type int8, float16, float32. | |||||
* @par Attributes: | |||||
* @li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, | |||||
* where N = C = 1, and H and W are positive integers within the range [1, 255]. | |||||
* @li strides: A required list of 4 ints, specifying the stride of the sliding window. | |||||
* The strides of the N and C dimensions are 1. | |||||
* The strides of the H and W dimensions are positive integers within the range [1, 63]. | |||||
* @li padding_mode: A required string, specifying the padding algorithm, | |||||
* either "VALID", "SAME" and "CALCULATED". | |||||
* With "SAME" means that the outputs will have the same spatial dimensions as its inputs. | |||||
* With "VALID" means no padding. | |||||
* @li pads: Pad value when padding_mode is "CALCULATED". | |||||
* @li data_format: An optional string, specifying the data format of "ksize" and "strides", | |||||
* either "NCHW", or "NHWC" (default). | |||||
* @li ceil_mode: Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". | |||||
* @li exclusive: Ignore padding area or not when calculating average. | |||||
* @par Outputs: | |||||
* y: The average pooled output tensor. Has the same type and format as input "x1". | |||||
* @attention Constraints: | |||||
* @li Only single input and single output are supported. | |||||
* @li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. ksize_H * ksize_W < 256 | |||||
* @li Due to instruction restrictions, | |||||
* the values of "strides_h" and "strides_w" are positive integers within the range [1, 63]. | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the TensorFlow/Pytorch/Onnx operator AvgPoolV2. | |||||
*/ | |||||
REG_OP(AvgPoolUpdate) | |||||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(x2, TensorType({DA_INT4, DT_INT8, DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(ksize, ListInt) | |||||
.REQUIRED_ATTR(strides, ListInt) | |||||
.ATTR(padding_mode, String, "CALCULATED") | |||||
.ATTR(pads, ListInt, {0, 0, 0, 0}) | |||||
.ATTR(data_format, String, "NHWC") | |||||
.ATTR(ceil_mode, Bool, false) | |||||
.ATTR(exclusive, Bool, true) | |||||
.OP_END_FACTORY_REG(AvgPoolUpdate) | |||||
/** | |||||
* @brief batch input by time | |||||
* @par Inputs: | |||||
* x: A list of input tensors. It's a dynamic input | |||||
* @par Attributes: | |||||
* @li window: time window, [-1, int64_max], if -1 will batch by input data flag, | |||||
* else will batch by input timestamp and data flag. | |||||
* @li batch_dim: [-1, input_shape_range), if -1 input shape:[x, ..., x] ---> output shape:[-1, x, ..., x], | |||||
* else output shape:[x, ..., -1(batch_dim), ..., x]; | |||||
* @li drop_remainder: a bool flag, take effect when window > -1, | |||||
* if true when batch data window < window, will drop data. | |||||
* @par Outputs: | |||||
* y: A list of output tensors. It's a dynamic input, the same size as "x". | |||||
* @attention Constraints: | |||||
* @li Only support in helper udf | |||||
*/ | |||||
REG_OP(TimeBatch) | |||||
.DYNAMIC_INPUT(x, TensorType::RealNumberType()) | |||||
.DYNAMIC_OUTPUT(y, TensorType::RealNumberType()) | |||||
.REQUIRED_ATTR(window, Int) | |||||
.ATTR(batch_dim, Int, -1) | |||||
.ATTR(drop_remainder, Bool, false) | |||||
.OP_END_FACTORY_REG(TimeBatch) | |||||
/** | |||||
* @brief Auto Batch process. \n | |||||
* @par Inputs: | |||||
* @li x: A list of input tensor objects. It's a dynamic input. \n | |||||
* @par Outputs: | |||||
* @li y: A list of output tensor objects. It's a dynamic output. \n | |||||
* @par Attributes: | |||||
* @li batch_size: auto batch size. | |||||
* @li timeout: auto batch wait timeout(unit:ms). | |||||
* @li padding: weather to pad when batch is insufficient. | |||||
* @li slide_stride: sliding window step. | |||||
*/ | |||||
REG_OP(AutoBatch) | |||||
.DYNAMIC_INPUT(x, TensorType::RealNumberType()) | |||||
.DYNAMIC_OUTPUT(y, TensorType::RealNumberType()) | |||||
.REQUIRED_ATTR(batch_size, Int) | |||||
.ATTR(timeout, Int, 0) | |||||
.ATTR(padding, Bool, false) | |||||
.ATTR(slide_stride, Int, 0) | |||||
.OP_END_FACTORY_REG(AutoBatch) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_EXPERIMENT_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_EXPERIMENT_OPS_H_ |
@@ -2039,24 +2039,24 @@ REG_OP(IMGWarpOffsets) | |||||
.OP_END_FACTORY_REG(IMGWarpOffsets) | .OP_END_FACTORY_REG(IMGWarpOffsets) | ||||
/** | /** | ||||
*@brief This operation samples 3d input x by using interpolation based on flow field grid, | |||||
which is usually gennerated by affine_grid. | |||||
* @brief This operation samples 3d input x by using interpolation based on flow field grid, | |||||
which is usually gennerated by affine_grid. | |||||
*@par Inputs: | |||||
*@li x: 5-D Tensor with shape `[batch, channels, depth, height, width]`. | |||||
*@li grid: flow field grid, 5-D Tensor with shape `[batch, depth, height, width, 2]`. | |||||
* @par Inputs: | |||||
* @li x: 5-D Tensor with shape `[batch, channels, depth, height, width]`. | |||||
* @li grid: flow field grid, 5-D Tensor with shape `[batch, depth, height, width, 2]`. | |||||
*@par Attributes: | |||||
*@li interpolation_mode: An optional string specifying the interpolation method. | |||||
*@li padding_mode: An optional string specifying the pad method. | |||||
*@li align_corners: An optional bool. If "true", the centers of the corner | |||||
pixels of the input and output tensors are aligned. Defaults to "false" . | |||||
* @par Attributes: | |||||
* @li interpolation_mode: An optional string specifying the interpolation method. | |||||
* @li padding_mode: An optional string specifying the pad method. | |||||
* @li align_corners: An optional bool. If "true", the centers of the corner | |||||
pixels of the input and output tensors are aligned. Defaults to "false" . | |||||
*@par Outputs: | |||||
*y: Returns 5-D Tensor with the same dtype as `x`. | |||||
* @par Outputs: | |||||
* y: Returns 5-D Tensor with the same dtype as `x`. | |||||
*@par Third-party framework compatibility | |||||
*Compatible with pytorch GridSampler3D operator. | |||||
* @par Third-party framework compatibility | |||||
* Compatible with pytorch GridSampler3D operator. | |||||
*/ | */ | ||||
REG_OP(GridSampler3D) | REG_OP(GridSampler3D) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | ||||
@@ -88,7 +88,7 @@ REG_OP(TensorMapInsert) | |||||
* @par Inputs: | * @par Inputs: | ||||
* @li input_handle: The input map. Must be type: DT_VARIANT. | * @li input_handle: The input map. Must be type: DT_VARIANT. | ||||
* @li key: A Tensor,the key to be looked up. Must be one of | |||||
* @li key: A Tensor, the key to be looked up. Must be one of | |||||
the following types: int32,int64,string . \n | the following types: int32,int64,string . \n | ||||
* @par Attributes: | * @par Attributes: | ||||
@@ -674,7 +674,7 @@ REG_OP(Conj) | |||||
*@par Inputs: | *@par Inputs: | ||||
*The input x and weight must have the same type. Inputs include: | *The input x and weight must have the same type. Inputs include: | ||||
*@li x: A Tensor dtype of float32. | *@li x: A Tensor dtype of float32. | ||||
*@li target: A Tensor dtype of int32. | |||||
*@li target: A Tensor dtype of int32 or int64. | |||||
*@li weight: A Tensor dtype of float32 . \n | *@li weight: A Tensor dtype of float32 . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -690,7 +690,7 @@ REG_OP(Conj) | |||||
*/ | */ | ||||
REG_OP(NLLLoss) | REG_OP(NLLLoss) | ||||
.INPUT(x, TensorType({DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT})) | ||||
.INPUT(target, TensorType({DT_INT32})) | |||||
.INPUT(target, TensorType({DT_INT32, DT_INT64})) | |||||
.OPTIONAL_INPUT(weight, TensorType({DT_FLOAT})) | .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT})) | ||||
.OUTPUT(y, TensorType({DT_FLOAT})) | .OUTPUT(y, TensorType({DT_FLOAT})) | ||||
.OUTPUT(total_weight, TensorType({DT_FLOAT})) | .OUTPUT(total_weight, TensorType({DT_FLOAT})) | ||||
@@ -704,7 +704,7 @@ REG_OP(NLLLoss) | |||||
*@par Inputs: | *@par Inputs: | ||||
*@li x:A Tensor dtype of float32. | *@li x:A Tensor dtype of float32. | ||||
*@li y_grad:A Tensor dtype of float32. | *@li y_grad:A Tensor dtype of float32. | ||||
*@li target:A Tensor dtype of int32. | |||||
*@li target:A Tensor dtype of int32, int64. | |||||
*@li weight:A Tensor dtype of float32. | *@li weight:A Tensor dtype of float32. | ||||
*@li total_weight:A Tensor dtype of float32 . \n | *@li total_weight:A Tensor dtype of float32 . \n | ||||
@@ -721,7 +721,7 @@ REG_OP(NLLLoss) | |||||
REG_OP(NLLLossGrad) | REG_OP(NLLLossGrad) | ||||
.INPUT(x, TensorType({DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT})) | ||||
.INPUT(y_grad, TensorType({DT_FLOAT})) | .INPUT(y_grad, TensorType({DT_FLOAT})) | ||||
.INPUT(target, TensorType({DT_INT32})) | |||||
.INPUT(target, TensorType({DT_INT32, DT_INT64})) | |||||
.INPUT(weight, TensorType({DT_FLOAT})) | .INPUT(weight, TensorType({DT_FLOAT})) | ||||
.INPUT(total_weight, TensorType({DT_FLOAT})) | .INPUT(total_weight, TensorType({DT_FLOAT})) | ||||
.OUTPUT(x_grad, TensorType({DT_FLOAT})) | .OUTPUT(x_grad, TensorType({DT_FLOAT})) | ||||
@@ -210,10 +210,10 @@ REG_OP(SwinTransformerLnQKV) | |||||
* float32, int32. Has format [ND, NHWC]. \n | * float32, int32. Has format [ND, NHWC]. \n | ||||
* @par Attributes: | * @par Attributes: | ||||
* @li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to | |||||
* [K, M]. | |||||
* @li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to | |||||
* [K, M]. \n | |||||
* @li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to | |||||
* [M, K] before multiplication. | |||||
* @li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to | |||||
* [K, N] before multiplication. \n | |||||
* @par Outputs: | * @par Outputs: | ||||
* y: The result matrix Tensor. 2D. Must be one of the following types: float16, | * y: The result matrix Tensor. 2D. Must be one of the following types: float16, | ||||
@@ -246,9 +246,9 @@ REG_OP(MatMul) | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to | * @li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to | ||||
* [M, K]. | |||||
* [M, K] before multiplication. | |||||
* @li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to | * @li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to | ||||
* [K, N]. | |||||
* [K, N] before multiplication. | |||||
* @li offset_x: An optional integer for quantized MatMulV2. | * @li offset_x: An optional integer for quantized MatMulV2. | ||||
* The negative offset added to the input x1 for int8 type. Ensure offset_x | * The negative offset added to the input x1 for int8 type. Ensure offset_x | ||||
* within the effective range of int8 [-128, 127]. Defaults to "0". \n | * within the effective range of int8 [-128, 127]. Defaults to "0". \n | ||||
@@ -289,9 +289,9 @@ REG_OP(MatMulV2) | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to | * @li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to | ||||
* [M, K]. | |||||
* [M, K] before multiplication. | |||||
* @li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to | * @li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to | ||||
* [K, N]. | |||||
* [K, N] before multiplication. | |||||
* @li offset_x: An optional integer for quantized MatMulV2Compress. | * @li offset_x: An optional integer for quantized MatMulV2Compress. | ||||
* The negative offset added to the input x1 for int8 type. Ensure offset_x | * The negative offset added to the input x1 for int8 type. Ensure offset_x | ||||
* within the effective range of int8 [-128, 127]. Defaults to "0". \n | * within the effective range of int8 [-128, 127]. Defaults to "0". \n | ||||
@@ -333,29 +333,20 @@ REG_OP(MatMulV2Compress) | |||||
* @li c: A matrix Tensor. Must be one of the following types:float32, float16, | * @li c: A matrix Tensor. Must be one of the following types:float32, float16, | ||||
* int8, int32. Has format ND. | * int8, int32. Has format ND. | ||||
* @li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the | * @li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the | ||||
* following types: float16, int32, float32, int8. Has format ND. | |||||
* following types: float32, float16, int8, int32. Has format ND. | |||||
* @li beta: A 1D Tensor. The shape of beta is [1]. Must be one of the following | * @li beta: A 1D Tensor. The shape of beta is [1]. Must be one of the following | ||||
* types: float16, int32, float32, int8. Has format ND.\n | |||||
* The format of a, b, c has restriction:\n | |||||
* When type of a is int8 and type of c is int32, the format of a, b, c should | |||||
* all be ND.\n | |||||
* When type of a is int8 and type of c is float32, the format of a, b, c | |||||
* should all be ND.\n | |||||
* When type of a is float16 and type of c is float16, the format of a, b, c | |||||
* should all be ND.\n | |||||
* When type of a is float16 and type of c is float32, the format of a, b, c | |||||
* should all be ND. \n | |||||
* types: float32, float16, int8, int32. Has format ND. \n | |||||
* @par Attributes: | * @par Attributes: | ||||
* Two attributes, including: | * Two attributes, including: | ||||
* @li transpose_a: Optional. A bool. If True, changes the shape of "a" from | * @li transpose_a: Optional. A bool. If True, changes the shape of "a" from | ||||
* [M, K] to [K, M]. | |||||
* [K, M] to [M, K] before multiplication. | |||||
* @li transpose_b: Optional. A bool. If True, changes the shape of "b" from | * @li transpose_b: Optional. A bool. If True, changes the shape of "b" from | ||||
* [K, N] to [N, K]. \n | |||||
* [N, K] to [K, N] before multiplication. \n | |||||
* @par Outputs: | * @par Outputs: | ||||
* y: The result matrix Tensor. Must be one of the following types: float16, | |||||
* float32, int32, int8. Has format [ND], the format should be equal to a. | |||||
* y: The result matrix Tensor. Must be one of the following types: float32, | |||||
* float16, int8, int32. Has format [ND], the format should be equal to a. | |||||
*/ | */ | ||||
REG_OP(GEMM) | REG_OP(GEMM) | ||||
@@ -379,10 +370,10 @@ REG_OP(GEMM) | |||||
* float32, int32. 2D or higher. Has format [ND, NHWC]. \n | * float32, int32. 2D or higher. Has format [ND, NHWC]. \n | ||||
* @par Attributes: | * @par Attributes: | ||||
* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] | |||||
* to [B, K, M]. | |||||
* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] | |||||
* to [B, K, M]. \n | |||||
* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, K, M] | |||||
* to [B, M, K] before multiplication. | |||||
* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, N, K] | |||||
* to [B, K, N] before multiplication. \n | |||||
* @par Outputs: | * @par Outputs: | ||||
* y: The result matrix Tensor. 2D or higher. Must be one of the following | * y: The result matrix Tensor. 2D or higher. Must be one of the following | ||||
@@ -418,10 +409,10 @@ REG_OP(BatchMatMul) | |||||
* int8, int4. Has format [ND, NHWC]. \n | * int8, int4. Has format [ND, NHWC]. \n | ||||
* @par Attributes: | * @par Attributes: | ||||
* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to | |||||
* [B, K, M]. | |||||
* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to | |||||
* [B, K, M]. \n | |||||
* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, K, M] to | |||||
* [B, M, K] before multiplication. | |||||
* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, N, K] to | |||||
* [B, K, N] before multiplication. \n | |||||
* @par Outputs: | * @par Outputs: | ||||
* y: The result matrix Tensor. 2D or higher. Must be one of the following | * y: The result matrix Tensor. 2D or higher. Must be one of the following | ||||
@@ -784,7 +775,8 @@ REG_OP(TensorScatterUpdate) | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li axis: An optional attribute. Defaults to 0. | * @li axis: An optional attribute. Defaults to 0. | ||||
* @li reduction: An optional attribute. Defaults to string "none" and can be "add" or "mul". | |||||
* @li reduction: An optional attribute. Defaults to string "none" and can be | |||||
* "add" or "mul". \n | |||||
* @par Outputs: | * @par Outputs: | ||||
* y: A Tensor. Has the same type and format as input "data" . \n | * y: A Tensor. Has the same type and format as input "data" . \n | ||||
@@ -1147,7 +1139,7 @@ REG_OP(DiagPart) | |||||
* with a set of learned weights, and (optionally) adds biases. \n | * with a set of learned weights, and (optionally) adds biases. \n | ||||
* @par Inputs: | * @par Inputs: | ||||
* Four inputs, including: | * Four inputs, including: | ||||
* @li x: A Tensor of type float16, int8, int4, float32. | |||||
* @li x: A Tensor of type float16, int8, int4. | |||||
* @li w: A weight matrix of type float16, int8, int4, float32. | * @li w: A weight matrix of type float16, int8, int4, float32. | ||||
* @li b: An optional Tensor of type float16, int32, float32. | * @li b: An optional Tensor of type float16, int32, float32. | ||||
* @li offset_w: An optional Tensor of type int8, int4. | * @li offset_w: An optional Tensor of type int8, int4. | ||||
@@ -1175,11 +1167,11 @@ REG_OP(DiagPart) | |||||
* Yes | * Yes | ||||
*/ | */ | ||||
REG_OP(FullyConnection) | REG_OP(FullyConnection) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT32, DT_BF16})) | |||||
.INPUT(w, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT32, DT_BF16})) | |||||
.OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32, DT_BF16})) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT, DT_BF16})) | |||||
.INPUT(w, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT, DT_BF16})) | |||||
.OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT, DT_BF16})) | |||||
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4})) | .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4})) | ||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32, DT_BF16})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT, DT_BF16})) | |||||
.REQUIRED_ATTR(num_output, Int) | .REQUIRED_ATTR(num_output, Int) | ||||
.ATTR(transpose, Bool, false) | .ATTR(transpose, Bool, false) | ||||
.ATTR(axis, Int, 1) | .ATTR(axis, Int, 1) | ||||
@@ -1643,7 +1635,7 @@ REG_OP(Tril) | |||||
* @par Inputs: | * @par Inputs: | ||||
* @li x: A list of Tensors. Must be one of the following types: int32, | * @li x: A list of Tensors. Must be one of the following types: int32, | ||||
* float16, float32. Tensors to be concatenated. All must have size 1 in | * float16, float32. Tensors to be concatenated. All must have size 1 in | ||||
* the first dimension and same shape.It's a dynamic input. \n | |||||
* the first dimension and same shape. It's a dynamic input. \n | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li equation: The subscripts for the Einstein summation. \n | * @li equation: The subscripts for the Einstein summation. \n | ||||
@@ -1658,7 +1650,7 @@ REG_OP(Tril) | |||||
* Input N must be Int. \n | * Input N must be Int. \n | ||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
* Compatible with Pytorch einsum operator. | |||||
* Compatible with Tensorflow 2.x einsum operator. | |||||
*/ | */ | ||||
REG_OP(Einsum) | REG_OP(Einsum) | ||||
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | ||||
@@ -99,8 +99,8 @@ Specifies the variance used for inference. Must be "None" | |||||
if the operation is used for training . \n | if the operation is used for training . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. | |||||
Defaults to "0.0001". | |||||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. | |||||
* Defaults to "0.0001". | |||||
*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". | *@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". | ||||
*@li is_training: An optional bool, specifying if the operation is used for training or inference. | *@li is_training: An optional bool, specifying if the operation is used for training or inference. | ||||
Defaults to "True" . \n | Defaults to "True" . \n | ||||
@@ -31,7 +31,7 @@ namespace ge { | |||||
* Three inputs include: | * Three inputs include: | ||||
* @li input: 4D origin shape of input tensor [N, C, H, W] or [N, H, W, C], | * @li input: 4D origin shape of input tensor [N, C, H, W] or [N, H, W, C], | ||||
* support float16. | * support float16. | ||||
* @li filter_size: A 4D tensor of type int32, int64, with shape [H, W, C, K] | |||||
* @li filter_size: A 4D tensor of type int32. | |||||
* @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C]. | * @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C]. | ||||
* Must be one of the following types: float16. \n | * Must be one of the following types: float16. \n | ||||
@@ -52,9 +52,9 @@ namespace ge { | |||||
* @par Outputs: | * @par Outputs: | ||||
* filter_grad: Gradient of the deep convolution relative to the filter with | * filter_grad: Gradient of the deep convolution relative to the filter with | ||||
* shape [H, W, C, K]. Must be one of the following types: float16. \n | |||||
* shape [H, W, C, K]. Must be one of the following types: float32. \n | |||||
* @attention Constraints:\n | |||||
* @attention Constraints: | |||||
* The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but | * The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but | ||||
* the data is 5D with shape [N, C1, Hi, Wi, C0], where C0 is 16.\n | * the data is 5D with shape [N, C1, Hi, Wi, C0], where C0 is 16.\n | ||||
* The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape | * The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape | ||||
@@ -90,7 +90,7 @@ REG_OP(DepthwiseConv2DBackpropFilter) | |||||
* Two inputs include: \n | * Two inputs include: \n | ||||
* @li input: 4D tensor with shape [N, C, H, W] or [N, H, W, C], of type float16 | * @li input: 4D tensor with shape [N, C, H, W] or [N, H, W, C], of type float16 | ||||
* @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C], | * @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C], | ||||
* of type float16 | |||||
* of type float16. | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li filter_size: A required list or tuple. Shape of filter. | * @li filter_size: A required list or tuple. Shape of filter. | ||||
@@ -133,8 +133,8 @@ REG_OP(DepthwiseConv2DBackpropFilter) | |||||
* instead. | * instead. | ||||
*/ | */ | ||||
REG_OP(DepthwiseConv2DBackpropFilterD) | REG_OP(DepthwiseConv2DBackpropFilterD) | ||||
.INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16})) | |||||
.INPUT(out_backprop, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16})) | |||||
.INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16})) | |||||
.INPUT(out_backprop, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16})) | |||||
.OUTPUT(filter_grad, TensorType({DT_FLOAT32})) | .OUTPUT(filter_grad, TensorType({DT_FLOAT32})) | ||||
.REQUIRED_ATTR(filter_size, ListInt) | .REQUIRED_ATTR(filter_size, ListInt) | ||||
.REQUIRED_ATTR(strides, ListInt) | .REQUIRED_ATTR(strides, ListInt) | ||||
@@ -147,9 +147,9 @@ REG_OP(DepthwiseConv2DBackpropFilterD) | |||||
* @brief Computes the gradients of depthwise convolution with respect to the | * @brief Computes the gradients of depthwise convolution with respect to the | ||||
* input. \n | * input. \n | ||||
* @par Inputs: | * @par Inputs: | ||||
* Three inputs include: \n | |||||
* Three inputs include: | |||||
* @li input_size: 4D shape of input tensor [N, C, H, W] or [N, H, W, C], | * @li input_size: 4D shape of input tensor [N, C, H, W] or [N, H, W, C], | ||||
* support int32, int64. | |||||
* support int32. | |||||
* @li filter: 4D filter tensor with shape of [H, W, C, K], support float16. | * @li filter: 4D filter tensor with shape of [H, W, C, K], support float16. | ||||
* @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C]. | * @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C]. | ||||
* Must be one of the following types: float16 . \n | * Must be one of the following types: float16 . \n | ||||
@@ -172,7 +172,7 @@ REG_OP(DepthwiseConv2DBackpropFilterD) | |||||
* @par Outputs: | * @par Outputs: | ||||
* input_grad: Gradient of the deep convolution relative to the input with shape | * input_grad: Gradient of the deep convolution relative to the input with shape | ||||
* [N, C, H, W] or [N, H, W, C] Must be one of the following types: | * [N, C, H, W] or [N, H, W, C] Must be one of the following types: | ||||
* float16, float32. \n | |||||
* float16. \n | |||||
* @attention Constraints:\n | * @attention Constraints:\n | ||||
* The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but | * The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but | ||||
@@ -184,7 +184,7 @@ REG_OP(DepthwiseConv2DBackpropFilterD) | |||||
* data is 5D with shape [N, C1, Ho, Wo, C0], | * data is 5D with shape [N, C1, Ho, Wo, C0], | ||||
* where C is the same as that of the feature map and C0 is 16.\n | * where C is the same as that of the feature map and C0 is 16.\n | ||||
* Limited by Tiling: max_h_in_l1 >= C0, where max_h_in_l1 = (l1_size - Hf * | * Limited by Tiling: max_h_in_l1 >= C0, where max_h_in_l1 = (l1_size - Hf * | ||||
* Wf * C0 * C0 * 2) / (2 * Wo *C0).\n | |||||
* Wf * C0 * C0 * 2) / (2 * Wo *C0). \n | |||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
* @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropInput. | * @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropInput. | ||||
@@ -194,7 +194,7 @@ REG_OP(DepthwiseConv2DBackpropInput) | |||||
.INPUT(input_size, TensorType({DT_INT32, DT_INT64})) | .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) | ||||
.INPUT(filter, TensorType({DT_FLOAT16})) | .INPUT(filter, TensorType({DT_FLOAT16})) | ||||
.INPUT(out_backprop, TensorType({DT_FLOAT16})) | .INPUT(out_backprop, TensorType({DT_FLOAT16})) | ||||
.OUTPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT32})) | |||||
.OUTPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.REQUIRED_ATTR(strides, ListInt) | .REQUIRED_ATTR(strides, ListInt) | ||||
.ATTR(dilations, ListInt, {1, 1, 1, 1}) | .ATTR(dilations, ListInt, {1, 1, 1, 1}) | ||||
.REQUIRED_ATTR(pads, ListInt) | .REQUIRED_ATTR(pads, ListInt) | ||||
@@ -355,9 +355,8 @@ REG_OP(BiasAddGrad) | |||||
* data tensor. An integer vector representing the shape of input, where | * data tensor. An integer vector representing the shape of input, where | ||||
* input is a 4-D tensor [batch, height, width, channels] | * input is a 4-D tensor [batch, height, width, channels] | ||||
* or [batch, channels, height, width]. | * or [batch, channels, height, width]. | ||||
* @li filter: A Tensor. Must be one of the following types: float16, float32, | |||||
* float64. 4-D with shape | |||||
* [filter_height, filter_width, in_channels, out_channels] | |||||
* @li filter: A Tensor. Must be one of the following types: float16. | |||||
* 4-D with shape [filter_height, filter_width, in_channels, out_channels] | |||||
* or [out_channels, filter_height, filter_width, in_channels] | * or [out_channels, filter_height, filter_width, in_channels] | ||||
* or [out_channels, in_channel, filter_height, filter_width]. | * or [out_channels, in_channel, filter_height, filter_width]. | ||||
* @li out_backprop: A Tensor. Must have the same type as filter. | * @li out_backprop: A Tensor. Must have the same type as filter. | ||||
@@ -372,14 +371,9 @@ REG_OP(BiasAddGrad) | |||||
| Tensor | out_bckprop | filter | y |\n | | Tensor | out_bckprop | filter | y |\n | ||||
|-----------|-------------|---------|--------|\n | |-----------|-------------|---------|--------|\n | ||||
| Data Type | float16 | float16 | float16|\n | | Data Type | float16 | float16 | float16|\n | ||||
| | float32 | float32 | float32|\n | |||||
| | float64 | float64 | float64|\n | |||||
| Format | NCHW | NCHW | NCHW |\n | | Format | NCHW | NCHW | NCHW |\n | ||||
| | NHWC | HWCN | NHWC |\n | | | NHWC | HWCN | NHWC |\n | ||||
*\n | *\n | ||||
* For float32 and float64 type, the actual calculation on the chip is based | |||||
* on float16. | |||||
*\n | |||||
* | * | ||||
*@par Attributes: | *@par Attributes: | ||||
* Five attributes: | * Five attributes: | ||||
@@ -400,13 +394,13 @@ REG_OP(BiasAddGrad) | |||||
*\n | *\n | ||||
| Name | Field | Scope |\n | | Name | Field | Scope |\n | ||||
|------------------|----------|--------------|\n | |------------------|----------|--------------|\n | ||||
| input_size | H | [1, 200000] |\n | |||||
| input_size | H | [1, 4096] |\n | |||||
| | W | [1, 4096] |\n | | | W | [1, 4096] |\n | ||||
| Filter | H | [1, 255] |\n | | Filter | H | [1, 255] |\n | ||||
| | W | [1, 255] |\n | | | W | [1, 255] |\n | ||||
| out_backprop | H*strideH| [1, 200000] |\n | |||||
| out_backprop | H*strideH| [1, 4096] |\n | |||||
| | W*strideW| [1, 4096] |\n | | | W*strideW| [1, 4096] |\n | ||||
| y(fmap) | H | [1, 200000] |\n | |||||
| y(fmap) | H | [1, 4096] |\n | |||||
| | W | [1, 4096] |\n | | | W | [1, 4096] |\n | ||||
| Stride | H | [1, 63] |\n | | Stride | H | [1, 63] |\n | ||||
| | W | [1, 63] |\n | | | W | [1, 63] |\n | ||||
@@ -455,7 +449,7 @@ REG_OP(Conv2DBackpropInput) | |||||
*@brief Computes the gradients of convolution with respect to the input. | *@brief Computes the gradients of convolution with respect to the input. | ||||
* @par Inputs: | * @par Inputs: | ||||
* Two inputs: | * Two inputs: | ||||
* @li filter: A Tensor. Types is float16. | |||||
* @li filter: A Tensor. Types is float16 or int8. | |||||
* 4-D with shape [filter_height, filter_width, in_channels, out_channels] | * 4-D with shape [filter_height, filter_width, in_channels, out_channels] | ||||
* or [out_channels, filter_height, filter_width, in_channels] | * or [out_channels, filter_height, filter_width, in_channels] | ||||
* or [out_channels, in_channel, filter_height, filter_width]. | * or [out_channels, in_channel, filter_height, filter_width]. | ||||
@@ -479,8 +473,8 @@ REG_OP(Conv2DBackpropInput) | |||||
* @li data_format: An optional string from: "NHWC", "NCHW". Defaults to | * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to | ||||
* "NHWC". Specify the data format of the input and output data. | * "NHWC". Specify the data format of the input and output data. | ||||
*@par Outputs: | *@par Outputs: | ||||
* y: A Tensor. Has the same type as filter,4-D tensor [batch, height, width, | |||||
* channels] or [batch, channels, height, width]. | |||||
* y: A Tensor. with the type of: float16, float32, int32, 4-D tensor | |||||
* [batch, height, width, channels] or [batch, channels, height, width]. | |||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
* Compatible with Tensorflow's conv2d_backprop_input | * Compatible with Tensorflow's conv2d_backprop_input | ||||
*@par Restrictions: | *@par Restrictions: | ||||
@@ -547,11 +541,11 @@ REG_OP(Conv2DBackpropInputD) | |||||
*\n | *\n | ||||
| Name | Field | Scope |\n | | Name | Field | Scope |\n | ||||
|------------------|----------|--------------|\n | |------------------|----------|--------------|\n | ||||
| x (out_backprop) | H*strideH| [1, 200000] |\n | |||||
| x (out_backprop) | H*strideH| [1, 4096] |\n | |||||
| | W*strideW| [1, 4096] |\n | | | W*strideW| [1, 4096] |\n | ||||
| Filter | H | [1, 255] |\n | | Filter | H | [1, 255] |\n | ||||
| | W | [1, 255] |\n | | | W | [1, 255] |\n | ||||
| y (fmap) | H | [1, 200000] |\n | |||||
| y (fmap) | H | [1, 4096] |\n | |||||
| | W | [1, 4096] |\n | | | W | [1, 4096] |\n | ||||
| Stride | H | [1, 63] |\n | | Stride | H | [1, 63] |\n | ||||
| | W | [1, 63] |\n | | | W | [1, 63] |\n | ||||
@@ -602,8 +596,8 @@ REG_OP(Deconvolution) | |||||
*@brief Computes the gradients of convolution with respect to the filter | *@brief Computes the gradients of convolution with respect to the filter | ||||
*@par Inputs: | *@par Inputs: | ||||
* Three inputs: | * Three inputs: | ||||
* @li x: A Tensor. Must be one of the following types: float16, float32, | |||||
* float64. 4-D with shape [batch, in_height, in_width, in_channels] or | |||||
* @li x: A Tensor. Must be one of the following types: float16. | |||||
* 4-D with shape [batch, in_height, in_width, in_channels] or | |||||
* [batch, in_channels, in_height, in_width]. | * [batch, in_channels, in_height, in_width]. | ||||
* @li filter_size: A const Tensor of type int32. Currently does not support | * @li filter_size: A const Tensor of type int32. Currently does not support | ||||
* data tensor. An integer vector representing the tensor shape of filter, | * data tensor. An integer vector representing the tensor shape of filter, | ||||
@@ -621,9 +615,7 @@ REG_OP(Deconvolution) | |||||
*\n | *\n | ||||
| Tensor | x | out_backprop | y |\n | | Tensor | x | out_backprop | y |\n | ||||
|-----------|---------|--------------|---------|\n | |-----------|---------|--------------|---------|\n | ||||
| Data Type | float16 | float16 | float16 |\n | |||||
| | float32 | float32 | float32 |\n | |||||
| | float64 | float64 | float64 |\n | |||||
| Data Type | float16 | float16 | float32 |\n | |||||
| Format | NCHW | NCHW | NCHW |\n | | Format | NCHW | NCHW | NCHW |\n | ||||
| | NHWC | NHWC | HWCN |\n | | | NHWC | NHWC | HWCN |\n | ||||
*\n | *\n | ||||
@@ -650,13 +642,13 @@ REG_OP(Deconvolution) | |||||
*\n | *\n | ||||
| Name | Field | Scope |\n | | Name | Field | Scope |\n | ||||
|------------------|----------|--------------|\n | |------------------|----------|--------------|\n | ||||
| x(fmap) | H | [1, 200000] |\n | |||||
| x(fmap) | H | [1, 4096] |\n | |||||
| | W | [1, 4096] |\n | | | W | [1, 4096] |\n | ||||
| Filter Size | H | [1, 255] |\n | | Filter Size | H | [1, 255] |\n | ||||
| | W | [1, 255] |\n | | | W | [1, 255] |\n | ||||
| out_backprop | H | [1, 200000] |\n | |||||
| out_backprop | H | [1, 4096] |\n | |||||
| | W | [1, 4096] |\n | | | W | [1, 4096] |\n | ||||
| y | H | [1, 200000] |\n | |||||
| y | H | [1, 4096] |\n | |||||
| | W | [1, 4096] |\n | | | W | [1, 4096] |\n | ||||
| Stride | H | [1, 63] |\n | | Stride | H | [1, 63] |\n | ||||
| | W | [1, 63] |\n | | | W | [1, 63] |\n | ||||
@@ -1015,13 +1007,12 @@ REG_OP(DeformableConv2D) | |||||
/** | /** | ||||
*@brief Computes a 3D convolution given 5D "x" and "filter" tensors. | *@brief Computes a 3D convolution given 5D "x" and "filter" tensors. | ||||
*@par Inputs: | *@par Inputs: | ||||
* @li x: A 5D tensor. Must be one of the following types: float16, | |||||
* (Currently does not support int8). The format of x is NCDHW or NDHWC. | |||||
* @li x: A 5D tensor. Must be one of the following types: float16, int8. | |||||
* The format of x is NCDHW or NDHWC. | |||||
* @li filter: A 5D tensor of the same type as "x". | * @li filter: A 5D tensor of the same type as "x". | ||||
* (Currently does not support int8). | |||||
* The format is NCDHW, NDHWC or DHWCN. | * The format is NCDHW, NDHWC or DHWCN. | ||||
* @li bias: Optional. An 1D tensor of the same type as "x". | * @li bias: Optional. An 1D tensor of the same type as "x". | ||||
* @li offset_w: Optional. An 1D tensor for quantized deconvolution. Reserved. \n | |||||
* @li offset_w: Optional. An 1D tensor for quantized deconvolution. \n | |||||
*@par Attributes: | *@par Attributes: | ||||
* @li strides: Required. A list of 5 integers. Specifies the stride of the | * @li strides: Required. A list of 5 integers. Specifies the stride of the | ||||
@@ -1041,7 +1032,8 @@ REG_OP(DeformableConv2D) | |||||
* Defaults to 0. Reserved. \n | * Defaults to 0. Reserved. \n | ||||
*@par Outputs: | *@par Outputs: | ||||
* y: A Tensor. Has the same type and data format as "x". \n | |||||
* y: A Tensor. Has the same data format as "x". if the type of "x" is int8, | |||||
* the type of y is int32. \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
* The image size after padding is greater than the filter size. \n | * The image size after padding is greater than the filter size. \n | ||||
@@ -1051,11 +1043,11 @@ REG_OP(DeformableConv2D) | |||||
* @li Compatible with the Caffe operator Convolution. | * @li Compatible with the Caffe operator Convolution. | ||||
*/ | */ | ||||
REG_OP(Conv3D) | REG_OP(Conv3D) | ||||
.INPUT(x, TensorType({DT_FLOAT16})) | |||||
.INPUT(filter, TensorType({DT_FLOAT16})) | |||||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT32})) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) | |||||
.INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) | |||||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT32, DT_INT32})) | |||||
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | ||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_INT32})) | |||||
.REQUIRED_ATTR(strides, ListInt) | .REQUIRED_ATTR(strides, ListInt) | ||||
.REQUIRED_ATTR(pads, ListInt) | .REQUIRED_ATTR(pads, ListInt) | ||||
.ATTR(dilations, ListInt, {1, 1, 1, 1, 1}) | .ATTR(dilations, ListInt, {1, 1, 1, 1, 1}) | ||||
@@ -1068,12 +1060,11 @@ REG_OP(Conv3D) | |||||
/** | /** | ||||
*@brief Computes the gradients of convolution 3d with respect to the input. | *@brief Computes the gradients of convolution 3d with respect to the input. | ||||
*@par Inputs: | *@par Inputs: | ||||
* @li input_size: A Tensor of type int32, int64. An integer vector | |||||
* @li input_size: A Tensor of type int32. An integer vector | |||||
* representing the shape of input, where input is a 5-D tensor | * representing the shape of input, where input is a 5-D tensor | ||||
* [batch, depth, height, width, channels] or | * [batch, depth, height, width, channels] or | ||||
* [batch, channels, depth, height, width]. | * [batch, channels, depth, height, width]. | ||||
* @li filter: A Tensor. Must be one of the following types: float16, float32. | |||||
* Currently does not support double. | |||||
* @li filter: A Tensor. Must be one of the following types: float16. | |||||
* @li out_backprop: A Tensor. Must have the same type as filter. | * @li out_backprop: A Tensor. Must have the same type as filter. | ||||
* 5-D with shape [batch, depth, out_height, out_width, out_channels] | * 5-D with shape [batch, depth, out_height, out_width, out_channels] | ||||
* or [batch, out_channels, depth, out_height, out_width]. Gradients with | * or [batch, out_channels, depth, out_height, out_width]. Gradients with | ||||
@@ -1095,8 +1086,7 @@ REG_OP(Conv3D) | |||||
* Defaults to "NDHWC". Specify the data format of the input and output data. \n | * Defaults to "NDHWC". Specify the data format of the input and output data. \n | ||||
*@par Outputs: | *@par Outputs: | ||||
* y: A Tensor. Has the same type as filter,and has same format as | |||||
* "input_size". \n | |||||
* y: A Tensor. Has same format as "input_size". \n | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with Tensorflow's conv3d_backprop_input | * Compatible with Tensorflow's conv3d_backprop_input | ||||
@@ -1207,8 +1197,7 @@ REG_OP(LSTM) | |||||
/** | /** | ||||
*@brief Computes the gradients of convolution3D with respect to the filter | *@brief Computes the gradients of convolution3D with respect to the filter | ||||
*@par Inputs: | *@par Inputs: | ||||
* @li x: A Tensor. Must be one of the following types: float16, float32, | |||||
* double. Currently does not support double. | |||||
* @li x: A Tensor. Must be one of the following types: float16. | |||||
* 5-D with shape [batch, in_depth, in_height, in_width, in_channels] | * 5-D with shape [batch, in_depth, in_height, in_width, in_channels] | ||||
* or [batch, in_channels, in_depth, in_height, in_width]. | * or [batch, in_channels, in_depth, in_height, in_width]. | ||||
* @li filter_size: A Tensor of type int32. An integer vector representing the | * @li filter_size: A Tensor of type int32. An integer vector representing the | ||||
@@ -1236,7 +1225,7 @@ REG_OP(LSTM) | |||||
* Defaults to "NDHWC". Specify the data format of the input and output data. \n | * Defaults to "NDHWC". Specify the data format of the input and output data. \n | ||||
*@par Outputs: | *@par Outputs: | ||||
* y: A Tensor that has the same type as "x" and the format is NDHWC, NCDHW | |||||
* y: A Tensor that has the type float32 and the format is NDHWC, NCDHW | |||||
* or DHWCN. \n | * or DHWCN. \n | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
@@ -1310,7 +1299,7 @@ REG_OP(Conv3DBackpropFilterD) | |||||
*@brief Computes the transpose of convolution 3d with respect to the input. | *@brief Computes the transpose of convolution 3d with respect to the input. | ||||
*@par Inputs: | *@par Inputs: | ||||
* @li input_size: A Tensor of type int32, int64. An integer vector | |||||
* @li input_size: A Tensor of type int32. An integer vector | |||||
* representing the shape of input. | * representing the shape of input. | ||||
* @li x: A Tensor of type float16, currently does not support int8. The format | * @li x: A Tensor of type float16, currently does not support int8. The format | ||||
* is NDHWC or NCDHW. | * is NDHWC or NCDHW. | ||||
@@ -1336,7 +1325,7 @@ REG_OP(Conv3DBackpropFilterD) | |||||
* @li offset_x: Optional. Input offset_x value. Reserved. \n | * @li offset_x: Optional. Input offset_x value. Reserved. \n | ||||
*@par Outputs: | *@par Outputs: | ||||
* y: A Tensor. Has the same type and format as "x". | |||||
* y: A Tensor. Has the same format as "x", has the type float16, float32. | |||||
*/ | */ | ||||
REG_OP(Conv3DTranspose) | REG_OP(Conv3DTranspose) | ||||
.INPUT(input_size, TensorType({DT_INT32, DT_INT64})) | .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) | ||||
@@ -1362,7 +1351,7 @@ REG_OP(Conv3DTranspose) | |||||
* The format is NDHWC or NCDHW. | * The format is NDHWC or NCDHW. | ||||
* @li filter: A Tensor of type float16, currently does not support int8. | * @li filter: A Tensor of type float16, currently does not support int8. | ||||
* The format is NDHWC, NCDHW or DHWCN. | * The format is NDHWC, NCDHW or DHWCN. | ||||
* @li bias: Optional. An 1D tensor of the same type as "x". Reserved. | |||||
* @li bias: Optional. An 1D tensor of the same type as "x". | |||||
* @li offset_w: Optional. An 1D tensor for quantized deconvolution. Reserved. \n | * @li offset_w: Optional. An 1D tensor for quantized deconvolution. Reserved. \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -1383,7 +1372,7 @@ REG_OP(Conv3DTranspose) | |||||
* @li offset_x: Optional. Input offset_x value. Reserved. \n | * @li offset_x: Optional. Input offset_x value. Reserved. \n | ||||
*@par Outputs: | *@par Outputs: | ||||
* y: A Tensor. Has the same type and format as "x". \n | |||||
* y: A Tensor. Has the same format as "x", has the type float16, float32. \n | |||||
*@par Restrictions: | *@par Restrictions: | ||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead. | * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead. | ||||
@@ -1428,7 +1417,7 @@ REG_OP(Conv3DTransposeD) | |||||
| Tensor | x | filter | bias | y |\n | | Tensor | x | filter | bias | y |\n | ||||
|-----------|---------|---------|---------|--------|\n | |-----------|---------|---------|---------|--------|\n | ||||
| Data Type | float16 | float16 | float16 | float16|\n | | Data Type | float16 | float16 | float16 | float16|\n | ||||
| | int8 | int8 | int32 | int32 |\n | |||||
| | float16 | float16 | float32 | float32|\n | |||||
| Format | NCHW | NCHW | ND | NCHW |\n | | Format | NCHW | NCHW | ND | NCHW |\n | ||||
| | NHWC | HWCN | | NHWC |\n | | | NHWC | HWCN | | NHWC |\n | ||||
*\n | *\n | ||||
@@ -1461,13 +1450,13 @@ REG_OP(Conv3DTransposeD) | |||||
*\n | *\n | ||||
| Name | Field | Scope |\n | | Name | Field | Scope |\n | ||||
|------------------|----------|--------------|\n | |------------------|----------|--------------|\n | ||||
| input_size | H | [1, 200000] |\n | |||||
| input_size | H | [1, 4096] |\n | |||||
| | W | [1, 4096] |\n | | | W | [1, 4096] |\n | ||||
| x (out_backprop) | H*strideH| [1, 200000] |\n | |||||
| x (out_backprop) | H*strideH| [1, 4096] |\n | |||||
| | W*strideW| [1, 4096] |\n | | | W*strideW| [1, 4096] |\n | ||||
| filter | H | [1, 255] |\n | | filter | H | [1, 255] |\n | ||||
| | W | [1, 255] |\n | | | W | [1, 255] |\n | ||||
| y (fmap) | H | [1, 200000] |\n | |||||
| y (fmap) | H | [1, 4096] |\n | |||||
| | W | [1, 4096] |\n | | | W | [1, 4096] |\n | ||||
| Stride | H | [1, 63] |\n | | Stride | H | [1, 63] |\n | ||||
| | W | [1, 63] |\n | | | W | [1, 63] |\n | ||||
@@ -1503,9 +1492,9 @@ REG_OP(Conv2DTranspose) | |||||
.INPUT(input_size, TensorType({DT_INT32, DT_INT64})) | .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) | .INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) | ||||
.INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) | .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) | ||||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32})) | |||||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | ||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT})) | |||||
.REQUIRED_ATTR(strides, ListInt) | .REQUIRED_ATTR(strides, ListInt) | ||||
.REQUIRED_ATTR(pads, ListInt) | .REQUIRED_ATTR(pads, ListInt) | ||||
.ATTR(dilations, ListInt, {1, 1, 1, 1}) | .ATTR(dilations, ListInt, {1, 1, 1, 1}) | ||||
@@ -1522,7 +1511,7 @@ REG_OP(Conv2DTranspose) | |||||
* @li x: A Tensor of type float16, int8. | * @li x: A Tensor of type float16, int8. | ||||
* @li filter: A Tensor of type float16, int8. Must have the same type as "x". | * @li filter: A Tensor of type float16, int8. Must have the same type as "x". | ||||
* @li bias: An optional 1D tensor of the same type as "x". | * @li bias: An optional 1D tensor of the same type as "x". | ||||
* @li offset_w: An optional 1D tensor for quantized inference. Type is int8. Reserved. | |||||
* @li offset_w: An optional 1D tensor for quantized inference. Type is int8. | |||||
*@par Required Attributes: | *@par Required Attributes: | ||||
* @li input_size: A Tensor of type int32 or int64. An integer vector representing the | * @li input_size: A Tensor of type int32 or int64. An integer vector representing the | ||||
* shape of input. | * shape of input. | ||||
@@ -1550,9 +1539,9 @@ REG_OP(Conv2DTranspose) | |||||
REG_OP(Conv2DTransposeD) | REG_OP(Conv2DTransposeD) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) | .INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) | ||||
.INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) | .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) | ||||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32})) | |||||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | ||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT})) | |||||
.REQUIRED_ATTR(input_size, ListInt) | .REQUIRED_ATTR(input_size, ListInt) | ||||
.REQUIRED_ATTR(strides, ListInt) | .REQUIRED_ATTR(strides, ListInt) | ||||
.REQUIRED_ATTR(pads, ListInt) | .REQUIRED_ATTR(pads, ListInt) | ||||
@@ -193,6 +193,39 @@ REG_OP(SigmoidCrossEntropyWithLogitsV2) | |||||
.ATTR(reduction, String, "mean") | .ATTR(reduction, String, "mean") | ||||
.OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsV2) | .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsV2) | ||||
/** | |||||
* @brief Computes the sigmoid focal loss of "pred" and "target". | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li pred: A 2-dimensional Tensor of type float16 or float32, specifying the predicted value. | |||||
* @li target: A 1-dimensional Tensor of type int32, specifying the target value. | |||||
* @li weight: A 1-dimensional Tensor, specifying the weight value. \n | |||||
* @par Attributes: | |||||
* @li gamma: An optional float, specifying the exponent of the modulating factor (1 - pt) | |||||
* to balance easy/hard examples. Defaults to 2.0. | |||||
* @li alpha: An optional float, specifying the weighting factor in range (1, 0) to balance | |||||
* the importance of positive/negative examples or less than 0 for ignore. Defaults to 0.25. | |||||
* @li reduction: A optional character string from "none", "mean", and "sum", specifying the | |||||
* reduction type to be applied to the output. Defaults to "mean". \n | |||||
* @par Outputs: | |||||
* loss: Sigmoid focal loss between the predicted value and target value. Has the same dimensions as "pred". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with mmcv operator SigmoidFocalLoss. | |||||
*/ | |||||
REG_OP(SigmoidFocalLoss) | |||||
.INPUT(pred, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(target, TensorType({DT_INT32})) | |||||
.OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(loss, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.ATTR(gamma, Float, 2.0) | |||||
.ATTR(alpha, Float, 0.25) | |||||
.ATTR(reduction, String, "mean") | |||||
.OP_END_FACTORY_REG(SigmoidFocalLoss) | |||||
/** | /** | ||||
* @brief Computes the regression box of the RPN. It is a FasterRCNN operator . | * @brief Computes the regression box of the RPN. It is a FasterRCNN operator . | ||||
@@ -1834,5 +1867,79 @@ REG_OP(AxpyWithSoftmaxAndDropOutDoMask) | |||||
.REQUIRED_ATTR(input_keep_prob, Float) | .REQUIRED_ATTR(input_keep_prob, Float) | ||||
.ATTR(axis, ListInt, {-1}) | .ATTR(axis, ListInt, {-1}) | ||||
.OP_END_FACTORY_REG(AxpyWithSoftmaxAndDropOutDoMask) | .OP_END_FACTORY_REG(AxpyWithSoftmaxAndDropOutDoMask) | ||||
/** | |||||
* @brief MMCV Function: sigmoid_focal_loss_grad . \n | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li pred: the predicted tensor. The type support float16 and float32. | |||||
* @li target: the target label Tensor. The type support Int32. | |||||
* @li dout: the grad of previous op grad, which has the sampe shape wth pred. The type support float16 and float32. | |||||
* @li weight: A optioanl input Tensor, default is None, which helps to calculate the loss by supplying sample weights: | |||||
* shape of pred should be (B,D), B means batch size, D means the number of labels. | |||||
* shape of target should be (D, ). | |||||
* shape of weight should be (D, ) \n | |||||
* @par Attributes: | |||||
* @li alpha: A attribute is used to reweight the sample. The type is float . \n | |||||
* @li gamma: A attribute is used to calculate the power of the probability. | |||||
* The type is float . \n | |||||
* @li reduction: a type of the reduce method. default is 'mean', which means computing the average loss. | |||||
'sum' means computing the sum of the loss, 'none' means no reducing .\n | |||||
* @par Outputs: | |||||
* grad: A mutable Tensor. Has the same type and shape as "pred". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the MMCV operator SigmoidFocalLoss. | |||||
*/ | |||||
REG_OP(SigmoidFocalLossGrad) | |||||
.INPUT(pred, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(target, TensorType({DT_INT32})) | |||||
.INPUT(dout, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(alpha, Float, 0.25) | |||||
.ATTR(gamma, Float, 2.0) | |||||
.ATTR(reduction, String, "mean") | |||||
.OP_END_FACTORY_REG(SigmoidFocalLossGrad) | |||||
/** | |||||
* @brief MMCV Function: softmax_focal_loss_grad . \n | |||||
* @par Inputs: | |||||
* Three inputs, including: | |||||
* @li pred: the predicted tensor. The type support float16 and float32. | |||||
* @li target: the target label Tensor. The type support Int32. | |||||
* @li dout: the grad of previous op grad, which has the sampe shape wth pred. The type support float16 and float32. | |||||
* @li weight: A optioanl input Tensor, default is None, which helps to calculate the loss by supplying sample weights: | |||||
* shape of pred should be (B,D), B means batch size, D means the number of labels. | |||||
* shape of target should be (B, D). | |||||
* shape of weight should be (D, ) \n | |||||
* @par Attributes: | |||||
* @li alpha: A attribute is used to reweight the sample. The type is float . \n | |||||
* @li gamma: A attribute is used to calculate the power of the probability. | |||||
* The type is float . \n | |||||
* @li reduction: a type of the reduce method. default is 'mean', which means computing the average loss. | |||||
'sum' means computing the sum of the loss, 'none' means no reducing .\n | |||||
* @par Outputs: | |||||
* grad: A mutable Tensor. Has the same type and shape as "pred". \n | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the MMCV operator SoftmaxFocalLossGrad. | |||||
*/ | |||||
REG_OP(SoftmaxFocalLossGrad) | |||||
.INPUT(pred, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(target, TensorType({DT_INT32})) | |||||
.INPUT(dout, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.ATTR(alpha, Float, 0.25) | |||||
.ATTR(gamma, Float, 2.0) | |||||
.ATTR(reduction, String, "mean") | |||||
.OP_END_FACTORY_REG(SoftmaxFocalLossGrad) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ |
@@ -174,8 +174,8 @@ REG_OP(AvgPoolV2) | |||||
/** | /** | ||||
* @brief Performs average pooling on the input. \n | * @brief Performs average pooling on the input. \n | ||||
* @par Inputs: | * @par Inputs: | ||||
* x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type | |||||
* float16, float32, double. \n | |||||
* @li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and | |||||
* type float16. \n | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li ksize: List of ints that has length 1, 3 or 5. The size of the window | * @li ksize: List of ints that has length 1, 3 or 5. The size of the window | ||||
@@ -201,8 +201,8 @@ REG_OP(AvgPoolV2) | |||||
* Compatible with the TensorFlow operator AvgPool3D. | * Compatible with the TensorFlow operator AvgPool3D. | ||||
*/ | */ | ||||
REG_OP(AvgPool3D) | REG_OP(AvgPool3D) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.REQUIRED_ATTR(ksize, ListInt) | .REQUIRED_ATTR(ksize, ListInt) | ||||
.REQUIRED_ATTR(strides, ListInt) | .REQUIRED_ATTR(strides, ListInt) | ||||
.REQUIRED_ATTR(pads, ListInt) | .REQUIRED_ATTR(pads, ListInt) | ||||
@@ -216,9 +216,9 @@ REG_OP(AvgPool3D) | |||||
/** | /** | ||||
* @brief Performs average pooling on the input. | * @brief Performs average pooling on the input. | ||||
* @par Inputs: | * @par Inputs: | ||||
* @li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double. | |||||
* @li filter: An optional tensor of type float16, float32, double, fractal_z_3d layout. | |||||
* @li multiplier: An optional tensor of float16, float32, double. | |||||
* @li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16. | |||||
* @li filter: An optional tensor of type float16, fractal_z_3d layout. | |||||
* @li multiplier: An optional tensor of float16. | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor. | * @li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor. | ||||
@@ -239,10 +239,10 @@ REG_OP(AvgPool3D) | |||||
* Compatible with the TensorFlow operator AvgPool3D. | * Compatible with the TensorFlow operator AvgPool3D. | ||||
*/ | */ | ||||
REG_OP(AvgPool3DD) | REG_OP(AvgPool3DD) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE})) | |||||
.REQUIRED_ATTR(ksize, ListInt) | .REQUIRED_ATTR(ksize, ListInt) | ||||
.REQUIRED_ATTR(strides, ListInt) | .REQUIRED_ATTR(strides, ListInt) | ||||
.REQUIRED_ATTR(pads, ListInt) | .REQUIRED_ATTR(pads, ListInt) | ||||
@@ -256,7 +256,7 @@ REG_OP(AvgPool3DD) | |||||
* @brief Computes AvgPool3DGrad function. \n | * @brief Computes AvgPool3DGrad function. \n | ||||
* @par Inputs: | * @par Inputs: | ||||
* @li orig_input_shape: An NDHWC tensor of type int32. | * @li orig_input_shape: An NDHWC tensor of type int32. | ||||
* @li grads: An NDHWC tensor of type float16, float32, or double. \n | |||||
* @li grads: An NDHWC tensor of type float16. \n | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li ksize: List of ints that has length 5. The size of the window for | * @li ksize: List of ints that has length 5. The size of the window for | ||||
@@ -284,8 +284,8 @@ REG_OP(AvgPool3DD) | |||||
REG_OP(AvgPool3DGrad) | REG_OP(AvgPool3DGrad) | ||||
.INPUT(orig_input_shape, TensorType({DT_INT32})) | .INPUT(orig_input_shape, TensorType({DT_INT32})) | ||||
.INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.REQUIRED_ATTR(ksize, ListInt) | .REQUIRED_ATTR(ksize, ListInt) | ||||
.REQUIRED_ATTR(strides, ListInt) | .REQUIRED_ATTR(strides, ListInt) | ||||
.REQUIRED_ATTR(pads, ListInt) | .REQUIRED_ATTR(pads, ListInt) | ||||
@@ -299,7 +299,7 @@ REG_OP(AvgPool3DGrad) | |||||
* @brief Performs average pooling on the input. | * @brief Performs average pooling on the input. | ||||
* @par Inputs: | * @par Inputs: | ||||
* @li grads: An NDHWC tensor of type float16. | * @li grads: An NDHWC tensor of type float16. | ||||
* @li filter: An optional tensor of type float16, fractal_z_3d layout. | |||||
* @li filter: An optional tensor of type float16. | |||||
* @li multiplier: An optional tensor of float16. | * @li multiplier: An optional tensor of float16. | ||||
* @par Attributes: | * @par Attributes: | ||||
@@ -867,8 +867,8 @@ REG_OP(MaxPoolGradGradWithArgmax) | |||||
/** | /** | ||||
* @brief Computes avgpoograd function. \n | * @brief Computes avgpoograd function. \n | ||||
* @par Inputs: | * @par Inputs: | ||||
* @li orig_input_shape: An NHWC tensor of type int32. | |||||
* @li input_grad: An NHWC tensor of type float16, float32, or double. \n | |||||
* @li orig_input_shape: A tensor of type int32. | |||||
* @li input_grad: A tensor of type float16. \n | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li ksize: A required tuple or list, specifying the size of the window for | * @li ksize: A required tuple or list, specifying the size of the window for | ||||
@@ -887,8 +887,8 @@ REG_OP(MaxPoolGradGradWithArgmax) | |||||
*/ | */ | ||||
REG_OP(AvgPoolGrad) | REG_OP(AvgPoolGrad) | ||||
.INPUT(orig_input_shape, TensorType({DT_INT32})) | .INPUT(orig_input_shape, TensorType({DT_INT32})) | ||||
.INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.OUTPUT(out_grad, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||||
.INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(out_grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | |||||
.REQUIRED_ATTR(ksize, ListInt) | .REQUIRED_ATTR(ksize, ListInt) | ||||
.REQUIRED_ATTR(strides, ListInt) | .REQUIRED_ATTR(strides, ListInt) | ||||
.REQUIRED_ATTR(padding, String) | .REQUIRED_ATTR(padding, String) | ||||
@@ -898,9 +898,9 @@ REG_OP(AvgPoolGrad) | |||||
/** | /** | ||||
* @brief Computes gradients of average pooling function . \n | * @brief Computes gradients of average pooling function . \n | ||||
* @par Inputs: | * @par Inputs: | ||||
* @input_grad: An NHWC tensor of type float16. | |||||
* @mean_matrix: Assist matrix, an NHWC tensor of type float16. | |||||
* @kernel_matrix: Assist matrix, an NHWC tensor of type float16. | |||||
* @li input_grad: An NHWC tensor of type float16. | |||||
* @li mean_matrix: Assist matrix, an NHWC tensor of type float16. | |||||
* @li kernel_matrix: Assist matrix, an NHWC tensor of type float16. | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li orig_input_shape: A required Original input dimensions. | * @li orig_input_shape: A required Original input dimensions. | ||||
@@ -913,7 +913,7 @@ REG_OP(AvgPoolGrad) | |||||
* @li data_format: An optional string. Defaults to "NHWC" . \n | * @li data_format: An optional string. Defaults to "NHWC" . \n | ||||
* @par Outputs: | * @par Outputs: | ||||
* @out_grad: A mutable tensor with the same shape and type as "orig_input". | |||||
* @li out_grad: A mutable tensor with the same shape and type as "orig_input". | |||||
* | * | ||||
* @par Restrictions: | * @par Restrictions: | ||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use AvgPoolGrad instead. | * Warning: THIS FUNCTION IS DEPRECATED. Please use AvgPoolGrad instead. | ||||
@@ -277,7 +277,7 @@ REG_OP(BN3DTrainingUpdate) | |||||
*y: A tensor of type float16 or float32 for the normalized "x" . \n | *y: A tensor of type float16 or float32 for the normalized "x" . \n | ||||
*@attention Constraints: | *@attention Constraints: | ||||
*For Ascend 310, the result accuracy fails to reach 1/1000 due to the | |||||
* For Ascend 310, the result accuracy fails to reach 1/1000 due to the | |||||
* square root instruction. | * square root instruction. | ||||
*/ | */ | ||||
REG_OP(BNInfer) | REG_OP(BNInfer) | ||||
@@ -314,8 +314,8 @@ REG_OP(BNInfer) | |||||
*@li batch_variance: A tensor of type float32, for the variance of "x" . \n | *@li batch_variance: A tensor of type float32, for the variance of "x" . \n | ||||
*@attention Constraints: | *@attention Constraints: | ||||
*This operator is used in conjunction with BNTrainingReduce. | |||||
*For Ascend 310, the result accuracy fails to reach 1/1000 due to | |||||
*@li This operator is used in conjunction with BNTrainingReduce. | |||||
*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to | |||||
* the square root instruction. | * the square root instruction. | ||||
*/ | */ | ||||
REG_OP(BNTrainingUpdateV2) | REG_OP(BNTrainingUpdateV2) | ||||
@@ -83,7 +83,8 @@ typedef enum tagRtPlatformType { | |||||
PLATFORM_CLOUD_V2_910B2 = 11, | PLATFORM_CLOUD_V2_910B2 = 11, | ||||
PLATFORM_CLOUD_V2_910B3 = 12, | PLATFORM_CLOUD_V2_910B3 = 12, | ||||
PLATFORM_CLOUD_V2_910B4 = 13, | PLATFORM_CLOUD_V2_910B4 = 13, | ||||
PLATFORM_END = 14, | |||||
PLATFORM_MDC_PG2 = 14, | |||||
PLATFORM_END = 15, | |||||
} rtPlatformType_t; | } rtPlatformType_t; | ||||
typedef enum tagRtCubeFracMKNFp16 { | typedef enum tagRtCubeFracMKNFp16 { | ||||
@@ -55,10 +55,10 @@ typedef enum tagRtFeatureType { | |||||
} rtFeatureType_t; | } rtFeatureType_t; | ||||
typedef enum tagRtDeviceFeatureType { | typedef enum tagRtDeviceFeatureType { | ||||
FEATURE_TYPE_SCHE, | |||||
FEATURE_TYPE_BLOCKING_OPERATOR, | |||||
FEATURE_TYPE_FFTS_MODE, | |||||
FEATURE_TYPE_END, | |||||
FEATURE_TYPE_SCHE, | |||||
FEATURE_TYPE_BLOCKING_OPERATOR, | |||||
FEATURE_TYPE_FFTS_MODE, | |||||
FEATURE_TYPE_END, | |||||
} rtDeviceFeatureType_t; | } rtDeviceFeatureType_t; | ||||
typedef enum tagMemcpyInfo { | typedef enum tagMemcpyInfo { | ||||
@@ -392,7 +392,23 @@ RTS_API rtError_t rtMemcpyHostTask(void * const dst, const uint64_t destMax, con | |||||
RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind, | RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind, | ||||
rtStream_t stm); | rtStream_t stm); | ||||
typedef struct rtMemcpyAddrInfo { | |||||
/** | |||||
* @ingroup dvrt_mem | |||||
* @brief asynchronized memcpy | |||||
* @param [in] dst destination address pointer | |||||
* @param [in] Max length of destination address memory | |||||
* @param [in] src source address pointer | |||||
* @param [in] count the number of byte to copy | |||||
* @param [in] kind memcpy type | |||||
* @param [in] stream asynchronized task stream | |||||
* @param [in] qosCfg asynchronized task qosCfg | |||||
* @return RT_ERROR_NONE for ok | |||||
* @return RT_ERROR_INVALID_VALUE for error input | |||||
*/ | |||||
RTS_API rtError_t rtMemcpyAsyncWithCfg(void *dst, uint64_t destMax, const void *src, uint64_t count, | |||||
rtMemcpyKind_t kind, rtStream_t stream, uint32_t qosCfg); | |||||
typedef struct { | |||||
uint32_t resv0; | uint32_t resv0; | ||||
uint32_t resv1; | uint32_t resv1; | ||||
uint32_t resv2; | uint32_t resv2; | ||||
@@ -420,6 +436,23 @@ RTS_API rtError_t rtMemcpyAsyncPtr(void *memcpyAddrInfo, uint64_t destMax, uint6 | |||||
RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtRecudeKind_t kind, | RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtRecudeKind_t kind, | ||||
rtDataType_t type, rtStream_t stm); | rtDataType_t type, rtStream_t stm); | ||||
/** | |||||
* @ingroup dvrt_mem | |||||
* @brief asynchronized reduce memcpy | |||||
* @param [in] dst destination address pointer | |||||
* @param [in] Max length of destination address memory | |||||
* @param [in] src source address pointer | |||||
* @param [in] count the number of byte to copy | |||||
* @param [in] kind memcpy type | |||||
* @param [in] type data type | |||||
* @param [in] stm asynchronized task stream | |||||
* @param [in] qosCfg asynchronized task qosCfg | |||||
* @return RT_ERROR_NONE for ok | |||||
* @return RT_ERROR_INVALID_VALUE for error input | |||||
*/ | |||||
RTS_API rtError_t rtReduceAsyncWithCfg(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtRecudeKind_t kind, | |||||
rtDataType_t type, rtStream_t stm, uint32_t qosCfg); | |||||
/** | /** | ||||
* @ingroup dvrt_mem | * @ingroup dvrt_mem | ||||
* @brief asynchronized reduce memcpy | * @brief asynchronized reduce memcpy | ||||
@@ -372,6 +372,17 @@ RTS_API rtError_t rtMbufInit(rtMemBuffCfg_t *cfg); | |||||
*/ | */ | ||||
RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *memBuf, uint64_t size); | RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *memBuf, uint64_t size); | ||||
/** | |||||
* @ingroup rt_mem_queue | |||||
* @brief alloc buff | |||||
* @param [out] memBuf: buff addr alloced | |||||
* @param [in] size: The amount of memory space requested | |||||
* @param [in] flag: Huge page flag(bit0~31: mem type, bit32~bit35: devid, bit36~63: resv) | |||||
* @param [in] grpId: group id | |||||
* @return RT_ERROR_NONE for ok | |||||
*/ | |||||
RTS_API rtError_t rtMbufAllocEx(rtMbufPtr_t *memBuf, uint64_t size, uint64_t flag, int32_t grpId); | |||||
/** | /** | ||||
* @ingroup rt_mem_queue | * @ingroup rt_mem_queue | ||||
* @brief free buff | * @brief free buff | ||||
@@ -417,6 +428,15 @@ RTS_API rtError_t rtMbufGetBuffSize(rtMbufPtr_t memBuf, uint64_t *totalSize); | |||||
*/ | */ | ||||
RTS_API rtError_t rtMbufGetPrivInfo(rtMbufPtr_t memBuf, void **priv, uint64_t *size); | RTS_API rtError_t rtMbufGetPrivInfo(rtMbufPtr_t memBuf, void **priv, uint64_t *size); | ||||
/** | |||||
* @ingroup rt_mem_queue | |||||
* @brief copy buf ref | |||||
* @param [in] memBuf: src buff addr | |||||
* @param [out] newMemBuf: des buff addr | |||||
* @return RT_ERROR_NONE for ok | |||||
*/ | |||||
RTS_API rtError_t rtMbufCopyBufRef(rtMbufPtr_t memBuf, rtMbufPtr_t *newMemBuf); | |||||
// mem group | // mem group | ||||
typedef struct { | typedef struct { | ||||
uint64_t maxMemSize; // max buf size in grp, in KB. = 0 means no limit | uint64_t maxMemSize; // max buf size in grp, in KB. = 0 means no limit | ||||
@@ -430,30 +450,43 @@ typedef struct { | |||||
uint32_t rsv : 28; | uint32_t rsv : 28; | ||||
} rtMemGrpShareAttr_t; | } rtMemGrpShareAttr_t; | ||||
#define RT_MEM_GRP_QUERY_GROUPS_OF_PROCESS 1 // query process all grp | |||||
#define RT_MEM_GRP_QUERY_GROUPS_OF_PROCESS 1 // query process all grp | |||||
#define RT_MEM_GRP_QUERY_GROUP_ID 2 // query group id from name | |||||
#define RT_MEM_GRP_NAME_LEN 32 // it must be same as driver define BUFF_GRP_NAME_LEN | |||||
typedef struct { | typedef struct { | ||||
int32_t pid; | int32_t pid; | ||||
} rtMemGrpQueryByProc_t; // cmd: GRP_QUERY_GROUPS_OF_PROCESS | } rtMemGrpQueryByProc_t; // cmd: GRP_QUERY_GROUPS_OF_PROCESS | ||||
typedef struct { | |||||
char grpName[RT_MEM_GRP_NAME_LEN]; | |||||
} rtMemGrpQueryGroupId_t; // cmd: RT_MEM_GRP_QUERY_GROUP_ID | |||||
typedef struct { | typedef struct { | ||||
int32_t cmd; | int32_t cmd; | ||||
union { | union { | ||||
rtMemGrpQueryByProc_t grpQueryByProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS | rtMemGrpQueryByProc_t grpQueryByProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS | ||||
rtMemGrpQueryGroupId_t grpQueryGroupId; // cmd: RT_MEM_GRP_QUERY_GROUP_ID | |||||
}; | }; | ||||
} rtMemGrpQueryInput_t; | } rtMemGrpQueryInput_t; | ||||
#define RT_MEM_GRP_NAME_LEN 32 // it must be same as driver define BUFF_GRP_NAME_LEN | |||||
typedef struct { | typedef struct { | ||||
char_t groupName[RT_MEM_GRP_NAME_LEN]; // group name | char_t groupName[RT_MEM_GRP_NAME_LEN]; // group name | ||||
rtMemGrpShareAttr_t attr; // process in group attribute | rtMemGrpShareAttr_t attr; // process in group attribute | ||||
} rtMemGrpOfProc_t; // cmd: GRP_QUERY_GROUPS_OF_PROCESS | } rtMemGrpOfProc_t; // cmd: GRP_QUERY_GROUPS_OF_PROCESS | ||||
typedef struct { | typedef struct { | ||||
rtMemGrpOfProc_t *groupsOfProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS | |||||
int32_t groupId; // group id | |||||
} rtMemGrpQueryGroupIdInfo_t; // cmd: RT_MEM_GRP_QUERY_GROUP_ID | |||||
typedef struct { | |||||
size_t maxNum; // max number of result | size_t maxNum; // max number of result | ||||
size_t resultNum; // if the number of results exceeds 'maxNum', only 'maxNum' results are filled in buffer | size_t resultNum; // if the number of results exceeds 'maxNum', only 'maxNum' results are filled in buffer | ||||
union { | |||||
rtMemGrpOfProc_t *groupsOfProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS | |||||
rtMemGrpQueryGroupIdInfo_t *groupIdInfo; // cmd: RT_MEM_GRP_QUERY_GROUP_ID | |||||
}; | |||||
} rtMemGrpQueryOutput_t; | } rtMemGrpQueryOutput_t; | ||||
/** | /** | ||||
@@ -99,8 +99,66 @@ RTS_API rtError_t rtCmoTaskLaunch(rtCmoTaskInfo_t *taskInfo, rtStream_t stm, uin | |||||
* @return RT_ERROR_NONE for ok, others failed | * @return RT_ERROR_NONE for ok, others failed | ||||
*/ | */ | ||||
RTS_API rtError_t rtBarrierTaskLaunch(rtBarrierTaskInfo_t *taskInfo, rtStream_t stm, uint32_t flag); | RTS_API rtError_t rtBarrierTaskLaunch(rtBarrierTaskInfo_t *taskInfo, rtStream_t stm, uint32_t flag); | ||||
#if defined(__cplusplus) | |||||
/** | |||||
* @ingroup rt_stars | |||||
* @brief dvpp group handle. | |||||
*/ | |||||
typedef void *rtDvppGrp_t; | |||||
typedef struct tagDvppGrpRptInfo { | |||||
uint32_t deviceId; | |||||
uint32_t streamId; | |||||
uint32_t taskId; | |||||
uint8_t sqeType; | |||||
uint8_t cqeErrorCode; | |||||
uint8_t reserve[2]; | |||||
uint32_t accErrorCode; | |||||
} rtDvppGrpRptInfo_t; | |||||
typedef void (*rtDvppGrpCallback)(rtDvppGrpRptInfo_t *rptInfo); | |||||
/** | |||||
* @ingroup rt_stars | |||||
* @brief create dvpp group. | |||||
* @param [in] flags group flag, reserved parameter | |||||
* @param [out] grp group handle | |||||
* @return RT_ERROR_NONE for ok, others failed | |||||
*/ | |||||
RTS_API rtError_t rtDvppGroupCreate(rtDvppGrp_t *grp, uint32_t flags); | |||||
/** | |||||
* @ingroup rt_stars | |||||
* @brief destroy dvpp group. | |||||
* @param [in] grp group handle | |||||
* @return RT_ERROR_NONE for ok, others failed | |||||
*/ | |||||
RTS_API rtError_t rtDvppGroupDestory(rtDvppGrp_t grp); | |||||
/** | |||||
* @ingroup rt_stars | |||||
* @brief create stream with grp handle | |||||
* @param [in|out] stm created stream | |||||
* @param [in] priority stream priority | |||||
* @param [in] flags stream op flags | |||||
* @param [in] grp grp handle | |||||
* @return RT_ERROR_NONE for ok | |||||
* @return RT_ERROR_INVALID_VALUE for error input | |||||
* @return RT_ERROR_NONE for ok, others failed | |||||
*/ | |||||
RTS_API rtError_t rtStreamCreateByGrp(rtStream_t *stm, int32_t priority, uint32_t flags, rtDvppGrp_t grp); | |||||
/** | |||||
* @ingroup rt_stars | |||||
* @brief wait report by grp | |||||
* @param [in] grp group handle | |||||
* @param [in] callBackFunc callback | |||||
* @param [in] timeout wait timeout config, ms, -1: wait forever | |||||
* @return RT_ERROR_NONE for ok, others failed | |||||
*/ | |||||
RTS_API rtError_t rtDvppWaitGroupReport(rtDvppGrp_t grp, rtDvppGrpCallback callBackFunc, int32_t timeout); | |||||
#if defined(__cplusplus) | |||||
} | } | ||||
#endif | #endif | ||||
#endif // CCE_RUNTIME_RT_STARS_H | |||||
#endif // CCE_RUNTIME_RT_STARS_H |
@@ -101,6 +101,16 @@ RTS_API rtError_t rtStreamWaitEventWithTimeout(rtStream_t stm, rtEvent_t evt, ui | |||||
*/ | */ | ||||
RTS_API rtError_t rtStreamSynchronize(rtStream_t stm); | RTS_API rtError_t rtStreamSynchronize(rtStream_t stm); | ||||
/** | |||||
* @ingroup dvrt_stream | |||||
* @brief wait stream to be complete and set timeout | |||||
* @param [in] stm stream to wait | |||||
* @param [in] timeout timeout value,the unit is milliseconds | |||||
* @return RT_ERROR_NONE for ok | |||||
* @return RT_ERROR_INVALID_VALUE for error input | |||||
*/ | |||||
RTS_API rtError_t rtStreamSynchronizeWithTimeout(rtStream_t stm, int32_t timeout); | |||||
/** | /** | ||||
* @ingroup dvrt_stream | * @ingroup dvrt_stream | ||||
* @brief queries an asynchronous stream for completion status | * @brief queries an asynchronous stream for completion status | ||||
@@ -202,7 +212,7 @@ RTS_API rtError_t rtStreamSwitchN(void *ptr, uint32_t size, void *valuePtr, rtSt | |||||
* @return RT_ERROR_INVALID_VALUE for error input | * @return RT_ERROR_INVALID_VALUE for error input | ||||
*/ | */ | ||||
RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stm, uint32_t flag, const void *addr, | RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stm, uint32_t flag, const void *addr, | ||||
uint32_t *streamId, uint32_t *taskId); | |||||
uint32_t *streamId, uint32_t *taskId); | |||||
/* | /* | ||||
* @ingroup rt_model | * @ingroup rt_model | ||||
@@ -18,6 +18,8 @@ | |||||
#define PROF_TRAINING_TRACE 0x00000040ULL | #define PROF_TRAINING_TRACE 0x00000040ULL | ||||
#define PROF_MSPROFTX 0x00000080ULL | #define PROF_MSPROFTX 0x00000080ULL | ||||
#define PROF_RUNTIME_API 0x00000100ULL | #define PROF_RUNTIME_API 0x00000100ULL | ||||
#define PROF_TASK_FRAMEWORK 0x00000200ULL | |||||
#define PROF_TASK_TSFW 0x00000400ULL | |||||
// system profilinig switch | // system profilinig switch | ||||
#define PROF_CPU 0x00010000ULL | #define PROF_CPU 0x00010000ULL | ||||
@@ -52,6 +54,8 @@ constexpr uint64_t PROF_AICPU_MODEL = 0x4000000000000000ULL; | |||||
#define PROF_TRAINING_TRACE_MASK 0x00000040ULL | #define PROF_TRAINING_TRACE_MASK 0x00000040ULL | ||||
#define PROF_MSPROFTX_MASK 0x00000080ULL | #define PROF_MSPROFTX_MASK 0x00000080ULL | ||||
#define PROF_RUNTIME_API_MASK 0x00000100ULL | #define PROF_RUNTIME_API_MASK 0x00000100ULL | ||||
#define PROF_TASK_FRAMEWORK_MASK 0x00000200ULL | |||||
#define PROF_TASK_TSFW_MASK 0x00000400ULL | |||||
// system profilinig mask | // system profilinig mask | ||||
#define PROF_CPU_MASK 0x00010000ULL | #define PROF_CPU_MASK 0x00010000ULL | ||||
@@ -102,7 +106,7 @@ extern "C" { | |||||
MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); | MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); | ||||
typedef int32_t Status; | |||||
using Status = int32_t; | |||||
typedef struct aclprofSubscribeConfig aclprofSubscribeConfig1; | typedef struct aclprofSubscribeConfig aclprofSubscribeConfig1; | ||||
/// | /// | ||||
/// @ingroup AscendCL | /// @ingroup AscendCL | ||||
@@ -159,8 +159,14 @@ enum MsprofGeTaskType { | |||||
MSPROF_GE_TASK_TYPE_AI_CPU, | MSPROF_GE_TASK_TYPE_AI_CPU, | ||||
MSPROF_GE_TASK_TYPE_AIV, | MSPROF_GE_TASK_TYPE_AIV, | ||||
MSPROF_GE_TASK_TYPE_WRITE_BACK, | MSPROF_GE_TASK_TYPE_WRITE_BACK, | ||||
MSPROF_GE_TASK_TYPE_MIX_AIC, | |||||
MSPROF_GE_TASK_TYPE_MIX_AIV, | |||||
MSPROF_GE_TASK_TYPE_FFTS_PLUS, | |||||
MSPROF_GE_TASK_TYPE_DSA, | |||||
MSPROF_GE_TASK_TYPE_DVPP, | |||||
MSPROF_GE_TASK_TYPE_INVALID | MSPROF_GE_TASK_TYPE_INVALID | ||||
}; | }; | ||||
enum MsprofGeShapeType { | enum MsprofGeShapeType { | ||||
MSPROF_GE_SHAPE_TYPE_STATIC = 0, | MSPROF_GE_SHAPE_TYPE_STATIC = 0, | ||||
MSPROF_GE_SHAPE_TYPE_DYNAMIC, | MSPROF_GE_SHAPE_TYPE_DYNAMIC, | ||||