Merge pull request !65 from yanghaoran/r0.5tags/v0.5.0-beta-827^0
| @@ -204,9 +204,6 @@ const std::string SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel"; | |||||
| // Save original model file name | // Save original model file name | ||||
| const std::string ORIGINAL_MODEL_FILE = "ge.originalModelFile"; | const std::string ORIGINAL_MODEL_FILE = "ge.originalModelFile"; | ||||
| // FE enable quant optimize | |||||
| const std::string QUANT_OPTIMIZE = "ge.quantOptimize"; | |||||
| const char *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum"; | const char *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum"; | ||||
| const char *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize"; | const char *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize"; | ||||
| const char *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum"; | const char *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum"; | ||||
| @@ -274,7 +271,6 @@ static const char *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM; | |||||
| static const char *const AICORE_NUM = ge::AICORE_NUM.c_str(); | static const char *const AICORE_NUM = ge::AICORE_NUM.c_str(); | ||||
| static const char *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str(); | static const char *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str(); | ||||
| static const char *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str(); | static const char *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str(); | ||||
| static const char *const QUANT_OPTIMIZE = ge::QUANT_OPTIMIZE.c_str(); | |||||
| static const char *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str(); | static const char *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str(); | ||||
| static const char *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str(); | static const char *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str(); | ||||
| static const char *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str(); | static const char *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str(); | ||||
| @@ -304,7 +300,6 @@ const std::set<std::string> global_options = {CORE_TYPE, | |||||
| AICORE_NUM, | AICORE_NUM, | ||||
| FUSION_SWITCH_FILE, | FUSION_SWITCH_FILE, | ||||
| ENABLE_SMALL_CHANNEL, | ENABLE_SMALL_CHANNEL, | ||||
| QUANT_OPTIMIZE, | |||||
| OP_SELECT_IMPL_MODE, | OP_SELECT_IMPL_MODE, | ||||
| OPTYPELIST_FOR_IMPLMODE}; | OPTYPELIST_FOR_IMPLMODE}; | ||||
| } // namespace ir_option | } // namespace ir_option | ||||
| @@ -43,6 +43,7 @@ | |||||
| #define DYNAMIC_INPUT_TD_NUM(name) ("__dynamic_input_" + name + "_cnt") | #define DYNAMIC_INPUT_TD_NUM(name) ("__dynamic_input_" + name + "_cnt") | ||||
| namespace ge { | namespace ge { | ||||
| class Operator; | |||||
| class OperatorImpl; | class OperatorImpl; | ||||
| class NamedAttrs; | class NamedAttrs; | ||||
| class Graph; | class Graph; | ||||
| @@ -50,6 +51,7 @@ class AttrValue; | |||||
| using SubgraphBuilder = std::function<Graph()>; | using SubgraphBuilder = std::function<Graph()>; | ||||
| using OperatorImplPtr = std::shared_ptr<OperatorImpl>; | using OperatorImplPtr = std::shared_ptr<OperatorImpl>; | ||||
| using OperatorPtr = std::shared_ptr<Operator>; | |||||
| class OpIO; | class OpIO; | ||||
| using OutHandler = std::shared_ptr<OpIO>; | using OutHandler = std::shared_ptr<OpIO>; | ||||
| @@ -67,6 +67,7 @@ using google::protobuf::Message; | |||||
| class OpRegistrationDataImpl; | class OpRegistrationDataImpl; | ||||
| using ParseParamFunc = std::function<domi::Status(const google::protobuf::Message *, ge::Operator &)>; | using ParseParamFunc = std::function<domi::Status(const google::protobuf::Message *, ge::Operator &)>; | ||||
| using ParseParamByOpFunc = std::function<domi::Status(const ge::Operator &, ge::Operator &)>; | |||||
| using FusionParseParamFunc = | using FusionParseParamFunc = | ||||
| std::function<domi::Status(const std::vector<const google::protobuf::Message *>, ge::Operator &)>; | std::function<domi::Status(const std::vector<const google::protobuf::Message *>, ge::Operator &)>; | ||||
| using ParseSubgraphFunc = std::function<Status(const std::string &subgraph_name, const ge::Graph &graph)>; | using ParseSubgraphFunc = std::function<Status(const std::string &subgraph_name, const ge::Graph &graph)>; | ||||
| @@ -85,6 +86,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData { | |||||
| OpRegistrationData &ParseParamsFn(const ParseParamFunc &parseParamFn); | OpRegistrationData &ParseParamsFn(const ParseParamFunc &parseParamFn); | ||||
| OpRegistrationData &ParseParamsByOperatorFn(const ParseParamByOpFunc &parse_param_by_op_fn); | |||||
| OpRegistrationData &FusionParseParamsFn(const FusionParseParamFunc &fusionParseParamFn); | OpRegistrationData &FusionParseParamsFn(const FusionParseParamFunc &fusionParseParamFn); | ||||
| OpRegistrationData &ParseSubgraphPostFn(const ParseSubgraphFunc &subgraph_post_fn); | OpRegistrationData &ParseSubgraphPostFn(const ParseSubgraphFunc &subgraph_post_fn); | ||||
| @@ -100,6 +103,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData { | |||||
| std::set<std::string> GetOriginOpTypeSet() const; | std::set<std::string> GetOriginOpTypeSet() const; | ||||
| domi::FrameworkType GetFrameworkType() const; | domi::FrameworkType GetFrameworkType() const; | ||||
| ParseParamFunc GetParseParamFn() const; | ParseParamFunc GetParseParamFn() const; | ||||
| ParseParamByOpFunc GetParseParamByOperatorFn() const; | |||||
| FusionParseParamFunc GetFusionParseParamFn() const; | FusionParseParamFunc GetFusionParseParamFn() const; | ||||
| ParseSubgraphFunc GetParseSubgraphPostFn() const; | ParseSubgraphFunc GetParseSubgraphPostFn() const; | ||||
| @@ -183,6 +183,7 @@ struct ModelData { | |||||
| uint32_t model_len = 0; // Model binary data length | uint32_t model_len = 0; // Model binary data length | ||||
| int32_t priority = 0; // Model priority | int32_t priority = 0; // Model priority | ||||
| std::string key; // Key path for encrypt model, Empty for unencrypt | std::string key; // Key path for encrypt model, Empty for unencrypt | ||||
| std::string om_name; // om file name, used for data dump | |||||
| }; | }; | ||||
| // The definition of Model information | // The definition of Model information | ||||
| @@ -46,6 +46,8 @@ class ModelHelper { | |||||
| static Status TransModelToGeModel(const ModelPtr& model, GeModelPtr& ge_model); | static Status TransModelToGeModel(const ModelPtr& model, GeModelPtr& ge_model); | ||||
| static Status TransGeModelToModel(const GeModelPtr& geModelPtr, ModelPtr& modelPtr); | static Status TransGeModelToModel(const GeModelPtr& geModelPtr, ModelPtr& modelPtr); | ||||
| Status GetBaseNameFromFileName(const std::string& file_name, std::string& base_name); | |||||
| Status GetModelNameFromMergedGraphName(const std::string& graph_name, std::string& model_name); | |||||
| private: | private: | ||||
| bool is_assign_model_ = false; | bool is_assign_model_ = false; | ||||
| @@ -62,7 +62,7 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||||
| // Get input and output descriptor | // Get input and output descriptor | ||||
| ge::Status GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | ge::Status GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | ||||
| std::vector<ge::TensorDesc> &output_desc); | |||||
| std::vector<ge::TensorDesc> &output_desc, bool new_model_desc = false); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -98,8 +98,10 @@ Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file); | |||||
| Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format); | Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format); | ||||
| Status GetOutputLeaf(ge::NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | |||||
| std::vector<std::string> &output_nodes_name); | |||||
| Status GetOutputLeaf(ge::NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info); | |||||
| void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | |||||
| std::vector<std::string> &output_nodes_name); | |||||
| void UpdateOmgCtxWithParserCtx(); | void UpdateOmgCtxWithParserCtx(); | ||||
| @@ -94,6 +94,8 @@ struct OmgContext { | |||||
| std::vector<std::pair<std::string, int32_t>> user_out_nodes; | std::vector<std::pair<std::string, int32_t>> user_out_nodes; | ||||
| // net out nodes (where user_out_nodes or leaf nodes) | // net out nodes (where user_out_nodes or leaf nodes) | ||||
| std::vector<std::string> net_out_nodes; | std::vector<std::string> net_out_nodes; | ||||
| // net out nodes top names(only caffe has top) | |||||
| std::vector<std::string> out_top_names; | |||||
| // path for the aicpu custom operator so_file | // path for the aicpu custom operator so_file | ||||
| std::vector<std::string> aicpu_op_run_paths; | std::vector<std::string> aicpu_op_run_paths; | ||||
| // ddk version | // ddk version | ||||
| @@ -139,6 +139,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string NEW_AIPP | |||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_INPUTS; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_INPUTS; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_OUTPUTS; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_OUTPUTS; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_DIMS; | |||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME; | ||||
| @@ -181,6 +183,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_ORIGIN_SIZE; | |||||
| // to be deleted | // to be deleted | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_TO_BE_DELETED; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_TO_BE_DELETED; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PERMUTE_RESHAPE_FUSION; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PERMUTE_RESHAPE_FUSION; | ||||
| @@ -31,6 +31,7 @@ class ShapeRefiner { | |||||
| static graphStatus InferShapeAndType(const NodePtr &node, bool before_subgraph); | static graphStatus InferShapeAndType(const NodePtr &node, bool before_subgraph); | ||||
| static graphStatus InferShapeAndType(const NodePtr &node); | static graphStatus InferShapeAndType(const NodePtr &node); | ||||
| static graphStatus InferShapeAndType(const ConstNodePtr &node, Operator &op); | static graphStatus InferShapeAndType(const ConstNodePtr &node, Operator &op); | ||||
| static void ClearContextMap(); | |||||
| private: | private: | ||||
| static void PrintInOutTensorShape(const ge::NodePtr &node, const std::string &phase); | static void PrintInOutTensorShape(const ge::NodePtr &node, const std::string &phase); | ||||
| @@ -121,6 +121,8 @@ const std::string NEW_AIPP_CONV_OP = "new_conv_op_for_aipp"; | |||||
| const std::string ATTR_NAME_AIPP_INPUTS = "_aipp_inputs"; | const std::string ATTR_NAME_AIPP_INPUTS = "_aipp_inputs"; | ||||
| const std::string ATTR_NAME_AIPP_OUTPUTS = "_aipp_outputs"; | const std::string ATTR_NAME_AIPP_OUTPUTS = "_aipp_outputs"; | ||||
| const std::string ATTR_NAME_INPUT_DIMS = "input_dims"; | |||||
| const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id"; | const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id"; | ||||
| const std::string ATTR_NAME_PARENT_GRAPH_NAME = "_parent_graph_name"; | const std::string ATTR_NAME_PARENT_GRAPH_NAME = "_parent_graph_name"; | ||||
| @@ -154,6 +156,7 @@ const std::string ATTR_NAME_RTSWITCH_RECV_EVENT_ID = "rtswitch_event_id"; | |||||
| const std::string ATTR_NAME_AUTOMIC_ADD_START = "automic_add_addr_start"; | const std::string ATTR_NAME_AUTOMIC_ADD_START = "automic_add_addr_start"; | ||||
| const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE = "automic_add_mem_size"; | const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE = "automic_add_mem_size"; | ||||
| const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS = "_dynamic_output_dims"; | const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS = "_dynamic_output_dims"; | ||||
| const std::string ATTR_NAME_INPUT_ORIGIN_SIZE = "input_origin_size"; | |||||
| // To be deleted | // To be deleted | ||||
| const std::string ATTR_TO_BE_DELETED = "to_be_deleted"; | const std::string ATTR_TO_BE_DELETED = "to_be_deleted"; | ||||
| @@ -1,5 +1,5 @@ | |||||
| LOCAL_PATH := $(call my-dir) | LOCAL_PATH := $(call my-dir) | ||||
| include $(LOCAL_PATH)/stub/Makefile | |||||
| COMMON_LOCAL_SRC_FILES := \ | COMMON_LOCAL_SRC_FILES := \ | ||||
| ./proto/om.proto \ | ./proto/om.proto \ | ||||
| ./proto/ge_ir.proto \ | ./proto/ge_ir.proto \ | ||||
| @@ -85,6 +85,29 @@ LOCAL_PROPRIETARY_MODULE := true | |||||
| include $(BUILD_HOST_SHARED_LIBRARY) | include $(BUILD_HOST_SHARED_LIBRARY) | ||||
| #compiler for host | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := stub/libgraph | |||||
| LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2 | |||||
| LOCAL_CPPFLAGS += -fexceptions | |||||
| LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := \ | |||||
| ../../out/atc/lib64/stub/graph.cc \ | |||||
| ../../out/atc/lib64/stub/operator.cc \ | |||||
| ../../out/atc/lib64/stub/tensor.cc \ | |||||
| ../../out/atc/lib64/stub/operator_factory.cc \ | |||||
| LOCAL_SHARED_LIBRARIES := | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| LOCAL_MULTILIB := 64 | |||||
| LOCAL_PROPRIETARY_MODULE := true | |||||
| include $(BUILD_HOST_SHARED_LIBRARY) | |||||
| #compiler for device | #compiler for device | ||||
| include $(CLEAR_VARS) | include $(CLEAR_VARS) | ||||
| @@ -111,6 +134,32 @@ LOCAL_PROPRIETARY_MODULE := true | |||||
| include $(BUILD_SHARED_LIBRARY) | include $(BUILD_SHARED_LIBRARY) | ||||
| #compiler for device | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := stub/libgraph | |||||
| LOCAL_CFLAGS += -O2 | |||||
| LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := \ | |||||
| ../../out/atc/lib64/stub/graph.cc \ | |||||
| ../../out/atc/lib64/stub/operator.cc \ | |||||
| ../../out/atc/lib64/stub/tensor.cc \ | |||||
| ../../out/atc/lib64/stub/operator_factory.cc \ | |||||
| LOCAL_SHARED_LIBRARIES := | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| ifeq ($(device_os),android) | |||||
| LOCAL_LDFLAGS := -ldl | |||||
| endif | |||||
| LOCAL_MULTILIB := 64 | |||||
| LOCAL_PROPRIETARY_MODULE := true | |||||
| include $(BUILD_SHARED_LIBRARY) | |||||
| # compile for ut/st | # compile for ut/st | ||||
| include $(CLEAR_VARS) | include $(CLEAR_VARS) | ||||
| @@ -759,6 +759,7 @@ graphStatus Node::Verify() const { | |||||
| GELOGW("Verify UpdateOutputName failed"); | GELOGW("Verify UpdateOutputName failed"); | ||||
| } | } | ||||
| } | } | ||||
| node_op.BreakConnect(); | |||||
| } | } | ||||
| if (op_->CommonVerify() == GRAPH_SUCCESS) { | if (op_->CommonVerify() == GRAPH_SUCCESS) { | ||||
| @@ -818,7 +818,9 @@ graphStatus OpDesc::InferShapeAndType() { | |||||
| } | } | ||||
| } | } | ||||
| Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this()); | Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this()); | ||||
| return (graphStatus)infer_func_(op_proxy); | |||||
| graphStatus ret = (graphStatus)infer_func_(op_proxy); | |||||
| op_proxy.BreakConnect(); | |||||
| return ret; | |||||
| } | } | ||||
| graphStatus OpDesc::DefaultInferFormat() { | graphStatus OpDesc::DefaultInferFormat() { | ||||
| @@ -863,12 +865,14 @@ graphStatus OpDesc::DefaultInferFormat() { | |||||
| } | } | ||||
| graphStatus OpDesc::OpVerify() { | graphStatus OpDesc::OpVerify() { | ||||
| Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this()); | |||||
| if (verifier_func_ == nullptr) { | if (verifier_func_ == nullptr) { | ||||
| verifier_func_ = OperatorFactoryImpl::GetVerifyFunc(GetType()); | verifier_func_ = OperatorFactoryImpl::GetVerifyFunc(GetType()); | ||||
| } | } | ||||
| if (verifier_func_ != nullptr) { | if (verifier_func_ != nullptr) { | ||||
| return (graphStatus)verifier_func_(op_proxy); | |||||
| Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this()); | |||||
| graphStatus ret = (graphStatus)verifier_func_(op_proxy); | |||||
| op_proxy.BreakConnect(); | |||||
| return ret; | |||||
| } | } | ||||
| return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
| } | } | ||||
| @@ -21,7 +21,7 @@ | |||||
| #include <mutex> | #include <mutex> | ||||
| #include <queue> | #include <queue> | ||||
| #include <set> | #include <set> | ||||
| #include "array_ops.h" | |||||
| #include "./array_ops.h" | |||||
| #include "debug/ge_log.h" | #include "debug/ge_log.h" | ||||
| #include "debug/ge_op_types.h" | #include "debug/ge_op_types.h" | ||||
| #include "debug/ge_util.h" | #include "debug/ge_util.h" | ||||
| @@ -931,7 +931,7 @@ OperatorImplPtr Operator::GetOperatorImplPtr() const { return operator_impl_; } | |||||
| void Operator::BreakConnect() const { | void Operator::BreakConnect() const { | ||||
| if (operator_impl_ == nullptr) { | if (operator_impl_ == nullptr) { | ||||
| GELOGE(GRAPH_FAILED, "operator impl is nullptr."); | |||||
| GELOGW("operator impl is nullptr."); | |||||
| return; | return; | ||||
| } | } | ||||
| operator_impl_->ClearInputLinks(); | operator_impl_->ClearInputLinks(); | ||||
| @@ -1318,6 +1318,8 @@ class GraphBuilderImpl { | |||||
| string type = src_op_impl->op_desc_->GetType(); | string type = src_op_impl->op_desc_->GetType(); | ||||
| auto node_op = ge::OperatorFactory::CreateOperator("node_op", type); | auto node_op = ge::OperatorFactory::CreateOperator("node_op", type); | ||||
| auto tensor_desc = ge::OpDescUtils::GetOpDescFromOperator(node_op); | auto tensor_desc = ge::OpDescUtils::GetOpDescFromOperator(node_op); | ||||
| node_op.BreakConnect(); | |||||
| GE_CHK_BOOL_EXEC(tensor_desc != nullptr, continue, "tensor_desc is null."); | GE_CHK_BOOL_EXEC(tensor_desc != nullptr, continue, "tensor_desc is null."); | ||||
| if ((tensor_desc->GetInputsSize() == 0 && tensor_desc->GetOutputsSize() > 0) || type == DATA || | if ((tensor_desc->GetInputsSize() == 0 && tensor_desc->GetOutputsSize() > 0) || type == DATA || | ||||
| type == VARIABLE || type == INITDATA || type == GETNEXT) { | type == VARIABLE || type == INITDATA || type == GETNEXT) { | ||||
| @@ -1542,6 +1544,7 @@ void GraphUtils::BreakConnect(const std::map<OperatorImplPtr, NodePtr> &all_node | |||||
| } | } | ||||
| op_impl->ClearOutputLinks(); | op_impl->ClearOutputLinks(); | ||||
| op_impl->ClearInputLinks(); | op_impl->ClearInputLinks(); | ||||
| OperatorKeeper::GetInstance().CheckOutOperator(op_impl); | |||||
| } | } | ||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -235,6 +235,7 @@ graphStatus ShapeRefiner::InferShapeAndType(const ConstNodePtr &node, Operator & | |||||
| GELOGD("get op from OperatorFactory success. opType: %s", op_type.c_str()); | GELOGD("get op from OperatorFactory success. opType: %s", op_type.c_str()); | ||||
| auto temp_op_desc = ge::OpDescUtils::GetOpDescFromOperator(node_op); | auto temp_op_desc = ge::OpDescUtils::GetOpDescFromOperator(node_op); | ||||
| node_op.BreakConnect(); | |||||
| if (temp_op_desc == nullptr) { | if (temp_op_desc == nullptr) { | ||||
| GELOGE(GRAPH_FAILED, "temp op desc is null"); | GELOGE(GRAPH_FAILED, "temp op desc is null"); | ||||
| return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
| @@ -328,6 +329,9 @@ InferenceContextPtr CreateInferenceContext(const std::unordered_map<NodePtr, Inf | |||||
| namespace { | namespace { | ||||
| std::unordered_map<NodePtr, InferenceContextPtr> context_map; | std::unordered_map<NodePtr, InferenceContextPtr> context_map; | ||||
| } | } | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY void ShapeRefiner::ClearContextMap() { context_map.clear(); } | |||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferShapeAndType(const NodePtr &node) { | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferShapeAndType(const NodePtr &node) { | ||||
| return InferShapeAndType(node, true); | return InferShapeAndType(node, true); | ||||
| } | } | ||||
| @@ -0,0 +1,6 @@ | |||||
| inc_path := $(shell pwd)/inc/external/ | |||||
| out_path := $(shell pwd)/out/atc/lib64/stub/ | |||||
| stub_path := $(shell pwd)/common/graph/stub/ | |||||
| mkdir_stub := $(shell mkdir -p $(out_path)) | |||||
| graph_local_stub := $(shell $(HI_PYTHON) $(stub_path)/gen_stubapi.py $(inc_path) $(out_path)) | |||||
| @@ -0,0 +1,573 @@ | |||||
| import os | |||||
| import re | |||||
| import sys | |||||
| import logging | |||||
| logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(levelname)s: %(message)s', | |||||
| level=logging.INFO) | |||||
| """ | |||||
| this attr is used for symbol table visible | |||||
| """ | |||||
| GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY' | |||||
| """ | |||||
| generate stub func body by return type | |||||
| """ | |||||
| RETURN_STATEMENTS = { | |||||
| 'graphStatus': ' return GRAPH_SUCCESS;', | |||||
| 'Status': ' return SUCCESS;', | |||||
| 'Graph': ' return Graph();', | |||||
| 'Graph&': ' return *this;', | |||||
| 'Format': ' return Format();', | |||||
| 'Format&': ' return *this;', | |||||
| 'Shape': ' return Shape();', | |||||
| 'Shape&': ' return *this;', | |||||
| 'TensorDesc': ' return TensorDesc();', | |||||
| 'TensorDesc&': ' return *this;', | |||||
| 'Tensor': ' return Tensor();', | |||||
| 'Tensor&': ' return *this;', | |||||
| 'Operator': ' return Operator();', | |||||
| 'Operator&': ' return *this;', | |||||
| 'Ptr': ' return nullptr;', | |||||
| 'std::string': ' return "";', | |||||
| 'std::string&': ' return "";', | |||||
| 'string': ' return "";', | |||||
| 'int': ' return 0;', | |||||
| 'DataType': ' return DT_FLOAT;', | |||||
| 'InferenceContextPtr': ' return nullptr;', | |||||
| 'SubgraphBuilder': ' return nullptr;', | |||||
| 'OperatorImplPtr': ' return nullptr;', | |||||
| 'OutHandler': ' return nullptr;', | |||||
| 'std::vector<std::string>': ' return {};', | |||||
| 'std::vector<int64_t>': ' return {};', | |||||
| 'std::map': ' return {};', | |||||
| 'uint32_t': ' return 0;', | |||||
| 'int64_t': ' return 0;', | |||||
| 'uint64_t': ' return 0;', | |||||
| 'size_t': ' return 0;', | |||||
| 'float': ' return 0.0f;', | |||||
| 'bool': ' return false;', | |||||
| } | |||||
| """ | |||||
| max code len per line in hua_wei software programming specifications | |||||
| """ | |||||
| max_code_len_per_line = 100 | |||||
| """ | |||||
| white_list_for_debug, include_dir_key_words is to | |||||
| determines which header files to generate cc files from | |||||
| when DEBUG on | |||||
| """ | |||||
| white_list_for_debug = ["operator.h", "tensor.h", | |||||
| "graph.h", "operator_factory.h", | |||||
| "ge_ir_build.h"] | |||||
| include_dir_key_words = ["ge", "graph"] | |||||
| DEBUG = True | |||||
| def need_generate_func(func_line): | |||||
| """ | |||||
| :param func_line: | |||||
| :return: | |||||
| """ | |||||
| if func_line.strip().endswith("default") or func_line.strip().endswith("delete") \ | |||||
| or func_line.strip().startswith("typedef") or func_line.strip().startswith("using"): | |||||
| return False | |||||
| return True | |||||
| def file_endswith_white_list_suffix(file): | |||||
| """ | |||||
| :param file: | |||||
| :return: | |||||
| """ | |||||
| if DEBUG: | |||||
| for suffix in white_list_for_debug: | |||||
| if file.endswith(suffix): | |||||
| return True | |||||
| return False | |||||
| else: | |||||
| return True | |||||
| """ | |||||
| belows are patterns used for analyse .h file | |||||
| """ | |||||
| # pattern function | |||||
| pattern_func = re.compile(r"""(^[\s]*) #leading with space,we will find and delete after | |||||
| ([a-zA-Z~_] # void int likely | |||||
| .* | |||||
| [)] #we find ) | |||||
| (?!.*{) # we do not want the case int abc() const { return 1;} | |||||
| .*) | |||||
| (;.*) #we want to find ; and after for we will replace these later | |||||
| \n$ | |||||
| """, re.VERBOSE | re.MULTILINE | re.DOTALL) | |||||
| # pattern comment | |||||
| pattern_comment = re.compile(r'^\s*//') | |||||
| pattern_comment_2_start = re.compile(r'^\s*/[*]') | |||||
| pattern_comment_2_end = re.compile(r'[*]/\s*$') | |||||
| # pattern define | |||||
| pattern_define = re.compile(r'^\s*#define') | |||||
| pattern_define_return = re.compile(r'\\\s*$') | |||||
| # blank line | |||||
| pattern_blank_line = re.compile(r'^\s*$') | |||||
| # virtual,explicit,friend,static | |||||
| pattern_keyword = re.compile(r'(virtual\s+|explicit\s+|friend\s+|static\s+)') | |||||
| # lead space | |||||
| pattern_leading_space = re.compile(r'(^[\s]*)[a-zA-Z~_]') | |||||
| # functions will have patterns such as func ( or func( | |||||
| # but operator is an exception; the class name is preceded by an operator, and the above mode does not exist | |||||
| # format like :"operator = ()" | |||||
| pattern_func_name = re.compile(r'([a-zA-Z0-9~_\-]+\s*|operator?.*)[(]') | |||||
| # template | |||||
| pattern_template = re.compile(r'^\s*template') | |||||
| pattern_template_end = re.compile(r'>\s*$') | |||||
| # namespace | |||||
| pattern_namespace = re.compile(r'namespace.*{') | |||||
| # class : which can handle classA a and {not on the same line, but if found ';' after class,then don't deal with | |||||
| pattern_class = re.compile(r'^[\s]*(class|struct)\s+(%s\s+)?([a-zA-Z0-9_\-]+<?)(?!.*;)' % GE_ATTR) | |||||
| # {} | |||||
| pattern_start = re.compile('{') | |||||
| pattern_end = re.compile('}') | |||||
| line_index = 0 | |||||
| class H2CC(object): | |||||
| def __init__(self, input_file, output_file, shared_includes_content): | |||||
| """ | |||||
| :param input_file: | |||||
| :param output_file: | |||||
| :param shared_includes_content: | |||||
| """ | |||||
| self.input_file = input_file | |||||
| self.output_file = output_file | |||||
| self.shared_includes_content = shared_includes_content | |||||
| self.line_index = 0 | |||||
| self.input_fd = open(self.input_file, 'r') | |||||
| self.input_content = self.input_fd.readlines() | |||||
| self.output_fd = open(self.output_file, 'w') | |||||
| # The state may be normal_now(in the middle of {}),class_now,namespace_now | |||||
| self.stack = [] | |||||
| self.stack_class = [] | |||||
| self.stack_template = [] | |||||
| # record funcs generated by h2cc func | |||||
| self.func_list_exist = [] | |||||
| def __del__(self): | |||||
| self.input_fd.close() | |||||
| self.output_fd.close() | |||||
| del self.stack | |||||
| del self.stack_class | |||||
| del self.stack_template | |||||
| del self.func_list_exist | |||||
| def just_skip(self): | |||||
| # skip blank line or comment | |||||
| if pattern_blank_line.search(self.input_content[self.line_index]) or pattern_comment.search( | |||||
| self.input_content[self.line_index]): # /n or comment using // | |||||
| self.line_index += 1 | |||||
| if pattern_comment_2_start.search(self.input_content[self.line_index]): # comment using /* | |||||
| while not pattern_comment_2_end.search(self.input_content[self.line_index]): # */ | |||||
| self.line_index += 1 | |||||
| self.line_index += 1 | |||||
| # skip define | |||||
| if pattern_define.search(self.input_content[self.line_index]): | |||||
| while pattern_blank_line.search(self.input_content[self.line_index]) or pattern_define_return.search( | |||||
| self.input_content[self.line_index]): | |||||
| self.line_index += 1 | |||||
| self.line_index += 1 | |||||
| def write_inc_content(self): | |||||
| for shared_include_content in self.shared_includes_content: | |||||
| self.output_fd.write(shared_include_content) | |||||
| def h2cc(self): | |||||
| """ | |||||
| :return: | |||||
| """ | |||||
| logging.info("start generate cc_file[%s] from h_file[%s]", self.output_file, self.input_file) | |||||
| global pattern_comment | |||||
| global pattern_comment_2_start | |||||
| global pattern_comment_2_end | |||||
| global pattern_blank_line | |||||
| global pattern_func | |||||
| global pattern_keyword | |||||
| global pattern_leading_space | |||||
| global pattern_func_name | |||||
| global pattern_template | |||||
| global pattern_template_end | |||||
| global pattern_namespace | |||||
| global pattern_class | |||||
| global pattern_start | |||||
| global pattern_end | |||||
| global line_index | |||||
| # write inc content | |||||
| self.write_inc_content() | |||||
| # core processing cycle, process the input .h file by line | |||||
| while self.line_index < len(self.input_content): | |||||
| # handle comment and blank line | |||||
| self.just_skip() | |||||
| # match namespace | |||||
| self.handle_namespace() | |||||
| # match template | |||||
| template_string = self.handle_template() | |||||
| # match class | |||||
| line = self.input_content[self.line_index] | |||||
| match_class = pattern_class.search(line) | |||||
| match_start = pattern_start.search(line) | |||||
| handle_class_result = self.handle_class(template_string, line, match_start, match_class) | |||||
| if handle_class_result == "continue": | |||||
| continue | |||||
| # match "}" | |||||
| handle_stack_result = self.handle_stack(match_start) | |||||
| if handle_stack_result == "continue": | |||||
| continue | |||||
| # handle func | |||||
| handle_func1_result, line, start_i = self.handle_func1(line) | |||||
| if handle_func1_result == "continue": | |||||
| continue | |||||
| # here means func is found | |||||
| # delete key word | |||||
| line = pattern_keyword.sub('', line) | |||||
| logging.info("line[%s]", line) | |||||
| # Class member function | |||||
| # if friend we will not add class name | |||||
| friend_match = re.search('friend ', line) | |||||
| if len(self.stack_class) > 0 and not friend_match: | |||||
| line, func_name = self.handle_class_member_func(line, template_string) | |||||
| # Normal functions | |||||
| else: | |||||
| line, func_name = self.handle_normal_func(line, template_string) | |||||
| need_generate = need_generate_func(line) | |||||
| # func body | |||||
| line += self.implement_function(line) | |||||
| # comment | |||||
| line = self.gen_comment(start_i) + line | |||||
| # write to out file | |||||
| self.write_func_content(line, func_name, need_generate) | |||||
| # next loop | |||||
| self.line_index += 1 | |||||
| logging.info('Added %s functions', len(self.func_list_exist)) | |||||
| logging.info('Successfully converted,please see ' + self.output_file) | |||||
| def handle_func1(self, line): | |||||
| """ | |||||
| :param line: | |||||
| :return: | |||||
| """ | |||||
| find1 = re.search('[(]', line) | |||||
| if not find1: | |||||
| self.line_index += 1 | |||||
| return "continue", line, None | |||||
| find2 = re.search('[)]', line) | |||||
| start_i = self.line_index | |||||
| space_match = pattern_leading_space.search(line) | |||||
| # deal with | |||||
| # int abc(int a, | |||||
| # int b) | |||||
| if find1 and (not find2): | |||||
| self.line_index += 1 | |||||
| line2 = self.input_content[self.line_index] | |||||
| if space_match: | |||||
| line2 = re.sub('^' + space_match.group(1), '', line2) | |||||
| line += line2 | |||||
| while self.line_index < len(self.input_content) and (not re.search('[)]', line2)): | |||||
| self.line_index += 1 | |||||
| line2 = self.input_content[self.line_index] | |||||
| line2 = re.sub('^' + space_match.group(1), '', line2) | |||||
| line += line2 | |||||
| match_start = pattern_start.search(self.input_content[self.line_index]) | |||||
| match_end = pattern_end.search(self.input_content[self.line_index]) | |||||
| if match_start: # like ) { or ) {} int the last line | |||||
| if not match_end: | |||||
| self.stack.append('normal_now') | |||||
| ii = start_i | |||||
| while ii <= self.line_index: | |||||
| ii += 1 | |||||
| self.line_index += 1 | |||||
| return "continue", line, start_i | |||||
| logging.info("line[%s]", line) | |||||
| # ' int abc();'->'int abc()' | |||||
| (line, match) = pattern_func.subn(r'\2\n', line) | |||||
| logging.info("line[%s]", line) | |||||
| # deal with case: | |||||
| # 'int \n abc(int a, int b)' | |||||
| if re.search(r'^\s*(inline)?\s*[a-zA-Z0-9_]+\s*$', self.input_content[start_i - 1]): | |||||
| line = self.input_content[start_i - 1] + line | |||||
| line = line.lstrip() | |||||
| if not match: | |||||
| self.line_index += 1 | |||||
| return "continue", line, start_i | |||||
| return "pass", line, start_i | |||||
| def handle_stack(self, match_start): | |||||
| """ | |||||
| :param match_start: | |||||
| :return: | |||||
| """ | |||||
| line = self.input_content[self.line_index] | |||||
| match_end = pattern_end.search(line) | |||||
| if match_start: | |||||
| self.stack.append('normal_now') | |||||
| if match_end: | |||||
| top_status = self.stack.pop() | |||||
| if top_status == 'namespace_now': | |||||
| self.output_fd.write(line + '\n') | |||||
| elif top_status == 'class_now': | |||||
| self.stack_class.pop() | |||||
| self.stack_template.pop() | |||||
| if match_start or match_end: | |||||
| self.line_index += 1 | |||||
| return "continue" | |||||
| if len(self.stack) > 0 and self.stack[-1] == 'normal_now': | |||||
| self.line_index += 1 | |||||
| return "continue" | |||||
| return "pass" | |||||
| def handle_class(self, template_string, line, match_start, match_class): | |||||
| """ | |||||
| :param template_string: | |||||
| :param line: | |||||
| :param match_start: | |||||
| :param match_class: | |||||
| :return: | |||||
| """ | |||||
| if match_class: # we face a class | |||||
| self.stack_template.append(template_string) | |||||
| self.stack.append('class_now') | |||||
| class_name = match_class.group(3) | |||||
| # class template specializations: class A<u,Node<u> > | |||||
| if '<' in class_name: | |||||
| k = line.index('<') | |||||
| fit = 1 | |||||
| for ii in range(k + 1, len(line)): | |||||
| if line[ii] == '<': | |||||
| fit += 1 | |||||
| if line[ii] == '>': | |||||
| fit -= 1 | |||||
| if fit == 0: | |||||
| break | |||||
| class_name += line[k + 1:ii + 1] | |||||
| logging.info('class_name[%s]', class_name) | |||||
| self.stack_class.append(class_name) | |||||
| while not match_start: | |||||
| self.line_index += 1 | |||||
| line = self.input_content[self.line_index] | |||||
| match_start = pattern_start.search(line) | |||||
| self.line_index += 1 | |||||
| return "continue" | |||||
| return "pass" | |||||
| def handle_template(self): | |||||
| line = self.input_content[self.line_index] | |||||
| match_template = pattern_template.search(line) | |||||
| template_string = '' | |||||
| if match_template: | |||||
| match_template_end = pattern_template_end.search(line) | |||||
| template_string = line | |||||
| while not match_template_end: | |||||
| self.line_index += 1 | |||||
| line = self.input_content[self.line_index] | |||||
| template_string += line | |||||
| match_template_end = pattern_template_end.search(line) | |||||
| self.line_index += 1 | |||||
| return template_string | |||||
| def handle_namespace(self): | |||||
| line = self.input_content[self.line_index] | |||||
| match_namespace = pattern_namespace.search(line) | |||||
| if match_namespace: # we face namespace | |||||
| self.output_fd.write(line + '\n') | |||||
| self.stack.append('namespace_now') | |||||
| self.line_index += 1 | |||||
| def handle_normal_func(self, line, template_string): | |||||
| template_line = '' | |||||
| self.stack_template.append(template_string) | |||||
| if self.stack_template[-1] != '': | |||||
| template_line = re.sub(r'\s*template', 'template', self.stack_template[-1]) | |||||
| # change '< class T = a, class U = A(3)>' to '<class T, class U>' | |||||
| template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line) | |||||
| template_line = re.sub(r'\s*=.*,', ',', template_line) | |||||
| template_line = re.sub(r'\s*=.*', '', template_line) | |||||
| line = re.sub(r'\s*=.*,', ',', line) | |||||
| line = re.sub(r'\s*=.*\)', ')', line) | |||||
| line = template_line + line | |||||
| self.stack_template.pop() | |||||
| func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group() | |||||
| logging.info("line[%s]", line) | |||||
| logging.info("func_name[%s]", func_name) | |||||
| return line, func_name | |||||
| def handle_class_member_func(self, line, template_string): | |||||
| template_line = '' | |||||
| x = '' | |||||
| if template_string != '': | |||||
| template_string = re.sub(r'\s*template', 'template', template_string) | |||||
| template_string = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_string) | |||||
| template_string = re.sub(r'\s*=.*,', ',', template_string) | |||||
| template_string = re.sub(r'\s*=.*', '', template_string) | |||||
| if self.stack_template[-1] != '': | |||||
| if not (re.search(r'<\s*>', stack_template[-1])): | |||||
| template_line = re.sub(r'^\s*template', 'template', stack_template[-1]) | |||||
| if not (re.search(r'<.*>', self.stack_class[-1])): | |||||
| # for x we get like template<class T, typename U> -> <T,U> | |||||
| x = re.sub(r'template\s*<', '<', template_line) # remove template -> <class T, typename U> | |||||
| x = re.sub(r'\n', '', x) | |||||
| x = re.sub(r'\s*=.*,', ',', x) | |||||
| x = re.sub(r'\s*=.*\>', '>', x) | |||||
| x = x.rstrip() # remove \n | |||||
| x = re.sub(r'(class|typename)\s+|(<class>|<typename>\s*class)', '', | |||||
| x) # remove class,typename -> <T, U> | |||||
| x = re.sub(r'<\s+', '<', x) | |||||
| x = re.sub(r'\s+>', '>', x) | |||||
| x = re.sub(r'\s+,', ',', x) | |||||
| x = re.sub(r',\s+', ', ', x) | |||||
| line = re.sub(r'\s*=\s+0', '', line) | |||||
| line = re.sub(r'\s*=\s+.*,', ',', line) | |||||
| line = re.sub(r'\s*=\s+.*\)', ')', line) | |||||
| logging.info("x[%s]\nline[%s]", x, line) | |||||
| # if the function is long, void ABC::foo() | |||||
| # breaks into two lines void ABC::\n foo() | |||||
| temp_line = pattern_func_name.sub(self.stack_class[-1] + x + '::' + r'\1(', line, count=1) | |||||
| if len(temp_line) > max_code_len_per_line: | |||||
| line = pattern_func_name.sub(self.stack_class[-1] + x + '::\n' + r'\1(', line, count=1) | |||||
| else: | |||||
| line = temp_line | |||||
| logging.info("line[%s]", line) | |||||
| # add template as the above if there is one | |||||
| template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line) | |||||
| template_line = re.sub(r'\s*=.*,', ',', template_line) | |||||
| template_line = re.sub(r'\s*=.*', '', template_line) | |||||
| line = template_line + template_string + line | |||||
| func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group() | |||||
| logging.info("line[%s]", line) | |||||
| logging.info("func_name[%s]", func_name) | |||||
| return line, func_name | |||||
| def write_func_content(self, content, func_name, need_generate): | |||||
| if not (func_name in self.func_list_exist) and need_generate: | |||||
| self.output_fd.write(content) | |||||
| self.func_list_exist.append(func_name) | |||||
| logging.info('add func:[%s]', func_name) | |||||
| def gen_comment(self, start_i): | |||||
| comment_line = '' | |||||
| # Function comments are on top of function declarations, copy them over | |||||
| k = start_i - 1 # one line before this func start | |||||
| if pattern_template.search(self.input_content[k]): | |||||
| k -= 1 | |||||
| if pattern_comment_2_end.search(self.input_content[k]): | |||||
| comment_line = self.input_content[k].lstrip() | |||||
| while not pattern_comment_2_start.search(self.input_content[k]): | |||||
| k -= 1 | |||||
| comment_line = self.input_content[k].lstrip() + comment_line | |||||
| else: | |||||
| for j in range(k, 0, -1): | |||||
| c_line = self.input_content[j] | |||||
| if pattern_comment.search(c_line): | |||||
| c_line = re.sub(r'\s*//', '//', c_line) | |||||
| comment_line = c_line + comment_line | |||||
| else: | |||||
| break | |||||
| return comment_line | |||||
| @staticmethod | |||||
| def implement_function(func): | |||||
| function_def = '' | |||||
| function_def += '{\n' | |||||
| all_items = func.split() | |||||
| start = 0 | |||||
| return_type = all_items[start] | |||||
| if return_type == "const": | |||||
| start += 1 | |||||
| return_type = all_items[start] | |||||
| if return_type.startswith(('std::map', 'std::set', 'std::vector')): | |||||
| return_type = "std::map" | |||||
| if return_type.endswith('*') or (len(all_items) > start + 1 and all_items[start + 1].startswith('*')): | |||||
| return_type = "Ptr" | |||||
| if len(all_items) > start + 1 and all_items[start + 1].startswith('&'): | |||||
| return_type += "&" | |||||
| if RETURN_STATEMENTS.__contains__(return_type): | |||||
| function_def += RETURN_STATEMENTS[return_type] | |||||
| else: | |||||
| logging.warning("Unhandled return type[%s]", return_type) | |||||
| function_def += '\n' | |||||
| function_def += '}\n' | |||||
| function_def += '\n' | |||||
| return function_def | |||||
| def collect_header_files(path): | |||||
| """ | |||||
| :param path: | |||||
| :return: | |||||
| """ | |||||
| header_files = [] | |||||
| shared_includes_content = [] | |||||
| for root, dirs, files in os.walk(path): | |||||
| files.sort() | |||||
| for file in files: | |||||
| if file.find("git") >= 0: | |||||
| continue | |||||
| if not file.endswith('.h'): | |||||
| continue | |||||
| file_path = os.path.join(root, file) | |||||
| file_path = file_path.replace('\\', '/') | |||||
| header_files.append(file_path) | |||||
| include_str = '#include "{}"\n'.format(file_path[path.rindex('/') + 1:]) | |||||
| shared_includes_content.append(include_str) | |||||
| return header_files, shared_includes_content | |||||
| def generate_stub_file(inc_dir, out_cc_dir): | |||||
| """ | |||||
| :param inc_dir: | |||||
| :param out_cc_dir: | |||||
| :return: | |||||
| """ | |||||
| target_header_files, shared_includes_content = collect_header_files(inc_dir) | |||||
| for header_file in target_header_files: | |||||
| if not file_endswith_white_list_suffix(header_file): | |||||
| continue | |||||
| cc_file = re.sub('.h*$', '.cc', header_file) | |||||
| h_2_cc = H2CC(header_file, out_cc_dir + cc_file[cc_file.rindex('/') + 1:], shared_includes_content) | |||||
| h_2_cc.h2cc() | |||||
| def gen_code(inc_dir, out_cc_dir): | |||||
| """ | |||||
| :param inc_dir: | |||||
| :param out_cc_dir: | |||||
| :return: | |||||
| """ | |||||
| if not inc_dir.endswith('/'): | |||||
| inc_dir += '/' | |||||
| if not out_cc_dir.endswith('/'): | |||||
| out_cc_dir += '/' | |||||
| for include_dir_key_word in include_dir_key_words: | |||||
| generate_stub_file(inc_dir + include_dir_key_word, out_cc_dir) | |||||
| if __name__ == '__main__': | |||||
| inc_dir = sys.argv[1] | |||||
| out_cc_dir = sys.argv[2] | |||||
| gen_code(inc_dir, out_cc_dir) | |||||
| @@ -187,12 +187,9 @@ void TBEPluginManager::LoadCustomOpLib() { | |||||
| std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas; | std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas; | ||||
| GELOGI("The size of registration_datas is: %zu", registration_datas.size()); | GELOGI("The size of registration_datas is: %zu", registration_datas.size()); | ||||
| for (OpRegistrationData reg_data : registration_datas) { | for (OpRegistrationData reg_data : registration_datas) { | ||||
| bool ret = CheckRegisterStatus(reg_data); | |||||
| if (ret) { | |||||
| GELOGD("Begin to register optype: %s, imply_type: %u", reg_data.GetOmOptype().c_str(), | |||||
| static_cast<uint32_t>(reg_data.GetImplyType())); | |||||
| domi::OpRegistry::Instance()->Register(reg_data); | |||||
| } | |||||
| GELOGD("Begin to register optype: %s, imply_type: %u", reg_data.GetOmOptype().c_str(), | |||||
| static_cast<uint32_t>(reg_data.GetImplyType())); | |||||
| domi::OpRegistry::Instance()->Register(reg_data); | |||||
| } | } | ||||
| } | } | ||||
| @@ -230,31 +227,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPlug | |||||
| } | } | ||||
| } | } | ||||
| bool TBEPluginManager::CheckRegisterStatus(const OpRegistrationData ®_data) { | |||||
| bool ret = true; | |||||
| static char *parser_priority = std::getenv("PARSER_PRIORITY"); | |||||
| static bool keep_cce = parser_priority != nullptr && string(parser_priority) == "cce"; | |||||
| auto ori_optype_set = reg_data.GetOriginOpTypeSet(); | |||||
| for (const auto &op_type : ori_optype_set) { | |||||
| domi::ImplyType imply_type = domi::OpRegistry::Instance()->GetImplyTypeByOriOpType(op_type); | |||||
| GELOGD("Enter into reg_data loop. op_type = %s , om_optype_ = %s", op_type.c_str(), reg_data.GetOmOptype().c_str()); | |||||
| if (imply_type != domi::ImplyType::BUILDIN) { | |||||
| if ((keep_cce && reg_data.GetImplyType() != domi::ImplyType::CCE) || | |||||
| (!keep_cce && reg_data.GetImplyType() != domi::ImplyType::TVM)) { | |||||
| GELOGD("op_type[%s] does not need to be changed, om_optype:%s.", op_type.c_str(), | |||||
| reg_data.GetOmOptype().c_str()); | |||||
| ret = false; | |||||
| } else { | |||||
| GELOGI("op_type[%s] will be changed to om_optype:%s.", op_type.c_str(), reg_data.GetOmOptype().c_str()); | |||||
| } | |||||
| } else { | |||||
| GELOGD("First register in ge initialize, original type: %s, om_optype: %s, imply type: %d.", op_type.c_str(), | |||||
| reg_data.GetOmOptype().c_str(), static_cast<int>(reg_data.GetImplyType())); | |||||
| } | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| Status TBEPluginManager::CheckCustomAiCpuOpLib() { | Status TBEPluginManager::CheckCustomAiCpuOpLib() { | ||||
| std::vector<std::string> vec_op_type; | std::vector<std::string> vec_op_type; | ||||
| @@ -63,7 +63,6 @@ class TBEPluginManager { | |||||
| static void GetCustomOpPath(std::string &customop_path); | static void GetCustomOpPath(std::string &customop_path); | ||||
| void LoadCustomOpLib(); | void LoadCustomOpLib(); | ||||
| static Status CheckCustomAiCpuOpLib(); | static Status CheckCustomAiCpuOpLib(); | ||||
| static bool CheckRegisterStatus(const OpRegistrationData ®_data); | |||||
| SoHandlesVec handles_vec_; | SoHandlesVec handles_vec_; | ||||
| static std::map<string, string> options_; | static std::map<string, string> options_; | ||||
| @@ -184,7 +184,8 @@ ModelHelper::SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::strin | |||||
| // Model | // Model | ||||
| ModelPtr model_ptr = ge::MakeShared<ge::Model>(); | ModelPtr model_ptr = ge::MakeShared<ge::Model>(); | ||||
| GE_CHECK_NOTNULL_EXEC(model_ptr, return MEMALLOC_FAILED); | GE_CHECK_NOTNULL_EXEC(model_ptr, return MEMALLOC_FAILED); | ||||
| model_ptr->SetName(compute_graph->GetName()); | |||||
| std::string original_model_name = compute_graph->GetName() + "_original"; | |||||
| model_ptr->SetName(original_model_name); | |||||
| model_ptr->SetGraph(graph); | model_ptr->SetGraph(graph); | ||||
| model_ptr->SetVersion(static_cast<uint32_t>(OM_PROTO_VERSION)); | model_ptr->SetVersion(static_cast<uint32_t>(OM_PROTO_VERSION)); | ||||
| string framework_version; | string framework_version; | ||||
| @@ -504,4 +505,36 @@ Status ModelHelper::ReleaseLocalModelData() noexcept { | |||||
| } | } | ||||
| return result; | return result; | ||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::GetBaseNameFromFileName(const string &file_name, | |||||
| string &base_name) { | |||||
| GELOGD("Get base_name from file, file_name:%s", file_name.c_str()); | |||||
| GE_CHK_BOOL_EXEC_WARN(!file_name.empty(), return FAILED, "File path may not valid, check params --output"); | |||||
| size_t start_position = 0; | |||||
| // using output as base_name (ignore ".om") | |||||
| size_t filename_suffixes = 3; | |||||
| if (file_name.find_last_of('/') != string::npos) { | |||||
| start_position = file_name.find_last_of('/') + 1; | |||||
| } | |||||
| size_t end_position = file_name.length() - filename_suffixes; | |||||
| base_name = file_name.substr(start_position, end_position - start_position); | |||||
| GE_CHK_BOOL_EXEC_WARN(!base_name.empty(), return FAILED, "Get base_name failed, check params --output"); | |||||
| return SUCCESS; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status | |||||
| ModelHelper::GetModelNameFromMergedGraphName(const string &graph_name, string &model_name) { | |||||
| GELOGD("Get model_name from graph_name, graph_name:%s", graph_name.c_str()); | |||||
| // this can only be used after merged graph(graph name will be append with "_x", x is index); | |||||
| GE_CHK_BOOL_EXEC_WARN(!graph_name.empty(), return FAILED, "File path may not valid, check params --output"); | |||||
| size_t start_position = 0; | |||||
| size_t end_position = graph_name.length(); | |||||
| // using graph as model_name (ignore "_x", x is the index of graph) | |||||
| if (graph_name.find_last_of('_') != string::npos) { | |||||
| end_position = graph_name.find_last_of('_'); | |||||
| } | |||||
| model_name = graph_name.substr(start_position, end_position); | |||||
| GE_CHK_BOOL_EXEC_WARN(!model_name.empty(), return FAILED, "Get model_name failed, check params --output"); | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -15,7 +15,7 @@ | |||||
| */ | */ | ||||
| #include "common/model_parser/base.h" | #include "common/model_parser/base.h" | ||||
| #include "common/helper/model_helper.h" | |||||
| #include <securec.h> | #include <securec.h> | ||||
| #include <sys/sysinfo.h> | #include <sys/sysinfo.h> | ||||
| #include <fstream> | #include <fstream> | ||||
| @@ -61,7 +61,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro | |||||
| // read data as a block: | // read data as a block: | ||||
| (void)fs.read(data, len); | (void)fs.read(data, len); | ||||
| ModelHelper model_helper; | |||||
| model_helper.GetBaseNameFromFileName(model_path, model_data.om_name); | |||||
| // Set the model data parameter | // Set the model data parameter | ||||
| model_data.model_data = data; | model_data.model_data = data; | ||||
| model_data.model_len = len; | model_data.model_len = len; | ||||
| @@ -16,15 +16,12 @@ | |||||
| #include "common/profiling/profiling_manager.h" | #include "common/profiling/profiling_manager.h" | ||||
| #include <nlohmann/json.hpp> | |||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
| #include "framework/common/string_util.h" | #include "framework/common/string_util.h" | ||||
| #include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
| #include "runtime/base.h" | #include "runtime/base.h" | ||||
| using Json = nlohmann::json; | |||||
| namespace { | namespace { | ||||
| const char *const kJobID = "jobID"; | const char *const kJobID = "jobID"; | ||||
| const char *const kDeviceID = "deviceID"; | const char *const kDeviceID = "deviceID"; | ||||
| @@ -35,6 +32,7 @@ const char *const kEvents = "events"; | |||||
| const char *const kAiCoreEvents = "ai_core_events"; | const char *const kAiCoreEvents = "ai_core_events"; | ||||
| const char *const kName = "name"; | const char *const kName = "name"; | ||||
| const char *const kTraceID = "traceId"; | const char *const kTraceID = "traceId"; | ||||
| const char *const kProfDir = "resultPath"; | |||||
| const size_t kReportMaxLen = 2048; | const size_t kReportMaxLen = 2048; | ||||
| } // namespace | } // namespace | ||||
| @@ -100,6 +98,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||||
| Json start_prof_conf = Json::parse(config); | Json start_prof_conf = Json::parse(config); | ||||
| Json &prof_conf = start_prof_conf[kStartCfg][0]; | Json &prof_conf = start_prof_conf[kStartCfg][0]; | ||||
| job_id_ = prof_conf[kJobID]; | job_id_ = prof_conf[kJobID]; | ||||
| auto iter = prof_conf.find(kProfDir); | |||||
| if (iter != prof_conf.end()) { | |||||
| prof_dir_ = prof_conf[kProfDir]; | |||||
| } | |||||
| Json &device_id = prof_conf[kDeviceID]; | Json &device_id = prof_conf[kDeviceID]; | ||||
| if (device_id.size() != 0) { | if (device_id.size() != 0) { | ||||
| vector<int32_t>().swap(device_id_); | vector<int32_t>().swap(device_id_); | ||||
| @@ -126,23 +128,36 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||||
| } | } | ||||
| } | } | ||||
| GELOGI("Profiling json config from acl:%s", config.c_str()); | |||||
| Json &features = prof_conf[kFeatures]; | Json &features = prof_conf[kFeatures]; | ||||
| if (ParseFeaturesFromAclCfg(features) != SUCCESS) { | |||||
| GELOGE(FAILED, "Parse feature from acl cfg failed."); | |||||
| return FAILED; | |||||
| } | |||||
| is_profiling_ = true; | |||||
| } catch (...) { | |||||
| GELOGE(FAILED, "Json conf is not invalid !"); | |||||
| return ge::PARAM_INVALID; | |||||
| } | |||||
| #endif | |||||
| return ge::SUCCESS; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::ParseFeaturesFromAclCfg( | |||||
| const Json &features) { | |||||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||||
| try { | |||||
| for (size_t i = 0; i < features.size(); ++i) { | for (size_t i = 0; i < features.size(); ++i) { | ||||
| Json &feature = features[i]; | |||||
| const Json &feature = features[i]; | |||||
| if ((feature.find(kName) == feature.end()) || feature[kName].is_null()) { | if ((feature.find(kName) == feature.end()) || feature[kName].is_null()) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| const std::string &name = feature[kName]; | const std::string &name = feature[kName]; | ||||
| if (name == "op_trace") { | if (name == "op_trace") { | ||||
| GELOGI("Op trace config from acl"); | |||||
| Json &conf = feature[kConf]; | |||||
| Json &events = conf[0][kEvents]; | |||||
| const Json &conf = feature[kConf]; | |||||
| const Json &events = conf[0][kEvents]; | |||||
| const std::string &ai_core_events = events[0][kAiCoreEvents]; | const std::string &ai_core_events = events[0][kAiCoreEvents]; | ||||
| GELOGI("Op trace config from acl ai_core_events:%s", ai_core_events.c_str()); | GELOGI("Op trace config from acl ai_core_events:%s", ai_core_events.c_str()); | ||||
| is_op_trace_ = true; | is_op_trace_ = true; | ||||
| // op trace get conf | |||||
| ProfMgrConf prof_mgr_conf; | ProfMgrConf prof_mgr_conf; | ||||
| int result = ProfMgrGetConf(ai_core_events, &prof_mgr_conf); | int result = ProfMgrGetConf(ai_core_events, &prof_mgr_conf); | ||||
| if (result != 0) { | if (result != 0) { | ||||
| @@ -154,10 +169,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||||
| GELOGI("Op trace profiling iter num %d,", op_trace_iter_num_); | GELOGI("Op trace profiling iter num %d,", op_trace_iter_num_); | ||||
| } else if (name == "task_trace") { | } else if (name == "task_trace") { | ||||
| is_op_trace_ = false; | is_op_trace_ = false; | ||||
| if (feature.find(kConf) != feature.end()) { | |||||
| const Json &conf = feature[kConf]; | |||||
| std::stringstream task_trace_conf; | |||||
| task_trace_conf << conf; | |||||
| task_trace_conf_ = task_trace_conf.str(); | |||||
| } | |||||
| GELOGI("Task trace config from acl"); | GELOGI("Task trace config from acl"); | ||||
| } else if (name == "system_trace") { | } else if (name == "system_trace") { | ||||
| is_op_trace_ = false; | is_op_trace_ = false; | ||||
| Json &conf = feature[kConf]; | |||||
| const Json &conf = feature[kConf]; | |||||
| std::stringstream system_trace_conf; | std::stringstream system_trace_conf; | ||||
| system_trace_conf << conf; | system_trace_conf << conf; | ||||
| system_trace_conf_ = system_trace_conf.str(); | system_trace_conf_ = system_trace_conf.str(); | ||||
| @@ -165,10 +186,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||||
| } | } | ||||
| profiling_opts_.push_back(name); | profiling_opts_.push_back(name); | ||||
| } | } | ||||
| is_profiling_ = true; | |||||
| } catch (...) { | } catch (...) { | ||||
| GELOGE(FAILED, "Json conf is not invalid !"); | |||||
| GELOGE(ge::PARAM_INVALID, "Json conf feature is not invalid !"); | |||||
| return ge::PARAM_INVALID; | return ge::PARAM_INVALID; | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -235,6 +254,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St | |||||
| p_device[kDeviceID] = std::to_string(device_id); | p_device[kDeviceID] = std::to_string(device_id); | ||||
| p_device[kJobID] = job_id_; | p_device[kJobID] = job_id_; | ||||
| p_device[kTraceID] = std::to_string(GetContext().TraceId()); | p_device[kTraceID] = std::to_string(GetContext().TraceId()); | ||||
| if (!prof_dir_.empty()) { | |||||
| p_device[kProfDir] = prof_dir_; | |||||
| GELOGI("Prof dir: %s.", prof_dir_.c_str()); | |||||
| } | |||||
| Json features; | Json features; | ||||
| if (is_op_trace_) { | if (is_op_trace_) { | ||||
| @@ -258,6 +281,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St | |||||
| Json f; | Json f; | ||||
| if (profiling_opts_[i] == "system_trace") { | if (profiling_opts_[i] == "system_trace") { | ||||
| f[kConf] = nlohmann::json::parse(system_trace_conf_); | f[kConf] = nlohmann::json::parse(system_trace_conf_); | ||||
| } else if (profiling_opts_[i] == "task_trace") { | |||||
| if (!task_trace_conf_.empty()) { | |||||
| f[kConf] = nlohmann::json::parse(task_trace_conf_); | |||||
| } | |||||
| } | } | ||||
| f[kName] = profiling_opts_[i]; | f[kName] = profiling_opts_[i]; | ||||
| features[i] = f; | features[i] = f; | ||||
| @@ -292,6 +319,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St | |||||
| GELOGW("ProfMgrStartUp failed."); | GELOGW("ProfMgrStartUp failed."); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| GELOGD("StartProfiling, prof_handle: %p", prof_handle); | |||||
| prof_handle_vec_.push_back(prof_handle); | prof_handle_vec_.push_back(prof_handle); | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -314,8 +342,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf | |||||
| for (size_t i = 0; i < prof_handle_vec_.size(); ++i) { | for (size_t i = 0; i < prof_handle_vec_.size(); ++i) { | ||||
| int result = ProfMgrStop(prof_handle_vec_[i]); | int result = ProfMgrStop(prof_handle_vec_[i]); | ||||
| if (result != 0) { | if (result != 0) { | ||||
| GELOGW("ProfMgr stop return fail:%d.", result); | |||||
| return; | |||||
| GELOGW("ProfMgr stop return fail:%d, handle:%p", result, prof_handle_vec_[i]); | |||||
| } | } | ||||
| } | } | ||||
| vector<void *>().swap(prof_handle_vec_); | vector<void *>().swap(prof_handle_vec_); | ||||
| @@ -17,6 +17,7 @@ | |||||
| #ifndef GE_COMMON_PROFILING_PROFILING_MANAGER_H_ | #ifndef GE_COMMON_PROFILING_PROFILING_MANAGER_H_ | ||||
| #define GE_COMMON_PROFILING_PROFILING_MANAGER_H_ | #define GE_COMMON_PROFILING_PROFILING_MANAGER_H_ | ||||
| #include <nlohmann/json.hpp> | |||||
| #include <map> | #include <map> | ||||
| #include <string> | #include <string> | ||||
| #include <vector> | #include <vector> | ||||
| @@ -30,6 +31,7 @@ | |||||
| using std::map; | using std::map; | ||||
| using std::string; | using std::string; | ||||
| using std::vector; | using std::vector; | ||||
| using Json = nlohmann::json; | |||||
| namespace ge { | namespace ge { | ||||
| const std::string GE_PROFILING_MODULE = "Framework"; | const std::string GE_PROFILING_MODULE = "Framework"; | ||||
| @@ -84,11 +86,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
| void PluginUnInit(const std::string &module) const; | void PluginUnInit(const std::string &module) const; | ||||
| private: | private: | ||||
| ge::Status ParseFeaturesFromAclCfg(const Json &feature); | |||||
| bool is_profiling_ = false; | bool is_profiling_ = false; | ||||
| bool is_op_trace_ = false; | bool is_op_trace_ = false; | ||||
| bool is_load_ = false; | bool is_load_ = false; | ||||
| int32_t op_trace_iter_num_ = 0; | int32_t op_trace_iter_num_ = 0; | ||||
| string job_id_; | string job_id_; | ||||
| string prof_dir_; | |||||
| vector<int32_t> device_id_; | vector<int32_t> device_id_; | ||||
| vector<string> op_trace_conf_; | vector<string> op_trace_conf_; | ||||
| vector<string> profiling_opts_; | vector<string> profiling_opts_; | ||||
| @@ -96,6 +100,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
| string recv_profiling_config_; | string recv_profiling_config_; | ||||
| string send_profiling_config_; | string send_profiling_config_; | ||||
| string system_trace_conf_; | string system_trace_conf_; | ||||
| string task_trace_conf_; | |||||
| const ProfilingEngineImpl engine_; | const ProfilingEngineImpl engine_; | ||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -208,6 +208,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> Propertie | |||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool PropertiesManager::IsLayerNeedDump(const std::string &model, | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool PropertiesManager::IsLayerNeedDump(const std::string &model, | ||||
| const std::string &om_name, | |||||
| const std::string &op_name) { | const std::string &op_name) { | ||||
| std::lock_guard<std::mutex> lock(dump_mutex_); | std::lock_guard<std::mutex> lock(dump_mutex_); | ||||
| // if dump all | // if dump all | ||||
| @@ -216,9 +217,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool PropertiesManager::IsLayer | |||||
| } | } | ||||
| // if this model need dump | // if this model need dump | ||||
| auto model_iter = model_dump_properties_map_.find(model); | |||||
| if (model_iter != model_dump_properties_map_.end()) { | |||||
| auto om_name_iter = model_dump_properties_map_.find(om_name); | |||||
| auto model_name_iter = model_dump_properties_map_.find(model); | |||||
| if (om_name_iter != model_dump_properties_map_.end() || model_name_iter != model_dump_properties_map_.end()) { | |||||
| // if no dump layer info, dump all layer in this model | // if no dump layer info, dump all layer in this model | ||||
| auto model_iter = om_name_iter != model_dump_properties_map_.end() ? om_name_iter : model_name_iter; | |||||
| if (model_iter->second.empty()) { | if (model_iter->second.empty()) { | ||||
| return true; | return true; | ||||
| } | } | ||||
| @@ -84,7 +84,7 @@ class PropertiesManager { | |||||
| void AddDumpPropertyValue(const std::string &model, const std::set<std::string> &layers); | void AddDumpPropertyValue(const std::string &model, const std::set<std::string> &layers); | ||||
| std::set<std::string> GetAllDumpModel(); | std::set<std::string> GetAllDumpModel(); | ||||
| std::set<std::string> GetDumpPropertyValue(const std::string &model); | std::set<std::string> GetDumpPropertyValue(const std::string &model); | ||||
| bool IsLayerNeedDump(const std::string &model, const std::string &op_name); | |||||
| bool IsLayerNeedDump(const std::string &model, const std::string &om_name, const std::string &op_name); | |||||
| void DeleteDumpPropertyValue(const std::string &model); | void DeleteDumpPropertyValue(const std::string &model); | ||||
| void ClearDumpPropertyValue(); | void ClearDumpPropertyValue(); | ||||
| bool QueryModelDumpStatus(const std::string &model); | bool QueryModelDumpStatus(const std::string &model); | ||||
| @@ -452,7 +452,7 @@ Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData | |||||
| // Get input and output descriptor | // Get input and output descriptor | ||||
| Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | ||||
| std::vector<ge::TensorDesc> &output_desc) { | |||||
| std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) { | |||||
| GELOGI("get model desc info begin."); | GELOGI("get model desc info begin."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | ||||
| @@ -464,8 +464,8 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes | |||||
| std::vector<uint32_t> input_formats; | std::vector<uint32_t> input_formats; | ||||
| std::vector<uint32_t> output_formats; | std::vector<uint32_t> output_formats; | ||||
| Status ret = | |||||
| GraphExecutor::GetInputOutputDescInfo(model_id, input_desc_infos, output_desc_infos, input_formats, output_formats); | |||||
| Status ret = GraphExecutor::GetInputOutputDescInfo(model_id, input_desc_infos, output_desc_infos, input_formats, | |||||
| output_formats, new_model_desc); | |||||
| if (ret != domi::SUCCESS) { | if (ret != domi::SUCCESS) { | ||||
| GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret); | GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret); | ||||
| return TransferDomiErrorCode(ret); | return TransferDomiErrorCode(ret); | ||||
| @@ -641,7 +641,6 @@ Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_da | |||||
| model_data.model_data = nullptr; | model_data.model_data = nullptr; | ||||
| } | } | ||||
| } | } | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -1,5 +1,5 @@ | |||||
| LOCAL_PATH := $(call my-dir) | LOCAL_PATH := $(call my-dir) | ||||
| include $(LOCAL_PATH)/stub/Makefile | |||||
| COMMON_LOCAL_SRC_FILES := \ | COMMON_LOCAL_SRC_FILES := \ | ||||
| proto/fusion_model.proto \ | proto/fusion_model.proto \ | ||||
| proto/optimizer_priority.proto \ | proto/optimizer_priority.proto \ | ||||
| @@ -353,6 +353,28 @@ LOCAL_SHARED_LIBRARIES := \ | |||||
| LOCAL_LDFLAGS := -lrt -ldl | LOCAL_LDFLAGS := -lrt -ldl | ||||
| include $(BUILD_HOST_SHARED_LIBRARY) | |||||
| #compiler for host infer | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := stub/libge_compiler | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 | |||||
| LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP | |||||
| ifeq ($(DEBUG), 1) | |||||
| LOCAL_CFLAGS += -g -O0 | |||||
| endif | |||||
| LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := ../../out/atc/lib64/stub/ge_ir_build.cc | |||||
| LOCAL_SHARED_LIBRARIES := | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| include $(BUILD_HOST_SHARED_LIBRARY) | include $(BUILD_HOST_SHARED_LIBRARY) | ||||
| #compiler for device | #compiler for device | ||||
| @@ -131,6 +131,7 @@ Status HostCpuEngine::RunInternal(const ge::OpDescPtr &op_desc, HostCpuOp &op_ke | |||||
| GELOGE(FAILED, "Failed to compute host cpu op. node = %s, ret = %u", op_desc->GetName().c_str(), ret); | GELOGE(FAILED, "Failed to compute host cpu op. node = %s, ret = %u", op_desc->GetName().c_str(), ret); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| op.BreakConnect(); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -20,6 +20,7 @@ | |||||
| #include "common/helper/model_helper.h" | #include "common/helper/model_helper.h" | ||||
| #include "common/helper/om_file_helper.h" | #include "common/helper/om_file_helper.h" | ||||
| #include "common/util.h" | #include "common/util.h" | ||||
| #include "common/util/error_manager/error_manager.h" | |||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "ge/ge_api.h" | #include "ge/ge_api.h" | ||||
| #include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
| @@ -125,17 +126,7 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen | |||||
| if (data_op == nullptr) { | if (data_op == nullptr) { | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| auto op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); | |||||
| auto input_desc = op_desc->MutableInputDesc(index); | |||||
| GE_CHECK_NOTNULL_EXEC(input_desc, return PARAM_INVALID); | |||||
| ge::Format old_format = input_desc->GetFormat(); | |||||
| if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) { | |||||
| input_desc->SetFormat(FORMAT_ND); | |||||
| input_desc->SetOriginFormat(FORMAT_ND); | |||||
| (void)AttrUtils::SetStr(data_op, "_single_input_format", TypeUtils::FormatToSerialString(old_format)); | |||||
| (void)AttrUtils::SetBool(data_op, "_is_single_op", true); | |||||
| } | |||||
| (void)AttrUtils::SetBool(data_op, "_is_single_op", true); | |||||
| GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail."); | GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail."); | ||||
| GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail."); | GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail."); | ||||
| @@ -157,17 +148,7 @@ static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, cons | |||||
| if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| auto single_op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL_EXEC(single_op_desc, return PARAM_INVALID); | |||||
| auto output_desc = single_op_desc->MutableOutputDesc(0); | |||||
| GE_CHECK_NOTNULL_EXEC(output_desc, return PARAM_INVALID); | |||||
| ge::Format old_format = output_desc->GetFormat(); | |||||
| if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) { | |||||
| output_desc->SetFormat(FORMAT_ND); | |||||
| output_desc->SetOriginFormat(FORMAT_ND); | |||||
| (void)AttrUtils::SetStr(op_desc, "_single_output_format", TypeUtils::FormatToSerialString(old_format)); | |||||
| (void)AttrUtils::SetBool(op_desc, "_is_single_op", true); | |||||
| } | |||||
| (void)AttrUtils::SetBool(op_desc, "_is_single_op", true); | |||||
| int32_t count = 0; | int32_t count = 0; | ||||
| for (const auto &out_desc : outputs) { | for (const auto &out_desc : outputs) { | ||||
| GeTensorDesc tensor = out_desc.GetTensorDesc(); | GeTensorDesc tensor = out_desc.GetTensorDesc(); | ||||
| @@ -212,19 +193,6 @@ static void GetOpsProtoPath(string &opsproto_path) { | |||||
| opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); | opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); | ||||
| } | } | ||||
| static string GetModelNameFromFileName(const string &file_name_prefix) { | |||||
| int start_position = 0; | |||||
| // using output as model_name (ignore ".om") | |||||
| int filename_suffixes = 3; | |||||
| if (file_name_prefix.find_last_of('/') != string::npos) { | |||||
| start_position += 1; | |||||
| } | |||||
| int end_position = file_name_prefix.length() - filename_suffixes; | |||||
| string model_name = file_name_prefix.substr(start_position, end_position - start_position); | |||||
| GELOGI("Get model_name from file, model_name:%s", model_name.c_str()); | |||||
| return model_name; | |||||
| } | |||||
| class GeGenerator::Impl { | class GeGenerator::Impl { | ||||
| public: | public: | ||||
| Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GraphId &graph_id, GeRootModelPtr &ge_models); | Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GraphId &graph_id, GeRootModelPtr &ge_models); | ||||
| @@ -332,8 +300,6 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr | |||||
| GraphId graph_id; | GraphId graph_id; | ||||
| GeRootModelPtr ge_root_model = nullptr; | GeRootModelPtr ge_root_model = nullptr; | ||||
| GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | ||||
| const string model_name = GetModelNameFromFileName(file_name_prefix); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(model_name.empty(), return PARAM_INVALID, "om name is not valid!"); | |||||
| impl_->is_offline_ = is_offline; | impl_->is_offline_ = is_offline; | ||||
| Status ret = impl_->BuildModel(graph, inputs, graph_id, ge_root_model); | Status ret = impl_->BuildModel(graph, inputs, graph_id, ge_root_model); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| @@ -345,9 +311,15 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr | |||||
| } | } | ||||
| GE_CHECK_NOTNULL(ge_root_model); | GE_CHECK_NOTNULL(ge_root_model); | ||||
| GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | ||||
| ModelHelper model_helper; | |||||
| string model_name = ""; | |||||
| Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), model_name); | |||||
| if (name_ret != SUCCESS) { | |||||
| GELOGE(FAILED, "Get model_name failed. Param --output is invalid"); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | ||||
| GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; | GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; | ||||
| GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model can not be null"); | GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model can not be null"); | ||||
| ge_model->SetName(model_name); | ge_model->SetName(model_name); | ||||
| ret = impl_->SaveModel(file_name_prefix, ge_model, model); | ret = impl_->SaveModel(file_name_prefix, ge_model, model); | ||||
| @@ -38,6 +38,7 @@ | |||||
| namespace { | namespace { | ||||
| const char *const kAttrNameWorkspaceReuseFlag = "workspace_reuse_flag"; | const char *const kAttrNameWorkspaceReuseFlag = "workspace_reuse_flag"; | ||||
| const char *const kL2FusionDynamicConvergeOp = "l2fusion_dynamic_converge_op"; | const char *const kL2FusionDynamicConvergeOp = "l2fusion_dynamic_converge_op"; | ||||
| const char *const kOpNoReuseMem = "no_reuse_mem_flag"; | |||||
| const char *const kDisableReuseMemory = "ge.exec.disableReuseMemory"; | const char *const kDisableReuseMemory = "ge.exec.disableReuseMemory"; | ||||
| const char *const OP_NO_REUSE_MEM = "OP_NO_REUSE_MEM"; | const char *const OP_NO_REUSE_MEM = "OP_NO_REUSE_MEM"; | ||||
| const int kReuseMaxCount = 10; | const int kReuseMaxCount = 10; | ||||
| @@ -624,8 +625,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
| (void)ge::GetContext().GetOption(kDisableReuseMemory, ge_disable_reuse_mem_env); | (void)ge::GetContext().GetOption(kDisableReuseMemory, ge_disable_reuse_mem_env); | ||||
| if (ge_disable_reuse_mem_env != "1") { | if (ge_disable_reuse_mem_env != "1") { | ||||
| bool reuse_mem_flag = !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]); | bool reuse_mem_flag = !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]); | ||||
| is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && reuse_mem_flag && is_op_reuse_mem && | |||||
| (IsPreReuse(n, out_index)); | |||||
| is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) && | |||||
| reuse_mem_flag && is_op_reuse_mem && (IsPreReuse(n, out_index)); | |||||
| auto stream_id = node_op_desc->GetStreamId(); | auto stream_id = node_op_desc->GetStreamId(); | ||||
| auto map_iter = reusable_streams_map_.find(stream_id); | auto map_iter = reusable_streams_map_.find(stream_id); | ||||
| if (is_reuse_memory && map_iter != reusable_streams_map_.end()) { | if (is_reuse_memory && map_iter != reusable_streams_map_.end()) { | ||||
| @@ -1182,6 +1183,9 @@ void ReAssignContinuousBlocks(const std::vector<MemoryBlock *> &org_blocks, | |||||
| GELOGI("Block continuous input index:%d", memory_block->input_index_); | GELOGI("Block continuous input index:%d", memory_block->input_index_); | ||||
| count++; | count++; | ||||
| if (count == 1) { | |||||
| memory_block->first_continuous_block_ = true; | |||||
| } | |||||
| if (count == continuous_blocks.size()) { | if (count == continuous_blocks.size()) { | ||||
| memory_block->last_continuous_block_ = true; | memory_block->last_continuous_block_ = true; | ||||
| } | } | ||||
| @@ -1242,6 +1246,10 @@ void BlockMemAssigner::ResizeMemoryBlocks() { | |||||
| if (memory_block == nullptr || memory_block->deleted_block_ || memory_block->is_zero_copy_) { | if (memory_block == nullptr || memory_block->deleted_block_ || memory_block->is_zero_copy_) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| if (memory_block->first_continuous_block_) { | |||||
| mem_offset_ += MEM_ALIGN_SIZE; | |||||
| } | |||||
| memory_block->Resize(); | memory_block->Resize(); | ||||
| memory_block->SetHeadOffset(mem_offset_); | memory_block->SetHeadOffset(mem_offset_); | ||||
| mem_offset_ += memory_block->Size(); | mem_offset_ += memory_block->Size(); | ||||
| @@ -64,6 +64,7 @@ class MemoryBlock { | |||||
| reuse_mem_(reuse_mem), | reuse_mem_(reuse_mem), | ||||
| input_index_(0), | input_index_(0), | ||||
| continuous_block_(false), | continuous_block_(false), | ||||
| first_continuous_block_(false), | |||||
| last_continuous_block_(false), | last_continuous_block_(false), | ||||
| is_zero_copy_(false), | is_zero_copy_(false), | ||||
| block_size_(block_size), | block_size_(block_size), | ||||
| @@ -129,6 +130,7 @@ class MemoryBlock { | |||||
| bool reuse_mem_; | bool reuse_mem_; | ||||
| uint32_t input_index_; | uint32_t input_index_; | ||||
| bool continuous_block_; | bool continuous_block_; | ||||
| bool first_continuous_block_; | |||||
| bool last_continuous_block_; | bool last_continuous_block_; | ||||
| bool is_zero_copy_; | bool is_zero_copy_; | ||||
| std::map<int64_t, size_t> depend_stream_life_; | std::map<int64_t, size_t> depend_stream_life_; | ||||
| @@ -446,6 +446,7 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node | |||||
| return ge::FAILED; | return ge::FAILED; | ||||
| } | } | ||||
| memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE; | |||||
| for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { | for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { | ||||
| output_list[out_data_anchor->GetIdx()] = memory_offset_[0].mem_offset_; | output_list[out_data_anchor->GetIdx()] = memory_offset_[0].mem_offset_; | ||||
| size_t pre_mem_offset = memory_offset_[0].mem_offset_; | size_t pre_mem_offset = memory_offset_[0].mem_offset_; | ||||
| @@ -450,11 +450,13 @@ Status GraphExecutor::GetInputOutputDescInfo(const uint32_t model_id, vector<Inp | |||||
| Status GraphExecutor::GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | Status GraphExecutor::GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | ||||
| vector<InputOutputDescInfo> &output_desc, | vector<InputOutputDescInfo> &output_desc, | ||||
| std::vector<uint32_t> &input_formats, std::vector<uint32_t> &out_formats) { | |||||
| std::vector<uint32_t> &input_formats, std::vector<uint32_t> &out_formats, | |||||
| bool new_model_desc) { | |||||
| try { | try { | ||||
| auto model_manager = ge::ModelManager::GetInstance(); | auto model_manager = ge::ModelManager::GetInstance(); | ||||
| GE_CHECK_NOTNULL(model_manager); | GE_CHECK_NOTNULL(model_manager); | ||||
| Status ret = model_manager->GetInputOutputDescInfo(model_id, input_desc, output_desc, input_formats, out_formats); | |||||
| Status ret = model_manager->GetInputOutputDescInfo(model_id, input_desc, output_desc, input_formats, out_formats, | |||||
| new_model_desc); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "GetInputOutputDescInfo failed."); | GELOGE(ret, "GetInputOutputDescInfo failed."); | ||||
| CsaInteract::GetInstance().WriteErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | CsaInteract::GetInstance().WriteErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | ||||
| @@ -71,7 +71,7 @@ class GraphExecutor { | |||||
| static Status GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | static Status GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | ||||
| vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &input_formats, | vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &input_formats, | ||||
| std::vector<uint32_t> &output_formats); | |||||
| std::vector<uint32_t> &output_formats, bool new_model_desc = false); | |||||
| static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | ||||
| @@ -21,6 +21,7 @@ | |||||
| #include <utility> | #include <utility> | ||||
| #include <vector> | #include <vector> | ||||
| #include "common/debug/log.h" | |||||
| #include "common/properties_manager.h" | #include "common/properties_manager.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "framework/common/util.h" | #include "framework/common/util.h" | ||||
| @@ -28,6 +29,7 @@ | |||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| #include "graph/load/new_model_manager/model_utils.h" | #include "graph/load/new_model_manager/model_utils.h" | ||||
| #include "graph/utils/attr_utils.h" | #include "graph/utils/attr_utils.h" | ||||
| #include "graph/utils/tensor_utils.h" | |||||
| #include "proto/ge_ir.pb.h" | #include "proto/ge_ir.pb.h" | ||||
| #include "proto/op_mapping_info.pb.h" | #include "proto/op_mapping_info.pb.h" | ||||
| #include "runtime/mem.h" | #include "runtime/mem.h" | ||||
| @@ -106,6 +108,7 @@ void DataDumper::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_ | |||||
| } | } | ||||
| void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) { | void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) { | ||||
| GELOGI("Start to save data %s message", node->GetName().c_str()); | |||||
| if (node != nullptr) { | if (node != nullptr) { | ||||
| auto input_op_desc = node->GetOpDesc(); | auto input_op_desc = node->GetOpDesc(); | ||||
| if (input_op_desc == nullptr) { | if (input_op_desc == nullptr) { | ||||
| @@ -126,6 +129,7 @@ void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) { | |||||
| {op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}}); | {op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}}); | ||||
| } | } | ||||
| } | } | ||||
| GELOGI("Save data message successfully"); | |||||
| } | } | ||||
| } | } | ||||
| @@ -159,30 +163,39 @@ void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::s | |||||
| return; | return; | ||||
| } | } | ||||
| GELOGI("Save input dump task %s, id: %u.", data_op->GetName().c_str(), task_id); | |||||
| int64_t data_size = 0; | |||||
| if (AttrUtils::GetInt(input_tensor, ATTR_NAME_INPUT_ORIGIN_SIZE, data_size)) { | |||||
| GELOGI("Get aipp data size according to attr is %ld", data_size); | |||||
| } else if (TensorUtils::GetTensorSizeInBytes(*input_tensor, data_size) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Get input size filed"); | |||||
| return; | |||||
| } | |||||
| GELOGI("Save input dump task %s, id: %u,stream id :%u,data size :%ld", data_op->GetName().c_str(), task_id, | |||||
| stream_id, data_size); | |||||
| op_list_.push_back({task_id, stream_id, data_op, args, false, inner_input_mapping.input_anchor_index, | op_list_.push_back({task_id, stream_id, data_op, args, false, inner_input_mapping.input_anchor_index, | ||||
| inner_input_mapping.output_anchor_index, input_tensor->GetShape().GetDims()}); | |||||
| inner_input_mapping.output_anchor_index, input_tensor->GetShape().GetDims(), data_size}); | |||||
| } | } | ||||
| } | } | ||||
| static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uintptr_t loop_cond, | static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uintptr_t loop_cond, | ||||
| aicpu::dump::OpMappingInfo &op_mapping_info) { | aicpu::dump::OpMappingInfo &op_mapping_info) { | ||||
| if (step_id != 0) { | if (step_id != 0) { | ||||
| GELOGI("step_id exist."); | |||||
| GELOGI("step_id exists."); | |||||
| op_mapping_info.set_step_id_addr(static_cast<uint64_t>(step_id)); | op_mapping_info.set_step_id_addr(static_cast<uint64_t>(step_id)); | ||||
| } else { | } else { | ||||
| GELOGI("step_id is null."); | GELOGI("step_id is null."); | ||||
| } | } | ||||
| if (loop_per_iter != 0) { | if (loop_per_iter != 0) { | ||||
| GELOGI("loop_per_iter exist."); | |||||
| GELOGI("loop_per_iter exists."); | |||||
| op_mapping_info.set_iterations_per_loop_addr(static_cast<uint64_t>(loop_per_iter)); | op_mapping_info.set_iterations_per_loop_addr(static_cast<uint64_t>(loop_per_iter)); | ||||
| } else { | } else { | ||||
| GELOGI("loop_per_iter is null."); | GELOGI("loop_per_iter is null."); | ||||
| } | } | ||||
| if (loop_cond != 0) { | if (loop_cond != 0) { | ||||
| GELOGI("loop_cond exist."); | |||||
| GELOGI("loop_cond exists."); | |||||
| op_mapping_info.set_loop_cond_addr(static_cast<uint64_t>(loop_cond)); | op_mapping_info.set_loop_cond_addr(static_cast<uint64_t>(loop_cond)); | ||||
| } else { | } else { | ||||
| GELOGI("loop_cond is null."); | GELOGI("loop_cond is null."); | ||||
| @@ -211,10 +224,19 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump: | |||||
| output.mutable_shape()->add_dim(dim); | output.mutable_shape()->add_dim(dim); | ||||
| } | } | ||||
| int64_t output_size = 0; | |||||
| if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Get output size filed"); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| GELOGI("Get output size in dump is %ld", output_size); | |||||
| std::string origin_name; | std::string origin_name; | ||||
| int32_t origin_output_index = -1; | int32_t origin_output_index = -1; | ||||
| (void)AttrUtils::GetStr(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name); | (void)AttrUtils::GetStr(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name); | ||||
| (void)AttrUtils::GetInt(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index); | (void)AttrUtils::GetInt(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index); | ||||
| GE_IF_BOOL_EXEC(output_size <= 0, GELOGE(PARAM_INVALID, "Output size %ld is less than zero", output_size); | |||||
| return PARAM_INVALID) | |||||
| output.set_size(output_size); | |||||
| output.set_original_name(origin_name); | output.set_original_name(origin_name); | ||||
| output.set_original_output_index(origin_output_index); | output.set_original_output_index(origin_output_index); | ||||
| output.set_original_output_format(static_cast<int32_t>(output_descs.at(i).GetOriginFormat())); | output.set_original_output_format(static_cast<int32_t>(output_descs.at(i).GetOriginFormat())); | ||||
| @@ -247,6 +269,10 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump: | |||||
| int32_t origin_output_index = -1; | int32_t origin_output_index = -1; | ||||
| (void)AttrUtils::GetStr(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name); | (void)AttrUtils::GetStr(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name); | ||||
| (void)AttrUtils::GetInt(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index); | (void)AttrUtils::GetInt(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index); | ||||
| GE_IF_BOOL_EXEC(inner_dump_info.data_size <= 0, | |||||
| GELOGE(PARAM_INVALID, "The size of data %ld is less than zero", inner_dump_info.data_size); | |||||
| return PARAM_INVALID) | |||||
| output.set_size(inner_dump_info.data_size); | |||||
| output.set_original_name(origin_name); | output.set_original_name(origin_name); | ||||
| output.set_original_output_index(origin_output_index); | output.set_original_output_index(origin_output_index); | ||||
| output.set_original_output_format(static_cast<int32_t>(output_tensor->GetOriginFormat())); | output.set_original_output_format(static_cast<int32_t>(output_tensor->GetOriginFormat())); | ||||
| @@ -283,6 +309,17 @@ Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump:: | |||||
| input.mutable_shape()->add_dim(dim); | input.mutable_shape()->add_dim(dim); | ||||
| } | } | ||||
| int64_t input_size = 0; | |||||
| if (AttrUtils::GetInt(&input_descs.at(i), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) { | |||||
| GELOGI("Get aipp input size according to attr is %ld", input_size); | |||||
| } else if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Get input size filed"); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| GELOGI("Get input size in dump is %ld", input_size); | |||||
| GE_IF_BOOL_EXEC(input_size <= 0, GELOGE(PARAM_INVALID, "Input size %ld is less than zero", input_size); | |||||
| return PARAM_INVALID;) | |||||
| input.set_size(input_size); | |||||
| input.set_address(static_cast<uint64_t>(inner_dump_info.args + sizeof(void *) * i)); | input.set_address(static_cast<uint64_t>(inner_dump_info.args + sizeof(void *) * i)); | ||||
| task.mutable_input()->Add(std::move(input)); | task.mutable_input()->Add(std::move(input)); | ||||
| } | } | ||||
| @@ -323,7 +360,7 @@ Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_in | |||||
| } | } | ||||
| load_flag_ = true; | load_flag_ = true; | ||||
| GELOGI("LoadDumpInfo success, proto size: %zu.", proto_size); | |||||
| GELOGI("LoadDumpInfo success, proto size is: %zu.", proto_size); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -360,11 +397,12 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_ | |||||
| return RT_FAILED; | return RT_FAILED; | ||||
| } | } | ||||
| load_flag_ = false; | load_flag_ = false; | ||||
| GELOGI("UnloadDumpInfo success, proto size: %zu.", proto_size); | |||||
| GELOGI("UnloadDumpInfo success, proto size is: %zu.", proto_size); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DataDumper::LoadDumpInfo() { | Status DataDumper::LoadDumpInfo() { | ||||
| PrintCheckLog(); | |||||
| std::string dump_list_key; | |||||
| PrintCheckLog(dump_list_key); | |||||
| if (op_list_.empty()) { | if (op_list_.empty()) { | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -374,12 +412,13 @@ Status DataDumper::LoadDumpInfo() { | |||||
| auto dump_path = PropertiesManager::Instance().GetDumpOutputPath(); | auto dump_path = PropertiesManager::Instance().GetDumpOutputPath(); | ||||
| op_mapping_info.set_dump_path(PropertiesManager::Instance().GetDumpOutputPath() + std::to_string(device_id_) + "/"); | op_mapping_info.set_dump_path(PropertiesManager::Instance().GetDumpOutputPath() + std::to_string(device_id_) + "/"); | ||||
| op_mapping_info.set_model_name(model_name_); | |||||
| op_mapping_info.set_model_name(dump_list_key); | |||||
| op_mapping_info.set_model_id(model_id_); | op_mapping_info.set_model_id(model_id_); | ||||
| op_mapping_info.set_flag(kAicpuLoadFlag); | op_mapping_info.set_flag(kAicpuLoadFlag); | ||||
| op_mapping_info.set_dump_step(PropertiesManager::Instance().GetDumpStep()); | op_mapping_info.set_dump_step(PropertiesManager::Instance().GetDumpStep()); | ||||
| SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | ||||
| GELOGD("Dump step in load dump info is %s", PropertiesManager::Instance().GetDumpStep().c_str()); | |||||
| GELOGI("Dump step is %s and dump path is %s in load dump info", PropertiesManager::Instance().GetDumpStep().c_str(), | |||||
| dump_path.c_str()); | |||||
| for (const auto &op_iter : op_list_) { | for (const auto &op_iter : op_list_) { | ||||
| aicpu::dump::Task task; | aicpu::dump::Task task; | ||||
| @@ -441,7 +480,7 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, | |||||
| if (PropertiesManager::Instance().GetDumpMode() == kDumpOutput || | if (PropertiesManager::Instance().GetDumpMode() == kDumpOutput || | ||||
| PropertiesManager::Instance().GetDumpMode() == kDumpInput || | PropertiesManager::Instance().GetDumpMode() == kDumpInput || | ||||
| PropertiesManager::Instance().GetDumpMode() == kDumpAll) { | PropertiesManager::Instance().GetDumpMode() == kDumpAll) { | ||||
| GELOGI("add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); | |||||
| GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); | |||||
| aicpu::dump::Task task; | aicpu::dump::Task task; | ||||
| task.set_end_graph(true); | task.set_end_graph(true); | ||||
| task.set_task_id(end_graph_task_id_); | task.set_task_id(end_graph_task_id_); | ||||
| @@ -477,7 +516,7 @@ Status DataDumper::UnloadDumpInfo() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void DataDumper::PrintCheckLog() { | |||||
| void DataDumper::PrintCheckLog(string &dump_list_key) { | |||||
| std::set<std::string> model_list = PropertiesManager::Instance().GetAllDumpModel(); | std::set<std::string> model_list = PropertiesManager::Instance().GetAllDumpModel(); | ||||
| if (model_list.empty()) { | if (model_list.empty()) { | ||||
| GELOGI("No model need dump."); | GELOGI("No model need dump."); | ||||
| @@ -485,19 +524,21 @@ void DataDumper::PrintCheckLog() { | |||||
| } | } | ||||
| GELOGI("%zu op need dump in %s.", op_list_.size(), model_name_.c_str()); | GELOGI("%zu op need dump in %s.", op_list_.size(), model_name_.c_str()); | ||||
| if (model_list.find(ge::DUMP_ALL_MODEL) == model_list.end()) { | |||||
| if (model_list.find(model_name_) == model_list.end()) { | |||||
| bool not_find_by_omname = model_list.find(om_name_) == model_list.end(); | |||||
| bool not_find_by_modelname = model_list.find(model_name_) == model_list.end(); | |||||
| if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) { | |||||
| if (not_find_by_omname && not_find_by_modelname) { | |||||
| std::string model_list_str; | std::string model_list_str; | ||||
| for (auto &model : model_list) { | for (auto &model : model_list) { | ||||
| model_list_str += "[" + model + "]."; | model_list_str += "[" + model + "]."; | ||||
| } | } | ||||
| GELOGW("Model %s not be set to dump, dump list: %s", model_name_.c_str(), model_list_str.c_str()); | |||||
| GELOGW("Model %s will not be set to dump, dump list: %s", model_name_.c_str(), model_list_str.c_str()); | |||||
| return; | return; | ||||
| } | } | ||||
| } | } | ||||
| std::set<std::string> config_dump_op_list = PropertiesManager::Instance().GetDumpPropertyValue(model_name_); | |||||
| dump_list_key = not_find_by_omname ? model_name_ : om_name_; | |||||
| std::set<std::string> config_dump_op_list = PropertiesManager::Instance().GetDumpPropertyValue(dump_list_key); | |||||
| std::set<std::string> dump_op_list; | std::set<std::string> dump_op_list; | ||||
| for (auto &inner_dump_info : op_list_) { | for (auto &inner_dump_info : op_list_) { | ||||
| // oplist value OpDescPtr is not nullptr | // oplist value OpDescPtr is not nullptr | ||||
| @@ -506,7 +547,7 @@ void DataDumper::PrintCheckLog() { | |||||
| for (auto &dump_op : config_dump_op_list) { | for (auto &dump_op : config_dump_op_list) { | ||||
| if (dump_op_list.find(dump_op) == dump_op_list.end()) { | if (dump_op_list.find(dump_op) == dump_op_list.end()) { | ||||
| GELOGW("Op %s set to dump but not exist in model %s or not a valid op.", dump_op.c_str(), model_name_.c_str()); | |||||
| GELOGW("Op %s set to dump but not exist in model %s or not a valid op.", dump_op.c_str(), dump_list_key.c_str()); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -64,6 +64,8 @@ class DataDumper { | |||||
| void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args); | void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args); | ||||
| void SaveEndGraphId(uint32_t task_id, uint32_t stream_id); | void SaveEndGraphId(uint32_t task_id, uint32_t stream_id); | ||||
| void SetOmName(const std::string &om_name) { om_name_ = om_name; } | |||||
| Status LoadDumpInfo(); | Status LoadDumpInfo(); | ||||
| Status UnloadDumpInfo(); | Status UnloadDumpInfo(); | ||||
| @@ -71,9 +73,13 @@ class DataDumper { | |||||
| private: | private: | ||||
| void ReleaseDevMem(void **ptr) noexcept; | void ReleaseDevMem(void **ptr) noexcept; | ||||
| void PrintCheckLog(); | |||||
| void PrintCheckLog(string &dump_list_key); | |||||
| std::string model_name_; | std::string model_name_; | ||||
| // for inference data dump | |||||
| std::string om_name_; | |||||
| uint32_t model_id_; | uint32_t model_id_; | ||||
| RuntimeParam runtime_param_; | RuntimeParam runtime_param_; | ||||
| void *dev_mem_load_; | void *dev_mem_load_; | ||||
| @@ -107,6 +113,7 @@ struct DataDumper::InnerDumpInfo { | |||||
| int input_anchor_index; | int input_anchor_index; | ||||
| int output_anchor_index; | int output_anchor_index; | ||||
| std::vector<int64_t> dims; | std::vector<int64_t> dims; | ||||
| int64_t data_size; | |||||
| }; | }; | ||||
| struct DataDumper::InnerInputMapping { | struct DataDumper::InnerInputMapping { | ||||
| @@ -78,7 +78,7 @@ namespace { | |||||
| const uint32_t kDataIndex = 0; | const uint32_t kDataIndex = 0; | ||||
| const uint32_t kOutputNum = 1; | const uint32_t kOutputNum = 1; | ||||
| const uint32_t kTrueBranchStreamNum = 1; | const uint32_t kTrueBranchStreamNum = 1; | ||||
| const uint32_t kThreadNum = 16; | |||||
| const uint32_t kThreadNum = 1; | |||||
| const uint32_t kAddrLen = sizeof(void *); | const uint32_t kAddrLen = sizeof(void *); | ||||
| const char *const kNeedDestroySpecifiedAicpuKernel = "need_destroy_specified_aicpu_kernel"; | const char *const kNeedDestroySpecifiedAicpuKernel = "need_destroy_specified_aicpu_kernel"; | ||||
| const int kDecimal = 10; | const int kDecimal = 10; | ||||
| @@ -94,42 +94,9 @@ inline bool IsCallDumpInputOp(const OpDescPtr &op_desc) { | |||||
| (void)ge::AttrUtils::GetBool(op_desc, ATTR_NO_TASK_AND_DUMP_NEEDED, skip_task_generate); | (void)ge::AttrUtils::GetBool(op_desc, ATTR_NO_TASK_AND_DUMP_NEEDED, skip_task_generate); | ||||
| return skip_task_generate; | return skip_task_generate; | ||||
| } | } | ||||
| void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input) { | |||||
| uint32_t n, c, h, w; | |||||
| n = format == FORMAT_NHWC ? NHWC_DIM_N : NCHW_DIM_N; | |||||
| c = format == FORMAT_NHWC ? NHWC_DIM_C : NCHW_DIM_C; | |||||
| h = format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H; | |||||
| w = format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W; | |||||
| if (!op_desc->HasAttr(ATTR_MBATCH_ORIGIN_INPUT_DIMS)) { | |||||
| if (op_desc->GetInputDescPtr(0)->GetShape().GetDimNum() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) { | |||||
| input.shape_info.num = op_desc->GetInputDescPtr(0)->GetShape().GetDim(n); | |||||
| input.shape_info.height = op_desc->GetInputDescPtr(0)->GetShape().GetDim(h); | |||||
| input.shape_info.width = op_desc->GetInputDescPtr(0)->GetShape().GetDim(w); | |||||
| input.shape_info.channel = op_desc->GetInputDescPtr(0)->GetShape().GetDim(c); | |||||
| } | |||||
| for (size_t k = 0; k < op_desc->GetInputDescPtr(0)->GetShape().GetDimNum(); k++) { | |||||
| input.shape_info.dims.push_back(op_desc->GetInputDescPtr(0)->GetShape().GetDim(k)); | |||||
| } | |||||
| } else { | |||||
| vector<int64_t> origin_input_dims; | |||||
| (void)AttrUtils::GetListInt(op_desc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims); | |||||
| if (origin_input_dims.size() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) { | |||||
| input.shape_info.num = origin_input_dims[n]; | |||||
| input.shape_info.height = origin_input_dims[h]; | |||||
| input.shape_info.width = origin_input_dims[w]; | |||||
| input.shape_info.channel = origin_input_dims[c]; | |||||
| } | |||||
| for (size_t k = 0; k < origin_input_dims.size(); ++k) { | |||||
| input.shape_info.dims.push_back(origin_input_dims[k]); | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace | } // namespace | ||||
| std::mutex DavinciModel::tvm_bin_mutex_; | std::mutex DavinciModel::tvm_bin_mutex_; | ||||
| std::set<std::string> DavinciModel::tvm_bin_kernel_; | |||||
| DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener> &listener) | DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener> &listener) | ||||
| : weights_mem_base_(nullptr), | : weights_mem_base_(nullptr), | ||||
| @@ -536,7 +503,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
| compute_graph_ = GraphUtils::GetComputeGraph(graph); | compute_graph_ = GraphUtils::GetComputeGraph(graph); | ||||
| GE_CHK_BOOL_RET_STATUS(compute_graph_ != nullptr, INTERNAL_ERROR, "Get compute graph is nullptr."); | GE_CHK_BOOL_RET_STATUS(compute_graph_ != nullptr, INTERNAL_ERROR, "Get compute graph is nullptr."); | ||||
| runtime_param_.graph_id = GetGraphID(compute_graph_->GetName()); | |||||
| runtime_param_.graph_id = compute_graph_->GetGraphID(); | |||||
| GE_TIMESTAMP_START(TransAllVarData); | GE_TIMESTAMP_START(TransAllVarData); | ||||
| GE_CHK_STATUS_RET(TransAllVarData(compute_graph_, runtime_param_.graph_id), "TransAllVarData failed."); | GE_CHK_STATUS_RET(TransAllVarData(compute_graph_, runtime_param_.graph_id), "TransAllVarData failed."); | ||||
| @@ -1447,6 +1414,55 @@ Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInf | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input) { | |||||
| uint32_t n, c, h, w; | |||||
| n = format == FORMAT_NHWC ? NHWC_DIM_N : NCHW_DIM_N; | |||||
| c = format == FORMAT_NHWC ? NHWC_DIM_C : NCHW_DIM_C; | |||||
| h = format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H; | |||||
| w = format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W; | |||||
| if (is_new_model_desc_ && op_desc->HasAttr(ATTR_NAME_INPUT_DIMS)) { | |||||
| // When static aipp is set, need to get the model input dims which processed by aipp | |||||
| vector<int64_t> model_input_dims; | |||||
| (void)AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_DIMS, model_input_dims); | |||||
| if (model_input_dims.size() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) { | |||||
| input.shape_info.num = model_input_dims[n]; | |||||
| input.shape_info.height = model_input_dims[h]; | |||||
| input.shape_info.width = model_input_dims[w]; | |||||
| input.shape_info.channel = model_input_dims[c]; | |||||
| } | |||||
| for (size_t k = 0; k < model_input_dims.size(); ++k) { | |||||
| input.shape_info.dims.push_back(model_input_dims[k]); | |||||
| } | |||||
| is_new_model_desc_ = false; | |||||
| return; | |||||
| } | |||||
| if (!op_desc->HasAttr(ATTR_MBATCH_ORIGIN_INPUT_DIMS)) { | |||||
| if (op_desc->GetInputDescPtr(0)->GetShape().GetDimNum() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) { | |||||
| input.shape_info.num = op_desc->GetInputDescPtr(0)->GetShape().GetDim(n); | |||||
| input.shape_info.height = op_desc->GetInputDescPtr(0)->GetShape().GetDim(h); | |||||
| input.shape_info.width = op_desc->GetInputDescPtr(0)->GetShape().GetDim(w); | |||||
| input.shape_info.channel = op_desc->GetInputDescPtr(0)->GetShape().GetDim(c); | |||||
| } | |||||
| for (size_t k = 0; k < op_desc->GetInputDescPtr(0)->GetShape().GetDimNum(); k++) { | |||||
| input.shape_info.dims.push_back(op_desc->GetInputDescPtr(0)->GetShape().GetDim(k)); | |||||
| } | |||||
| } else { | |||||
| vector<int64_t> origin_input_dims; | |||||
| (void)AttrUtils::GetListInt(op_desc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims); | |||||
| if (origin_input_dims.size() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) { | |||||
| input.shape_info.num = origin_input_dims[n]; | |||||
| input.shape_info.height = origin_input_dims[h]; | |||||
| input.shape_info.width = origin_input_dims[w]; | |||||
| input.shape_info.channel = origin_input_dims[c]; | |||||
| } | |||||
| for (size_t k = 0; k < origin_input_dims.size(); ++k) { | |||||
| input.shape_info.dims.push_back(origin_input_dims[k]); | |||||
| } | |||||
| } | |||||
| } | |||||
| Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats) { | Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats) { | ||||
| for (size_t index = 0; index < data_op_list_.size(); ++index) { | for (size_t index = 0; index < data_op_list_.size(); ++index) { | ||||
| InputOutputDescInfo input; | InputOutputDescInfo input; | ||||
| @@ -1455,6 +1471,7 @@ Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, s | |||||
| Format format = data_op_list_[index]->GetInputDescPtr(0)->GetFormat(); | Format format = data_op_list_[index]->GetInputDescPtr(0)->GetFormat(); | ||||
| CreateInputDimsInfo(data_op_list_[index], format, input); | CreateInputDimsInfo(data_op_list_[index], format, input); | ||||
| input.data_type = data_op_list_[index]->GetInputDescPtr(0)->GetDataType(); | input.data_type = data_op_list_[index]->GetInputDescPtr(0)->GetDataType(); | ||||
| input.name = data_op_list_[index]->GetName(); | input.name = data_op_list_[index]->GetName(); | ||||
| int64_t input_size = 0; | int64_t input_size = 0; | ||||
| @@ -1535,7 +1552,10 @@ Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, | |||||
| "construct output_name failed."); | "construct output_name failed."); | ||||
| // forward compatbility, if old om has no out_node_name, need to return output follow origin way | // forward compatbility, if old om has no out_node_name, need to return output follow origin way | ||||
| if (out_size == out_node_name.size()) { | if (out_size == out_node_name.size()) { | ||||
| output_name = out_node_name[index] + ":" + std::to_string(src_index[index]); | |||||
| // neweast plan, the index will add to name during generate model. | |||||
| bool contains_colon = out_node_name[index].find(":") != std::string::npos; | |||||
| output_name = | |||||
| contains_colon ? out_node_name[index] : out_node_name[index] + ":" + std::to_string(src_index[index]); | |||||
| } else { | } else { | ||||
| output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + | output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + | ||||
| std::to_string(src_index[index]); | std::to_string(src_index[index]); | ||||
| @@ -1966,6 +1986,10 @@ Status DavinciModel::CopyOutputDataToUser(OpDescPtr &op_desc, std::vector<DataBu | |||||
| "Model output data size(%u) does not match required size(%u).", v_output_size[i], | "Model output data size(%u) does not match required size(%u).", v_output_size[i], | ||||
| data_buf.length); | data_buf.length); | ||||
| if (copy_only_addrs_.count(v_output_data_addr[i]) == 0) { | |||||
| GELOGI("[ZCPY] This addr[%p] has already feed by zero copy.", v_output_data_addr[i]); | |||||
| continue; // Skip: Feed by zero copy. | |||||
| } | |||||
| GELOGI( | GELOGI( | ||||
| "CopyOutputDataToUser memcpy graph_%u type[F] name[%s] output[%lu] dst[%p] src[%p] mem_size[%u] datasize[%u]", | "CopyOutputDataToUser memcpy graph_%u type[F] name[%s] output[%lu] dst[%p] src[%p] mem_size[%u] datasize[%u]", | ||||
| runtime_param_.graph_id, op_desc->GetName().c_str(), i, data_buf.data, v_output_data_addr[i], data_buf.length, | runtime_param_.graph_id, op_desc->GetName().c_str(), i, data_buf.data, v_output_data_addr[i], data_buf.length, | ||||
| @@ -2510,51 +2534,19 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec | |||||
| } | } | ||||
| Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) { | Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) { | ||||
| GELOGI("InitTaskInfo in,task size %zu", model_task_def.task().size()); | |||||
| GELOGI("InitTaskInfo in,task size %d", model_task_def.task().size()); | |||||
| task_list_.resize(model_task_def.task_size()); | task_list_.resize(model_task_def.task_size()); | ||||
| std::vector<std::future<Status>> futures(model_task_def.task_size()); | |||||
| ThreadPool executor(kThreadNum); | |||||
| rtContext_t ctx = nullptr; | |||||
| rtError_t rt_ret = rtCtxGetCurrent(&ctx); | |||||
| if (rt_ret != RT_ERROR_NONE || ctx == nullptr) { | |||||
| GELOGE(RT_FAILED, "Failed to get current context from rt, error-code 0x%X.", rt_ret); | |||||
| return RT_FAILED; | |||||
| } | |||||
| for (int32_t i = 0; i < model_task_def.task_size(); ++i) { | |||||
| std::future<Status> f = executor.commit( | |||||
| [](const domi::TaskDef &task, DavinciModel *model, rtContext_t ctx, int32_t idx) -> Status { | |||||
| rtError_t rt_ret = rtCtxSetCurrent(ctx); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Failed to set context from rt, error-code 0x%X.", rt_ret); | |||||
| return RT_FAILED; | |||||
| } | |||||
| Status ret = FAILED; | |||||
| // dynamic shape will create task_list_ before | |||||
| if (model->task_list_[idx] == nullptr) { | |||||
| model->task_list_[idx] = TaskInfoFactory::Instance().Create(static_cast<rtModelTaskType_t>(task.type())); | |||||
| GE_CHECK_NOTNULL(model->task_list_[idx]); | |||||
| } | |||||
| ret = model->task_list_[idx]->Init(task, model); | |||||
| return ret; | |||||
| }, | |||||
| model_task_def.task(i), this, ctx, i); | |||||
| if (!f.valid()) { | |||||
| GELOGE(FAILED, "Future is invalid"); | |||||
| return FAILED; | |||||
| } | |||||
| futures[i] = std::move(f); | |||||
| } | |||||
| Status ret; | |||||
| for (size_t i = 0; i < futures.size(); ++i) { | |||||
| ret = futures[i].get(); | |||||
| for (int i = 0; i < model_task_def.task_size(); ++i) { | |||||
| // dynamic shape will create task_list_ before | |||||
| const domi::TaskDef &task = model_task_def.task(i); | |||||
| task_list_[i] = TaskInfoFactory::Instance().Create(static_cast<rtModelTaskType_t>(task.type())); | |||||
| GE_CHECK_NOTNULL(task_list_[i]); | |||||
| Status ret = task_list_[i]->Init(task, this); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Task index %zu init failed.", i); | |||||
| GELOGE(ret, "Task index %d init failed.", i); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| } | } | ||||
| GELOGI("InitTaskInfo out"); | GELOGI("InitTaskInfo out"); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -2623,7 +2615,7 @@ Status DavinciModel::DistributeTask() { | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| if (PropertiesManager::Instance().IsLayerNeedDump(name_, op->GetName())) { | |||||
| if (PropertiesManager::Instance().IsLayerNeedDump(name_, om_name_, op->GetName())) { | |||||
| SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); | SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); | ||||
| } | } | ||||
| } | } | ||||
| @@ -2661,8 +2653,9 @@ Status DavinciModel::DistributeTask() { | |||||
| void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { | void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { | ||||
| auto all_dump_model = PropertiesManager::Instance().GetAllDumpModel(); | auto all_dump_model = PropertiesManager::Instance().GetAllDumpModel(); | ||||
| if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || | |||||
| all_dump_model.find(name_) != all_dump_model.end()) { | |||||
| bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end(); | |||||
| bool findByModelName = all_dump_model.find(name_) != all_dump_model.end(); | |||||
| if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) { | |||||
| GELOGI("start save end_graph_info to dumper, task_id is %u, stream_id is %u", task_id, stream_id); | GELOGI("start save end_graph_info to dumper, task_id is %u, stream_id is %u", task_id, stream_id); | ||||
| data_dumper_.SaveEndGraphId(task_id, stream_id); | data_dumper_.SaveEndGraphId(task_id, stream_id); | ||||
| } | } | ||||
| @@ -2696,7 +2689,7 @@ void DavinciModel::SetOutputOutsideAddr(const std::vector<void *> &outside_addrs | |||||
| if (output_outside_addrs_.find(addr) != output_outside_addrs_.end()) { | if (output_outside_addrs_.find(addr) != output_outside_addrs_.end()) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| DisableZeroCopy(addr); // Data to NetOutput directly. | |||||
| (void)output_outside_addrs_.emplace(std::pair<const void *, std::vector<void *>>(addr, {})); | (void)output_outside_addrs_.emplace(std::pair<const void *, std::vector<void *>>(addr, {})); | ||||
| GELOGI("SetOutputOutsideAddr success."); | GELOGI("SetOutputOutsideAddr success."); | ||||
| } | } | ||||
| @@ -2902,11 +2895,15 @@ Status DavinciModel::UpdateIoTaskArgs(const map<uint32_t, pair<int64_t, void *>> | |||||
| } | } | ||||
| // For input data, just copy for rts task. | // For input data, just copy for rts task. | ||||
| if (is_input && copy_only_addrs_.count(addr) > 0) { | |||||
| if (rtMemcpy(addr, size, buffer.data, buffer.length, RT_MEMCPY_DEVICE_TO_DEVICE) != RT_ERROR_NONE) { | |||||
| GELOGE(FAILED, "Non-zero copy data node copy failed"); | |||||
| return FAILED; | |||||
| if (copy_only_addrs_.count(addr) > 0) { | |||||
| if (is_input) { | |||||
| GELOGI("[IMAS] Find addr %p need direct copy from user malloc input %p.", addr, buffer.data); | |||||
| if (rtMemcpy(addr, size, buffer.data, buffer.length, RT_MEMCPY_DEVICE_TO_DEVICE) != RT_ERROR_NONE) { | |||||
| GELOGE(FAILED, "Non-zero copy data node copy failed"); | |||||
| return FAILED; | |||||
| } | |||||
| } | } | ||||
| GELOGI("No need to exeucte zero copy task because this addr %p need direct copy.", addr); | |||||
| continue; | continue; | ||||
| } | } | ||||
| @@ -2953,7 +2950,6 @@ const char *DavinciModel::GetRegisterStub(const string &binfile, const string &s | |||||
| } else { | } else { | ||||
| binfile_key = session_graph_id + "_" + binfile; | binfile_key = session_graph_id + "_" + binfile; | ||||
| } | } | ||||
| std::lock_guard<std::mutex> lock(tvm_bin_mutex_); | |||||
| auto it = tvm_bin_kernel_.find(binfile_key); | auto it = tvm_bin_kernel_.find(binfile_key); | ||||
| if (it != tvm_bin_kernel_.end()) { | if (it != tvm_bin_kernel_.end()) { | ||||
| return it->c_str(); | return it->c_str(); | ||||
| @@ -3089,7 +3085,6 @@ void DavinciModel::StoreTbeHandle(const std::string &handle_key) { | |||||
| // Online mode FE may call rtFunctionRegister. | // Online mode FE may call rtFunctionRegister. | ||||
| TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | ||||
| // Need protection of tvm_bin_mutex_. | |||||
| auto it = used_tbe_handle_map_.find(handle_key); | auto it = used_tbe_handle_map_.find(handle_key); | ||||
| if (it != used_tbe_handle_map_.end()) { | if (it != used_tbe_handle_map_.end()) { | ||||
| // GE registered, increase reference. | // GE registered, increase reference. | ||||
| @@ -3109,9 +3104,9 @@ void DavinciModel::StoreTbeHandle(const std::string &handle_key) { | |||||
| void DavinciModel::CleanTbeHandle() { | void DavinciModel::CleanTbeHandle() { | ||||
| TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | ||||
| std::lock_guard<std::mutex> lock(tvm_bin_mutex_); | |||||
| kernel_store.EraseTBEHandle(used_tbe_handle_map_); | kernel_store.EraseTBEHandle(used_tbe_handle_map_); | ||||
| used_tbe_handle_map_.clear(); | used_tbe_handle_map_.clear(); | ||||
| tvm_bin_kernel_.clear(); | |||||
| } | } | ||||
| /// | /// | ||||
| @@ -3246,15 +3241,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||||
| bool is_dynamic_batch = input_data.is_dynamic_batch; | bool is_dynamic_batch = input_data.is_dynamic_batch; | ||||
| InitZeroCopyUtil(is_dynamic_batch, input_use_zero_copy, output_use_zero_copy); | InitZeroCopyUtil(is_dynamic_batch, input_use_zero_copy, output_use_zero_copy); | ||||
| // Empty task, Just copy input to output, need direct copy. | |||||
| if (task_list_.empty() && (input_use_zero_copy || output_use_zero_copy)) { | |||||
| GELOGE(FAILED, "Empty task, Just copy input to output, need direct copy."); | |||||
| return FAILED; | |||||
| } | |||||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_PRE_PROC_START)); | GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_PRE_PROC_START)); | ||||
| Status ret = | |||||
| input_use_zero_copy ? CopyModelData(input_data, output_data, is_dynamic_batch) : CopyInputData(input_data, true); | |||||
| Status ret = CopyModelData(input_data, output_data, is_dynamic_batch); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return INTERNAL_ERROR, "Copy input data to model failed."); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return INTERNAL_ERROR, "Copy input data to model failed."); | ||||
| GELOGI("current_data.index=%u", input_data.index); | GELOGI("current_data.index=%u", input_data.index); | ||||
| @@ -3271,7 +3259,7 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||||
| if (!is_async_mode_) { | if (!is_async_mode_) { | ||||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_START)); | GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_START)); | ||||
| ret = output_use_zero_copy ? SyncDataAndDump() : CopyOutputData(input_data.index, output_data); | |||||
| ret = CopyOutputData(input_data.index, output_data); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return INTERNAL_ERROR, "Copy Output data to user failed."); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return INTERNAL_ERROR, "Copy Output data to user failed."); | ||||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_END)); | GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_END)); | ||||
| } | } | ||||
| @@ -3344,17 +3332,6 @@ void DavinciModel::FreeWeightsMem() { | |||||
| } | } | ||||
| } | } | ||||
| uint32_t DavinciModel::GetGraphID(const std::string &session_graph_id) { | |||||
| std::string session_id = "_"; | |||||
| auto pos = session_graph_id.find(session_id); | |||||
| if (pos != std::string::npos) { | |||||
| size_t graph_id_length = session_graph_id.length() - pos - session_id.length(); | |||||
| std::string graph_id = session_graph_id.substr(pos + session_id.length(), graph_id_length); | |||||
| return static_cast<uint32_t>(std::strtol(graph_id.c_str(), nullptr, kDecimal)); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) { | Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) { | ||||
| GELOGI("TransAllVarData start: session_id:%lu, graph_id: %u.", session_id_, graph_id); | GELOGI("TransAllVarData start: session_id:%lu, graph_id: %u.", session_id_, graph_id); | ||||
| rtContext_t ctx = nullptr; | rtContext_t ctx = nullptr; | ||||
| @@ -3387,6 +3364,7 @@ void DavinciModel::SetDataDumperArgs() { | |||||
| data_dumper_.SetModelName(name_); | data_dumper_.SetModelName(name_); | ||||
| data_dumper_.SetModelId(model_id_); | data_dumper_.SetModelId(model_id_); | ||||
| data_dumper_.SetMemory(runtime_param_); | data_dumper_.SetMemory(runtime_param_); | ||||
| data_dumper_.SetOmName(om_name_); | |||||
| int32_t device_id = 0; | int32_t device_id = 0; | ||||
| rtError_t rt_ret = rtGetDevice(&device_id); | rtError_t rt_ret = rtGetDevice(&device_id); | ||||
| @@ -187,6 +187,8 @@ class DavinciModel { | |||||
| // model name | // model name | ||||
| string Name() { return name_; } | string Name() { return name_; } | ||||
| // om_name | |||||
| string OmName() { return om_name_; } | |||||
| // version | // version | ||||
| uint32_t Version() const { return version_; } | uint32_t Version() const { return version_; } | ||||
| @@ -273,7 +275,7 @@ class DavinciModel { | |||||
| /// @brief For TVM Op, avoid Addr Reuse. | /// @brief For TVM Op, avoid Addr Reuse. | ||||
| /// @return void* | /// @return void* | ||||
| /// | /// | ||||
| static const char *GetRegisterStub(const string &tvm_binfile_key, const string &session_graph_model_id = ""); | |||||
| const char *GetRegisterStub(const string &tvm_binfile_key, const string &session_graph_model_id = ""); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -471,6 +473,9 @@ class DavinciModel { | |||||
| Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | ||||
| Status GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims, | Status GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims, | ||||
| std::vector<InputOutputDims> &output_dims); | std::vector<InputOutputDims> &output_dims); | ||||
| void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } | |||||
| // om file name | |||||
| void SetOmName(string om_name) { om_name_ = om_name; } | |||||
| private: | private: | ||||
| // memory address of weights | // memory address of weights | ||||
| @@ -560,6 +565,8 @@ class DavinciModel { | |||||
| Status InitModelMem(void *dev_ptr, size_t memsize, void *weight_ptr, size_t weightsize); | Status InitModelMem(void *dev_ptr, size_t memsize, void *weight_ptr, size_t weightsize); | ||||
| void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input); | |||||
| Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats); | Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats); | ||||
| Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo); | Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo); | ||||
| @@ -752,8 +759,6 @@ class DavinciModel { | |||||
| void CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result); | void CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result); | ||||
| uint32_t GetGraphID(const std::string &session_graph_id); | |||||
| Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); | Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); | ||||
| Status CopyVarData(ComputeGraphPtr &graph); | Status CopyVarData(ComputeGraphPtr &graph); | ||||
| @@ -771,6 +776,10 @@ class DavinciModel { | |||||
| uint32_t model_id_; | uint32_t model_id_; | ||||
| uint32_t runtime_model_id_; | uint32_t runtime_model_id_; | ||||
| string name_; | string name_; | ||||
| // used for inference data dump | |||||
| string om_name_; | |||||
| uint32_t version_; | uint32_t version_; | ||||
| GeModelPtr ge_model_; | GeModelPtr ge_model_; | ||||
| @@ -860,8 +869,8 @@ class DavinciModel { | |||||
| std::set<uint32_t> hcom_streams_; | std::set<uint32_t> hcom_streams_; | ||||
| RuntimeParam runtime_param_; | RuntimeParam runtime_param_; | ||||
| static std::mutex tvm_bin_mutex_; // lock for tvm maps. | |||||
| static std::set<std::string> tvm_bin_kernel_; | |||||
| static std::mutex tvm_bin_mutex_; | |||||
| std::set<std::string> tvm_bin_kernel_; | |||||
| std::map<std::string, uint32_t> used_tbe_handle_map_; | std::map<std::string, uint32_t> used_tbe_handle_map_; | ||||
| @@ -884,6 +893,7 @@ class DavinciModel { | |||||
| std::map<const void *, void *> knonw_output_data_info_; | std::map<const void *, void *> knonw_output_data_info_; | ||||
| vector<uint64_t> batch_size_; | vector<uint64_t> batch_size_; | ||||
| bool is_new_model_desc_{false}; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ | #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ | ||||
| @@ -325,6 +325,12 @@ Status ModelManager::DeleteModel(uint32_t id) { | |||||
| auto it = model_map_.find(id); | auto it = model_map_.find(id); | ||||
| auto hybrid_model_it = hybrid_model_map_.find(id); | auto hybrid_model_it = hybrid_model_map_.find(id); | ||||
| if (it != model_map_.end()) { | if (it != model_map_.end()) { | ||||
| uint64_t session_id = it->second->GetSessionId(); | |||||
| std::string model_key = std::to_string(session_id) + "_" + std::to_string(id); | |||||
| auto iter_aicpu_kernel = model_aicpu_kernel_.find(model_key); | |||||
| if (iter_aicpu_kernel != model_aicpu_kernel_.end()) { | |||||
| (void)model_aicpu_kernel_.erase(iter_aicpu_kernel); | |||||
| } | |||||
| (void)model_map_.erase(it); | (void)model_map_.erase(it); | ||||
| } else if (hybrid_model_it != hybrid_model_map_.end()) { | } else if (hybrid_model_it != hybrid_model_map_.end()) { | ||||
| (void)hybrid_model_map_.erase(hybrid_model_it); | (void)hybrid_model_map_.erase(hybrid_model_it); | ||||
| @@ -685,11 +691,14 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu | |||||
| Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | ||||
| vector<InputOutputDescInfo> &output_desc, | vector<InputOutputDescInfo> &output_desc, | ||||
| std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats) { | |||||
| std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats, | |||||
| bool new_model_desc) { | |||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, | GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, | ||||
| "GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); | "GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); | ||||
| davinci_model->SetModelDescVersion(new_model_desc); | |||||
| return davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats); | return davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats); | ||||
| } | } | ||||
| @@ -820,6 +829,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| davinci_model->SetDeviceId(device_id); | davinci_model->SetDeviceId(device_id); | ||||
| davinci_model->SetOmName(model.om_name); | |||||
| /// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail. | /// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail. | ||||
| /// These session_ids come from the same model, so the values of session_id are the same. | /// These session_ids come from the same model, so the values of session_id are the same. | ||||
| @@ -178,7 +178,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| ge::Status GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc, | ge::Status GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc, | ||||
| std::vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &inputFormats, | std::vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &inputFormats, | ||||
| std::vector<uint32_t> &outputFormats); | |||||
| std::vector<uint32_t> &outputFormats, bool new_model_desc = false); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Get dynamic batch_info | /// @brief Get dynamic batch_info | ||||
| @@ -47,7 +47,8 @@ Status EndGraphTaskInfo::Distribute() { | |||||
| GE_CHECK_NOTNULL(davinci_model_); | GE_CHECK_NOTNULL(davinci_model_); | ||||
| auto all_dump_model = PropertiesManager::Instance().GetAllDumpModel(); | auto all_dump_model = PropertiesManager::Instance().GetAllDumpModel(); | ||||
| if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || | if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || | ||||
| all_dump_model.find(davinci_model_->Name()) != all_dump_model.end()) { | |||||
| all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() || | |||||
| all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) { | |||||
| GELOGI("Start to call rtEndGraphEx"); | GELOGI("Start to call rtEndGraphEx"); | ||||
| rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag); | rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| @@ -153,7 +153,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); | ||||
| return FAILED;) | return FAILED;) | ||||
| if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), op_desc->GetName())) { | |||||
| if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||||
| op_desc->GetName())) { | |||||
| dump_flag_ = RT_KERNEL_DUMPFLAG; | dump_flag_ = RT_KERNEL_DUMPFLAG; | ||||
| dump_args_ = input_output_addr_; | dump_args_ = input_output_addr_; | ||||
| } | } | ||||
| @@ -63,7 +63,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| domi::KernelDef kernel_def = task_def.kernel(); | |||||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||||
| block_dim_ = kernel_def.block_dim(); | block_dim_ = kernel_def.block_dim(); | ||||
| args_size_ = kernel_def.args_size(); | args_size_ = kernel_def.args_size(); | ||||
| // get opcontext stored in model | // get opcontext stored in model | ||||
| @@ -92,7 +92,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci | |||||
| string session_graph_model_id; | string session_graph_model_id; | ||||
| davinci_model_->GetUniqueId(op_desc_, session_graph_model_id); | davinci_model_->GetUniqueId(op_desc_, session_graph_model_id); | ||||
| // get bin_file_key | // get bin_file_key | ||||
| const char *bin_file_key = DavinciModel::GetRegisterStub(op_desc_->GetName(), session_graph_model_id); | |||||
| const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id); | |||||
| // new aicpu kernel(rtCpuKernelLaunch) no need to check function | // new aicpu kernel(rtCpuKernelLaunch) no need to check function | ||||
| if (kernel_type_ == cce::ccKernelType::CCE_AI_CORE) { | if (kernel_type_ == cce::ccKernelType::CCE_AI_CORE) { | ||||
| rtError_t rt_ret; | rtError_t rt_ret; | ||||
| @@ -494,7 +494,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||||
| // When inferencing, stub_func_ is different from dynamic-registration to runtime, and needs to be modified. | // When inferencing, stub_func_ is different from dynamic-registration to runtime, and needs to be modified. | ||||
| string session_graph_model_id; | string session_graph_model_id; | ||||
| davinci_model_->GetUniqueId(op_desc, session_graph_model_id); | davinci_model_->GetUniqueId(op_desc, session_graph_model_id); | ||||
| const char *bin_file_key = DavinciModel::GetRegisterStub(op_desc->GetName(), session_graph_model_id); | |||||
| const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc->GetName(), session_graph_model_id); | |||||
| rtError_t rt_ret = rtQueryFunctionRegistered(const_cast<char *>(bin_file_key)); | rtError_t rt_ret = rtQueryFunctionRegistered(const_cast<char *>(bin_file_key)); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| stub_func_ = const_cast<char *>(bin_file_key); | stub_func_ = const_cast<char *>(bin_file_key); | ||||
| @@ -549,7 +549,8 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), op_desc->GetName())) { | |||||
| if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||||
| op_desc->GetName())) { | |||||
| dump_flag_ = RT_KERNEL_DUMPFLAG; | dump_flag_ = RT_KERNEL_DUMPFLAG; | ||||
| dump_args_ = static_cast<char *>(args_) + offset; | dump_args_ = static_cast<char *>(args_) + offset; | ||||
| } | } | ||||
| @@ -818,7 +819,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
| return RT_FAILED; | return RT_FAILED; | ||||
| } | } | ||||
| if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), op_desc->GetName())) { | |||||
| if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||||
| op_desc->GetName())) { | |||||
| dump_flag_ = RT_KERNEL_DUMPFLAG; | dump_flag_ = RT_KERNEL_DUMPFLAG; | ||||
| dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead); | dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead); | ||||
| } | } | ||||
| @@ -396,8 +396,6 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||||
| if (save_ret != SUCCESS) { | if (save_ret != SUCCESS) { | ||||
| GELOGW("Fail to save cache."); | GELOGW("Fail to save cache."); | ||||
| } | } | ||||
| // release rts generate context | |||||
| RtContextUtil::GetInstance().DestroyrtContexts(); | |||||
| GEEVENT("[GEPERFTRACE] GE PreRun End"); | GEEVENT("[GEPERFTRACE] GE PreRun End"); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -420,6 +418,8 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: | |||||
| ret = IncreBuild(graph_node, ge_model); | ret = IncreBuild(graph_node, ge_model); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| ret = PreRun(graph_node, inputs, ge_root_model, session_id); | ret = PreRun(graph_node, inputs, ge_root_model, session_id); | ||||
| // release rts generate context | |||||
| RtContextUtil::GetInstance().DestroyrtContexts(); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "PreRun Failed."); | GELOGE(ret, "PreRun Failed."); | ||||
| return ret; | return ret; | ||||
| @@ -2165,6 +2165,8 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { | |||||
| GeModelPtr ge_model = nullptr; | GeModelPtr ge_model = nullptr; | ||||
| if (graph_manager->IncreBuild(graph_node, ge_model) != SUCCESS) { | if (graph_manager->IncreBuild(graph_node, ge_model) != SUCCESS) { | ||||
| ret = graph_manager->PreRun(graph_node, ge_inputs, ge_root_model, args.session_id); | ret = graph_manager->PreRun(graph_node, ge_inputs, ge_root_model, args.session_id); | ||||
| // release rts generate context | |||||
| RtContextUtil::GetInstance().DestroyrtContexts(); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| graph_node->SetRunFlag(false); | graph_node->SetRunFlag(false); | ||||
| ReturnError(graph_manager, args.callback, ret, "PreRun Failed, thread exit.."); | ReturnError(graph_manager, args.callback, ret, "PreRun Failed, thread exit.."); | ||||
| @@ -91,7 +91,7 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen | |||||
| std::string var_key = VarKey(var_name, tensor_desc); | std::string var_key = VarKey(var_name, tensor_desc); | ||||
| GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str()); | GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str()); | ||||
| if (var_addr_mgr_map_.count(var_key) == 0) { | if (var_addr_mgr_map_.count(var_key) == 0) { | ||||
| uint64_t logic_address = VarManager::Instance(0)->GetVarMemLogicBase() + | |||||
| uint64_t logic_address = VarManager::Instance(session_id_)->GetVarMemLogicBase() + | |||||
| reinterpret_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address)); | reinterpret_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address)); | ||||
| GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(), | GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(), | ||||
| TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(), | TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(), | ||||
| @@ -105,9 +105,8 @@ void ge::GraphPartitioner::SetMergedGraphId(ge::ComputeGraphPtr &output_merged_c | |||||
| Status ge::GraphPartitioner::RemoveNodeAndEdgeBetweenEndPld(ge::ComputeGraphPtr &output_merged_compute_graph, | Status ge::GraphPartitioner::RemoveNodeAndEdgeBetweenEndPld(ge::ComputeGraphPtr &output_merged_compute_graph, | ||||
| const std::vector<SubGraphInfoPtr> &sub_graph_list) { | const std::vector<SubGraphInfoPtr> &sub_graph_list) { | ||||
| ComputeGraphPtr new_sub_graph = MakeShared<ComputeGraph>("mergedGraph"); | |||||
| output_merged_compute_graph = new_sub_graph; | |||||
| if ((new_sub_graph == nullptr) || (MergeAllSubGraph(output_merged_compute_graph, sub_graph_list) != SUCCESS)) { | |||||
| if ((output_merged_compute_graph == nullptr) || | |||||
| (MergeAllSubGraph(output_merged_compute_graph, sub_graph_list) != SUCCESS)) { | |||||
| GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: MergeAllSubGraph failed."); | GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: MergeAllSubGraph failed."); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -229,6 +228,9 @@ Status ge::GraphPartitioner::MergeSubGraph(ge::ComputeGraphPtr &output_merged_co | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| } | } | ||||
| ComputeGraphPtr new_sub_graph = MakeShared<ComputeGraph>(original_compute_graph->GetName()); | |||||
| GE_CHECK_NOTNULL(new_sub_graph); | |||||
| output_merged_compute_graph = new_sub_graph; | |||||
| GE_TIMESTAMP_START(MergeGraphRemoveNode); | GE_TIMESTAMP_START(MergeGraphRemoveNode); | ||||
| if (RemoveNodeAndEdgeBetweenEndPld(output_merged_compute_graph, sub_graph_list) != ge::SUCCESS) { | if (RemoveNodeAndEdgeBetweenEndPld(output_merged_compute_graph, sub_graph_list) != ge::SUCCESS) { | ||||
| GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: merging sub-graphs failed"); | GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: merging sub-graphs failed"); | ||||
| @@ -70,6 +70,7 @@ OpDescPtr SameTransdataBreadthFusionPass::GetCastOp(const GeTensorDesc &in_desc, | |||||
| cast_op_name << "fusion_cast_" << fusion_cast_op_count++; | cast_op_name << "fusion_cast_" << fusion_cast_op_count++; | ||||
| auto node_op = ge::OperatorFactory::CreateOperator(cast_op_name.str(), CAST); | auto node_op = ge::OperatorFactory::CreateOperator(cast_op_name.str(), CAST); | ||||
| auto cast_op = ge::OpDescUtils::GetOpDescFromOperator(node_op); | auto cast_op = ge::OpDescUtils::GetOpDescFromOperator(node_op); | ||||
| node_op.BreakConnect(); | |||||
| if (cast_op == nullptr) { | if (cast_op == nullptr) { | ||||
| GELOGE(INTERNAL_ERROR, "new fusion cast op failed!"); | GELOGE(INTERNAL_ERROR, "new fusion cast op failed!"); | ||||
| return nullptr; | return nullptr; | ||||
| @@ -501,6 +501,7 @@ OpDescPtr TransOpWithoutReshapeFusionPass::GetCastOp(const GeTensorDesc &cast_in | |||||
| cast_op_name << "fusion_cast_op_" << fusion_cast_op_count++; | cast_op_name << "fusion_cast_op_" << fusion_cast_op_count++; | ||||
| auto node_op = ge::OperatorFactory::CreateOperator(cast_op_name.str(), CAST); | auto node_op = ge::OperatorFactory::CreateOperator(cast_op_name.str(), CAST); | ||||
| auto cast_op = ge::OpDescUtils::GetOpDescFromOperator(node_op); | auto cast_op = ge::OpDescUtils::GetOpDescFromOperator(node_op); | ||||
| node_op.BreakConnect(); | |||||
| if (cast_op == nullptr) { | if (cast_op == nullptr) { | ||||
| GELOGE(INTERNAL_ERROR, "new cast op failed!"); | GELOGE(INTERNAL_ERROR, "new cast op failed!"); | ||||
| return nullptr; | return nullptr; | ||||
| @@ -19,8 +19,6 @@ | |||||
| #include <set> | #include <set> | ||||
| #include <string> | #include <string> | ||||
| #include <utility> | #include <utility> | ||||
| #include "common/formats/format_transfers/format_transfer_fractal_nz.h" | |||||
| #include "common/formats/format_transfers/format_transfer_fractal_z.h" | |||||
| #include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h" | #include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h" | ||||
| #include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" | #include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" | ||||
| #include "common/formats/format_transfers/format_transfer_transpose.h" | #include "common/formats/format_transfers/format_transfer_transpose.h" | ||||
| @@ -34,6 +32,7 @@ | |||||
| #include "graph/common/transop_util.h" | #include "graph/common/transop_util.h" | ||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| #include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
| #include "graph/shape_refiner.h" | |||||
| #include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
| #include "graph/manager/util/rt_context_util.h" | #include "graph/manager/util/rt_context_util.h" | ||||
| #include "graph/optimize/graph_optimize.h" | #include "graph/optimize/graph_optimize.h" | ||||
| @@ -123,9 +122,6 @@ static std::map<std::string, ge::DataType> output_type_str_to_datatype = { | |||||
| {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}}; | {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}}; | ||||
| const char *const kMbatchSwitchnName = "mbatch-switch-name"; | const char *const kMbatchSwitchnName = "mbatch-switch-name"; | ||||
| const int64_t kGemmNdShapeSize = 2; | |||||
| const int64_t kGemmAlignSize32 = 32; | |||||
| const int64_t kGemmAlignSize16 = 16; | |||||
| OpDescPtr CreateTensorShape(const GeTensorDesc &data_tensor) { | OpDescPtr CreateTensorShape(const GeTensorDesc &data_tensor) { | ||||
| GeTensorPtr tensor = MakeShared<GeTensor>(); | GeTensorPtr tensor = MakeShared<GeTensor>(); | ||||
| @@ -1135,114 +1131,9 @@ Status ProcessInputNC1HWC0DynShape(NodePtr &node_ptr, bool &is_dynamic_batch, No | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status ProcessGemmFractalZ(GeShape &src_shape, std::vector<int64_t> &dst_shape_vec) { | |||||
| dst_shape_vec.clear(); | |||||
| if (src_shape.GetDims().size() != kGemmNdShapeSize) { | |||||
| GELOGE(INTERNAL_ERROR, "gemm shape size must be 2"); | |||||
| return FAILED; | |||||
| } | |||||
| dst_shape_vec.push_back(formats::Ceil(src_shape.GetDim(0), kGemmAlignSize32)); | |||||
| dst_shape_vec.push_back(formats::Ceil(src_shape.GetDim(1), kGemmAlignSize16)); | |||||
| dst_shape_vec.push_back(kGemmAlignSize16); | |||||
| dst_shape_vec.push_back(kGemmAlignSize32); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status SetInOutForGemm(GeTensorDescPtr &input, GeTensorDescPtr &output, GeShape shape, Format format) { | |||||
| input->SetShape(shape); | |||||
| input->SetFormat(format); | |||||
| output->SetShape(shape); | |||||
| output->SetFormat(format); | |||||
| int64_t input_shape_size = 0; | |||||
| int64_t output_shape_size = 0; | |||||
| ge::graphStatus input_graph_status = ge::TensorUtils::GetTensorSizeInBytes(*input, input_shape_size); | |||||
| ge::graphStatus output_graph_status = ge::TensorUtils::GetTensorMemorySizeInBytes(*output, output_shape_size); | |||||
| if ((input_graph_status != ge::GRAPH_SUCCESS) && (output_graph_status != ge::GRAPH_SUCCESS)) { | |||||
| GELOGE(GRAPH_FAILED, "GetTensorSize failed!"); | |||||
| return FAILED; | |||||
| } | |||||
| ge::TensorUtils::SetSize(*input, input_shape_size); | |||||
| ge::TensorUtils::SetSize(*output, output_shape_size); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ProcessSingleOpInput(NodePtr &node_ptr, string &single_op_input_format) { | |||||
| ge::Format input_format = TypeUtils::SerialStringToFormat(single_op_input_format); | |||||
| auto op_desc = node_ptr->GetOpDesc(); | |||||
| auto data_input = op_desc->MutableInputDesc(0); | |||||
| auto data_output = op_desc->MutableOutputDesc(0); | |||||
| ge::Format src_format = data_input->GetFormat(); | |||||
| ge::DataType src_dt = data_input->GetDataType(); | |||||
| ge::GeShape src_shape = data_input->GetShape(); | |||||
| std::vector<int64_t> dst_shape_vec; | |||||
| if (input_format == FORMAT_FRACTAL_NZ) { | |||||
| formats::FormatTransferFractalNz transfer; | |||||
| if (transfer.TransShape(src_format, src_shape.GetDims(), src_dt, FORMAT_FRACTAL_NZ, dst_shape_vec) != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Op [%s] trans FZ Shape failed.", op_desc->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| ge::GeShape dst_shape(dst_shape_vec); | |||||
| if (SetInOutForGemm(data_input, data_output, dst_shape, FORMAT_FRACTAL_NZ) != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Op [%s] set FRACTAL_NZ desc failed.", op_desc->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } else if (input_format == FORMAT_FRACTAL_Z) { | |||||
| if (ProcessGemmFractalZ(src_shape, dst_shape_vec) != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Op [%s] trans FRACTAL_Z Shape failed.", op_desc->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| ge::GeShape dst_shape(dst_shape_vec); | |||||
| if (SetInOutForGemm(data_input, data_output, dst_shape, FORMAT_FRACTAL_Z) != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Op [%s] set FRACTAL_Z desc failed.", op_desc->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| // Gemm shape and format should be set at this stage, temporary solution. | |||||
| auto out_anchor = node_ptr->GetOutDataAnchor(0); | |||||
| for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { | |||||
| GE_CHECK_NOTNULL(in_anchor); | |||||
| auto index = static_cast<uint32_t>(in_anchor->GetIdx()); | |||||
| ge::NodePtr next_node = in_anchor->GetOwnerNode(); | |||||
| GE_CHECK_NOTNULL(next_node); | |||||
| auto next_op_desc = next_node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(next_op_desc); | |||||
| auto input_desc = next_op_desc->MutableInputDesc(index); | |||||
| GE_CHECK_NOTNULL(input_desc); | |||||
| input_desc->SetFormat(input_format); | |||||
| input_desc->SetShape(data_output->GetShape()); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ProcessSingleOpOutput(OpDescPtr &op_desc, string &single_op_output_format) { | |||||
| ge::Format input_format = TypeUtils::SerialStringToFormat(single_op_output_format); | |||||
| auto data_input = op_desc->MutableInputDesc(0); | |||||
| ge::Format src_format = data_input->GetFormat(); | |||||
| ge::DataType src_dt = data_input->GetDataType(); | |||||
| ge::GeShape src_shape = data_input->GetShape(); | |||||
| std::vector<int64_t> dst_shape_vec; | |||||
| if (input_format == FORMAT_FRACTAL_NZ) { | |||||
| formats::FormatTransferFractalNz transfer; | |||||
| if (transfer.TransShape(src_format, src_shape.GetDims(), src_dt, FORMAT_FRACTAL_NZ, dst_shape_vec) != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Op [%s] trans FZ Shape failed.", op_desc->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| ge::GeShape dst_shape(dst_shape_vec); | |||||
| data_input->SetShape(dst_shape); | |||||
| data_input->SetFormat(FORMAT_FRACTAL_NZ); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ProcessDataNodeDynShape(NodePtr &node_ptr, bool &is_single_op) { | |||||
| Status ProcessDataNodeDynShape(NodePtr &node_ptr) { | |||||
| auto op_desc = node_ptr->GetOpDesc(); | auto op_desc = node_ptr->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| std::string single_op_input_format; | |||||
| if (is_single_op && (ge::AttrUtils::GetStr(op_desc, "_single_input_format", single_op_input_format))) { | |||||
| if (ProcessSingleOpInput(node_ptr, single_op_input_format) != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Process single op input [%s] failed.", node_ptr->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| bool set_fp16 = false; | bool set_fp16 = false; | ||||
| if (!ge::AttrUtils::GetBool(node_ptr->GetOpDesc(), "input_fp16", set_fp16) || !set_fp16) { | if (!ge::AttrUtils::GetBool(node_ptr->GetOpDesc(), "input_fp16", set_fp16) || !set_fp16) { | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -1375,16 +1266,9 @@ bool NeedUpdateOutputByOutputTypeParm(std::string &output_type, NodePtr &src_nod | |||||
| return false; | return false; | ||||
| } | } | ||||
| Status ProcessNetoutputNodeDynShape(NodePtr &node, std::string &output_type, bool &is_single_op) { | |||||
| Status ProcessNetoutputNodeDynShape(NodePtr &node, std::string &output_type) { | |||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| std::string single_op_output_format; | |||||
| if (is_single_op && (ge::AttrUtils::GetStr(op_desc, "_single_output_format", single_op_output_format))) { | |||||
| if (ProcessSingleOpOutput(op_desc, single_op_output_format) != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Process single op output [%s] failed.", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| ge::DataType output_data_type = ge::DT_FLOAT; | ge::DataType output_data_type = ge::DT_FLOAT; | ||||
| for (const auto &in_anchor : node->GetAllInDataAnchors()) { | for (const auto &in_anchor : node->GetAllInDataAnchors()) { | ||||
| @@ -1717,7 +1601,8 @@ Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input) { | |||||
| auto format = desc.GetFormat(); | auto format = desc.GetFormat(); | ||||
| auto origin_format = desc.GetOriginFormat(); | auto origin_format = desc.GetOriginFormat(); | ||||
| bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); | bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); | ||||
| if (is_internal) { | |||||
| bool need_check_internal_format = (!options_.is_single_op) && is_internal; | |||||
| if (need_check_internal_format) { | |||||
| GELOGE(PARAM_INVALID, "Input format %s or origin_format %s is not support.", | GELOGE(PARAM_INVALID, "Input format %s or origin_format %s is not support.", | ||||
| TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::FormatToSerialString(origin_format).c_str()); | TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::FormatToSerialString(origin_format).c_str()); | ||||
| return FAILED; | return FAILED; | ||||
| @@ -2164,6 +2049,7 @@ Status GraphPrepare::GenerateInfershapeGraph(ConstGraphPtr graph) { | |||||
| GELOGE(ret, "Run ge_passes infershape for preprocess failed, ret:%u.", ret); | GELOGE(ret, "Run ge_passes infershape for preprocess failed, ret:%u.", ret); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| ShapeRefiner::ClearContextMap(); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -2389,6 +2275,7 @@ Status GraphPrepare::InferShapeForPreprocess() { | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| ShapeRefiner::ClearContextMap(); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Run ge_passes infershape for preprocess failed, ret:%u.", ret); | GELOGE(ret, "Run ge_passes infershape for preprocess failed, ret:%u.", ret); | ||||
| return ret; | return ret; | ||||
| @@ -2821,14 +2708,14 @@ Status GraphPrepare::UpdateInputOutputByOptions() { | |||||
| } | } | ||||
| if (node_ptr->GetType() == DATA) { | if (node_ptr->GetType() == DATA) { | ||||
| if (ProcessDataNodeDynShape(node_ptr, options_.is_single_op) != SUCCESS) { | |||||
| if (ProcessDataNodeDynShape(node_ptr) != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Process data node failed"); | GELOGE(INTERNAL_ERROR, "Process data node failed"); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| } | } | ||||
| if (node_ptr->GetType() == ge::NETOUTPUT) { | if (node_ptr->GetType() == ge::NETOUTPUT) { | ||||
| if (ProcessNetoutputNodeDynShape(node_ptr, options_.output_datatype, options_.is_single_op) != SUCCESS) { | |||||
| if (ProcessNetoutputNodeDynShape(node_ptr, options_.output_datatype) != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Process netoutput node failed"); | GELOGE(INTERNAL_ERROR, "Process netoutput node failed"); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -389,8 +389,8 @@ Status AippOp::SetDefaultParams() { | |||||
| GELOGI("parse aipp params:input_format:%s, csc_switch:%d.", | GELOGI("parse aipp params:input_format:%s, csc_switch:%d.", | ||||
| domi::AippOpParams::InputFormat_Name(aipp_params_->input_format()).c_str(), aipp_params_->csc_switch()); | domi::AippOpParams::InputFormat_Name(aipp_params_->input_format()).c_str(), aipp_params_->csc_switch()); | ||||
| GELOGI("parse aipp params:mean_chn_0:%d, mean_chn_1:%d, mean_chn_2:%d.", aipp_params_->mean_chn_0(), | |||||
| aipp_params_->mean_chn_1(), aipp_params_->mean_chn_2()); | |||||
| GELOGI("parse aipp params:mean_chn_0:%d, mean_chn_1:%d, mean_chn_2:%d, mean_chn_3:%d.", aipp_params_->mean_chn_0(), | |||||
| aipp_params_->mean_chn_1(), aipp_params_->mean_chn_2(), aipp_params_->mean_chn_3()); | |||||
| GELOGI("parse aipp params:min_chn_0:%f, min_chn_1:%f, min_chn_2:%f.", aipp_params_->min_chn_0(), | GELOGI("parse aipp params:min_chn_0:%f, min_chn_1:%f, min_chn_2:%f.", aipp_params_->min_chn_0(), | ||||
| aipp_params_->min_chn_1(), aipp_params_->min_chn_2()); | aipp_params_->min_chn_1(), aipp_params_->min_chn_2()); | ||||
| @@ -40,6 +40,23 @@ namespace ge { | |||||
| namespace { | namespace { | ||||
| const char *const kMbatchSwitchnName = "mbatch-switch-name"; | const char *const kMbatchSwitchnName = "mbatch-switch-name"; | ||||
| } // namespace | } // namespace | ||||
| static void ConvertShape2Nhwc(Format &format, vector<int64_t> &shape_vec) { | |||||
| if ((format == FORMAT_NHWC) || (shape_vec.size() != static_cast<size_t>(NORMAL_TENSOR_SIZE))) { | |||||
| return; | |||||
| } | |||||
| if (format != FORMAT_NCHW) { | |||||
| GELOGW("The format is not NCHW, current format is %s", TypeUtils::FormatToSerialString(format).c_str()); | |||||
| return; | |||||
| } | |||||
| vector<int64_t> shape_vec_tmp; | |||||
| shape_vec.swap(shape_vec_tmp); | |||||
| shape_vec.push_back(shape_vec_tmp[NCHW_DIM_N]); | |||||
| shape_vec.push_back(shape_vec_tmp[NCHW_DIM_H]); | |||||
| shape_vec.push_back(shape_vec_tmp[NCHW_DIM_W]); | |||||
| shape_vec.push_back(shape_vec_tmp[NCHW_DIM_C]); | |||||
| return; | |||||
| } | |||||
| Status InsertNewOpUtil::Init() { | Status InsertNewOpUtil::Init() { | ||||
| insert_op_conf_.reset((new (std::nothrow) domi::InsertNewOps())); | insert_op_conf_.reset((new (std::nothrow) domi::InsertNewOps())); | ||||
| GE_CHECK_NOTNULL(insert_op_conf_); | GE_CHECK_NOTNULL(insert_op_conf_); | ||||
| @@ -223,11 +240,13 @@ Status InsertNewOpUtil::UpdatePrevNodeByAipp(NodePtr &node, std::set<NodePtr> &s | |||||
| GELOGE(FAILED, "UpdateOutputDesc fail, graph_ret:%d", graph_ret); | GELOGE(FAILED, "UpdateOutputDesc fail, graph_ret:%d", graph_ret); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| GELOGI("Get size [%ld] from aipp [%s].", size, aipp_op_desc->GetName().c_str()); | |||||
| GELOGI("Get input size [%ld] from aipp [%s].", size, aipp_op_desc->GetName().c_str()); | |||||
| if (size == 0) { | if (size == 0) { | ||||
| GELOGE(FAILED, "Can not get size from aipp [%s]", aipp_op_desc->GetName().c_str()); | GELOGE(FAILED, "Can not get size from aipp [%s]", aipp_op_desc->GetName().c_str()); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| // Save the input size of aipp node, which will be used in dumping aipp node or fused aipp node | |||||
| (void)AttrUtils::SetInt(aipp_input, ATTR_NAME_INPUT_ORIGIN_SIZE, size); | |||||
| auto in_data_anchor = node->GetInDataAnchor(0); | auto in_data_anchor = node->GetInDataAnchor(0); | ||||
| GE_CHECK_NOTNULL(in_data_anchor); | GE_CHECK_NOTNULL(in_data_anchor); | ||||
| @@ -305,6 +324,8 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt | |||||
| auto data_opdesc = data->GetOpDesc(); | auto data_opdesc = data->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(data_opdesc); | GE_CHECK_NOTNULL(data_opdesc); | ||||
| Format old_format = data_opdesc->MutableOutputDesc(0)->GetFormat(); | |||||
| auto ret = data_opdesc->UpdateOutputDesc(0, *input_desc); | auto ret = data_opdesc->UpdateOutputDesc(0, *input_desc); | ||||
| if (ret != GRAPH_SUCCESS) { | if (ret != GRAPH_SUCCESS) { | ||||
| GELOGE(INTERNAL_ERROR, "Failed to update data %s output using switchn %s", data->GetName().c_str(), | GELOGE(INTERNAL_ERROR, "Failed to update data %s output using switchn %s", data->GetName().c_str(), | ||||
| @@ -317,9 +338,34 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt | |||||
| switchn->GetName().c_str()); | switchn->GetName().c_str()); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| // Update attr _mbatch_origin_input_dims for data when it is linked to aipp | |||||
| UpdateMultiBatchInputDims(data_opdesc, old_format); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void InsertNewOpUtil::UpdateMultiBatchInputDims(const OpDescPtr &data_opdesc, Format &old_format) { | |||||
| if (!data_opdesc->HasAttr(ATTR_MBATCH_ORIGIN_INPUT_DIMS)) { | |||||
| GELOGW("Failed to acquire _mbatch_origin_input_dims attr from node [%s]", data_opdesc->GetName().c_str()); | |||||
| return; | |||||
| } | |||||
| auto new_data_dims = data_opdesc->GetOutputDesc(0).GetShape().GetDims(); | |||||
| vector<int64_t> origin_input_dims; | |||||
| (void)AttrUtils::GetListInt(data_opdesc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims); | |||||
| // Convert origin_input_dims to NHWC because data format is set to NHWC when it is linked to aipp. | |||||
| ConvertShape2Nhwc(old_format, origin_input_dims); | |||||
| if (new_data_dims.size() != origin_input_dims.size()) { | |||||
| return; | |||||
| } | |||||
| for (size_t i = 0; i < origin_input_dims.size(); ++i) { | |||||
| // Need to update shape when aipp has crop function because H,W is different, ignore -1. | |||||
| if (origin_input_dims[i] > 0) { | |||||
| origin_input_dims[i] = new_data_dims[i]; | |||||
| } | |||||
| } | |||||
| (void)AttrUtils::SetListInt(data_opdesc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims); | |||||
| return; | |||||
| } | |||||
| Status InsertNewOpUtil::GetDataRelatedNode(NodePtr &node, std::map<NodePtr, std::set<NodePtr>> &data_next_node_map) { | Status InsertNewOpUtil::GetDataRelatedNode(NodePtr &node, std::map<NodePtr, std::set<NodePtr>> &data_next_node_map) { | ||||
| GELOGI("Start to get data and next node %s.", node->GetName().c_str()); | GELOGI("Start to get data and next node %s.", node->GetName().c_str()); | ||||
| OpDescPtr data_op = node->GetOpDesc(); | OpDescPtr data_op = node->GetOpDesc(); | ||||
| @@ -420,15 +466,18 @@ Status InsertNewOpUtil::RecordAIPPInfoToData(const ComputeGraphPtr &graph) { | |||||
| GetInputOutputInfo(data_node, aipp_it, input, output); | GetInputOutputInfo(data_node, aipp_it, input, output); | ||||
| input_dims.emplace_back(input); | input_dims.emplace_back(input); | ||||
| output_dims.emplace_back(output); | output_dims.emplace_back(output); | ||||
| // When static aipp is set, need to get the model input dims which processed by aipp | |||||
| GE_RETURN_IF_ERROR(SetModelInputDims(data_node, aipp_it)); | |||||
| } | } | ||||
| if (!AttrUtils::SetListStr(data_node->GetOpDesc(), ATTR_NAME_AIPP_INPUTS, input_dims)) { | if (!AttrUtils::SetListStr(data_node->GetOpDesc(), ATTR_NAME_AIPP_INPUTS, input_dims)) { | ||||
| GELOGE(FAILED, "SetListInt of %s failed.", ATTR_NAME_AIPP_INPUTS.c_str()); | |||||
| GELOGE(FAILED, "SetListStr of %s failed.", ATTR_NAME_AIPP_INPUTS.c_str()); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| if (!AttrUtils::SetListStr(data_node->GetOpDesc(), ATTR_NAME_AIPP_OUTPUTS, output_dims)) { | if (!AttrUtils::SetListStr(data_node->GetOpDesc(), ATTR_NAME_AIPP_OUTPUTS, output_dims)) { | ||||
| GELOGE(FAILED, "SetListInt of %s failed.", ATTR_NAME_AIPP_OUTPUTS.c_str()); | |||||
| GELOGE(FAILED, "SetListStr of %s failed.", ATTR_NAME_AIPP_OUTPUTS.c_str()); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| } | } | ||||
| @@ -473,4 +522,41 @@ Status InsertNewOpUtil::GetInputOutputInfo(NodePtr &data_node, NodePtr &aipp_nod | |||||
| data_node->GetName().c_str(), aipp_node->GetName().c_str(), input.c_str(), output.c_str()); | data_node->GetName().c_str(), aipp_node->GetName().c_str(), input.c_str(), output.c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status InsertNewOpUtil::SetModelInputDims(NodePtr &data_node, NodePtr &aipp_node) { | |||||
| GE_CHECK_NOTNULL(data_node); | |||||
| GE_CHECK_NOTNULL(aipp_node); | |||||
| OpDescPtr data_opdesc = data_node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(data_opdesc); | |||||
| OpDescPtr aipp_opdesc = aipp_node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(aipp_opdesc); | |||||
| // In dynamic bacth/hw scenario, the new model input dims only need be set once | |||||
| if (data_node->GetOpDesc()->HasAttr(ATTR_NAME_INPUT_DIMS)) { | |||||
| GELOGD("Data %s already has attribute %s", data_node->GetOpDesc()->GetName().c_str(), ATTR_NAME_INPUT_DIMS.c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| vector<int64_t> model_input_dims; | |||||
| vector<int64_t> origin_input_dims; | |||||
| if (AttrUtils::GetListInt(aipp_opdesc, ATTR_NAME_INPUT_DIMS, model_input_dims) && !model_input_dims.empty()) { | |||||
| // When dynamic bacth/hw is set, N or HW need to be set to -1 | |||||
| if (AttrUtils::GetListInt(data_opdesc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims) && | |||||
| !origin_input_dims.empty()) { | |||||
| GELOGI("In dynamic bacth/hw scenario, N or HW need to be set to -1. model_input_dims: %s, origin_input_dims: %s", | |||||
| formats::JoinToString(model_input_dims).c_str(), formats::JoinToString(origin_input_dims).c_str()); | |||||
| for (size_t i = 0; i < origin_input_dims.size(); ++i) { | |||||
| // N or HW need to be set to -1 | |||||
| if (origin_input_dims[i] < 0) { | |||||
| model_input_dims[i] = origin_input_dims[i]; | |||||
| } | |||||
| } | |||||
| } | |||||
| GELOGD("After set H/W to -1, the model input dims: %s.", formats::JoinToString(model_input_dims).c_str()); | |||||
| if (!AttrUtils::SetListInt(data_opdesc, ATTR_NAME_INPUT_DIMS, model_input_dims)) { | |||||
| GELOGE(FAILED, "SetListInt of %s failed.", ATTR_NAME_INPUT_DIMS.c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -61,11 +61,13 @@ class InsertNewOpUtil { | |||||
| std::unique_ptr<domi::InsertNewOps> insert_op_conf_; | std::unique_ptr<domi::InsertNewOps> insert_op_conf_; | ||||
| void UpdateMultiBatchInputDims(const OpDescPtr &data_opdesc, Format &old_format); | |||||
| Status UpdatePrevNodeByAipp(NodePtr &node, std::set<NodePtr> &switchns); | Status UpdatePrevNodeByAipp(NodePtr &node, std::set<NodePtr> &switchns); | ||||
| Status UpdateDataBySwitchN(const NodePtr &switchn, const NodePtr &data); | Status UpdateDataBySwitchN(const NodePtr &switchn, const NodePtr &data); | ||||
| Status GetDataRelatedNode(NodePtr &node, std::map<NodePtr, std::set<NodePtr>> &data_next_node_map); | Status GetDataRelatedNode(NodePtr &node, std::map<NodePtr, std::set<NodePtr>> &data_next_node_map); | ||||
| Status GetAllAipps(const NodePtr &node, std::vector<NodePtr> &aipps); | Status GetAllAipps(const NodePtr &node, std::vector<NodePtr> &aipps); | ||||
| Status GetInputOutputInfo(NodePtr &data_node, NodePtr &aipp_node, std::string &input, std::string &output); | Status GetInputOutputInfo(NodePtr &data_node, NodePtr &aipp_node, std::string &input, std::string &output); | ||||
| Status SetModelInputDims(NodePtr &data_node, NodePtr &aipp_node); | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -44,6 +44,7 @@ const int kSwitchNPredIndex = 1; | |||||
| const int kDataOutIndex = 0; | const int kDataOutIndex = 0; | ||||
| const int kDataInIndex = 0; | const int kDataInIndex = 0; | ||||
| const int kMergeDataOutIndex = 0; | const int kMergeDataOutIndex = 0; | ||||
| const int kStaticOutput = -1; | |||||
| const size_t kMaxShapesCount = 100; | const size_t kMaxShapesCount = 100; | ||||
| const size_t kMinShapesCount = 2; | const size_t kMinShapesCount = 2; | ||||
| @@ -947,15 +948,18 @@ Status GetDynamicOutputShape(ComputeGraphPtr &graph) { | |||||
| GELOGE(PARAM_INVALID, "Graph is null ,para is invalid"); | GELOGE(PARAM_INVALID, "Graph is null ,para is invalid"); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| for (auto &node : graph->GetAllNodes()) { | |||||
| for (auto &node : graph->GetDirectNode()) { | |||||
| if (node->GetType() == NETOUTPUT) { | if (node->GetType() == NETOUTPUT) { | ||||
| auto netoutput_desc = node->GetOpDesc(); | auto netoutput_desc = node->GetOpDesc(); | ||||
| auto inputnode_to_netoutput = node->GetInAllNodes(); | auto inputnode_to_netoutput = node->GetInAllNodes(); | ||||
| std::vector<size_t> dynamic_output_index; | |||||
| for (size_t j = 0; j < inputnode_to_netoutput.size(); j++) { | for (size_t j = 0; j < inputnode_to_netoutput.size(); j++) { | ||||
| bool ret = false; | bool ret = false; | ||||
| (void)AttrUtils::GetBool(inputnode_to_netoutput.at(j)->GetOpDesc(), ATTR_INSERT_BY_MBATCH, ret); | (void)AttrUtils::GetBool(inputnode_to_netoutput.at(j)->GetOpDesc(), ATTR_INSERT_BY_MBATCH, ret); | ||||
| if (inputnode_to_netoutput.at(j)->GetType() == MERGE && ret) { | if (inputnode_to_netoutput.at(j)->GetType() == MERGE && ret) { | ||||
| GELOGI("Find the merge node %s with mbatch attr", inputnode_to_netoutput.at(j)->GetName().c_str()); | |||||
| GELOGI("Find the merge node %s with mbatch attr and the index is %zu", | |||||
| inputnode_to_netoutput.at(j)->GetName().c_str(), j); | |||||
| dynamic_output_index.emplace_back(j); | |||||
| for (size_t i = 0; i < inputnode_to_netoutput.at(j)->GetInNodes().size(); i++) { | for (size_t i = 0; i < inputnode_to_netoutput.at(j)->GetInNodes().size(); i++) { | ||||
| auto input_desc = inputnode_to_netoutput.at(j)->GetOpDesc(); | auto input_desc = inputnode_to_netoutput.at(j)->GetOpDesc(); | ||||
| auto input_tensor_desc = input_desc->GetInputDesc(i); | auto input_tensor_desc = input_desc->GetInputDesc(i); | ||||
| @@ -967,6 +971,17 @@ Status GetDynamicOutputShape(ComputeGraphPtr &graph) { | |||||
| } | } | ||||
| } | } | ||||
| if (dynamic_output_dims.size() > 0) { | if (dynamic_output_dims.size() > 0) { | ||||
| for (size_t k = 0; k < inputnode_to_netoutput.size(); k++) { | |||||
| auto it = std::find(dynamic_output_index.begin(), dynamic_output_index.end(), k); | |||||
| if (it != dynamic_output_index.end()) { | |||||
| continue; | |||||
| } | |||||
| auto tensor_desc = netoutput_desc->GetInputDesc(k); | |||||
| auto shape = tensor_desc.GetShape().ToString(); | |||||
| std::string static_output_shape = std::to_string(kStaticOutput) + "," + std::to_string(k) + "," + shape; | |||||
| GELOGI("The static output shape msg is %s", static_output_shape.c_str()); | |||||
| dynamic_output_dims.emplace_back(static_output_shape); | |||||
| } | |||||
| if (!AttrUtils::SetListStr(netoutput_desc, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_dims)) { | if (!AttrUtils::SetListStr(netoutput_desc, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_dims)) { | ||||
| GELOGE(FAILED, "Set dynamic output dims attr failed"); | GELOGE(FAILED, "Set dynamic output dims attr failed"); | ||||
| return FAILED; | return FAILED; | ||||
| @@ -31,6 +31,7 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace { | namespace { | ||||
| const size_t kConcatV2InputNum = 3; | const size_t kConcatV2InputNum = 3; | ||||
| const int kSupportEmptyTensorRank = 1; | |||||
| const std::set<DataType> concatv2_supported_type = {DT_INT32, DT_FLOAT}; | const std::set<DataType> concatv2_supported_type = {DT_INT32, DT_FLOAT}; | ||||
| template <typename T> | template <typename T> | ||||
| @@ -39,7 +40,12 @@ void GetOutputData(std::vector<T> &y_data, int64_t loop, size_t &input_size, | |||||
| for (int64_t i = 0; i < loop; i++) { | for (int64_t i = 0; i < loop; i++) { | ||||
| for (size_t k = 0; k < input_size; k++) { | for (size_t k = 0; k < input_size; k++) { | ||||
| GeShape datak_shape = input.at(k)->GetTensorDesc().GetShape(); | GeShape datak_shape = input.at(k)->GetTensorDesc().GetShape(); | ||||
| const T *datak = reinterpret_cast<const T *>(input.at(k)->GetData().data()); | |||||
| auto buffer = input.at(k)->GetData(); | |||||
| const T *datak = reinterpret_cast<const T *>(buffer.data()); | |||||
| if (datak == nullptr || buffer.size() == 0) { | |||||
| GELOGW("input[%zu] is with no data", k); | |||||
| continue; | |||||
| } | |||||
| int64_t gapk = datak_shape.GetShapeSize() / loop; // [2,3] is 6/loop | int64_t gapk = datak_shape.GetShapeSize() / loop; // [2,3] is 6/loop | ||||
| for (int64_t j = 0; j < gapk; j++) { | for (int64_t j = 0; j < gapk; j++) { | ||||
| y_data.push_back(datak[j + gapk * i]); | y_data.push_back(datak[j + gapk * i]); | ||||
| @@ -63,7 +69,8 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector<ge: | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| int tidx = -1; | int tidx = -1; | ||||
| Status ret = ConcatV2PreCompute(input, tidx); | |||||
| ConstGeTensorPtr tensor = nullptr; | |||||
| Status ret = ConcatV2PreCompute(input, tidx, tensor); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -71,9 +78,8 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector<ge: | |||||
| size_t input_size = input.size(); // N + 1 | size_t input_size = input.size(); // N + 1 | ||||
| input_size--; // N | input_size--; // N | ||||
| ConstGeTensorPtr tensor0 = input.at(0); | |||||
| GE_CHECK_NOTNULL(tensor0); | |||||
| DataType data_type = tensor0->GetTensorDesc().GetDataType(); | |||||
| GE_CHECK_NOTNULL(tensor); | |||||
| DataType data_type = tensor->GetTensorDesc().GetDataType(); | |||||
| uint32_t length = 0; | uint32_t length = 0; | ||||
| if (!TypeUtils::GetDataTypeLength(data_type, length)) { | if (!TypeUtils::GetDataTypeLength(data_type, length)) { | ||||
| GELOGW("Can't GetDataTypeLength of data_type: %s", TypeUtils::DataTypeToSerialString(data_type).c_str()); | GELOGW("Can't GetDataTypeLength of data_type: %s", TypeUtils::DataTypeToSerialString(data_type).c_str()); | ||||
| @@ -91,7 +97,7 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector<ge: | |||||
| return MEMALLOC_FAILED; | return MEMALLOC_FAILED; | ||||
| } | } | ||||
| GeShape data0_shape = tensor0->GetTensorDesc().GetShape(); | |||||
| GeShape data0_shape = tensor->GetTensorDesc().GetShape(); | |||||
| int64_t loop = 1; | int64_t loop = 1; | ||||
| for (int i = 0; i < tidx; i++) { | for (int i = 0; i < tidx; i++) { | ||||
| loop *= data0_shape.GetDim(i); | loop *= data0_shape.GetDim(i); | ||||
| @@ -110,29 +116,33 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector<ge: | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &input, int &tidx) { | |||||
| Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &input, int &tidx, | |||||
| ConstGeTensorPtr &tensor) { | |||||
| size_t input_size = input.size(); | size_t input_size = input.size(); | ||||
| // N >= 2 and N + 1 >= 3 | // N >= 2 and N + 1 >= 3 | ||||
| if (input_size < kConcatV2InputNum) { | if (input_size < kConcatV2InputNum) { | ||||
| GELOGI("The number of input for ConcatV2 must not be less than %zu.", kConcatV2InputNum); | GELOGI("The number of input for ConcatV2 must not be less than %zu.", kConcatV2InputNum); | ||||
| return NOT_CHANGED; | return NOT_CHANGED; | ||||
| } | } | ||||
| bool has_empty_tensor = false; | |||||
| input_size--; | |||||
| for (size_t i = 0; i < input_size; i++) { | for (size_t i = 0; i < input_size; i++) { | ||||
| if (input[i] == nullptr) { | if (input[i] == nullptr) { | ||||
| GELOGI("Input%zu must not be null.", i); | GELOGI("Input%zu must not be null.", i); | ||||
| return NOT_CHANGED; | return NOT_CHANGED; | ||||
| } | } | ||||
| if (input.at(i)->GetData().size() == 0) { | if (input.at(i)->GetData().size() == 0) { | ||||
| GELOGI("Check data size fail. input%zu size is 0.", i); | |||||
| return NOT_CHANGED; | |||||
| GELOGW("input[%zu] is with no data.", i); | |||||
| has_empty_tensor = true; | |||||
| continue; | |||||
| } | |||||
| if (tensor == nullptr) { | |||||
| tensor = input.at(i); // get first valid tensor with data | |||||
| } | } | ||||
| } | } | ||||
| input_size--; | |||||
| ConstGeTensorPtr tensor0 = input.at(0); | |||||
| GE_CHECK_NOTNULL(tensor0); | |||||
| DataType data_type = tensor0->GetTensorDesc().GetDataType(); | |||||
| GE_CHECK_NOTNULL(tensor); | |||||
| DataType data_type = tensor->GetTensorDesc().GetDataType(); | |||||
| for (size_t i = 1; i < input_size; i++) { | for (size_t i = 1; i < input_size; i++) { | ||||
| if (data_type != input.at(i)->GetTensorDesc().GetDataType()) { | if (data_type != input.at(i)->GetTensorDesc().GetDataType()) { | ||||
| GELOGI("Data type of N inputs for ConcatV2 not the same, check input %zu failed.", i); | GELOGI("Data type of N inputs for ConcatV2 not the same, check input %zu failed.", i); | ||||
| @@ -149,13 +159,18 @@ Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &i | |||||
| ConstGeTensorPtr tensor_axis = input.at(input_size); | ConstGeTensorPtr tensor_axis = input.at(input_size); | ||||
| GE_CHECK_NOTNULL(tensor_axis); | GE_CHECK_NOTNULL(tensor_axis); | ||||
| const int *axis = reinterpret_cast<const int *>(tensor_axis->GetData().data()); | const int *axis = reinterpret_cast<const int *>(tensor_axis->GetData().data()); | ||||
| tidx = axis[0]; // [-rank(values), rank(values)) | |||||
| int dims = static_cast<int>(tensor0->GetTensorDesc().GetShape().GetDimNum()); // rank | |||||
| GE_CHECK_NOTNULL(axis); | |||||
| tidx = axis[0]; // [-rank(values), rank(values)) | |||||
| int rank = static_cast<int>(tensor->GetTensorDesc().GetShape().GetDimNum()); // rank | |||||
| if (tidx < 0) { | if (tidx < 0) { | ||||
| tidx += dims; | |||||
| tidx += rank; | |||||
| } | } | ||||
| if (tidx < 0 || tidx > dims) { | |||||
| GELOGI("ConcatV2 tidx not legal."); | |||||
| // 1. tidx should in range [0,rank) | |||||
| // 2. empty tensor only support case: [n],[m],[] | |||||
| // case: [[],[]] ,[[],[]] ,[] or other case when rank >=2 is not supported | |||||
| if (tidx < 0 || tidx >= rank || (has_empty_tensor && rank > kSupportEmptyTensorRank)) { | |||||
| GELOGW("ConcatV2 info: tidx[%d]_rank[%d]_has_empty_tensor[bool:%d] cannot be supported, skip fold.", tidx, rank, | |||||
| has_empty_tensor); | |||||
| return NOT_CHANGED; | return NOT_CHANGED; | ||||
| } | } | ||||
| @@ -28,7 +28,7 @@ class ConcatV2Kernel : public Kernel { | |||||
| std::vector<GeTensorPtr> &v_output) override; | std::vector<GeTensorPtr> &v_output) override; | ||||
| private: | private: | ||||
| Status ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &input, int &tidx); | |||||
| Status ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &input, int &tidx, ConstGeTensorPtr &tensor); | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -46,6 +46,8 @@ namespace ge { | |||||
| namespace { | namespace { | ||||
| const int kDecimal = 10; | const int kDecimal = 10; | ||||
| const int kSocVersionLen = 50; | const int kSocVersionLen = 50; | ||||
| const int kDefaultDeviceIdForTrain = 0; | |||||
| const int kDefaultDeviceIdForInfer = -1; | |||||
| } // namespace | } // namespace | ||||
| static std::shared_ptr<GELib> instancePtr_ = nullptr; | static std::shared_ptr<GELib> instancePtr_ = nullptr; | ||||
| @@ -194,8 +196,12 @@ Status GELib::SystemInitialize(const map<string, string> &options) { | |||||
| // In train and infer, profiling is always needed. | // In train and infer, profiling is always needed. | ||||
| InitOptions(options); | InitOptions(options); | ||||
| InitProfiling(this->options_); | InitProfiling(this->options_); | ||||
| if (is_train_mode_) { | |||||
| // 1.`is_train_mode_` means case: train | |||||
| // 2.`(!is_train_mode_) && (options_.device_id != kDefaultDeviceIdForInfer)` means case: online infer | |||||
| // these two case need call `InitSystemWithOptions->rtGetDeviceIndexByPhyId` | |||||
| // to convert phy device id to logical device id | |||||
| // note:rtGetDeviceIndexByPhyId return `0` logical id when input phy device id is `0` | |||||
| if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { | |||||
| status = InitSystemWithOptions(this->options_); | status = InitSystemWithOptions(this->options_); | ||||
| } else { | } else { | ||||
| status = InitSystemWithoutOptions(); | status = InitSystemWithoutOptions(); | ||||
| @@ -237,7 +243,7 @@ void GELib::InitOptions(const map<string, string> &options) { | |||||
| if (iter != options.end()) { | if (iter != options.end()) { | ||||
| this->options_.session_id = std::strtoll(iter->second.c_str(), nullptr, kDecimal); | this->options_.session_id = std::strtoll(iter->second.c_str(), nullptr, kDecimal); | ||||
| } | } | ||||
| this->options_.device_id = 0; | |||||
| this->options_.device_id = is_train_mode_ ? kDefaultDeviceIdForTrain : kDefaultDeviceIdForInfer; | |||||
| iter = options.find(OPTION_EXEC_DEVICE_ID); | iter = options.find(OPTION_EXEC_DEVICE_ID); | ||||
| if (iter != options.end()) { | if (iter != options.end()) { | ||||
| this->options_.device_id = static_cast<int32_t>(std::strtol(iter->second.c_str(), nullptr, kDecimal)); | this->options_.device_id = static_cast<int32_t>(std::strtol(iter->second.c_str(), nullptr, kDecimal)); | ||||
| @@ -289,7 +295,8 @@ void GELib::InitOptions(const map<string, string> &options) { | |||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOptions(Options &options) { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOptions(Options &options) { | ||||
| GELOGI("Training init GELib. session Id:%ld, device id :%d ", options.session_id, options.device_id); | |||||
| std::string mode = is_train_mode_ ? "Training" : "Online infer"; | |||||
| GELOGI("%s init GELib. session Id:%ld, device id :%d ", mode.c_str(), options.session_id, options.device_id); | |||||
| GEEVENT("System init with options begin, job id %s", options.job_id.c_str()); | GEEVENT("System init with options begin, job id %s", options.job_id.c_str()); | ||||
| std::lock_guard<std::mutex> lock(status_mutex_); | std::lock_guard<std::mutex> lock(status_mutex_); | ||||
| GE_IF_BOOL_EXEC(is_system_inited && !is_shutdown, | GE_IF_BOOL_EXEC(is_system_inited && !is_shutdown, | ||||
| @@ -329,13 +336,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOpt | |||||
| is_system_inited = true; | is_system_inited = true; | ||||
| is_shutdown = false; | is_shutdown = false; | ||||
| GELOGI("Training init GELib success."); | |||||
| GELOGI("%s init GELib success.", mode.c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GELib::SystemShutdownWithOptions(const Options &options) { | Status GELib::SystemShutdownWithOptions(const Options &options) { | ||||
| GELOGI("Training finalize GELib begin."); | |||||
| std::string mode = is_train_mode_ ? "Training" : "Online infer"; | |||||
| GELOGI("%s finalize GELib begin.", mode.c_str()); | |||||
| std::lock_guard<std::mutex> lock(status_mutex_); | std::lock_guard<std::mutex> lock(status_mutex_); | ||||
| GE_IF_BOOL_EXEC(is_shutdown || !is_system_inited, | GE_IF_BOOL_EXEC(is_shutdown || !is_system_inited, | ||||
| @@ -353,8 +361,7 @@ Status GELib::SystemShutdownWithOptions(const Options &options) { | |||||
| is_system_inited = false; | is_system_inited = false; | ||||
| is_shutdown = true; | is_shutdown = true; | ||||
| GELOGI("Training finalize GELib success."); | |||||
| GELOGI("%s finalize GELib success.", mode.c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -424,7 +431,7 @@ Status GELib::Finalize() { | |||||
| // Shut down profiling | // Shut down profiling | ||||
| ShutDownProfiling(); | ShutDownProfiling(); | ||||
| if (is_train_mode_) { | |||||
| if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { | |||||
| GELOGI("System ShutDown."); | GELOGI("System ShutDown."); | ||||
| mid_state = SystemShutdownWithOptions(this->options_); | mid_state = SystemShutdownWithOptions(this->options_); | ||||
| if (mid_state != SUCCESS) { | if (mid_state != SUCCESS) { | ||||
| @@ -39,6 +39,7 @@ | |||||
| #include "ir_build/atc_ir_common.h" | #include "ir_build/atc_ir_common.h" | ||||
| #include "omg/omg.h" | #include "omg/omg.h" | ||||
| #include "omg/parser/parser_factory.h" | #include "omg/parser/parser_factory.h" | ||||
| #include "omg/parser/parser_inner_ctx.h" | |||||
| #include "parser/common/register_tbe.h" | #include "parser/common/register_tbe.h" | ||||
| #include "register/op_registry.h" | #include "register/op_registry.h" | ||||
| #include "single_op_parser.h" | #include "single_op_parser.h" | ||||
| @@ -178,8 +179,6 @@ DEFINE_string(compress_weight_conf, "", "Optional; the config file to compress w | |||||
| DEFINE_string(enable_single_stream, "", "Optional; enable single stream. true: enable; false(default): disable"); | DEFINE_string(enable_single_stream, "", "Optional; enable single stream. true: enable; false(default): disable"); | ||||
| DEFINE_string(quant_optimize, "true", "Optional; enable quant optimize. true: enable; false(default): disable"); | |||||
| DEFINE_string(log, "default", "Optional; generate atc log. Support debug, info, warning, error, null"); | DEFINE_string(log, "default", "Optional; generate atc log. Support debug, info, warning, error, null"); | ||||
| DEFINE_string(dump_mode, "0", "Optional; generate infershape json,only support 1 , 0."); | DEFINE_string(dump_mode, "0", "Optional; generate infershape json,only support 1 , 0."); | ||||
| @@ -203,10 +202,7 @@ class GFlagUtils { | |||||
| "arguments explain:\n" | "arguments explain:\n" | ||||
| " --model Model file\n" | " --model Model file\n" | ||||
| " --singleop Single op definition file. atc will generate offline " | " --singleop Single op definition file. atc will generate offline " | ||||
| "model(s) for single op if --singleop is set. \n" | |||||
| " Note: Only output, soc_verion, core_type, aicore_num, auto_tune_mode, precision_mode, " | |||||
| "op_select_implmode, enable_small_channel, enable_compress_weight, compress_weight_conf " | |||||
| "enable_single_stream and log are valid in this mode \n" | |||||
| "model(s) for single op if --singleop is set.\n" | |||||
| " --weight Weight file. Required when framework is Caffe\n" | " --weight Weight file. Required when framework is Caffe\n" | ||||
| " --framework Framework type(0:Caffe; 1:MindSpore; 3:Tensorflow)\n" | " --framework Framework type(0:Caffe; 1:MindSpore; 3:Tensorflow)\n" | ||||
| " --output Output file path&name(needn't suffix, will add " | " --output Output file path&name(needn't suffix, will add " | ||||
| @@ -253,6 +249,9 @@ class GFlagUtils { | |||||
| " --op_select_implmode Set op select implmode. Support high_precision, high_performance." | " --op_select_implmode Set op select implmode. Support high_precision, high_performance." | ||||
| "default: high_performance\n" | "default: high_performance\n" | ||||
| "disable\n" | "disable\n" | ||||
| " --optypelist_for_implmode Appoint which op to use op_select_implmode, used with op_select_implmode ." | |||||
| "Separate multiple nodes with commas (,). Use double quotation marks (\") to enclose each argument." | |||||
| "E.g.: \"node_name1,node_name2\"\n" | |||||
| " --head_stream Add head stream. 0(default): disable; 1: enable\n" | " --head_stream Add head stream. 0(default): disable; 1: enable\n" | ||||
| " --soc_version The soc version. E.g.: \"Ascend310\"\n" | " --soc_version The soc version. E.g.: \"Ascend310\"\n" | ||||
| " --core_type Set core type AiCore or VectorCore. VectorCore: use vector core. " | " --core_type Set core type AiCore or VectorCore. VectorCore: use vector core. " | ||||
| @@ -270,8 +269,7 @@ class GFlagUtils { | |||||
| "Use double quotation marks (\") to enclose each argument." | "Use double quotation marks (\") to enclose each argument." | ||||
| "E.g: \"imagesize1_height,imagesize1_width;imagesize2_height,imagesize2_width\"\n" | "E.g: \"imagesize1_height,imagesize1_width;imagesize2_height,imagesize2_width\"\n" | ||||
| " --auto_tune_mode Set tune mode. E.g.: \"GA,RL\", support configure multiple, spit by ,\n" | " --auto_tune_mode Set tune mode. E.g.: \"GA,RL\", support configure multiple, spit by ,\n" | ||||
| " --enable_single_stream Enable single stream. true: enable; false(default): disable\n" | |||||
| " --quant_optimize Enable quant optimize. true(default): enable; false: disable\n"); | |||||
| " --enable_single_stream Enable single stream. true: enable; false(default): disable\n"); | |||||
| gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); | gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); | ||||
| // Using gflags to analyze input parameters | // Using gflags to analyze input parameters | ||||
| @@ -656,13 +654,36 @@ void LoadCustomOpLib() { | |||||
| std::vector<OpRegistrationData> registrationDatas = OpRegistry::Instance()->registrationDatas; | std::vector<OpRegistrationData> registrationDatas = OpRegistry::Instance()->registrationDatas; | ||||
| for (OpRegistrationData reg_data : registrationDatas) { | for (OpRegistrationData reg_data : registrationDatas) { | ||||
| bool ret = ge::OpRegistrationTbe::Instance()->Finalize(reg_data); | |||||
| if (ret) { | |||||
| OpRegistry::Instance()->Register(reg_data); | |||||
| if (reg_data.GetFrameworkType() == static_cast<domi::FrameworkType>(FLAGS_framework)) { | |||||
| bool ret = ge::OpRegistrationTbe::Instance()->Finalize(reg_data); | |||||
| if (ret) { | |||||
| (void)OpRegistry::Instance()->Register(reg_data); | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| void SaveCustomCaffeProtoPath() { | |||||
| GELOGI("Enter save custom caffe proto path."); | |||||
| string customop_path; | |||||
| const char *path_env = std::getenv("ASCEND_OPP_PATH"); | |||||
| if (path_env != nullptr) { | |||||
| std::string path = path_env; | |||||
| customop_path = path + "/framework/custom/caffe/"; | |||||
| GELOGI("Get custom proto path from env : %s", path_env); | |||||
| ge::GetParserContext().custom_proto_path = customop_path; | |||||
| return; | |||||
| } | |||||
| std::string path_base = ge::GELib::GetPath(); | |||||
| GELOGI("path_base is %s", path_base.c_str()); | |||||
| path_base = path_base.substr(0, path_base.rfind('/')); | |||||
| path_base = path_base.substr(0, path_base.rfind('/') + 1); | |||||
| customop_path = path_base + "ops/framework/custom/caffe/"; | |||||
| ge::GetParserContext().custom_proto_path = customop_path; | |||||
| return; | |||||
| } | |||||
| #endif | #endif | ||||
| Status CreateInputsForInference(const ge::Graph &graph, vector<ge::GeTensor> &inputs) { | Status CreateInputsForInference(const ge::Graph &graph, vector<ge::GeTensor> &inputs) { | ||||
| @@ -850,6 +871,7 @@ domi::Status GenerateModel(std::map<string, string> &options, std::string output | |||||
| atc_params.insert(std::pair<string, string>("is_output_adjust_hw_layout", FLAGS_is_output_adjust_hw_layout)); | atc_params.insert(std::pair<string, string>("is_output_adjust_hw_layout", FLAGS_is_output_adjust_hw_layout)); | ||||
| atc_params.insert(std::pair<string, string>("compress_weight_conf", FLAGS_compress_weight_conf)); | atc_params.insert(std::pair<string, string>("compress_weight_conf", FLAGS_compress_weight_conf)); | ||||
| atc_params.insert(std::pair<string, string>(string(ge::OUTPUT_DATATYPE), FLAGS_output_type)); | atc_params.insert(std::pair<string, string>(string(ge::OUTPUT_DATATYPE), FLAGS_output_type)); | ||||
| atc_params.insert(std::pair<string, string>("output", output)); | |||||
| Status ret = | Status ret = | ||||
| ParseGraph(graph, atc_params, FLAGS_model.c_str(), FLAGS_weight.c_str(), (domi::FrameworkType)FLAGS_framework, | ParseGraph(graph, atc_params, FLAGS_model.c_str(), FLAGS_weight.c_str(), (domi::FrameworkType)FLAGS_framework, | ||||
| @@ -982,6 +1004,8 @@ domi::Status GenerateOmModel() { | |||||
| // Load custom operator Library | // Load custom operator Library | ||||
| LoadCustomOpLib(); | LoadCustomOpLib(); | ||||
| SaveCustomCaffeProtoPath(); | |||||
| ret = ge::CheckCustomAiCpuOpLib(); | ret = ge::CheckCustomAiCpuOpLib(); | ||||
| GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "check custom aicpu run so failed!"); | GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "check custom aicpu run so failed!"); | ||||
| @@ -1043,8 +1067,6 @@ domi::Status GenerateOmModel() { | |||||
| options.insert(std::pair<string, string>(string(ge::ENABLE_SINGLE_STREAM), FLAGS_enable_single_stream)); | options.insert(std::pair<string, string>(string(ge::ENABLE_SINGLE_STREAM), FLAGS_enable_single_stream)); | ||||
| options.insert(std::pair<string, string>(string(ge::QUANT_OPTIMIZE), FLAGS_quant_optimize)); | |||||
| SetDynamicBatchSizeOrImagesizeOptions(); | SetDynamicBatchSizeOrImagesizeOptions(); | ||||
| if (!FLAGS_save_original_model.empty()) { | if (!FLAGS_save_original_model.empty()) { | ||||
| @@ -273,10 +273,6 @@ Status SingleOpParser::ConvertToBuildParam(int index, const SingleOpDesc &single | |||||
| } else { | } else { | ||||
| op_desc->AddInputDesc(desc.name, ge_tensor_desc); | op_desc->AddInputDesc(desc.name, ge_tensor_desc); | ||||
| } | } | ||||
| if (desc.format == FORMAT_FRACTAL_NZ || desc.format == FORMAT_FRACTAL_Z) { | |||||
| ge_tensor_desc.SetFormat(FORMAT_ND); | |||||
| ge_tensor_desc.SetOriginFormat(FORMAT_ND); | |||||
| } | |||||
| build_param.inputs.emplace_back(ge_tensor_desc); | build_param.inputs.emplace_back(ge_tensor_desc); | ||||
| } | } | ||||
| @@ -292,10 +288,6 @@ Status SingleOpParser::ConvertToBuildParam(int index, const SingleOpDesc &single | |||||
| TensorUtils::SetInputTensor(ge_tensor_desc, false); | TensorUtils::SetInputTensor(ge_tensor_desc, false); | ||||
| TensorUtils::SetOutputTensor(ge_tensor_desc, true); | TensorUtils::SetOutputTensor(ge_tensor_desc, true); | ||||
| op_desc->AddOutputDesc(ge_tensor_desc); | op_desc->AddOutputDesc(ge_tensor_desc); | ||||
| if (desc.format == FORMAT_FRACTAL_NZ || desc.format == FORMAT_FRACTAL_Z) { | |||||
| ge_tensor_desc.SetFormat(FORMAT_ND); | |||||
| ge_tensor_desc.SetOriginFormat(FORMAT_ND); | |||||
| } | |||||
| build_param.outputs.emplace_back(ge_tensor_desc); | build_param.outputs.emplace_back(ge_tensor_desc); | ||||
| } | } | ||||
| @@ -29,6 +29,8 @@ | |||||
| #include "common/types.h" | #include "common/types.h" | ||||
| #include "common/util.h" | #include "common/util.h" | ||||
| #include "common/util/error_manager/error_manager.h" | #include "common/util/error_manager/error_manager.h" | ||||
| #include "common/helper/model_helper.h" | |||||
| #include "common/ge/ge_util.h" | |||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "framework/omg/parser/parser_inner_ctx.h" | #include "framework/omg/parser/parser_inner_ctx.h" | ||||
| #include "google/protobuf/io/zero_copy_stream_impl.h" | #include "google/protobuf/io/zero_copy_stream_impl.h" | ||||
| @@ -419,10 +421,6 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const | |||||
| GELOGE(domi::FAILED, "Can not find src node (%s) in graph.", user_out_nodes[i].first.c_str()); | GELOGE(domi::FAILED, "Can not find src node (%s) in graph.", user_out_nodes[i].first.c_str()); | ||||
| return domi::FAILED; | return domi::FAILED; | ||||
| } | } | ||||
| if (out_node->GetType() == DATA) { | |||||
| GELOGE(domi::FAILED, "out_nodes [%s] can not be set input data, please check", user_out_nodes[i].first.c_str()); | |||||
| return domi::FAILED; | |||||
| } | |||||
| auto op_desc = out_node->GetOpDesc(); | auto op_desc = out_node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| if (i < output_formats.size()) { | if (i < output_formats.size()) { | ||||
| @@ -441,24 +439,49 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const | |||||
| (void)ge::AttrUtils::SetListInt(op_desc, "_output_dt_index", it_index->second); | (void)ge::AttrUtils::SetListInt(op_desc, "_output_dt_index", it_index->second); | ||||
| } | } | ||||
| output_nodes_info.push_back(std::make_pair(out_node, user_out_nodes[i].second)); | output_nodes_info.push_back(std::make_pair(out_node, user_out_nodes[i].second)); | ||||
| output_nodes_name.push_back(out_node->GetName()); | |||||
| output_nodes_name.push_back(out_node->GetName() + ":" + std::to_string(user_out_nodes[i].second)); | |||||
| } | } | ||||
| // default output node (leaf) | // default output node (leaf) | ||||
| if (user_out_nodes.empty()) { | if (user_out_nodes.empty()) { | ||||
| for (ge::NodePtr node : compute_graph->GetDirectNode()) { | for (ge::NodePtr node : compute_graph->GetDirectNode()) { | ||||
| if (!node->GetInDataNodes().empty() && node->GetOutDataNodes().empty()) { | if (!node->GetInDataNodes().empty() && node->GetOutDataNodes().empty()) { | ||||
| Status ret = GetOutputLeaf(node, output_nodes_info, output_nodes_name); | |||||
| Status ret = GetOutputLeaf(node, output_nodes_info); | |||||
| GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "find leaf fail."); | GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "find leaf fail."); | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| GetOutputNodesNameAndIndex(output_nodes_info, output_nodes_name); | |||||
| compute_graph->SetGraphOutNodesInfo(output_nodes_info); | compute_graph->SetGraphOutNodesInfo(output_nodes_info); | ||||
| domi::GetContext().net_out_nodes = output_nodes_name; | domi::GetContext().net_out_nodes = output_nodes_name; | ||||
| return domi::SUCCESS; | return domi::SUCCESS; | ||||
| } | } | ||||
| Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | |||||
| std::vector<std::string> &output_nodes_name) { | |||||
| void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | |||||
| std::vector<std::string> &output_nodes_name) { | |||||
| output_nodes_name.clear(); | |||||
| if (domi::GetContext().out_top_names.empty()) { | |||||
| // tf process, no top name. | |||||
| for (const auto output_node_info : output_nodes_info) { | |||||
| std::string node_name = output_node_info.first->GetName(); | |||||
| int32_t index = output_node_info.second; | |||||
| output_nodes_name.push_back(node_name + ":" + std::to_string(index)); | |||||
| } | |||||
| return; | |||||
| } | |||||
| // caffe process, need add top name after node_name:index | |||||
| for (size_t i = 0; i < output_nodes_info.size(); ++i) { | |||||
| std::string node_name = output_nodes_info[i].first->GetName(); | |||||
| int32_t index = output_nodes_info[i].second; | |||||
| if (i < domi::GetContext().out_top_names.size()) { | |||||
| output_nodes_name.push_back(node_name + ":" + std::to_string(index) + ":" + domi::GetContext().out_top_names[i]); | |||||
| } else { | |||||
| GELOGW("Get top name of node [%s] fail.", node_name.c_str()); | |||||
| output_nodes_name.push_back(node_name + ":" + std::to_string(index)); | |||||
| } | |||||
| } | |||||
| } | |||||
| Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info) { | |||||
| ge::OpDescPtr tmpDescPtr = node->GetOpDesc(); | ge::OpDescPtr tmpDescPtr = node->GetOpDesc(); | ||||
| if (tmpDescPtr == nullptr) { | if (tmpDescPtr == nullptr) { | ||||
| GELOGE(domi::FAILED, "Get outnode op desc fail."); | GELOGE(domi::FAILED, "Get outnode op desc fail."); | ||||
| @@ -468,7 +491,6 @@ Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> | |||||
| if (node->GetType() != NETOUTPUT) { | if (node->GetType() != NETOUTPUT) { | ||||
| for (size_t index = 0; index < size; ++index) { | for (size_t index = 0; index < size; ++index) { | ||||
| output_nodes_info.push_back(std::make_pair(node, index)); | output_nodes_info.push_back(std::make_pair(node, index)); | ||||
| output_nodes_name.push_back(node->GetName()); | |||||
| } | } | ||||
| } else { | } else { | ||||
| const auto in_anchors = node->GetAllInDataAnchors(); | const auto in_anchors = node->GetAllInDataAnchors(); | ||||
| @@ -480,7 +502,6 @@ Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> | |||||
| } | } | ||||
| auto out_node = out_anchor->GetOwnerNode(); | auto out_node = out_anchor->GetOwnerNode(); | ||||
| output_nodes_info.push_back(std::make_pair(out_node, out_anchor->GetIdx())); | output_nodes_info.push_back(std::make_pair(out_node, out_anchor->GetIdx())); | ||||
| output_nodes_name.push_back(out_node->GetName()); | |||||
| } | } | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -612,9 +633,16 @@ FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<stri | |||||
| Params::Instance()->SetTarget(target); | Params::Instance()->SetTarget(target); | ||||
| // Create an empty computegraph | // Create an empty computegraph | ||||
| ComputeGraphPtr compute_graph = nullptr; | |||||
| GE_MAKE_SHARED(compute_graph = std::make_shared<ComputeGraph>(kGraphDefaultName + "_" + CurrentTimeInStr()), | |||||
| return FAILED); | |||||
| std::string om_name; | |||||
| ParseAtcParms(atc_params, "output", om_name); | |||||
| ModelHelper model_helper; | |||||
| string graph_name = ""; | |||||
| Status name_ret = model_helper.GetBaseNameFromFileName(om_name, graph_name); | |||||
| if (name_ret != SUCCESS) { | |||||
| graph_name = kGraphDefaultName + "_" + CurrentTimeInStr(); | |||||
| } | |||||
| ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>(graph_name); | |||||
| GE_CHECK_NOTNULL(compute_graph); | |||||
| graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); | graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); | ||||
| // initialize omgContext | // initialize omgContext | ||||
| @@ -664,8 +692,6 @@ FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<stri | |||||
| GELOGI("The pre-checking report has been saved to %s.", check_report.c_str()); | GELOGI("The pre-checking report has been saved to %s.", check_report.c_str()); | ||||
| } | } | ||||
| // Prevent data residue in multiple calls | |||||
| PreChecker::Instance().Clear(); | |||||
| GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC model parse ret fail."); | GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC model parse ret fail."); | ||||
| std::string input_fp16_nodes; | std::string input_fp16_nodes; | ||||
| @@ -693,12 +719,19 @@ FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<stri | |||||
| graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); | graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); | ||||
| auto weights_parser = WeightsParserFactory::Instance()->CreateWeightsParser(type); | auto weights_parser = WeightsParserFactory::Instance()->CreateWeightsParser(type); | ||||
| ret = weights_parser->Parse(weights_file, graph); | ret = weights_parser->Parse(weights_file, graph); | ||||
| GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC weights parse ret fail."); | |||||
| // IN ONLY_PRE_CHECK mode, generate pre inspection report only. | // IN ONLY_PRE_CHECK mode, generate pre inspection report only. | ||||
| if (run_mode == ONLY_PRE_CHECK) { | |||||
| if (PreChecker::Instance().HasError() || run_mode == ONLY_PRE_CHECK) { | |||||
| std::string check_report; | |||||
| ParseAtcParms(atc_params, "check_report", check_report); | |||||
| GE_RETURN_WITH_LOG_IF_ERROR(PreChecker::Instance().Save(check_report), "Generate pre-checking report failed."); | |||||
| GEEVENT("The pre-checking report has been saved to %s.", check_report.c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| // Prevent data residue in multiple calls | |||||
| PreChecker::Instance().Clear(); | |||||
| GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC weights parse ret fail."); | |||||
| GELOGI("ATC parser success."); | GELOGI("ATC parser success."); | ||||
| @@ -41,17 +41,18 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOpManager::GetOpFr | |||||
| uintptr_t resource_id; | uintptr_t resource_id; | ||||
| // runtime uses NULL to denote a default stream for each device | // runtime uses NULL to denote a default stream for each device | ||||
| if (stream == nullptr) { | if (stream == nullptr) { | ||||
| // use device id as resource key instead | |||||
| int32_t dev_id = 0; | |||||
| auto rt_err = rtGetDevice(&dev_id); | |||||
| // get current context | |||||
| rtContext_t rt_cur_ctx = nullptr; | |||||
| auto rt_err = rtCtxGetCurrent(&rt_cur_ctx); | |||||
| if (rt_err != RT_ERROR_NONE) { | if (rt_err != RT_ERROR_NONE) { | ||||
| GELOGE(RT_FAILED, "Get current device id failed. ret = %d", static_cast<int>(rt_err)); | |||||
| GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast<int>(rt_err)); | |||||
| return RT_FAILED; | return RT_FAILED; | ||||
| } | } | ||||
| GELOGI("GetOpFromModel with default stream. device id = %d", dev_id); | |||||
| resource_id = static_cast<uintptr_t>(dev_id); | |||||
| // use current context as resource key instead | |||||
| GELOGI("use context as resource key instead when default stream"); | |||||
| resource_id = reinterpret_cast<uintptr_t>(rt_cur_ctx); | |||||
| } else { | } else { | ||||
| GELOGI("use stream as resource key instead when create stream"); | |||||
| resource_id = reinterpret_cast<uintptr_t>(stream); | resource_id = reinterpret_cast<uintptr_t>(stream); | ||||
| } | } | ||||
| @@ -0,0 +1,6 @@ | |||||
| inc_path := $(shell pwd)/inc/external/ | |||||
| out_path := $(shell pwd)/out/atc/lib64/stub/ | |||||
| stub_path := $(shell pwd)/framework/domi/stub/ | |||||
| mkdir_stub := $(shell mkdir -p $(out_path)) | |||||
| local_stub := $(shell $(HI_PYTHON) $(stub_path)/gen_stubapi.py $(inc_path) $(out_path)) | |||||
| @@ -0,0 +1,4 @@ | |||||
| ################################################################################### | |||||
| the directory (stub) saves the stub file | |||||
| gen_stubapi.py is using for retrieving API and generating stub functions | |||||
| ################################################################################### | |||||
| @@ -0,0 +1,573 @@ | |||||
| import os | |||||
| import re | |||||
| import sys | |||||
| import logging | |||||
| logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(levelname)s: %(message)s', | |||||
| level=logging.INFO) | |||||
| """ | |||||
| this attr is used for symbol table visible | |||||
| """ | |||||
| GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY' | |||||
| """ | |||||
| generate stub func body by return type | |||||
| """ | |||||
| RETURN_STATEMENTS = { | |||||
| 'graphStatus': ' return GRAPH_SUCCESS;', | |||||
| 'Status': ' return SUCCESS;', | |||||
| 'Graph': ' return Graph();', | |||||
| 'Graph&': ' return *this;', | |||||
| 'Format': ' return Format();', | |||||
| 'Format&': ' return *this;', | |||||
| 'Shape': ' return Shape();', | |||||
| 'Shape&': ' return *this;', | |||||
| 'TensorDesc': ' return TensorDesc();', | |||||
| 'TensorDesc&': ' return *this;', | |||||
| 'Tensor': ' return Tensor();', | |||||
| 'Tensor&': ' return *this;', | |||||
| 'Operator': ' return Operator();', | |||||
| 'Operator&': ' return *this;', | |||||
| 'Ptr': ' return nullptr;', | |||||
| 'std::string': ' return "";', | |||||
| 'std::string&': ' return "";', | |||||
| 'string': ' return "";', | |||||
| 'int': ' return 0;', | |||||
| 'DataType': ' return DT_FLOAT;', | |||||
| 'InferenceContextPtr': ' return nullptr;', | |||||
| 'SubgraphBuilder': ' return nullptr;', | |||||
| 'OperatorImplPtr': ' return nullptr;', | |||||
| 'OutHandler': ' return nullptr;', | |||||
| 'std::vector<std::string>': ' return {};', | |||||
| 'std::vector<int64_t>': ' return {};', | |||||
| 'std::map': ' return {};', | |||||
| 'uint32_t': ' return 0;', | |||||
| 'int64_t': ' return 0;', | |||||
| 'uint64_t': ' return 0;', | |||||
| 'size_t': ' return 0;', | |||||
| 'float': ' return 0.0f;', | |||||
| 'bool': ' return false;', | |||||
| } | |||||
| """ | |||||
| max code len per line in hua_wei software programming specifications | |||||
| """ | |||||
| max_code_len_per_line = 100 | |||||
| """ | |||||
| white_list_for_debug, include_dir_key_words is to | |||||
| determines which header files to generate cc files from | |||||
| when DEBUG on | |||||
| """ | |||||
| white_list_for_debug = ["operator.h", "tensor.h", | |||||
| "graph.h", "operator_factory.h", | |||||
| "ge_ir_build.h"] | |||||
| include_dir_key_words = ["ge", "graph"] | |||||
| DEBUG = True | |||||
| def need_generate_func(func_line): | |||||
| """ | |||||
| :param func_line: | |||||
| :return: | |||||
| """ | |||||
| if func_line.strip().endswith("default") or func_line.strip().endswith("delete") \ | |||||
| or func_line.strip().startswith("typedef") or func_line.strip().startswith("using"): | |||||
| return False | |||||
| return True | |||||
| def file_endswith_white_list_suffix(file): | |||||
| """ | |||||
| :param file: | |||||
| :return: | |||||
| """ | |||||
| if DEBUG: | |||||
| for suffix in white_list_for_debug: | |||||
| if file.endswith(suffix): | |||||
| return True | |||||
| return False | |||||
| else: | |||||
| return True | |||||
| """ | |||||
| belows are patterns used for analyse .h file | |||||
| """ | |||||
| # pattern function | |||||
| pattern_func = re.compile(r"""(^[\s]*) #leading with space,we will find and delete after | |||||
| ([a-zA-Z~_] # void int likely | |||||
| .* | |||||
| [)] #we find ) | |||||
| (?!.*{) # we do not want the case int abc() const { return 1;} | |||||
| .*) | |||||
| (;.*) #we want to find ; and after for we will replace these later | |||||
| \n$ | |||||
| """, re.VERBOSE | re.MULTILINE | re.DOTALL) | |||||
| # pattern comment | |||||
| pattern_comment = re.compile(r'^\s*//') | |||||
| pattern_comment_2_start = re.compile(r'^\s*/[*]') | |||||
| pattern_comment_2_end = re.compile(r'[*]/\s*$') | |||||
| # pattern define | |||||
| pattern_define = re.compile(r'^\s*#define') | |||||
| pattern_define_return = re.compile(r'\\\s*$') | |||||
| # blank line | |||||
| pattern_blank_line = re.compile(r'^\s*$') | |||||
| # virtual,explicit,friend,static | |||||
| pattern_keyword = re.compile(r'(virtual\s+|explicit\s+|friend\s+|static\s+)') | |||||
| # lead space | |||||
| pattern_leading_space = re.compile(r'(^[\s]*)[a-zA-Z~_]') | |||||
| # functions will have patterns such as func ( or func( | |||||
| # but operator is an exception; the class name is preceded by an operator, and the above mode does not exist | |||||
| # format like :"operator = ()" | |||||
| pattern_func_name = re.compile(r'([a-zA-Z0-9~_\-]+\s*|operator?.*)[(]') | |||||
| # template | |||||
| pattern_template = re.compile(r'^\s*template') | |||||
| pattern_template_end = re.compile(r'>\s*$') | |||||
| # namespace | |||||
| pattern_namespace = re.compile(r'namespace.*{') | |||||
| # class : which can handle classA a and {not on the same line, but if found ';' after class,then don't deal with | |||||
| pattern_class = re.compile(r'^[\s]*(class|struct)\s+(%s\s+)?([a-zA-Z0-9_\-]+<?)(?!.*;)' % GE_ATTR) | |||||
| # {} | |||||
| pattern_start = re.compile('{') | |||||
| pattern_end = re.compile('}') | |||||
| line_index = 0 | |||||
| class H2CC(object): | |||||
| def __init__(self, input_file, output_file, shared_includes_content): | |||||
| """ | |||||
| :param input_file: | |||||
| :param output_file: | |||||
| :param shared_includes_content: | |||||
| """ | |||||
| self.input_file = input_file | |||||
| self.output_file = output_file | |||||
| self.shared_includes_content = shared_includes_content | |||||
| self.line_index = 0 | |||||
| self.input_fd = open(self.input_file, 'r') | |||||
| self.input_content = self.input_fd.readlines() | |||||
| self.output_fd = open(self.output_file, 'w') | |||||
| # The state may be normal_now(in the middle of {}),class_now,namespace_now | |||||
| self.stack = [] | |||||
| self.stack_class = [] | |||||
| self.stack_template = [] | |||||
| # record funcs generated by h2cc func | |||||
| self.func_list_exist = [] | |||||
| def __del__(self): | |||||
| self.input_fd.close() | |||||
| self.output_fd.close() | |||||
| del self.stack | |||||
| del self.stack_class | |||||
| del self.stack_template | |||||
| del self.func_list_exist | |||||
| def just_skip(self): | |||||
| # skip blank line or comment | |||||
| if pattern_blank_line.search(self.input_content[self.line_index]) or pattern_comment.search( | |||||
| self.input_content[self.line_index]): # /n or comment using // | |||||
| self.line_index += 1 | |||||
| if pattern_comment_2_start.search(self.input_content[self.line_index]): # comment using /* | |||||
| while not pattern_comment_2_end.search(self.input_content[self.line_index]): # */ | |||||
| self.line_index += 1 | |||||
| self.line_index += 1 | |||||
| # skip define | |||||
| if pattern_define.search(self.input_content[self.line_index]): | |||||
| while pattern_blank_line.search(self.input_content[self.line_index]) or pattern_define_return.search( | |||||
| self.input_content[self.line_index]): | |||||
| self.line_index += 1 | |||||
| self.line_index += 1 | |||||
| def write_inc_content(self): | |||||
| for shared_include_content in self.shared_includes_content: | |||||
| self.output_fd.write(shared_include_content) | |||||
| def h2cc(self): | |||||
| """ | |||||
| :return: | |||||
| """ | |||||
| logging.info("start generate cc_file[%s] from h_file[%s]", self.output_file, self.input_file) | |||||
| global pattern_comment | |||||
| global pattern_comment_2_start | |||||
| global pattern_comment_2_end | |||||
| global pattern_blank_line | |||||
| global pattern_func | |||||
| global pattern_keyword | |||||
| global pattern_leading_space | |||||
| global pattern_func_name | |||||
| global pattern_template | |||||
| global pattern_template_end | |||||
| global pattern_namespace | |||||
| global pattern_class | |||||
| global pattern_start | |||||
| global pattern_end | |||||
| global line_index | |||||
| # write inc content | |||||
| self.write_inc_content() | |||||
| # core processing cycle, process the input .h file by line | |||||
| while self.line_index < len(self.input_content): | |||||
| # handle comment and blank line | |||||
| self.just_skip() | |||||
| # match namespace | |||||
| self.handle_namespace() | |||||
| # match template | |||||
| template_string = self.handle_template() | |||||
| # match class | |||||
| line = self.input_content[self.line_index] | |||||
| match_class = pattern_class.search(line) | |||||
| match_start = pattern_start.search(line) | |||||
| handle_class_result = self.handle_class(template_string, line, match_start, match_class) | |||||
| if handle_class_result == "continue": | |||||
| continue | |||||
| # match "}" | |||||
| handle_stack_result = self.handle_stack(match_start) | |||||
| if handle_stack_result == "continue": | |||||
| continue | |||||
| # handle func | |||||
| handle_func1_result, line, start_i = self.handle_func1(line) | |||||
| if handle_func1_result == "continue": | |||||
| continue | |||||
| # here means func is found | |||||
| # delete key word | |||||
| line = pattern_keyword.sub('', line) | |||||
| logging.info("line[%s]", line) | |||||
| # Class member function | |||||
| # if friend we will not add class name | |||||
| friend_match = re.search('friend ', line) | |||||
| if len(self.stack_class) > 0 and not friend_match: | |||||
| line, func_name = self.handle_class_member_func(line, template_string) | |||||
| # Normal functions | |||||
| else: | |||||
| line, func_name = self.handle_normal_func(line, template_string) | |||||
| need_generate = need_generate_func(line) | |||||
| # func body | |||||
| line += self.implement_function(line) | |||||
| # comment | |||||
| line = self.gen_comment(start_i) + line | |||||
| # write to out file | |||||
| self.write_func_content(line, func_name, need_generate) | |||||
| # next loop | |||||
| self.line_index += 1 | |||||
| logging.info('Added %s functions', len(self.func_list_exist)) | |||||
| logging.info('Successfully converted,please see ' + self.output_file) | |||||
| def handle_func1(self, line): | |||||
| """ | |||||
| :param line: | |||||
| :return: | |||||
| """ | |||||
| find1 = re.search('[(]', line) | |||||
| if not find1: | |||||
| self.line_index += 1 | |||||
| return "continue", line, None | |||||
| find2 = re.search('[)]', line) | |||||
| start_i = self.line_index | |||||
| space_match = pattern_leading_space.search(line) | |||||
| # deal with | |||||
| # int abc(int a, | |||||
| # int b) | |||||
| if find1 and (not find2): | |||||
| self.line_index += 1 | |||||
| line2 = self.input_content[self.line_index] | |||||
| if space_match: | |||||
| line2 = re.sub('^' + space_match.group(1), '', line2) | |||||
| line += line2 | |||||
| while self.line_index < len(self.input_content) and (not re.search('[)]', line2)): | |||||
| self.line_index += 1 | |||||
| line2 = self.input_content[self.line_index] | |||||
| line2 = re.sub('^' + space_match.group(1), '', line2) | |||||
| line += line2 | |||||
| match_start = pattern_start.search(self.input_content[self.line_index]) | |||||
| match_end = pattern_end.search(self.input_content[self.line_index]) | |||||
| if match_start: # like ) { or ) {} int the last line | |||||
| if not match_end: | |||||
| self.stack.append('normal_now') | |||||
| ii = start_i | |||||
| while ii <= self.line_index: | |||||
| ii += 1 | |||||
| self.line_index += 1 | |||||
| return "continue", line, start_i | |||||
| logging.info("line[%s]", line) | |||||
| # ' int abc();'->'int abc()' | |||||
| (line, match) = pattern_func.subn(r'\2\n', line) | |||||
| logging.info("line[%s]", line) | |||||
| # deal with case: | |||||
| # 'int \n abc(int a, int b)' | |||||
| if re.search(r'^\s*(inline)?\s*[a-zA-Z0-9_]+\s*$', self.input_content[start_i - 1]): | |||||
| line = self.input_content[start_i - 1] + line | |||||
| line = line.lstrip() | |||||
| if not match: | |||||
| self.line_index += 1 | |||||
| return "continue", line, start_i | |||||
| return "pass", line, start_i | |||||
| def handle_stack(self, match_start): | |||||
| """ | |||||
| :param match_start: | |||||
| :return: | |||||
| """ | |||||
| line = self.input_content[self.line_index] | |||||
| match_end = pattern_end.search(line) | |||||
| if match_start: | |||||
| self.stack.append('normal_now') | |||||
| if match_end: | |||||
| top_status = self.stack.pop() | |||||
| if top_status == 'namespace_now': | |||||
| self.output_fd.write(line + '\n') | |||||
| elif top_status == 'class_now': | |||||
| self.stack_class.pop() | |||||
| self.stack_template.pop() | |||||
| if match_start or match_end: | |||||
| self.line_index += 1 | |||||
| return "continue" | |||||
| if len(self.stack) > 0 and self.stack[-1] == 'normal_now': | |||||
| self.line_index += 1 | |||||
| return "continue" | |||||
| return "pass" | |||||
| def handle_class(self, template_string, line, match_start, match_class): | |||||
| """ | |||||
| :param template_string: | |||||
| :param line: | |||||
| :param match_start: | |||||
| :param match_class: | |||||
| :return: | |||||
| """ | |||||
| if match_class: # we face a class | |||||
| self.stack_template.append(template_string) | |||||
| self.stack.append('class_now') | |||||
| class_name = match_class.group(3) | |||||
| # class template specializations: class A<u,Node<u> > | |||||
| if '<' in class_name: | |||||
| k = line.index('<') | |||||
| fit = 1 | |||||
| for ii in range(k + 1, len(line)): | |||||
| if line[ii] == '<': | |||||
| fit += 1 | |||||
| if line[ii] == '>': | |||||
| fit -= 1 | |||||
| if fit == 0: | |||||
| break | |||||
| class_name += line[k + 1:ii + 1] | |||||
| logging.info('class_name[%s]', class_name) | |||||
| self.stack_class.append(class_name) | |||||
| while not match_start: | |||||
| self.line_index += 1 | |||||
| line = self.input_content[self.line_index] | |||||
| match_start = pattern_start.search(line) | |||||
| self.line_index += 1 | |||||
| return "continue" | |||||
| return "pass" | |||||
| def handle_template(self): | |||||
| line = self.input_content[self.line_index] | |||||
| match_template = pattern_template.search(line) | |||||
| template_string = '' | |||||
| if match_template: | |||||
| match_template_end = pattern_template_end.search(line) | |||||
| template_string = line | |||||
| while not match_template_end: | |||||
| self.line_index += 1 | |||||
| line = self.input_content[self.line_index] | |||||
| template_string += line | |||||
| match_template_end = pattern_template_end.search(line) | |||||
| self.line_index += 1 | |||||
| return template_string | |||||
| def handle_namespace(self): | |||||
| line = self.input_content[self.line_index] | |||||
| match_namespace = pattern_namespace.search(line) | |||||
| if match_namespace: # we face namespace | |||||
| self.output_fd.write(line + '\n') | |||||
| self.stack.append('namespace_now') | |||||
| self.line_index += 1 | |||||
| def handle_normal_func(self, line, template_string): | |||||
| template_line = '' | |||||
| self.stack_template.append(template_string) | |||||
| if self.stack_template[-1] != '': | |||||
| template_line = re.sub(r'\s*template', 'template', self.stack_template[-1]) | |||||
| # change '< class T = a, class U = A(3)>' to '<class T, class U>' | |||||
| template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line) | |||||
| template_line = re.sub(r'\s*=.*,', ',', template_line) | |||||
| template_line = re.sub(r'\s*=.*', '', template_line) | |||||
| line = re.sub(r'\s*=.*,', ',', line) | |||||
| line = re.sub(r'\s*=.*\)', ')', line) | |||||
| line = template_line + line | |||||
| self.stack_template.pop() | |||||
| func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group() | |||||
| logging.info("line[%s]", line) | |||||
| logging.info("func_name[%s]", func_name) | |||||
| return line, func_name | |||||
| def handle_class_member_func(self, line, template_string): | |||||
| template_line = '' | |||||
| x = '' | |||||
| if template_string != '': | |||||
| template_string = re.sub(r'\s*template', 'template', template_string) | |||||
| template_string = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_string) | |||||
| template_string = re.sub(r'\s*=.*,', ',', template_string) | |||||
| template_string = re.sub(r'\s*=.*', '', template_string) | |||||
| if self.stack_template[-1] != '': | |||||
| if not (re.search(r'<\s*>', stack_template[-1])): | |||||
| template_line = re.sub(r'^\s*template', 'template', stack_template[-1]) | |||||
| if not (re.search(r'<.*>', self.stack_class[-1])): | |||||
| # for x we get like template<class T, typename U> -> <T,U> | |||||
| x = re.sub(r'template\s*<', '<', template_line) # remove template -> <class T, typename U> | |||||
| x = re.sub(r'\n', '', x) | |||||
| x = re.sub(r'\s*=.*,', ',', x) | |||||
| x = re.sub(r'\s*=.*\>', '>', x) | |||||
| x = x.rstrip() # remove \n | |||||
| x = re.sub(r'(class|typename)\s+|(<class>|<typename>\s*class)', '', | |||||
| x) # remove class,typename -> <T, U> | |||||
| x = re.sub(r'<\s+', '<', x) | |||||
| x = re.sub(r'\s+>', '>', x) | |||||
| x = re.sub(r'\s+,', ',', x) | |||||
| x = re.sub(r',\s+', ', ', x) | |||||
| line = re.sub(r'\s*=\s+0', '', line) | |||||
| line = re.sub(r'\s*=\s+.*,', ',', line) | |||||
| line = re.sub(r'\s*=\s+.*\)', ')', line) | |||||
| logging.info("x[%s]\nline[%s]", x, line) | |||||
| # if the function is long, void ABC::foo() | |||||
| # breaks into two lines void ABC::\n foo() | |||||
| temp_line = pattern_func_name.sub(self.stack_class[-1] + x + '::' + r'\1(', line, count=1) | |||||
| if len(temp_line) > max_code_len_per_line: | |||||
| line = pattern_func_name.sub(self.stack_class[-1] + x + '::\n' + r'\1(', line, count=1) | |||||
| else: | |||||
| line = temp_line | |||||
| logging.info("line[%s]", line) | |||||
| # add template as the above if there is one | |||||
| template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line) | |||||
| template_line = re.sub(r'\s*=.*,', ',', template_line) | |||||
| template_line = re.sub(r'\s*=.*', '', template_line) | |||||
| line = template_line + template_string + line | |||||
| func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group() | |||||
| logging.info("line[%s]", line) | |||||
| logging.info("func_name[%s]", func_name) | |||||
| return line, func_name | |||||
| def write_func_content(self, content, func_name, need_generate): | |||||
| if not (func_name in self.func_list_exist) and need_generate: | |||||
| self.output_fd.write(content) | |||||
| self.func_list_exist.append(func_name) | |||||
| logging.info('add func:[%s]', func_name) | |||||
| def gen_comment(self, start_i): | |||||
| comment_line = '' | |||||
| # Function comments are on top of function declarations, copy them over | |||||
| k = start_i - 1 # one line before this func start | |||||
| if pattern_template.search(self.input_content[k]): | |||||
| k -= 1 | |||||
| if pattern_comment_2_end.search(self.input_content[k]): | |||||
| comment_line = self.input_content[k].lstrip() | |||||
| while not pattern_comment_2_start.search(self.input_content[k]): | |||||
| k -= 1 | |||||
| comment_line = self.input_content[k].lstrip() + comment_line | |||||
| else: | |||||
| for j in range(k, 0, -1): | |||||
| c_line = self.input_content[j] | |||||
| if pattern_comment.search(c_line): | |||||
| c_line = re.sub(r'\s*//', '//', c_line) | |||||
| comment_line = c_line + comment_line | |||||
| else: | |||||
| break | |||||
| return comment_line | |||||
| @staticmethod | |||||
| def implement_function(func): | |||||
| function_def = '' | |||||
| function_def += '{\n' | |||||
| all_items = func.split() | |||||
| start = 0 | |||||
| return_type = all_items[start] | |||||
| if return_type == "const": | |||||
| start += 1 | |||||
| return_type = all_items[start] | |||||
| if return_type.startswith(('std::map', 'std::set', 'std::vector')): | |||||
| return_type = "std::map" | |||||
| if return_type.endswith('*') or (len(all_items) > start + 1 and all_items[start + 1].startswith('*')): | |||||
| return_type = "Ptr" | |||||
| if len(all_items) > start + 1 and all_items[start + 1].startswith('&'): | |||||
| return_type += "&" | |||||
| if RETURN_STATEMENTS.__contains__(return_type): | |||||
| function_def += RETURN_STATEMENTS[return_type] | |||||
| else: | |||||
| logging.warning("Unhandled return type[%s]", return_type) | |||||
| function_def += '\n' | |||||
| function_def += '}\n' | |||||
| function_def += '\n' | |||||
| return function_def | |||||
| def collect_header_files(path): | |||||
| """ | |||||
| :param path: | |||||
| :return: | |||||
| """ | |||||
| header_files = [] | |||||
| shared_includes_content = [] | |||||
| for root, dirs, files in os.walk(path): | |||||
| files.sort() | |||||
| for file in files: | |||||
| if file.find("git") >= 0: | |||||
| continue | |||||
| if not file.endswith('.h'): | |||||
| continue | |||||
| file_path = os.path.join(root, file) | |||||
| file_path = file_path.replace('\\', '/') | |||||
| header_files.append(file_path) | |||||
| include_str = '#include "{}"\n'.format(file_path[path.rindex('/') + 1:]) | |||||
| shared_includes_content.append(include_str) | |||||
| return header_files, shared_includes_content | |||||
| def generate_stub_file(inc_dir, out_cc_dir): | |||||
| """ | |||||
| :param inc_dir: | |||||
| :param out_cc_dir: | |||||
| :return: | |||||
| """ | |||||
| target_header_files, shared_includes_content = collect_header_files(inc_dir) | |||||
| for header_file in target_header_files: | |||||
| if not file_endswith_white_list_suffix(header_file): | |||||
| continue | |||||
| cc_file = re.sub('.h*$', '.cc', header_file) | |||||
| h_2_cc = H2CC(header_file, out_cc_dir + cc_file[cc_file.rindex('/') + 1:], shared_includes_content) | |||||
| h_2_cc.h2cc() | |||||
| def gen_code(inc_dir, out_cc_dir): | |||||
| """ | |||||
| :param inc_dir: | |||||
| :param out_cc_dir: | |||||
| :return: | |||||
| """ | |||||
| if not inc_dir.endswith('/'): | |||||
| inc_dir += '/' | |||||
| if not out_cc_dir.endswith('/'): | |||||
| out_cc_dir += '/' | |||||
| for include_dir_key_word in include_dir_key_words: | |||||
| generate_stub_file(inc_dir + include_dir_key_word, out_cc_dir) | |||||
| if __name__ == '__main__': | |||||
| inc_dir = sys.argv[1] | |||||
| out_cc_dir = sys.argv[2] | |||||
| gen_code(inc_dir, out_cc_dir) | |||||
| @@ -17,9 +17,10 @@ | |||||
| syntax = "proto3"; | syntax = "proto3"; | ||||
| import "om.proto"; | import "om.proto"; | ||||
| package domi; | package domi; | ||||
| message FusionModelDef { | message FusionModelDef { | ||||
| string version = 1; | string version = 1; | ||||
| repeated OpDef fusion_op = 2; | repeated OpDef fusion_op = 2; | ||||
| } | |||||
| } | |||||
| @@ -1029,9 +1029,9 @@ REG_OP(BesselI1e) | |||||
| * y: A Tensor of type UnaryDataType. | * y: A Tensor of type UnaryDataType. | ||||
| * @attention Constraints: | * @attention Constraints: | ||||
| * @li "base" is supposed to be greater than 0. Retaining the default \n | |||||
| * @li "base" is supposed to be greater than 0. Retaining the default | |||||
| * value "-1" sets "base" to "e". | * value "-1" sets "base" to "e". | ||||
| * @li If the input value of operator Log is within the range (0, 0.01] or \n | |||||
| * @li If the input value of operator Log is within the range (0, 0.01] or | |||||
| * [0.95, 1.05], the output accuracy is subject to change. | * [0.95, 1.05], the output accuracy is subject to change. | ||||
| * @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
| @@ -1047,11 +1047,11 @@ REG_OP(Log) | |||||
| .OP_END_FACTORY_REG(Log) | .OP_END_FACTORY_REG(Log) | ||||
| /** | /** | ||||
| * @brief Returns x1 * x2 element-wise.\n | |||||
| * @brief Returns x1 * x2 element-wise. | |||||
| * y = x1 * x2 | * y = x1 * x2 | ||||
| * @par Inputs: | * @par Inputs: | ||||
| * @li x1: A Tensor. Must be one of the following types: float16, float32,\n | |||||
| * @li x1: A Tensor. Must be one of the following types: float16, float32, | |||||
| * float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128. | * float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128. | ||||
| * @li x2: A Tensor. Must be one of the following types: float16, float32, | * @li x2: A Tensor. Must be one of the following types: float16, float32, | ||||
| * float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128. | * float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128. | ||||
| @@ -1079,7 +1079,7 @@ REG_OP(Mul) | |||||
| .OP_END_FACTORY_REG(Mul) | .OP_END_FACTORY_REG(Mul) | ||||
| /** | /** | ||||
| * @brief Computes the gradient of the square root of "x" with regard to its\n | |||||
| * @brief Computes the gradient of the square root of "x" with regard to its | |||||
| * input. grad = dy * 0.5/y, where y = sqrt(x), and "dy" is the corresponding | * input. grad = dy * 0.5/y, where y = sqrt(x), and "dy" is the corresponding | ||||
| * input gradient. | * input gradient. | ||||
| @@ -3022,6 +3022,7 @@ REG_OP(CosineEmbeddingLoss) | |||||
| *@brief Kullback-Leibler divergence. | *@brief Kullback-Leibler divergence. | ||||
| *@par Inputs: | *@par Inputs: | ||||
| * Two inputs, including: | |||||
| *@li x: Tensor of arbitrary shape. | *@li x: Tensor of arbitrary shape. | ||||
| *@li target: Tensor of the same shape and dtype as x. | *@li target: Tensor of the same shape and dtype as x. | ||||
| @@ -934,7 +934,6 @@ REG_OP(EncodeJpeg) | |||||
| /** | /** | ||||
| *@brief PNG-encode an image. | *@brief PNG-encode an image. | ||||
| *@par Inputs: | *@par Inputs: | ||||
| *Input image must be unit8 or uint16 type. Inputs include: \n | *Input image must be unit8 or uint16 type. Inputs include: \n | ||||
| *image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels] \n | *image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels] \n | ||||
| @@ -1224,6 +1223,16 @@ REG_OP(CombinedNonMaxSuppression) | |||||
| .ATTR(clip_boxes, Bool, true) | .ATTR(clip_boxes, Bool, true) | ||||
| .OP_END_FACTORY_REG(CombinedNonMaxSuppression) | .OP_END_FACTORY_REG(CombinedNonMaxSuppression) | ||||
| REG_OP(SpatialTransformerD) | |||||
| .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) | |||||
| .OPTIONAL_INPUT(theta, TensorType({DT_FLOAT,DT_FLOAT16})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16})) | |||||
| .ATTR(output_size, ListInt, {-1, -1}) | |||||
| .ATTR(default_theta, ListFloat, {}) | |||||
| .ATTR(align_corners, Bool, false) | |||||
| .ATTR(use_default_theta, ListBool, {}) | |||||
| .OP_END_FACTORY_REG(SpatialTransformerD) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_OP_MAGE_OPS_H_ | #endif // GE_OP_MAGE_OPS_H_ | ||||
| @@ -93,31 +93,49 @@ REG_OP(MatMulV2) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Five inputs, including: | *Five inputs, including: | ||||
| *@li a: A matrix Tensor. 4D. Must be one of the following types:\n float16, int8. Has format [FRACTAL_NZ]. | |||||
| *@li b: A matrix Tensor. 4D. Must be one of the following types:\n float16, int8. When type is int8, has format [FRACTAL_Z], \n otherwise has format [FRACTAL_NZ]. | |||||
| *@li c: A matrix Tensor. 2D or higher. Must be one of the following types: \n float16, int32, float32. When type is int32, has format [ND], \n otherwise has format [FRACTAL_NZ]. | |||||
| *@li alpha: A 1D Tensor. The shape of alpha is [1].\n Must be one of the following types: float16, int32, float32. Has format [ND]. | |||||
| *@li beta: A 1D Tensor. The shape of beta is [1].\n Must be one of the following types: float16, int32, float32. Has format [ND]. | |||||
| *@li a: A matrix Tensor. Must be one of the following types: float16, int8. | |||||
| * Has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ). | |||||
| *@li b: A matrix Tensor. Must be one of the following types: float16, int8. | |||||
| * Has format [ND, FRACTAL_NZ, FRACTAL_Z]. 2D(ND) or 4D(FRACTAL_NZ, FRACTAL_Z). | |||||
| *@li c: A matrix Tensor. Must be one of the following types: float16, int32, | |||||
| * float32. has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ). | |||||
| *@li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the following | |||||
| * types: float16, int32, float32. Has format [ND]. | |||||
| *@li beta: A 1D Tensor. The shape of beta is [1]. Must be one of the following | |||||
| * types: float16, int32, float32. Has format [ND]. | |||||
| * The format of a, b, c has restriction:\n | |||||
| * When type of a is int8 and type of c is int32, the format of a, b, c should | |||||
| * all be ND, or a is FRACTAL_NZ and b is FRACTAL_Z and c is ND.\n | |||||
| * When type of a is int8 and type of c is float32, the format of a, b, c should | |||||
| * all be ND or a is FRACTAL_NZ and b is FRACTAL_Z and c is FRACTAL_NZ.\n | |||||
| * When type of a is float16 and type of c is float16, the format of a, b, c | |||||
| * should all be ND or FRACTAL_NZ.\n | |||||
| * When type of a is float16 and type of c is float32, the format of a, b, c | |||||
| * should all be ND or FRACTAL_NZ. | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *Two attributes, including: | *Two attributes, including: | ||||
| *@li transpose_a: Optional. A bool.\n If True, changes the shape of "a" from [M, K] to [K, M].\n Reserved parameters, not used for now. | |||||
| *@li transpose_b: Optional. A bool.\n If True, changes the shape of "b" from [M, K] to [K, M].\n Reserved parameters, not used for now. | |||||
| *@li transpose_a: Optional. A bool. If True, changes the shape of "a" from | |||||
| * [M, K] to [K, M]. | |||||
| *@li transpose_b: Optional. A bool. If True, changes the shape of "b" from | |||||
| * [K, N] to [N, K]. | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *@out: The result matrix Tensor. 4D. Must be one of the following types:\n float16, float32, int32. Has format [FRACTAL_NZ]. | |||||
| *y: The result matrix Tensor. Must be one of the following types: float16, | |||||
| * float32, int32. Has format [ND, FRACTAL_NZ], the format should be equal to a. | |||||
| * 2D(ND) or 4D(FRACTAL_NZ). | |||||
| */ | */ | ||||
| REG_OP(Gemm) | |||||
| REG_OP(GEMM) | |||||
| .INPUT(a, TensorType({DT_FLOAT16, DT_INT8})) | .INPUT(a, TensorType({DT_FLOAT16, DT_INT8})) | ||||
| .INPUT(b, TensorType({DT_FLOAT16, DT_INT8})) | .INPUT(b, TensorType({DT_FLOAT16, DT_INT8})) | ||||
| .INPUT(c, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | .INPUT(c, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | ||||
| .INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | .INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | ||||
| .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | ||||
| .OUTPUT(out, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||||
| .ATTR(transpose_a, Bool, false) | .ATTR(transpose_a, Bool, false) | ||||
| .ATTR(transpose_b, Bool, false) | .ATTR(transpose_b, Bool, false) | ||||
| .OP_END_FACTORY_REG(Gemm) | |||||
| .OP_END_FACTORY_REG(GEMM) | |||||
| /** | /** | ||||
| *@brief Multiplies matrix "a" by matrix "b", producing "a * b". | *@brief Multiplies matrix "a" by matrix "b", producing "a * b". | ||||
| @@ -361,14 +361,14 @@ REG_OP(BatchNormGradExt2) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | ||||
| *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | ||||
| *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | |||||
| *@li momentum: An optional string, input x's Scale factor | |||||
| *@li variance: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the variance used for inference. | |||||
| *@li momentum: A Tensor of type float32 or float16, represents the mean and the variance's scale factor | |||||
| *@li scale: An optional tensor of type float16 or float32, no use | *@li scale: An optional tensor of type float16 or float32, no use | ||||
| *@li offset: An optional tensor of type float16 or float32, no use | *@li offset: An optional tensor of type float16 or float32, no use | ||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". | *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". | ||||
| *@li use_global_stats: mean inference mode , only can be "True". | *@li use_global_stats: mean inference mode , only can be "True". | ||||
| *@li mode: An optional input, not use | |||||
| *@li mode: An optional attr, not use | |||||
| *@par Outputs:\n | *@par Outputs:\n | ||||
| *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x" | *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x" | ||||
| */ | */ | ||||
| @@ -391,7 +391,7 @@ REG_OP(BNInference) | |||||
| *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | ||||
| *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | ||||
| *@li momentum: An optional float, input x's Scale factor | |||||
| *@li momentum: A Tensor of type float32 or float16, the mean and the variance's Scale factor | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". | *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". | ||||
| *@li use_global_stats: mean inference mode , only can be "True". | *@li use_global_stats: mean inference mode , only can be "True". | ||||
| @@ -420,13 +420,13 @@ REG_OP(BnHost) | |||||
| *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | ||||
| *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | ||||
| *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | ||||
| *@li momentum: An optional float, input x's Scale factor | |||||
| *@li scale: An optional tensor of type float16 or float32, no use | *@li scale: An optional tensor of type float16 or float32, no use | ||||
| *@li offset: An optional tensor of type float16 or float32, no use | *@li offset: An optional tensor of type float16 or float32, no use | ||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li momentum: An optional float32 num, represents the mean and the variance's scale factor | |||||
| *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". | *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". | ||||
| *@li use_global_stats: mean inference mode , only can be "True". | *@li use_global_stats: mean inference mode , only can be "True". | ||||
| *@li mode: An optional inpout, not use | |||||
| *@li mode: An optional attr, not use | |||||
| *@par Outputs:\n | *@par Outputs:\n | ||||
| *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x" | *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x" | ||||
| */ | */ | ||||
| @@ -62,7 +62,7 @@ namespace ge { | |||||
| * data is 5D with shape [N, C1, Ho, Wo, C0], | * data is 5D with shape [N, C1, Ho, Wo, C0], | ||||
| * where C is the same as that of the feature map and C0 is 16.\n | * where C is the same as that of the feature map and C0 is 16.\n | ||||
| * Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 * | * Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 * | ||||
| * stride_h + 32 * filter_h) * ceil(Wi, 16) �?l1_size and Hf*Wf �?l0b_size/512.\n | |||||
| * stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512. | |||||
| * @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
| * @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter. | * @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter. | ||||
| @@ -119,7 +119,7 @@ REG_OP(DepthwiseConv2DBackpropFilter) | |||||
| * data is 5D with shape [N, C1, Ho, Wo, C0], | * data is 5D with shape [N, C1, Ho, Wo, C0], | ||||
| * where C is the same as that of the feature map and C0 is 16.\n | * where C is the same as that of the feature map and C0 is 16.\n | ||||
| * Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 * | * Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 * | ||||
| * stride_h + 32 * filter_h) * ceil(Wi, 16) �?l1_size and Hf*Wf �?l0b_size/512.\n | |||||
| * stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512. | |||||
| * @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
| * @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter. | * @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter. | ||||
| @@ -178,7 +178,7 @@ REG_OP(DepthwiseConv2DBackpropFilterD) | |||||
| * Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the | * Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the | ||||
| * data is 5D with shape [N, C1, Ho, Wo, C0], | * data is 5D with shape [N, C1, Ho, Wo, C0], | ||||
| * where C is the same as that of the feature map and C0 is 16.\n | * where C is the same as that of the feature map and C0 is 16.\n | ||||
| * Limited by Tiling: max_h_in_l1 �?C0, where max_h_in_l1 = (l1_size - Hf * | |||||
| * Limited by Tiling: max_h_in_l1 >= C0, where max_h_in_l1 = (l1_size - Hf * | |||||
| * Wf * C0 * C0 * 2) / (2 * Wo *C0).\n | * Wf * C0 * C0 * 2) / (2 * Wo *C0).\n | ||||
| * @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
| @@ -235,7 +235,7 @@ REG_OP(DepthwiseConv2DBackpropInput) | |||||
| * Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the | * Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the | ||||
| * data is 5D with shape [N, C1, Ho, Wo, C0], | * data is 5D with shape [N, C1, Ho, Wo, C0], | ||||
| * where C is the same as that of the feature map and C0 is 16.\n | * where C is the same as that of the feature map and C0 is 16.\n | ||||
| * Limited by Tiling: max_h_in_l1 �?C0, where max_h_in_l1 = (l1_size - Hf * | |||||
| * Limited by Tiling: max_h_in_l1 >= C0, where max_h_in_l1 = (l1_size - Hf * | |||||
| * Wf * C0 * C0 * 2) / (2 * Wo *C0).\n | * Wf * C0 * C0 * 2) / (2 * Wo *C0).\n | ||||
| * @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
| @@ -459,45 +459,44 @@ REG_OP(Conv2DBackpropInputD) | |||||
| *@brief Computes the Deconvolution with respect to the input. | *@brief Computes the Deconvolution with respect to the input. | ||||
| *@par Inputs: | *@par Inputs: | ||||
| * Three inputs: | * Three inputs: | ||||
| * @li x: A Tensor. Must have the same type as "filter". 4D with shape | |||||
| * [batch, out_height, out_width, out_channels] | |||||
| * or [batch, out_channels, out_height, out_width]. Gradients with respect | |||||
| * @li x: A Tensor of type float16 or int8. 4D with shape | |||||
| * [batch, out_channels, out_height, out_width]. Gradients with respect | |||||
| * to the output of the convolution. | * to the output of the convolution. | ||||
| * @li filter: A Tensor of type float16. | |||||
| * 4D with shape [filter_height, filter_width, in_channels, out_channels], | |||||
| * or [out_channels, filter_height, filter_width, in_channels], | |||||
| * or [out_channels, in_channel, filter_height, filter_width]. | |||||
| * @li filter: A Tensor. Must have the same type as "x". | |||||
| * 4D with shape [out_channels, in_channel, filter_height, filter_width].\n | |||||
| * Two optional inputs: | * Two optional inputs: | ||||
| * @li bias: An optional tensor of type float16 | |||||
| * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved.\n | |||||
| * @li bias: An optional tensor. Must have the same type as "y". | |||||
| * @li offset_w: An optional 1D tensor for quantized deconvolution. | |||||
| * Type is int8. Reserved.\n | |||||
| *@par Attributes: | *@par Attributes: | ||||
| * Six attributes: | * Six attributes: | ||||
| * @li strides: A tuple or list of 2 integers. The stride of the sliding window | * @li strides: A tuple or list of 2 integers. The stride of the sliding window | ||||
| * for H/W dimension. | * for H/W dimension. | ||||
| * @li pads: A tuple or list of 4 integers. The [top, bottom, left, right] | * @li pads: A tuple or list of 4 integers. The [top, bottom, left, right] | ||||
| * padding on the feature map | |||||
| * padding on the feature map. | |||||
| * @li dilations: A tuple or list of 4 integers. The dilation factor for each | * @li dilations: A tuple or list of 4 integers. The dilation factor for each | ||||
| * dimension of input. Must be [1, 1, 1, 1]. | * dimension of input. Must be [1, 1, 1, 1]. | ||||
| * @li groups: Number of blocked connections from input channels to \n | |||||
| output channels. | |||||
| * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC".\n | |||||
| * @li groups: Number of blocked connections from input channels to | |||||
| output channels. Defaults to "1". | |||||
| * @li data_format: An optional string from: "NCHW". Defaults to "NCHW". \n | |||||
| Specify the data format of the input and output data. | Specify the data format of the input and output data. | ||||
| * @li offset_x: An optional integer for quantized deconvolution. | |||||
| * @li offset_x: An optional integer for quantized deconvolution. Defaults to "0". | |||||
| *@par Outputs: | *@par Outputs: | ||||
| * y: A Tensor. Has the same type as "filter". 4D tensor with shape | |||||
| * [batch, height, width, channels] or [batch, channels, height, width]. | |||||
| * y: A Tensor. 4D tensor with shape [batch, channels, height, width]. | |||||
| * When type of x is float16, the type of y must be float16. | |||||
| * When type of x is int8, the type of y must be int32. | |||||
| */ | */ | ||||
| REG_OP(Deconvolution) | REG_OP(Deconvolution) | ||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) | |||||
| .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) | |||||
| .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) | |||||
| .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) | |||||
| .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32})) | |||||
| .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | ||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) | |||||
| .ATTR(strides, ListInt, {1, 1, 1, 1}) | |||||
| .ATTR(pads, ListInt, {0, 0, 0, 0}) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32})) | |||||
| .REQUIRED_ATTR(strides, ListInt) | |||||
| .REQUIRED_ATTR(pads, ListInt) | |||||
| .ATTR(dilations, ListInt, {1, 1, 1, 1}) | .ATTR(dilations, ListInt, {1, 1, 1, 1}) | ||||
| .ATTR(groups, Int, 1) | .ATTR(groups, Int, 1) | ||||
| .ATTR(data_format, String, "NHWC") | |||||
| .ATTR(data_format, String, "NCHW") | |||||
| .ATTR(offset_x, Int, 0) | .ATTR(offset_x, Int, 0) | ||||
| .OP_END_FACTORY_REG(Deconvolution) | .OP_END_FACTORY_REG(Deconvolution) | ||||
| /** | /** | ||||
| @@ -554,7 +553,7 @@ REG_OP(Conv2DBackpropFilter) | |||||
| * @li groups: Number of blocked connections from input channels to output channels. | * @li groups: Number of blocked connections from input channels to output channels. | ||||
| * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC". Specify the data format of the input and output data. | * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC". Specify the data format of the input and output data. | ||||
| *@par Outputs: | *@par Outputs: | ||||
| * y: A Tensor. Has the same type as x | |||||
| * y: A Tensor. Type is float32 | |||||
| *@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
| * Compatible with Tensorflow's conv2d_backprop_filter | * Compatible with Tensorflow's conv2d_backprop_filter | ||||
| */ | */ | ||||
| @@ -586,8 +585,6 @@ REG_OP(Conv2DBackpropFilterD) | |||||
| |---------|---------|---------|----------|-------- | |---------|---------|---------|----------|-------- | ||||
| | float32 | float32 | float32 | _ | float32 | | float32 | float32 | float32 | _ | float32 | ||||
| |---------|---------|---------|----------|-------- | |---------|---------|---------|----------|-------- | ||||
| | float64 | float64 | float64 | _ | float64 | |||||
| |---------|---------|---------|----------|-------- | |||||
| | int8 | int8 | int32 | int8 | int32 | | int8 | int8 | int32 | int8 | int32 | ||||
| -----------|---------|---------|---------|----------|-------- | -----------|---------|---------|---------|----------|-------- | ||||
| Format | NCHW | NCHW | ND | ND | NCHW | Format | NCHW | NCHW | ND | ND | NCHW | ||||
| @@ -607,7 +604,7 @@ REG_OP(Conv2DBackpropFilterD) | |||||
| * for dilated convolution. Has the same dimension order and value as "strides". | * for dilated convolution. Has the same dimension order and value as "strides". | ||||
| * @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
| * channels. Input channels and output channels must both be divisible by | * channels. Input channels and output channels must both be divisible by | ||||
| * "groups". Must be set to 1. | |||||
| * "groups". | |||||
| * @li offset_x: An optional integer for quantized convolution. | * @li offset_x: An optional integer for quantized convolution. | ||||
| * @li data_format: An optional string from: "NHWC", "NCHW". Specifying the | * @li data_format: An optional string from: "NHWC", "NCHW". Specifying the | ||||
| * data format of the input and output images. Reserved. | * data format of the input and output images. Reserved. | ||||
| @@ -642,7 +639,7 @@ REG_OP(Conv2DBackpropFilterD) | |||||
| * @verbatim | * @verbatim | ||||
| Output | Restrictions | Output | Restrictions | ||||
| ------------------|---------------------------------------------- | ------------------|---------------------------------------------- | ||||
| W dimension == 1 | HxW(input) == HxW(filter) == 1x1,2x2...11x11. | |||||
| W dimension == 1 | HxW(input) == HxW(filter) | |||||
| H dimension == 1 | | H dimension == 1 | | ||||
| ------------------|---------------------------------------------- | ------------------|---------------------------------------------- | ||||
| W dimension == 1 | Not supported | W dimension == 1 | Not supported | ||||
| @@ -659,11 +656,11 @@ REG_OP(Conv2DBackpropFilterD) | |||||
| *@li Compatible with the Caffe operator 2D "Convolution". | *@li Compatible with the Caffe operator 2D "Convolution". | ||||
| */ | */ | ||||
| REG_OP(Conv2D) | REG_OP(Conv2D) | ||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) | |||||
| .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) | |||||
| .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) | |||||
| .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) | |||||
| .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | ||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .REQUIRED_ATTR(strides, ListInt) | .REQUIRED_ATTR(strides, ListInt) | ||||
| .REQUIRED_ATTR(pads, ListInt) | .REQUIRED_ATTR(pads, ListInt) | ||||
| .ATTR(dilations, ListInt, {1, 1, 1, 1}) | .ATTR(dilations, ListInt, {1, 1, 1, 1}) | ||||
| @@ -186,7 +186,7 @@ REG_OP(ROIAlignGrad) | |||||
| * Three inputs, including: \n | * Three inputs, including: \n | ||||
| *@li features: A 5HD Tensor of type float32 or float16. | *@li features: A 5HD Tensor of type float32 or float16. | ||||
| *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, the value "5" indicates the indexes of images where the ROIs are located, | *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, the value "5" indicates the indexes of images where the ROIs are located, | ||||
| * "x0", "x1", "y0", and "y1". | |||||
| * "x0", "y0", "x1", and "y1". | |||||
| *@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved. | *@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved. | ||||
| *@par Attributes: | *@par Attributes: | ||||
| @@ -219,7 +219,7 @@ REG_OP(MaxPool3D) | |||||
| * @attention Constraints: | * @attention Constraints: | ||||
| * @li Computing gradients of global pooling is not supported, which means | * @li Computing gradients of global pooling is not supported, which means | ||||
| * "ksize < x1". | * "ksize < x1". | ||||
| * @li "ksiez" is in the range [1, 255]. "strides" is in the range [1, 63] | |||||
| * @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] | |||||
| * @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
| * Compatible with the TensorFlow operator MaxPoolGrad. | * Compatible with the TensorFlow operator MaxPoolGrad. | ||||
| @@ -239,10 +239,9 @@ REG_OP(MaxPoolGrad) | |||||
| * @brief Computes second-order gradients of the maxpooling function. | * @brief Computes second-order gradients of the maxpooling function. | ||||
| * @par Inputs: | * @par Inputs: | ||||
| * @li x1: Original forward input tensor. Supported type:float, double, int32, | |||||
| * uint8, int16, int8, int64, uint16, half, uint32, uint64. | |||||
| * @li x2: Has the same type and format as input "x1". | |||||
| * @li grad:Has the same type and format as input "x1". | |||||
| * @li x1: Original forward input tensor of type RealNumberType | |||||
| * @li x2: Original forward output tensor of type RealNumberType | |||||
| * @li grad: Gradient tensor of type RealNumberType | |||||
| * @par Attributes: | * @par Attributes: | ||||
| * @li ksize: A required list or tuple, | * @li ksize: A required list or tuple, | ||||
| @@ -258,9 +257,12 @@ REG_OP(MaxPoolGrad) | |||||
| * @li "x1" and "grads" must have the same shape. | * @li "x1" and "grads" must have the same shape. | ||||
| * @li "x2" and "y" must have the same shape. Otherwise, an error is reported. | * @li "x2" and "y" must have the same shape. Otherwise, an error is reported. | ||||
| * @li "x1", "x2", "grads", and "y" must be 5D tensors. | * @li "x1", "x2", "grads", and "y" must be 5D tensors. | ||||
| * @li ksize[H] and ksize[W] is in the range [1, 255]. | |||||
| * @li strides[H] and strides[W] is in the range [1, 63]. | |||||
| * @li Other dimensions of ksize and strides is 1. | |||||
| * @par Outputs: | * @par Outputs: | ||||
| * @li y: Has the same type and format as input "x1". | |||||
| * @li y: Result tensor of type RealNumberType | |||||
| * @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
| * @li Compatible with the TensorFlow operator MaxPoolGradGrad. | * @li Compatible with the TensorFlow operator MaxPoolGradGrad. | ||||
| @@ -399,18 +401,15 @@ REG_OP(MaxPoolGradWithArgmax) | |||||
| * @brief Computes second-order gradients of the maxpooling function. | * @brief Computes second-order gradients of the maxpooling function. | ||||
| * @par Inputs: | * @par Inputs: | ||||
| * @li x: Original forward input tensor. Supported type: float, double, int32, | |||||
| * uint8, int16, int8, int64, uint16, half, uint32, uint64. | |||||
| * @li grad: Gradient tensor. Supported type: float, double, int32, | |||||
| * uint8, int16, int8, int64, uint16, half, uint32, uint64. | |||||
| * @li argmax: An tensor of type int32 or int64. | |||||
| * @li x: Original forward input tensor of type RealNumberType | |||||
| * @li grad: Gradient tensor of type RealNumberType | |||||
| * @li argmax: An tensor of type IndexNumberType | |||||
| * @par Attributes: | * @par Attributes: | ||||
| * @li ksize: A required list, specifying the size of the sliding window. | * @li ksize: A required list, specifying the size of the sliding window. | ||||
| * @li strides: A required list, specifying the stride of the sliding window. | * @li strides: A required list, specifying the stride of the sliding window. | ||||
| * @li padding: A required string, window sliding mode. Either SAME or VALID. | * @li padding: A required string, window sliding mode. Either SAME or VALID. | ||||
| * @par Outputs: | * @par Outputs: | ||||
| * @li y:Result tensor. Supported type: float, double, int32, | |||||
| * uint8, int16, int8, int64, uint16, half, uint32, uint64 | |||||
| * @li y:Result tensor of type RealNumberType | |||||
| * @attention Constraints: | * @attention Constraints: | ||||
| * @li Only the cloud platform is supported. | * @li Only the cloud platform is supported. | ||||
| @@ -41,7 +41,7 @@ namespace ge { | |||||
| *@li beta1: A scalar. Has the same type as "var". | *@li beta1: A scalar. Has the same type as "var". | ||||
| *@li beta2: A scalar. Has the same type as "var". | *@li beta2: A scalar. Has the same type as "var". | ||||
| *@li epsilon: A scalar. Has the same type as "var". | *@li epsilon: A scalar. Has the same type as "var". | ||||
| *@li grad: A tensor for the gradient. Has the same type as "var". | |||||
| *@li grad: A tensor for the gradient. Has the same type as "var". | |||||
| * | * | ||||
| *@par Attributes: | *@par Attributes: | ||||
| * use_locking: An optional bool. Defaults to "False". | * use_locking: An optional bool. Defaults to "False". | ||||
| @@ -465,7 +465,7 @@ REG_OP(ApplyKerasMomentumD) | |||||
| /** | /** | ||||
| *@brief Updates '*var' according to the Adam algorithm.. | |||||
| *@brief Updates '*var' according to the Adam algorithm. | |||||
| * lr_t := {learning_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t) | * lr_t := {learning_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t) | ||||
| * m_t := beta_1 * m_{t-1} + (1 - beta_1) * g | * m_t := beta_1 * m_{t-1} + (1 - beta_1) * g | ||||
| * v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g | * v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g | ||||
| @@ -866,7 +866,7 @@ REG_OP(ApplyCenteredRMSProp) | |||||
| .OUTPUT(var, TensorType::NumberType()) | .OUTPUT(var, TensorType::NumberType()) | ||||
| .ATTR(use_locking, Bool, false) | .ATTR(use_locking, Bool, false) | ||||
| .OP_END_FACTORY_REG(ApplyCenteredRMSProp) | .OP_END_FACTORY_REG(ApplyCenteredRMSProp) | ||||
| /** | /** | ||||
| *@brief Updates "var" according to the centered RMSProp algorithm. | *@brief Updates "var" according to the centered RMSProp algorithm. | ||||
| * The centered RMSProp algorithm uses an estimate of the centered second moment | * The centered RMSProp algorithm uses an estimate of the centered second moment | ||||
| @@ -1262,7 +1262,7 @@ REG_OP(DataFormatDimMap) | |||||
| .OP_END_FACTORY_REG(DataFormatDimMap) | .OP_END_FACTORY_REG(DataFormatDimMap) | ||||
| /** | /** | ||||
| * @brief Implements stochastic gradient descent (optionally with momentum).\n | |||||
| * @brief Implements stochastic gradient descent (optionally with momentum). | |||||
| * Nesterov momentum is based on the formula from | * Nesterov momentum is based on the formula from | ||||
| * On the importance of initialization and momentum in deep learning.\n | * On the importance of initialization and momentum in deep learning.\n | ||||
| @@ -1508,7 +1508,7 @@ REG_OP(ApplyProximalAdagradD) | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *use_locking: An optional bool. Defaults to "False".\n | *use_locking: An optional bool. Defaults to "False".\n | ||||
| * If "True", updating of the var and accum tensors will be protected by a lock; \n | * If "True", updating of the var and accum tensors will be protected by a lock; \n | ||||
| * If "False", the behavior is undefined, but may exhibit less contention. | |||||
| * If "False", the behavior is undefined, but may exhibit less contention. | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *var: A mutable Tensor. Has the same type as "var". | *var: A mutable Tensor. Has the same type as "var". | ||||
| @@ -2172,13 +2172,13 @@ REG_OP(SparseApplyFtrl) | |||||
| * Should be a Variable Tensor. | * Should be a Variable Tensor. | ||||
| * @li grad: A Tensor of the same type as "var", for the gradient. | * @li grad: A Tensor of the same type as "var", for the gradient. | ||||
| * @li indices: A vector of indices into the first dimension of var and accum. | * @li indices: A vector of indices into the first dimension of var and accum. | ||||
| * @par Attributes: | |||||
| * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. | * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. | ||||
| * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar. | * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar. | ||||
| * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar. | * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar. | ||||
| * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. | * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. | ||||
| * @par Attributes: | |||||
| * use_locking: An optional bool. Defaults to "False". | |||||
| * @li use_locking: An optional bool. Defaults to "False". | |||||
| * If "True", updating of the "var" and "accum" tensors will be | * If "True", updating of the "var" and "accum" tensors will be | ||||
| * protected by a lock; otherwise the behavior is undefined, | * protected by a lock; otherwise the behavior is undefined, | ||||
| * but may exhibit less contention. | * but may exhibit less contention. | ||||
| @@ -2314,6 +2314,7 @@ REG_OP(SparseApplyFtrlV2D) | |||||
| * var <- var - mom\n | * var <- var - mom\n | ||||
| * | * | ||||
| * @par Inputs: | * @par Inputs: | ||||
| * Nine inputs, including: | |||||
| * @li var: A mutable tensor. Must be one of the data types defined in\n | * @li var: A mutable tensor. Must be one of the data types defined in\n | ||||
| * TensorType::NumberType(). Should be from a Variable(). | * TensorType::NumberType(). Should be from a Variable(). | ||||
| * @li ms: A mutable tensor. Must have the same type as "var". Should be from a | * @li ms: A mutable tensor. Must have the same type as "var". Should be from a | ||||
| @@ -2367,6 +2368,7 @@ REG_OP(SparseApplyRMSProp) | |||||
| * var <- var - mom | * var <- var - mom | ||||
| * | * | ||||
| * @par Inputs: | * @par Inputs: | ||||
| * Six inputs, including: | |||||
| * @li var: A mutable tensor. Must be one of the data types defined in | * @li var: A mutable tensor. Must be one of the data types defined in | ||||
| * TensorType::NumberType(). Should be from a Variable(). | * TensorType::NumberType(). Should be from a Variable(). | ||||
| * @li ms: A mutable tensor. Must have the same type as "var". Should be from a | * @li ms: A mutable tensor. Must have the same type as "var". Should be from a | ||||
| @@ -2418,6 +2420,7 @@ REG_OP(SparseApplyRMSPropD) | |||||
| * accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n | * accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n | ||||
| * | * | ||||
| * @par Inputs: | * @par Inputs: | ||||
| * Eight inputs, including: | |||||
| * @li var: A mutable tensor. Must be one of the data types defined in\n | * @li var: A mutable tensor. Must be one of the data types defined in\n | ||||
| * TensorType::NumberType(). Should be from a Variable(). | * TensorType::NumberType(). Should be from a Variable(). | ||||
| * @li accum: A mutable tensor. Must have the same type as "var". Should be from a | * @li accum: A mutable tensor. Must have the same type as "var". Should be from a | ||||
| @@ -2468,6 +2471,7 @@ REG_OP(SparseApplyAdadelta) | |||||
| * accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n | * accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n | ||||
| * | * | ||||
| * @par Inputs: | * @par Inputs: | ||||
| * Seven inputs, including: | |||||
| * @li var: A mutable tensor. Must be one of the data types defined in | * @li var: A mutable tensor. Must be one of the data types defined in | ||||
| * TensorType::NumberType(). Should be from a Variable(). | * TensorType::NumberType(). Should be from a Variable(). | ||||
| * @li accum: A mutable tensor. Must have the same type as "var". Should be from a | * @li accum: A mutable tensor. Must have the same type as "var". Should be from a | ||||
| @@ -203,11 +203,11 @@ REG_OP(Sigmoid) | |||||
| * @brief Computes z = (y - y*y)*dy. | * @brief Computes z = (y - y*y)*dy. | ||||
| * @par Inputs: | * @par Inputs: | ||||
| * @li y: the input is tensor , dtype is UnaryDataType. | |||||
| * @li dy the input is tensor , dtype is UnaryDataType. | |||||
| * @li y: The input is Tensor, dtype is UnaryDataType. | |||||
| * @li dy: The input is Tensor, dtype is UnaryDataType. | |||||
| * @par Outputs: | * @par Outputs: | ||||
| * z: the shape of output, dtype is UnaryDataType. | |||||
| * z: The shape of output, dtype is UnaryDataType. | |||||
| */ | */ | ||||
| REG_OP(SigmoidGrad) | REG_OP(SigmoidGrad) | ||||
| .INPUT(y, TensorType(UnaryDataType)) | .INPUT(y, TensorType(UnaryDataType)) | ||||
| @@ -21,17 +21,17 @@ | |||||
| namespace ge { | namespace ge { | ||||
| /** | /** | ||||
| * @brief Dequantizes the input tensor into a float tensor.\n | |||||
| * [input_min_range, input_max_range] are scalar floats that specify the range | |||||
| * for "output_data". \n | |||||
| * @brief Dequantizes the input tensor into a float tensor. | |||||
| * [min_range, max_range] are float32 tensors that specify the range | |||||
| * for "y". \n | |||||
| * The "mode" attribute controls exactly which calculations are used to convert\n | * The "mode" attribute controls exactly which calculations are used to convert\n | ||||
| * the float values to their quantized equivalents. | * the float values to their quantized equivalents. | ||||
| * @par Inputs: | * @par Inputs: | ||||
| * @li input_data: A Tensor. Must be one of the following types: int8, uint8, | |||||
| * @li x: A Tensor. Must be one of the following types: int8, uint8, | |||||
| * int32. | * int32. | ||||
| * @li input_min_range: A Tensor of type float32. | |||||
| * @li min_range: A Tensor of type float32. | |||||
| * Specifies the minimum scalar value possibly produced for the input. | * Specifies the minimum scalar value possibly produced for the input. | ||||
| * @li input_max_range: A Tensor of type float32. | |||||
| * @li max_range: A Tensor of type float32. | |||||
| * Specifies the maximum scalar value possibly produced for the input. | * Specifies the maximum scalar value possibly produced for the input. | ||||
| * @par Attributes: | * @par Attributes: | ||||
| @@ -39,11 +39,11 @@ namespace ge { | |||||
| * Defaults to "MIN_COMBINED". | * Defaults to "MIN_COMBINED". | ||||
| * @par Outputs: | * @par Outputs: | ||||
| * output_data: A dictionary of type float32. | |||||
| * y: A dictionary of type float32. | |||||
| * @attention Constraints: | * @attention Constraints: | ||||
| * @li "input_min_range" and "input_max_range" have the same shapes. | |||||
| * @li "input_data" and "output_data" have the same shapes. | |||||
| * @li "min_range" and "max_range" have the same shapes. | |||||
| * @li "x" and "y" have the same shapes. | |||||
| * @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
| * Compatible with the TensorFlow operator Dequantize. | * Compatible with the TensorFlow operator Dequantize. | ||||
| @@ -149,7 +149,7 @@ REG_OP(TileD) | |||||
| * @li indices: A Tensor of type IndexNumberType. | * @li indices: A Tensor of type IndexNumberType. | ||||
| * @par Outputs: | * @par Outputs: | ||||
| * output: A Tensor of type BasicType. | |||||
| * y: A Tensor of type BasicType. | |||||
| * @see GatherNd() | * @see GatherNd() | ||||
| * @attention Constraints: | * @attention Constraints: | ||||
| @@ -767,6 +767,7 @@ REG_OP(SliceD) | |||||
| * dimension. | * dimension. | ||||
| * @par Inputs: | * @par Inputs: | ||||
| * Two inputs, including: | |||||
| * @li x: A 1D or higher tensor of type float16, with the last dimension at | * @li x: A 1D or higher tensor of type float16, with the last dimension at | ||||
| * least "k". | * least "k". | ||||
| * Specifies the data to sort. | * Specifies the data to sort. | ||||
| @@ -789,7 +790,7 @@ REG_OP(SliceD) | |||||
| * @li indices: A Tensor of type int32, specifying the indices of sorted data. | * @li indices: A Tensor of type int32, specifying the indices of sorted data. | ||||
| * @attention Constraints: | * @attention Constraints: | ||||
| * @li k =< 4096 | |||||
| * @li k =< 5120 | |||||
| * @li Size of the last dimension =< 65500 | * @li Size of the last dimension =< 65500 | ||||
| * @li sorted = true | * @li sorted = true | ||||
| * @li Don't support to get score on the platform of Ascend310 | * @li Don't support to get score on the platform of Ascend310 | ||||
| @@ -813,6 +814,7 @@ REG_OP(TopKD) | |||||
| * dimension. | * dimension. | ||||
| * @par Inputs: | * @par Inputs: | ||||
| * Two inputs, including: | |||||
| * @li x: A 1D or higher tensor of type BasicType, with the last dimension | * @li x: A 1D or higher tensor of type BasicType, with the last dimension | ||||
| * at least "k". | * at least "k". | ||||
| * @li k: A 0D Tensor of type int32.\n | * @li k: A 0D Tensor of type int32.\n | ||||
| @@ -902,8 +904,8 @@ REG_OP(ScatterNdD) | |||||
| * @li x2: A 1D Tensor of type int32. A batch_size tensor of class ids. | * @li x2: A 1D Tensor of type int32. A batch_size tensor of class ids. | ||||
| * @par Attributes: | * @par Attributes: | ||||
| * @li k: A required int32, specifying the number of top elements to look at for | |||||
| * computing precision. | |||||
| * @li k: A required IndexNumberType, specifying the number of top elements to | |||||
| * look at for computing precision. | |||||
| * @par Outputs: | * @par Outputs: | ||||
| * y: A Tensor of type bool. | * y: A Tensor of type bool. | ||||
| @@ -1000,6 +1002,7 @@ REG_OP(StridedSliceAssign) | |||||
| * "strides", etc. work exactly as in "StridedSlice". | * "strides", etc. work exactly as in "StridedSlice". | ||||
| * @par Inputs: | * @par Inputs: | ||||
| * Two inputs, including: | |||||
| * @li var: A mutable ND Tensor of type BasicType. | * @li var: A mutable ND Tensor of type BasicType. | ||||
| * @li input_value: A mutable ND "Tensor" of type BasicType. | * @li input_value: A mutable ND "Tensor" of type BasicType. | ||||
| @@ -1335,7 +1338,7 @@ REG_OP(InplaceSubD) | |||||
| .OP_END_FACTORY_REG(InplaceSubD) | .OP_END_FACTORY_REG(InplaceSubD) | ||||
| /** | /** | ||||
| * @brief Applies sparse addition to input "x" using individual values or slices\n | |||||
| * @brief Applies sparse addition to input "x" using individual values or slices | |||||
| * from "updates" according to "indices". The updates are non-aliasing: "x" is\n | * from "updates" according to "indices". The updates are non-aliasing: "x" is\n | ||||
| * only modified in-place if no other operations will use it. Otherwise, a copy\n | * only modified in-place if no other operations will use it. Otherwise, a copy\n | ||||
| * of "x" is made. This operation has a gradient with respect to both "x" and | * of "x" is made. This operation has a gradient with respect to both "x" and | ||||
| @@ -1372,7 +1375,7 @@ REG_OP(ScatterNonAliasingAdd) | |||||
| * @li x: A Tensor of type RealNumberType. | * @li x: A Tensor of type RealNumberType. | ||||
| * @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix | * @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix | ||||
| * of "x.shape". | * of "x.shape". | ||||
| * @li k: A Tensor. | |||||
| * @li num_segments: A Tensor of type IndexNumberType. | |||||
| * @par Outputs: | * @par Outputs: | ||||
| * y: A Tensor of type RealNumberType. | * y: A Tensor of type RealNumberType. | ||||
| @@ -1419,13 +1422,13 @@ REG_OP(UnsortedSegmentMinD) | |||||
| * @par Inputs: | * @par Inputs: | ||||
| * Three inputs, including: | * Three inputs, including: | ||||
| * @li x: A Tensor of type RealNumberType. | |||||
| * @li x: A Tensor of type NumberType. | |||||
| * @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix | * @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix | ||||
| * of "x.shape". | * of "x.shape". | ||||
| * @li k: A Tensor. | |||||
| * @li num_segments: A Tensor of type IndexNumberType. | |||||
| * @par Outputs: | * @par Outputs: | ||||
| * y: A Tensor of type RealNumberType. | |||||
| * y: A Tensor of type NumberType. | |||||
| * @see UnsortedSegmentSum(), UnsortedSegmentMin(), | * @see UnsortedSegmentSum(), UnsortedSegmentMin(), | ||||
| @@ -20,19 +20,38 @@ | |||||
| #include "graph/operator_reg.h" | #include "graph/operator_reg.h" | ||||
| namespace ge { | namespace ge { | ||||
| /** | |||||
| *@brief Convert tensor format from HWCN to C1HWNCoC0. | |||||
| *@par Inputs: | |||||
| *x: A Tensor. Must be 4D Tensor of type float16, float32, int32, uint16, with format HWCN. | |||||
| *@par Outputs: | |||||
| *y: A 6D Tensor. Has the same type as "x", with format C1HWNCoC0. | |||||
| */ | |||||
| REG_OP(DepthwiseWeight4DTo6D) | REG_OP(DepthwiseWeight4DTo6D) | ||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | ||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | ||||
| .OP_END_FACTORY_REG(DepthwiseWeight4DTo6D) | .OP_END_FACTORY_REG(DepthwiseWeight4DTo6D) | ||||
| /** | |||||
| *@brief Convert tensor format from C1HWNCoC0 to HWCN. | |||||
| *@par Inputs: | |||||
| *x: A Tensor. Must be 6D Tensor of type float16, float32, int32, uint16, with format C1HWNCoC0. | |||||
| *@par Attributes: | |||||
| *channel_size: An optional int, specifying the channel size of 4D Tensor with format HWCN. | |||||
| *@par Outputs: | |||||
| *y: A 4D Tensor. Has the same type as "x", with format HWCN. | |||||
| */ | |||||
| REG_OP(DepthwiseWeight6DTo4D) | REG_OP(DepthwiseWeight6DTo4D) | ||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | ||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | ||||
| .ATTR(channel_size, Int, 16) | .ATTR(channel_size, Int, 16) | ||||
| .OP_END_FACTORY_REG(DepthwiseWeight6DTo4D) | .OP_END_FACTORY_REG(DepthwiseWeight6DTo4D) | ||||
| /** | /** | ||||
| *@brief Permutes the dimensions according to perm.\n | *@brief Permutes the dimensions according to perm.\n | ||||
| The returned tensor's dimension i will correspond to the input dimension perm[i]. | The returned tensor's dimension i will correspond to the input dimension perm[i]. | ||||
| @@ -390,20 +409,20 @@ REG_OP(SpaceToBatchD) | |||||
| .OP_END_FACTORY_REG(SpaceToBatchD) | .OP_END_FACTORY_REG(SpaceToBatchD) | ||||
| /** | /** | ||||
| * @brief Unpacks the given dimension of a rank-R tensor "x" into rank-(R-1) | |||||
| * @brief Unpacks the given dimension of a rank-R Tensor "x" into rank-(R-1) | |||||
| * tensors. | * tensors. | ||||
| * @par Inputs: | * @par Inputs: | ||||
| * x: A rank-R tensor (R > 0) of type BasicType, with format ND or NC1HWC0. | * x: A rank-R tensor (R > 0) of type BasicType, with format ND or NC1HWC0. | ||||
| * @par Attributes: | * @par Attributes: | ||||
| * @li num: An optional int, specifying the number of tensors to be unpacked to. | |||||
| * @li num: A required int, specifying the number of tensors to be unpacked to. | |||||
| * Defaults to "None". | * Defaults to "None". | ||||
| * @li axis: A required int, specifying the axis to unpack along. The value range | |||||
| * @li axis: An optional int, specifying the axis to unpack along. The value range | |||||
| * is [-R, R). | * is [-R, R). | ||||
| * @par Outputs: | * @par Outputs: | ||||
| * y: The list of Tensor objects unpacked from "x", of type BasicType. | |||||
| * y: Dynamic output. The list of Tensor objects unpacked from "x", of type BasicType. | |||||
| * @attention Constraints: | * @attention Constraints: | ||||
| * @li If "num" is not specified, it is inferred from the shape of "x". | * @li If "num" is not specified, it is inferred from the shape of "x". | ||||
| @@ -434,11 +453,11 @@ REG_OP(Unpack) | |||||
| * dimension of images. | * dimension of images. | ||||
| * @li strides: A required list or tuple. How far the centers of two consecutive | * @li strides: A required list or tuple. How far the centers of two consecutive | ||||
| * patches are in the images. Must be: [1, stride_rows, stride_cols, 1]. | * patches are in the images. Must be: [1, stride_rows, stride_cols, 1]. | ||||
| * @li rates: A required list or tuple. Must be: [1, rate_rows, rate_cols, 1]. \n | |||||
| * This is the input stride, specifying how far two consecutive patch \n | |||||
| * @li rates: A required list or tuple. Must be: [1, rate_rows, rate_cols, 1].\n | |||||
| * This is the input stride, specifying how far two consecutive patch\n | |||||
| * samples are in the input. Equivalent to extracting patches | * samples are in the input. Equivalent to extracting patches | ||||
| * with patch_sizes_eff = patch_sizes + (patch_sizes - 1) *\n | * with patch_sizes_eff = patch_sizes + (patch_sizes - 1) *\n | ||||
| * (rates - 1), followed by subsampling them spatially by a factor of rates. \n | |||||
| * (rates - 1), followed by subsampling them spatially by a factor of rates.\n | |||||
| * This is equivalent to rate in dilated (a.k.a. Atrous) convolutions. | * This is equivalent to rate in dilated (a.k.a. Atrous) convolutions. | ||||
| * @li padding: A required string. The type of padding algorithm to use. | * @li padding: A required string. The type of padding algorithm to use. | ||||
| @@ -59,6 +59,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry { | |||||
| domi::ParseParamFunc GetParseParamFunc(const std::string &op_type); | domi::ParseParamFunc GetParseParamFunc(const std::string &op_type); | ||||
| domi::ParseParamByOpFunc GetParseParamByOperatorFunc(const std::string &op_type); | |||||
| domi::FusionParseParamFunc GetFusionParseParamFunc(const std::string &op_type); | domi::FusionParseParamFunc GetFusionParseParamFunc(const std::string &op_type); | ||||
| domi::ParseSubgraphFunc GetParseSubgraphPostFunc(const std::string &op_type); | domi::ParseSubgraphFunc GetParseSubgraphPostFunc(const std::string &op_type); | ||||
| @@ -73,6 +75,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry { | |||||
| std::unordered_map<std::string, std::set<std::string>> op_ori_optype_map_; | std::unordered_map<std::string, std::set<std::string>> op_ori_optype_map_; | ||||
| std::unordered_map<std::string, domi::ImplyType> op_run_mode_map_; | std::unordered_map<std::string, domi::ImplyType> op_run_mode_map_; | ||||
| std::unordered_map<std::string, ParseParamFunc> opParseParamsFnMap_; | std::unordered_map<std::string, ParseParamFunc> opParseParamsFnMap_; | ||||
| std::unordered_map<std::string, ParseParamByOpFunc> parse_params_by_op_func_map_; | |||||
| std::unordered_map<std::string, FusionParseParamFunc> fusionOpParseParamsFnMap_; | std::unordered_map<std::string, FusionParseParamFunc> fusionOpParseParamsFnMap_; | ||||
| std::unordered_map<std::string, ParseSubgraphFunc> op_types_to_parse_subgraph_post_func_; | std::unordered_map<std::string, ParseSubgraphFunc> op_types_to_parse_subgraph_post_func_; | ||||
| std::unordered_map<std::string, std::vector<RemoveInputConfigure>> remove_input_configure_map_; | std::unordered_map<std::string, std::vector<RemoveInputConfigure>> remove_input_configure_map_; | ||||
| @@ -98,6 +98,14 @@ RTS_API rtError_t rtCtxSynchronize(void); | |||||
| */ | */ | ||||
| RTS_API rtError_t rtCtxGetCurrent(rtContext_t *ctx); | RTS_API rtError_t rtCtxGetCurrent(rtContext_t *ctx); | ||||
| /** | |||||
| * @ingroup rt_context | |||||
| * @brief returns the primary context of device. | |||||
| * @param [out] ctx returned context | |||||
| * @return RT_ERROR_NONE for ok | |||||
| */ | |||||
| RTS_API rtError_t rtGetPriCtxByDeviceId(int32_t device, rtContext_t *ctx); | |||||
| /** | /** | ||||
| * @ingroup rt_context | * @ingroup rt_context | ||||
| * @brief returns the device ID for the current context | * @brief returns the device ID for the current context | ||||
| @@ -277,6 +277,7 @@ extern int dlog_setlevel(int moduleId, int level, int enableEvent); | |||||
| /** | /** | ||||
| * @ingroup slog | * @ingroup slog | ||||
| * @brief CheckLogLevel: check module level enable or not | * @brief CheckLogLevel: check module level enable or not | ||||
| * users no need to call it because all dlog interface(include inner interface) has already called | |||||
| * | * | ||||
| * @param [in]moduleId: module id, eg: CCE | * @param [in]moduleId: module id, eg: CCE | ||||
| * @param [in]logLevel: eg: DLOG_EVENT/DLOG_ERROR/DLOG_WARN/DLOG_INFO/DLOG_DEBUG | * @param [in]logLevel: eg: DLOG_EVENT/DLOG_ERROR/DLOG_WARN/DLOG_INFO/DLOG_DEBUG | ||||
| @@ -291,46 +292,76 @@ extern int CheckLogLevel(int moduleId, int logLevel); | |||||
| * @param [in]moduleId: module id, eg: CCE | * @param [in]moduleId: module id, eg: CCE | ||||
| * @param [in]fmt: log content | * @param [in]fmt: log content | ||||
| */ | */ | ||||
| #define dlog_error(moduleId, fmt, ...) \ | |||||
| do { \ | |||||
| DlogErrorInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| #define dlog_error(moduleId, fmt, ...) \ | |||||
| do { \ | |||||
| DlogErrorInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| } while (0) | } while (0) | ||||
| /** | /** | ||||
| * @ingroup slog | * @ingroup slog | ||||
| * @brief dlog_warn: print warning log | * @brief dlog_warn: print warning log | ||||
| * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time | |||||
| * | * | ||||
| * @param [in]moduleId: module id, eg: CCE | * @param [in]moduleId: module id, eg: CCE | ||||
| * @param [in]fmt: log content | * @param [in]fmt: log content | ||||
| */ | */ | ||||
| #define dlog_warn(moduleId, fmt, ...) \ | |||||
| do { \ | |||||
| DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| #ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD | |||||
| #define dlog_warn(moduleId, fmt, ...) \ | |||||
| do { \ | |||||
| DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| } while (0) | } while (0) | ||||
| #else | |||||
| #define dlog_warn(moduleId, fmt, ...) \ | |||||
| do { \ | |||||
| if(CheckLogLevel(moduleId, DLOG_WARN) == 1) { \ | |||||
| DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| } \ | |||||
| } while (0) | |||||
| #endif | |||||
| /** | /** | ||||
| * @ingroup slog | * @ingroup slog | ||||
| * @brief dlog_info: print info log | * @brief dlog_info: print info log | ||||
| * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time | |||||
| * | * | ||||
| * @param [in]moduleId: module id, eg: CCE | * @param [in]moduleId: module id, eg: CCE | ||||
| * @param [in]fmt: log content | * @param [in]fmt: log content | ||||
| */ | */ | ||||
| #define dlog_info(moduleId, fmt, ...) \ | |||||
| do { \ | |||||
| DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| #ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD | |||||
| #define dlog_info(moduleId, fmt, ...) \ | |||||
| do { \ | |||||
| DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| } while (0) | |||||
| #else | |||||
| #define dlog_info(moduleId, fmt, ...) \ | |||||
| do { \ | |||||
| if(CheckLogLevel(moduleId, DLOG_INFO) == 1) { \ | |||||
| DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| } \ | |||||
| } while (0) | } while (0) | ||||
| #endif | |||||
| /** | /** | ||||
| * @ingroup slog | * @ingroup slog | ||||
| * @brief dlog_debug: print debug log | * @brief dlog_debug: print debug log | ||||
| * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time | |||||
| * | * | ||||
| * @param [in]moduleId: module id, eg: CCE | * @param [in]moduleId: module id, eg: CCE | ||||
| * @param [in]fmt: log content | * @param [in]fmt: log content | ||||
| */ | */ | ||||
| #define dlog_debug(moduleId, fmt, ...) \ | |||||
| do { \ | |||||
| DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| #ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD | |||||
| #define dlog_debug(moduleId, fmt, ...) \ | |||||
| do { \ | |||||
| DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| } while (0) | } while (0) | ||||
| #else | |||||
| #define dlog_debug(moduleId, fmt, ...) \ | |||||
| do { \ | |||||
| if(CheckLogLevel(moduleId, DLOG_DEBUG) == 1) { \ | |||||
| DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| } \ | |||||
| } while (0) | |||||
| #endif | |||||
| /** | /** | ||||
| * @ingroup slog | * @ingroup slog | ||||
| @@ -339,9 +370,9 @@ extern int CheckLogLevel(int moduleId, int logLevel); | |||||
| * @param [in]moduleId: module id, eg: CCE | * @param [in]moduleId: module id, eg: CCE | ||||
| * @param [in]fmt: log content | * @param [in]fmt: log content | ||||
| */ | */ | ||||
| #define dlog_event(moduleId, fmt, ...) \ | |||||
| do { \ | |||||
| DlogEventInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| #define dlog_event(moduleId, fmt, ...) \ | |||||
| do { \ | |||||
| DlogEventInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| } while (0) | } while (0) | ||||
| /** | /** | ||||
| @@ -352,10 +383,19 @@ extern int CheckLogLevel(int moduleId, int logLevel); | |||||
| * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) | * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) | ||||
| * @param [in]fmt: log content | * @param [in]fmt: log content | ||||
| */ | */ | ||||
| #define Dlog(moduleId, level, fmt, ...) \ | |||||
| do { \ | |||||
| DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| #ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD | |||||
| #define Dlog(moduleId, level, fmt, ...) \ | |||||
| do { \ | |||||
| DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| } while (0) | } while (0) | ||||
| #else | |||||
| #define Dlog(moduleId, level, fmt, ...) \ | |||||
| do { \ | |||||
| if(CheckLogLevel(moduleId, level) == 1) { \ | |||||
| DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| } \ | |||||
| } while (0) | |||||
| #endif | |||||
| /** | /** | ||||
| * @ingroup slog | * @ingroup slog | ||||
| @@ -366,10 +406,19 @@ extern int CheckLogLevel(int moduleId, int logLevel); | |||||
| * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) | * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) | ||||
| * @param [in]fmt: log content | * @param [in]fmt: log content | ||||
| */ | */ | ||||
| #define DlogSub(moduleId, submodule, level, fmt, ...) \ | |||||
| do { \ | |||||
| DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ | |||||
| #ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD | |||||
| #define DlogSub(moduleId, submodule, level, fmt, ...) \ | |||||
| do { \ | |||||
| DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ | |||||
| } while (0) | |||||
| #else | |||||
| #define DlogSub(moduleId, submodule, level, fmt, ...) \ | |||||
| do { \ | |||||
| if(CheckLogLevel(moduleId, level) == 1) { \ | |||||
| DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ | |||||
| } \ | |||||
| } while (0) | } while (0) | ||||
| #endif | |||||
| /** | /** | ||||
| * @ingroup slog | * @ingroup slog | ||||
| @@ -381,11 +430,19 @@ extern int CheckLogLevel(int moduleId, int logLevel); | |||||
| * @param [in]kvNum: key-value element num in array | * @param [in]kvNum: key-value element num in array | ||||
| * @param [in]fmt: log content | * @param [in]fmt: log content | ||||
| */ | */ | ||||
| #define DlogWithKV(moduleId, level, pstKVArray, kvNum, fmt, ...) \ | |||||
| do { \ | |||||
| DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| #ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD | |||||
| #define DlogWithKV(moduleId, level, pstKVArray, kvNum, fmt, ...) \ | |||||
| do { \ | |||||
| DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| } while (0) | } while (0) | ||||
| #else | |||||
| #define DlogWithKV(moduleId, level, pstKVArray, kvNum, fmt, ...) \ | |||||
| do { \ | |||||
| if(CheckLogLevel(moduleId, level) == 1) { \ | |||||
| DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||||
| } \ | |||||
| } while (0) | |||||
| #endif | |||||
| /** | /** | ||||
| * @ingroup slog | * @ingroup slog | ||||