Merge pull request !60 from HW_KK/r0.7tags/v0.7.0-beta
| @@ -42,6 +42,9 @@ class GraphOptimizer { | |||||
| // optimize original graph for FE quant optimize | // optimize original graph for FE quant optimize | ||||
| virtual Status OptimizeGraphPrepare(ComputeGraph &graph) { return SUCCESS; } | virtual Status OptimizeGraphPrepare(ComputeGraph &graph) { return SUCCESS; } | ||||
| // optimize graph before build for RTS | |||||
| virtual Status OptimizeGraphBeforeBuild(ComputeGraph &graph) { return SUCCESS; } | |||||
| // optimize original graph, using in graph preparation stage | // optimize original graph, using in graph preparation stage | ||||
| virtual Status OptimizeOriginalGraph(ComputeGraph &graph) = 0; | virtual Status OptimizeOriginalGraph(ComputeGraph &graph) = 0; | ||||
| @@ -0,0 +1,39 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_COMMON_UTILS_AI_CORE_COMMON_ATTR_DEFINE_H_ | |||||
| #define INC_COMMON_UTILS_AI_CORE_COMMON_ATTR_DEFINE_H_ | |||||
| #include <string> | |||||
| namespace fe { | |||||
| static const std::string SCOPE_ID_ATTR = "fusion_scope"; | |||||
| static const std::string FE_IMPLY_TYPE = "_fe_imply_type"; | |||||
| static const std::string PARENT_OP_TYPE = "parentOpType"; | |||||
| static const std::string ATTR_NAME_TASK_L2_FUSION_INFO_EXTEND_PTR = "task_l2_fusion_info_extend_content"; | |||||
| static const std::string ATTR_DATA_DUMP_REF = "_datadump_ref"; | |||||
| static const std::string ATTR_NAME_L2_FUSION_EXTEND_PTR = "l2_fusion_extend_content"; | |||||
| static const std::string L1_OPTIMIZED = "l1_optimized"; | |||||
| static const std::string L2_OPTIMIZED = "l2_optimized"; | |||||
| } // namespace fe | |||||
| #endif | |||||
| @@ -0,0 +1,118 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_COMMON_UTILS_AI_CORE_COMMON_TYPES_H_ | |||||
| #define INC_COMMON_UTILS_AI_CORE_COMMON_TYPES_H_ | |||||
| #include "graph/anchor.h" | |||||
| #include "graph/types.h" | |||||
| #include "runtime/kernel.h" | |||||
| #include <map> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| namespace fe { | |||||
| struct FusionOpSrc { | |||||
| uint32_t src_op_id; | |||||
| ge::AnchorPtr src_anchor; | |||||
| int32_t fusion_src_index; | |||||
| int32_t fusion_dst_index; | |||||
| }; | |||||
| struct FusionOpDst { | |||||
| uint32_t dst_op_id; | |||||
| ge::AnchorPtr dst_anchor; | |||||
| }; | |||||
| struct FusionDataFlow { | |||||
| std::pair<ge::AnchorPtr, ge::AnchorPtr> edge; | |||||
| std::pair<std::string, ge::AnchorPtr> node_dataindex_pair; | |||||
| }; | |||||
| typedef struct tagL2FusionData { | |||||
| uint32_t l2Index; | |||||
| uint64_t l2Addr; | |||||
| uint64_t l2PageNum; | |||||
| } L2FusionData_t; | |||||
| typedef std::map<uint64_t, L2FusionData_t> L2FusionDataMap_t; | |||||
| typedef struct tagFeSmDesc { | |||||
| rtL2Ctrl_t l2ctrl; | |||||
| std::string nodeName[8]; | |||||
| uint8_t outputIndex[8]; | |||||
| } feSmDesc_t; | |||||
| typedef struct TagTaskL2FusionInfo { | |||||
| std::string nodeName; | |||||
| feSmDesc_t l2Info; | |||||
| L2FusionDataMap_t input; | |||||
| L2FusionDataMap_t output; | |||||
| uint32_t isUsed; | |||||
| } TaskL2FusionInfo_t; | |||||
| using L2FusionInfoPtr = std::shared_ptr<TaskL2FusionInfo_t>; | |||||
| typedef struct ToOpStruct { | |||||
| int64_t opL1Space = 0; | |||||
| std::vector<int64_t> opL1FusionType; | |||||
| int64_t opL1WorkspaceFlag = 0; // for workspace flag | |||||
| int64_t opL1WorkspaceSize = 0; | |||||
| std::vector<std::vector<int64_t>> validInputShape; | |||||
| std::vector<std::vector<int64_t>> validOutputShape; | |||||
| std::vector<std::vector<int64_t>> sliceInputOffset; // conv & pooling & ReadSelect | |||||
| std::vector<std::vector<int64_t>> sliceOutputOffset; // WriteSelect | |||||
| std::vector<uint32_t> totalShape; | |||||
| uint32_t splitIndex = 0; | |||||
| ToOpStruct() { | |||||
| // set invalid value for essential variable | |||||
| opL1Space = -1; | |||||
| opL1WorkspaceSize = -1; | |||||
| } | |||||
| } ToOpStruct_t; | |||||
| enum OpImplType { | |||||
| EN_IMPL_CUSTOM_CONSTANT_CCE = 0, // custom constant op | |||||
| EN_IMPL_CUSTOM_TIK, // custom tik op | |||||
| EN_IMPL_CUSTOM_TBE, // custom tbe op | |||||
| EN_IMPL_HW_CONSTANT_CCE, // Huawei built-in constant op | |||||
| EN_IMPL_HW_GENERAL_CCE, // Huawei built-in cce op | |||||
| EN_IMPL_HW_TIK, // Huawei built-in tik op | |||||
| EN_IMPL_HW_TBE, // Huawei built-in tbe op | |||||
| EN_IMPL_RL, // RL op | |||||
| EN_IMPL_PLUGIN_TBE, // Huawei built-in tbe plugin op | |||||
| EN_IMPL_VECTOR_CORE_HW_TBE, // Huawei built-in tbe op | |||||
| EN_IMPL_VECTOR_CORE_CUSTOM_TBE, // custom tbe op | |||||
| EN_IMPL_NON_PERSISTENT_CUSTOM_TBE, // custom tbe op | |||||
| EN_RESERVED // reserved value | |||||
| }; | |||||
| static const std::map<ge::DataType, uint32_t> DATATYPE_SIZE_MAP{{ge::DT_FLOAT, sizeof(float)}, | |||||
| {ge::DT_FLOAT16, sizeof(int16_t)}, | |||||
| {ge::DT_INT8, sizeof(int8_t)}, | |||||
| {ge::DT_INT32, sizeof(int32_t)}, | |||||
| {ge::DT_UINT8, sizeof(uint8_t)}, | |||||
| {ge::DT_UINT32, sizeof(uint32_t)}, | |||||
| {ge::DT_INT16, sizeof(int16_t)}, | |||||
| {ge::DT_UINT16, sizeof(uint16_t)}, | |||||
| {ge::DT_INT64, sizeof(int64_t)}, | |||||
| {ge::DT_UINT64, sizeof(uint64_t)}, | |||||
| {ge::DT_DOUBLE, sizeof(double)}, | |||||
| {ge::DT_BOOL, sizeof(bool)}, | |||||
| {ge::DT_DUAL, sizeof(float) + sizeof(int8_t)}, | |||||
| {ge::DT_DUAL_SUB_UINT8, sizeof(int8_t)}, | |||||
| {ge::DT_DUAL_SUB_INT8, sizeof(int8_t)}}; | |||||
| } // namespace fe | |||||
| #endif | |||||
| @@ -0,0 +1,107 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_COMMON_UTILS_AI_CORE_COMMON_GRAPH_COMMON_H_ | |||||
| #define INC_COMMON_UTILS_AI_CORE_COMMON_GRAPH_COMMON_H_ | |||||
| #include "graph/compute_graph.h" | |||||
| #include "common/aicore_util_types.h" | |||||
| #include "register/graph_optimizer/graph_optimize_register_error_codes.h" | |||||
| #include <map> | |||||
| #include <string> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| namespace fe { | |||||
| using kScopeNodeMap_t = std::map<int64_t, std::vector<ge::NodePtr>>; | |||||
| using kScopeNodePair_t = std::pair<int64_t, std::vector<ge::NodePtr>>; | |||||
| class GraphCommImpl; | |||||
| using GraphCommImplPtr = std::unique_ptr<GraphCommImpl>; | |||||
| class GraphComm { | |||||
| public: | |||||
| GraphComm(const string &engineName); | |||||
| virtual ~GraphComm(); | |||||
| GraphComm(const GraphComm &in) = delete; | |||||
| GraphComm &operator=(const GraphComm &in) = delete; | |||||
| Status GetscopeNodeMap(ge::ComputeGraph &graph, kScopeNodeMap_t &fusionMap); | |||||
| Status CopyFusionOpNodes(vector<FusionDataFlow> &fusInputEdgeList, vector<FusionDataFlow> &fusOutputEdgeList, | |||||
| vector<ge::NodePtr> &fusNodelist, ge::OpDescPtr fusionOpDesc, | |||||
| ge::ComputeGraphPtr fusionGraph); | |||||
| Status CopyFusionOpEdges(ge::OpDescPtr fusionOpDesc, ge::ComputeGraph &origGraph, ge::ComputeGraphPtr fusionGraph); | |||||
| Status GetNodeDataFlowMap(const ge::NodePtr &fusNode, | |||||
| std::map<ge::NodePtr, std::map<ge::AnchorPtr, ge::AnchorPtr>> &fusionOpAnchorsMap, | |||||
| ge::kFusionDataFlowVec_t &fusDataflowList, const int &mapType); | |||||
| Status GetFusionNodeEdgeList(std::vector<ge::NodePtr> &fusNodelist, std::vector<FusionDataFlow> &fusInputEdgeList, | |||||
| std::vector<FusionDataFlow> &fusOutputEdgeList); | |||||
| void ClearFusionSrc(); | |||||
| void ClearFusionDst(); | |||||
| void AddFusionOutputSrc(const uint32_t &src_op_id, const ge::AnchorPtr &src_anchor, const int32_t &fusion_src_index, | |||||
| std::pair<string, ge::AnchorPtr> &node_dataindex_pair); | |||||
| void AddFusionInputSrc(const uint32_t &src_op_id, const ge::AnchorPtr &src_anchor, const int32_t &fusion_dst_index, | |||||
| std::pair<string, ge::AnchorPtr> &node_dataindex_pair); | |||||
| void SaveFusionDst(const uint32_t &dst_op_id, ge::AnchorPtr dst_anchor); | |||||
| bool IsFusionDstExist(const uint32_t &dst_op_id, const ge::AnchorPtr &dst_anchor); | |||||
| bool GetFusionSrc(const uint32_t &src_op_id, const ge::AnchorPtr &src_anchor, int32_t &fusion_src_index, | |||||
| int32_t &fusion_dst_index); | |||||
| Status GetFusionNodeCtrlEdgeList(vector<ge::NodePtr> &fusNodelist, vector<FusionDataFlow> &fusInputCtrlEdgeList, | |||||
| vector<FusionDataFlow> &fusOutputCtrlEdgeList); | |||||
| Status MergeFusionNodeEdgeList(ge::NodePtr &fusNode, vector<ge::NodePtr> &fusNodelist, | |||||
| vector<FusionDataFlow> &fusInputEdgeList, vector<FusionDataFlow> &fusOutputEdgeList); | |||||
| Status MergeFusionNodeCtrlEdgeList(ge::NodePtr &fusNode, vector<ge::NodePtr> &fusNodelist, | |||||
| vector<FusionDataFlow> &fusInputEdgeList, | |||||
| vector<FusionDataFlow> &fusOutputEdgeList); | |||||
| string GetEngineName(); | |||||
| private: | |||||
| Status MergeFusionNodeInputEdgeList(ge::NodePtr fusNode, std::vector<ge::NodePtr> &fusNodelist, | |||||
| std::vector<FusionDataFlow> &fusInputEdgeList); | |||||
| Status MergeFusionNodeOutputEdgeList(ge::NodePtr fusNode, std::vector<ge::NodePtr> &fusNodelist, | |||||
| std::vector<FusionDataFlow> &fusOutputEdgeList); | |||||
| string engineName_; | |||||
| std::vector<FusionOpSrc> exist_fusion_src_list_; | |||||
| std::vector<FusionOpDst> exist_fusion_dst_list_; | |||||
| // std::vector<std::multimap<std::string, uint32_t>> | |||||
| ge::kFusionDataFlowVec_t fusion_input_dataflow_list_; | |||||
| // std::vector<std::multimap<std::string, ge::AnchorPtr>> | |||||
| ge::kFusionDataFlowVec_t fusion_output_dataflow_list_; | |||||
| GraphCommImplPtr graphCommImplPtr_; | |||||
| }; | |||||
| } // namespace fe | |||||
| #endif | |||||
| @@ -0,0 +1,42 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_COMMON_UTILS_AI_CORE_COMMON_SCOPE_ALLOCATOR_H_ | |||||
| #define INC_COMMON_UTILS_AI_CORE_COMMON_SCOPE_ALLOCATOR_H_ | |||||
| #include "graph/op_desc.h" | |||||
| namespace fe { | |||||
| class ScopeAllocator { | |||||
| public: | |||||
| ScopeAllocator(); | |||||
| virtual ~ScopeAllocator(); | |||||
| ScopeAllocator(const ScopeAllocator& in) = delete; | |||||
| ScopeAllocator& operator=(const ScopeAllocator& in) = delete; | |||||
| public: | |||||
| void Init(); | |||||
| int64_t GetCurrentScopeId(); | |||||
| int64_t AllocateScopeId(void); | |||||
| bool HasScopeAttr(ge::ConstOpDescPtr opdef); | |||||
| bool GetScopeAttr(ge::ConstOpDescPtr opdef, int64_t& scopeId); | |||||
| bool SetScopeAttr(ge::OpDescPtr opdef, int64_t scopeId); | |||||
| private: | |||||
| int64_t scopeId; | |||||
| }; | |||||
| } // namespace fe | |||||
| #endif | |||||
| @@ -14,15 +14,20 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef GE_GRAPH_PASSES_SWITCH_SPLIT_PASS_H_ | |||||
| #define GE_GRAPH_PASSES_SWITCH_SPLIT_PASS_H_ | |||||
| #ifndef AICORE_PARAM_CALCULATOR | |||||
| #define AICORE_PARAM_CALCULATOR | |||||
| #include <set> | |||||
| #include "graph/passes/base_pass.h" | |||||
| namespace ge { | |||||
| class SwitchSplitPass : public BaseNodePass { | |||||
| #include "graph/node.h" | |||||
| #include "graph_optimizer/graph_optimize_register_error_codes.h" | |||||
| namespace fe { | |||||
| class AICoreParamCalculator { | |||||
| public: | public: | ||||
| Status Run(NodePtr &node) override; | |||||
| AICoreParamCalculator(); | |||||
| ~AICoreParamCalculator(); | |||||
| Status CalcOpRunningParam(ge::Node &node); | |||||
| }; | }; | ||||
| } // namespace ge | |||||
| #endif // GE_GRAPH_PASSES_SWITCH_SPLIT_PASS_H_ | |||||
| } // namespace fe | |||||
| #endif // AICORE_PARAM_CALCULATOR | |||||
| @@ -0,0 +1,45 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef TENSORSIZE_CALCULATOR_H | |||||
| #define TENSORSIZE_CALCULATOR_H | |||||
| #include "graph_optimizer/graph_optimize_register_error_codes.h" | |||||
| #include <map> | |||||
| #include <string> | |||||
| #include "graph/compute_graph.h" | |||||
| #include "graph/op_desc.h" | |||||
| namespace fe { | |||||
| class TensorSizeCalculator { | |||||
| public: | |||||
| /** | |||||
| * Calculate the tensor size of input and output of each opdesc | |||||
| * @param opDesc opdesc object | |||||
| * @param opImplType op impl type | |||||
| * @return status SUCCESS or FAILED | |||||
| */ | |||||
| static Status CalculateOpTensorSize(ge::OpDesc &opDesc); | |||||
| private: | |||||
| static Status CalcInputOpTensorSize(ge::OpDesc &opDesc, int32_t &outputRealCalcFlag); | |||||
| static Status CalcOutputOpTensorSize(ge::OpDesc &opDesc, int32_t &outputRealCalcFlag); | |||||
| }; | |||||
| } // namespace fe | |||||
| #endif // TENSORSIZE_CALCULATOR_H | |||||
| @@ -98,6 +98,15 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Session { | |||||
| /// | /// | ||||
| Status RunGraphAsync(uint32_t graphId, const std::vector<ge::InputTensorInfo> &inputs, RunAsyncCallback callback); | Status RunGraphAsync(uint32_t graphId, const std::vector<ge::InputTensorInfo> &inputs, RunAsyncCallback callback); | ||||
| /// | |||||
| /// @ingroup ge_graph | |||||
| /// @brief get variables in the session with specific session id | |||||
| /// @param [in] var_names: variable names | |||||
| /// @param [out] var_values: variable values | |||||
| /// @return Status result of function | |||||
| /// | |||||
| Status GetVariables(const std::vector<std::string> &var_names, std::vector<Tensor> &var_values); | |||||
| /// | /// | ||||
| /// @ingroup ge_graph | /// @ingroup ge_graph | ||||
| /// @brief register callback func with specific summary or checkpoint by users | /// @brief register callback func with specific summary or checkpoint by users | ||||
| @@ -23,6 +23,12 @@ | |||||
| #include "graph/graph.h" | #include "graph/graph.h" | ||||
| #include "graph/ge_error_codes.h" | #include "graph/ge_error_codes.h" | ||||
| namespace { | |||||
| #define IR_MAJOR_VERSION (int(1)) | |||||
| #define IR_MINOR_VERSION (int(0)) | |||||
| #define IR_PATCH_VERSION (int(0)) | |||||
| } // namespace | |||||
| namespace ge { | namespace ge { | ||||
| struct ModelBufferData { | struct ModelBufferData { | ||||
| @@ -71,5 +77,17 @@ graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<std::string | |||||
| */ | */ | ||||
| graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &model); | graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &model); | ||||
| /** | |||||
| * @ingroup AscendCL | |||||
| * @brief query IR interface version | |||||
| * | |||||
| * @param major_version[OUT] IR interface major version | |||||
| * @param minor_version[OUT] IR interface minor version | |||||
| * @param patch_version[OUT] IR interface patch version | |||||
| * @retval GRAPH_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| */ | |||||
| graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *patch_version); | |||||
| }; // namespace ge | }; // namespace ge | ||||
| #endif | #endif | ||||
| @@ -45,9 +45,11 @@ | |||||
| namespace ge { | namespace ge { | ||||
| class Operator; | class Operator; | ||||
| class OperatorImpl; | class OperatorImpl; | ||||
| class NodeUtils; | |||||
| class NamedAttrs; | class NamedAttrs; | ||||
| class Graph; | class Graph; | ||||
| class AttrValue; | class AttrValue; | ||||
| class Node; | |||||
| using SubgraphBuilder = std::function<Graph()>; | using SubgraphBuilder = std::function<Graph()>; | ||||
| using OperatorImplPtr = std::shared_ptr<OperatorImpl>; | using OperatorImplPtr = std::shared_ptr<OperatorImpl>; | ||||
| @@ -65,8 +67,8 @@ using std::string; | |||||
| class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Operator { | class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Operator { | ||||
| public: | public: | ||||
| friend class OperatorImpl; | friend class OperatorImpl; | ||||
| friend class GraphBuilderImpl; | friend class GraphBuilderImpl; | ||||
| friend class NodeUtils; | |||||
| using OpInt = int64_t; | using OpInt = int64_t; | ||||
| using OpFloat = float; | using OpFloat = float; | ||||
| @@ -104,6 +106,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Operator { | |||||
| Operator &SetInput(const string &dst_name, const Operator &src_oprt, const string &name); // lint !e148 | Operator &SetInput(const string &dst_name, const Operator &src_oprt, const string &name); // lint !e148 | ||||
| Operator &SetInput(const string &dst_name, const Operator &src_oprt, uint32_t index); | |||||
| Operator &AddControlInput(const Operator &src_oprt); | Operator &AddControlInput(const Operator &src_oprt); | ||||
| graphStatus GetInputConstData(const string &dst_name, Tensor &data) const; | graphStatus GetInputConstData(const string &dst_name, Tensor &data) const; | ||||
| @@ -269,11 +273,15 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Operator { | |||||
| OutHandler GetOutput(const string &name) const; | OutHandler GetOutput(const string &name) const; | ||||
| OutHandler GetOutput(uint32_t index) const; | |||||
| OperatorImplPtr GetOperatorImplPtr() const; | OperatorImplPtr GetOperatorImplPtr() const; | ||||
| OperatorImplPtr operator_impl_{nullptr}; | OperatorImplPtr operator_impl_{nullptr}; | ||||
| graphStatus GetInputConstDataOut(const string &dst_name, Tensor &data) const; | graphStatus GetInputConstDataOut(const string &dst_name, Tensor &data) const; | ||||
| std::shared_ptr<const Node> GetNode() const; | |||||
| }; | }; | ||||
| /*lint +e148*/ | /*lint +e148*/ | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -130,6 +130,10 @@ class OpReg { | |||||
| Operator::SetInput(#x, v, srcName); \ | Operator::SetInput(#x, v, srcName); \ | ||||
| return *this; \ | return *this; \ | ||||
| } \ | } \ | ||||
| _THIS_TYPE &set_input_##x(Operator &v, uint32_t index) { \ | |||||
| Operator::SetInput(#x, v, index); \ | |||||
| return *this; \ | |||||
| } \ | |||||
| _THIS_TYPE &set_input_##x(Operator &v) { \ | _THIS_TYPE &set_input_##x(Operator &v) { \ | ||||
| Operator::SetInput(#x, v); \ | Operator::SetInput(#x, v); \ | ||||
| return *this; \ | return *this; \ | ||||
| @@ -159,6 +163,10 @@ class OpReg { | |||||
| Operator::SetInput(#x, v, srcName); \ | Operator::SetInput(#x, v, srcName); \ | ||||
| return *this; \ | return *this; \ | ||||
| } \ | } \ | ||||
| _THIS_TYPE &set_input_##x(Operator &v, uint32_t index) { \ | |||||
| Operator::SetInput(#x, v, index); \ | |||||
| return *this; \ | |||||
| } \ | |||||
| TensorDesc get_input_desc_##x() const { return Operator::GetInputDesc(#x); } \ | TensorDesc get_input_desc_##x() const { return Operator::GetInputDesc(#x); } \ | ||||
| graphStatus update_input_desc_##x(const TensorDesc &tensorDesc) { \ | graphStatus update_input_desc_##x(const TensorDesc &tensorDesc) { \ | ||||
| return Operator::UpdateInputDesc(#x, tensorDesc); \ | return Operator::UpdateInputDesc(#x, tensorDesc); \ | ||||
| @@ -0,0 +1,331 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef EXTERNAL_REGISTER_SCOPE_SCOPE_FUSION_PASS_REGISTER_H_ | |||||
| #define EXTERNAL_REGISTER_SCOPE_SCOPE_FUSION_PASS_REGISTER_H_ | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include <map> | |||||
| #include "ge/ge_api_error_codes.h" | |||||
| #include "register/register_error_codes.h" | |||||
| #include "register/register_types.h" | |||||
| #include "graph/operator.h" | |||||
| #define CHECK_INNER_NODE_CONDITION(cond, fusion_rlt) \ | |||||
| do { \ | |||||
| if (!(cond)) { \ | |||||
| if ((fusion_rlt) != nullptr) { \ | |||||
| (fusion_rlt)->SetType(ge::kScopeInvalidType); \ | |||||
| } \ | |||||
| return; \ | |||||
| } \ | |||||
| } while (0) | |||||
| namespace domi { | |||||
| class TensorFlowModelParser; | |||||
| } // namespace domi | |||||
| namespace ge { | |||||
| const int32_t kFusionDisableIndex = 99999; | |||||
| const char *const kScopeToMultiNodes = "ScopeToMultiNodes"; | |||||
| const char *const kScopeInvalidType = "ScopeInvalidType"; | |||||
| const char *const kInputFromFusionScope = "InputFromFusionScope"; | |||||
| const char *const kOutputToFusionScope = "OutputToFusionScope"; | |||||
| class ScopePattern; | |||||
| using ScopeFusionPatterns = std::vector<std::vector<ScopePattern *>>; | |||||
| class ScopePassManager; | |||||
| class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY Scope { | |||||
| public: | |||||
| explicit Scope(const std::string &name, const std::string &sub_type = "", Scope *father_scope = nullptr); | |||||
| ~Scope(); | |||||
| std::string Name() const; | |||||
| std::string SubType() const; | |||||
| std::map<std::string, ge::OperatorPtr> AllNodesMap() const; | |||||
| Scope *GetSubScope(const std::string &scope_name) const; | |||||
| std::string LastName() const; | |||||
| std::vector<Scope *> GetAllSubScopes() const; | |||||
| const Scope *GetFatherScope() const; | |||||
| private: | |||||
| class ScopeImpl; | |||||
| std::unique_ptr<ScopeImpl> impl_; | |||||
| friend class ScopeBasePass; | |||||
| friend class ScopeTree; | |||||
| friend class NodeOpTypeFeature; | |||||
| friend class NodeAttrFeature; | |||||
| friend class ScopeFeature; | |||||
| }; | |||||
| class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY FusionScopesResult { | |||||
| public: | |||||
| FusionScopesResult(); | |||||
| ~FusionScopesResult(); | |||||
| void SetName(const std::string &name); | |||||
| void SetType(const std::string &type); | |||||
| void SetDescription(const std::string &description); | |||||
| std::string Name() const; | |||||
| std::vector<ge::OperatorPtr> Nodes() const; | |||||
| void InsertInputs(const std::string &inner_op_name, const std::vector<int32_t> &index_map); | |||||
| void InsertOutputs(const std::string &inner_op_name, const std::vector<int32_t> &index_map); | |||||
| class InnerNodeInfo { | |||||
| public: | |||||
| explicit InnerNodeInfo(const std::string &fusion_node_name); | |||||
| InnerNodeInfo(const std::string &fusion_node_name, const std::string &name, const std::string &type); | |||||
| InnerNodeInfo(InnerNodeInfo &&other) noexcept; | |||||
| InnerNodeInfo &operator=(InnerNodeInfo &&other) noexcept; | |||||
| InnerNodeInfo(const InnerNodeInfo &) = delete; | |||||
| InnerNodeInfo &operator=(const InnerNodeInfo &) = delete; | |||||
| ~InnerNodeInfo(); | |||||
| InnerNodeInfo &SetName(const std::string &name); | |||||
| InnerNodeInfo &SetType(const std::string &type); | |||||
| InnerNodeInfo &InsertInput(const std::string &input_node, int32_t peer_out_idx); | |||||
| InnerNodeInfo &InsertOutput(const std::string &output_node, int32_t peer_in_idx); | |||||
| ge::graphStatus BuildInnerNode(); | |||||
| ge::graphStatus SetInputFormat(const std::string &input_name, const std::string &format); | |||||
| ge::graphStatus SetOutputFormat(const std::string &output_name, const std::string &format); | |||||
| ge::graphStatus SetDynamicInputFormat(const std::string &input_name, uint32_t index, const std::string &format); | |||||
| ge::graphStatus SetDynamicOutputFormat(const std::string &output_name, uint32_t index, const std::string &format); | |||||
| ge::Operator *MutableOperator(); | |||||
| std::string GetName() const; | |||||
| std::string GetType() const; | |||||
| std::vector<std::pair<std::string, int32_t>> GetInputs() const; | |||||
| std::vector<std::pair<std::string, int32_t>> GetOutputs() const; | |||||
| private: | |||||
| class InnerNodeInfoImpl; | |||||
| std::unique_ptr<InnerNodeInfoImpl> impl_; | |||||
| }; | |||||
| InnerNodeInfo *AddInnerNode(const std::string &name, const std::string &type); | |||||
| InnerNodeInfo *MutableRecentInnerNode(); | |||||
| InnerNodeInfo *MutableInnerNode(uint32_t index); | |||||
| ge::graphStatus CheckInnerNodesInfo(); | |||||
| private: | |||||
| class FusionScopesResultImpl; | |||||
| std::unique_ptr<FusionScopesResultImpl> impl_; | |||||
| friend class ScopeGraph; | |||||
| friend class ScopeBasePass; | |||||
| friend class TensorFlowModelParser; | |||||
| }; | |||||
| class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeTree { | |||||
| public: | |||||
| ScopeTree(); | |||||
| Status Init(); | |||||
| ScopeTree(const ScopeTree &scopetree) = delete; | |||||
| ScopeTree &operator=(const ScopeTree &scopetree) = delete; | |||||
| ~ScopeTree(); | |||||
| std::vector<Scope *> GetAllScopes() const; | |||||
| private: | |||||
| class ScopeTreeImpl; | |||||
| std::unique_ptr<ScopeTreeImpl> impl_; | |||||
| friend class ScopeGraph; | |||||
| friend class ScopeBasePass; | |||||
| }; | |||||
| class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeGraph { | |||||
| public: | |||||
| ScopeGraph(); | |||||
| Status Init(); | |||||
| ScopeGraph(const ScopeGraph &scope_graph) = delete; | |||||
| ScopeGraph &operator=(const ScopeGraph &scope_graph) = delete; | |||||
| ~ScopeGraph(); | |||||
| const ScopeTree *GetScopeTree() const; | |||||
| std::map<std::string, ge::OperatorPtr> GetNodesMap() const; | |||||
| private: | |||||
| class ScopeGraphImpl; | |||||
| std::unique_ptr<ScopeGraphImpl> impl_; | |||||
| friend class ScopePassManager; | |||||
| friend class ScopeBasePass; | |||||
| friend class TensorFlowModelParser; | |||||
| }; | |||||
| class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeAttrValue { | |||||
| public: | |||||
| ScopeAttrValue(); | |||||
| ScopeAttrValue(ScopeAttrValue const &attr_value); | |||||
| ScopeAttrValue &operator=(ScopeAttrValue const &attr_value); | |||||
| ~ScopeAttrValue(); | |||||
| void SetIntValue(int64_t value); | |||||
| void SetFloatValue(float value); | |||||
| void SetStringValue(std::string value); | |||||
| void SetBoolValue(bool value); | |||||
| private: | |||||
| class ScopeAttrValueImpl; | |||||
| std::unique_ptr<ScopeAttrValueImpl> impl_; | |||||
| friend class NodeAttrFeature; | |||||
| }; | |||||
| class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeBaseFeature { | |||||
| public: | |||||
| virtual bool Match(const Scope *scope) = 0; | |||||
| virtual ~ScopeBaseFeature(){}; | |||||
| }; | |||||
| class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY NodeOpTypeFeature : ScopeBaseFeature { | |||||
| public: | |||||
| NodeOpTypeFeature(std::string nodeType, int num, int step = 0); | |||||
| NodeOpTypeFeature(NodeOpTypeFeature const &feature); | |||||
| NodeOpTypeFeature &operator=(NodeOpTypeFeature const &feature); | |||||
| ~NodeOpTypeFeature(); | |||||
| bool Match(const Scope *scope) override; | |||||
| private: | |||||
| class NodeOpTypeFeatureImpl; | |||||
| std::unique_ptr<NodeOpTypeFeatureImpl> impl_; | |||||
| }; | |||||
| class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY NodeAttrFeature : ScopeBaseFeature { | |||||
| public: | |||||
| NodeAttrFeature(std::string nodeType, std::string attr_name, ge::DataType datatype, ScopeAttrValue attr_value); | |||||
| NodeAttrFeature(NodeAttrFeature const &feature); | |||||
| NodeAttrFeature &operator=(NodeAttrFeature const &feature); | |||||
| ~NodeAttrFeature(); | |||||
| bool Match(const Scope *scope) override; | |||||
| private: | |||||
| class NodeAttrFeatureImpl; | |||||
| std::unique_ptr<NodeAttrFeatureImpl> impl_; | |||||
| }; | |||||
| class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeFeature : ScopeBaseFeature { | |||||
| public: | |||||
| ScopeFeature(std::string sub_type, int32_t num, std::string suffix = "", std::string sub_scope_mask = "", | |||||
| int step = 0); | |||||
| ScopeFeature(ScopeFeature const &feature); | |||||
| ScopeFeature &operator=(ScopeFeature const &feature); | |||||
| ~ScopeFeature(); | |||||
| bool Match(const Scope *scope) override; | |||||
| private: | |||||
| class ScopeFeatureImpl; | |||||
| std::unique_ptr<ScopeFeatureImpl> impl_; | |||||
| }; | |||||
| class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopePattern { | |||||
| public: | |||||
| ScopePattern(); | |||||
| ~ScopePattern(); | |||||
| ScopePattern &SetSubType(const std::string &sub_type); | |||||
| ScopePattern &AddNodeOpTypeFeature(NodeOpTypeFeature feature); | |||||
| ScopePattern &AddNodeAttrFeature(NodeAttrFeature feature); | |||||
| ScopePattern &AddScopeFeature(ScopeFeature feature); | |||||
| private: | |||||
| class ScopePatternImpl; | |||||
| std::unique_ptr<ScopePatternImpl> impl_; | |||||
| friend class ScopeBasePass; | |||||
| }; | |||||
| class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopesResult { | |||||
| public: | |||||
| ScopesResult(); | |||||
| ScopesResult(ScopesResult const &result); | |||||
| ScopesResult &operator=(ScopesResult const &result); | |||||
| ~ScopesResult(); | |||||
| void SetScopes(std::vector<Scope *> &scopes); | |||||
| void SetNodes(std::vector<ge::OperatorPtr> &nodes); | |||||
| private: | |||||
| class ScopesResultImpl; | |||||
| std::unique_ptr<ScopesResultImpl> impl_; | |||||
| friend class ScopeBasePass; | |||||
| }; | |||||
| class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeBasePass { | |||||
| public: | |||||
| ScopeBasePass(); | |||||
| virtual ~ScopeBasePass(); | |||||
| protected: | |||||
| // Subclasses implement respective fusion strategies and build the Patterns | |||||
| virtual std::vector<ScopeFusionPatterns> DefinePatterns() = 0; | |||||
| // Define the name of the scope pass | |||||
| virtual std::string PassName() = 0; | |||||
| // Subclasses implement respective multi-scope or operator fusion methods across scopes | |||||
| virtual Status LastMatchScopesAndOPs(std::shared_ptr<ScopeGraph> &scope_graph, | |||||
| std::vector<ScopesResult> &results) = 0; | |||||
| // Subclasses implement their own results and set the input and output of the final fusion operator | |||||
| virtual void GenerateFusionResult(const std::vector<Scope *> &scopes, FusionScopesResult *fusion_rlt) = 0; | |||||
| private: | |||||
| class ScopeBasePassImpl; | |||||
| std::unique_ptr<ScopeBasePassImpl> impl_; | |||||
| friend class ge::ScopePassManager; | |||||
| friend class ScopeBasePassImpl; | |||||
| }; | |||||
| class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeFusionPassRegistry { | |||||
| public: | |||||
| using CreateFn = ScopeBasePass *(*)(); | |||||
| ~ScopeFusionPassRegistry(); | |||||
| static ScopeFusionPassRegistry &GetInstance() { | |||||
| static ScopeFusionPassRegistry instance; | |||||
| return instance; | |||||
| } | |||||
| void RegisterScopeFusionPass(const std::string &pass_name, CreateFn create_fn, bool is_general); | |||||
| private: | |||||
| ScopeFusionPassRegistry(); | |||||
| class ScopeFusionPassRegistryImpl; | |||||
| /*lint -e148*/ | |||||
| std::unique_ptr<ScopeFusionPassRegistryImpl> impl_; | |||||
| friend class TensorFlowModelParser; | |||||
| }; | |||||
| class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeUtil { | |||||
| public: | |||||
| static std::string StringReplaceAll(std::string str, const std::string &old_value, const std::string &new_value); | |||||
| static void FreeScopePatterns(ScopeFusionPatterns &patterns); | |||||
| static void FreeOneBatchPattern(std::vector<ScopePattern *> &one_batch_pattern); | |||||
| }; | |||||
| class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeFusionPassRegistrar { | |||||
| public: | |||||
| ScopeFusionPassRegistrar(const char *pass_name, ScopeBasePass *(*create_fn)(), bool is_general); | |||||
| ~ScopeFusionPassRegistrar() {} | |||||
| }; | |||||
| #define REGISTER_SCOPE_FUSION_PASS(pass_name, scope_pass, is_general) \ | |||||
| REGISTER_SCOPE_FUSION_PASS_UNIQ_HELPER(__COUNTER__, pass_name, scope_pass, is_general) | |||||
| #define REGISTER_SCOPE_FUSION_PASS_UNIQ_HELPER(ctr, pass_name, scope_pass, is_general) \ | |||||
| REGISTER_SCOPE_FUSION_PASS_UNIQ(ctr, pass_name, scope_pass, is_general) | |||||
| #define REGISTER_SCOPE_FUSION_PASS_UNIQ(ctr, pass_name, scope_pass, is_general) \ | |||||
| static ::ge::ScopeFusionPassRegistrar register_scope_fusion_pass##ctr __attribute__((unused)) = \ | |||||
| ::ge::ScopeFusionPassRegistrar( \ | |||||
| pass_name, []() -> ::ge::ScopeBasePass * { return new (std::nothrow) scope_pass(); }, is_general) | |||||
| } // namespace ge | |||||
| #endif // EXTERNAL_REGISTER_SCOPE_SCOPE_FUSION_PASS_REGISTER_H_ | |||||
| @@ -22,7 +22,7 @@ | |||||
| #include <string> | #include <string> | ||||
| #include <vector> | #include <vector> | ||||
| #include "common/fmk_error_codes.h" | |||||
| #include "framework/common/fmk_error_codes.h" | |||||
| #include "ge/ge_api_error_codes.h" | #include "ge/ge_api_error_codes.h" | ||||
| #include "external/graph/types.h" | #include "external/graph/types.h" | ||||
| #include "external/ge/ge_api_types.h" | #include "external/ge/ge_api_types.h" | ||||
| @@ -49,6 +49,7 @@ enum OpEngineType { | |||||
| }; | }; | ||||
| const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | ||||
| const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | |||||
| // Data cache, including data address and length | // Data cache, including data address and length | ||||
| struct DataBuffer { | struct DataBuffer { | ||||
| @@ -128,6 +129,7 @@ struct OriginInputInfo { | |||||
| // The structure of AIPP info | // The structure of AIPP info | ||||
| struct AippConfigInfo { | struct AippConfigInfo { | ||||
| int8_t aipp_mode; | |||||
| int8_t input_format; | int8_t input_format; | ||||
| int32_t src_image_size_w; | int32_t src_image_size_w; | ||||
| int32_t src_image_size_h; | int32_t src_image_size_h; | ||||
| @@ -175,6 +177,9 @@ struct AippConfigInfo { | |||||
| float var_reci_chn_1; | float var_reci_chn_1; | ||||
| float var_reci_chn_2; | float var_reci_chn_2; | ||||
| float var_reci_chn_3; | float var_reci_chn_3; | ||||
| int8_t support_rotation; | |||||
| uint32_t related_input_rank; | |||||
| uint32_t max_src_image_size; | |||||
| }; | }; | ||||
| // The structure of offline Modeldata | // The structure of offline Modeldata | ||||
| @@ -250,5 +255,31 @@ struct ComputeGraphDescInfo { | |||||
| std::vector<std::vector<int64_t>> output_shape; | std::vector<std::vector<int64_t>> output_shape; | ||||
| std::vector<DataType> output_data_type; | std::vector<DataType> output_data_type; | ||||
| }; | }; | ||||
| struct OpDescInfo { | |||||
| std::string op_name; | |||||
| uint32_t task_id; | |||||
| uint32_t stream_id; | |||||
| std::vector<Format> input_format; | |||||
| std::vector<std::vector<int64_t>> input_shape; | |||||
| std::vector<DataType> input_data_type; | |||||
| std::vector<void *> input_addrs; | |||||
| std::vector<Format> output_format; | |||||
| std::vector<std::vector<int64_t>> output_shape; | |||||
| std::vector<DataType> output_data_type; | |||||
| std::vector<void *> output_addrs; | |||||
| }; | |||||
| struct ModelDumpConfig { | |||||
| std::string model_name; | |||||
| std::vector<std::string> layers; | |||||
| }; | |||||
| struct DumpConfig { | |||||
| std::string dump_path; | |||||
| std::string dump_mode; | |||||
| std::string dump_status; | |||||
| std::string dump_op_switch; | |||||
| std::vector<ModelDumpConfig> dump_list; | |||||
| }; | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // INC_FRAMEWORK_COMMON_GE_TYPES_H_ | #endif // INC_FRAMEWORK_COMMON_GE_TYPES_H_ | ||||
| @@ -606,6 +606,7 @@ static constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 79; | |||||
| /// @brief INPUT node type | /// @brief INPUT node type | ||||
| /// | /// | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string INPUT_TYPE; | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string INPUT_TYPE; | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMMY_DATA; | |||||
| /// | /// | ||||
| /// @ingroup domi_omg | /// @ingroup domi_omg | ||||
| @@ -347,6 +347,14 @@ std::string ToString(const google::protobuf::RepeatedField<T> &rpd_field) { | |||||
| /// | /// | ||||
| uint64_t GetCurrentTimestap(); | uint64_t GetCurrentTimestap(); | ||||
| /// | |||||
| /// @ingroup domi_common | |||||
| /// @brief Obtains the absolute time (timestamp) of the current system. | |||||
| /// @return Timestamp, in seconds (US) | |||||
| /// | |||||
| /// | |||||
| uint32_t GetCurrentSecondTimestap(); | |||||
| /// | /// | ||||
| /// @ingroup domi_common | /// @ingroup domi_common | ||||
| /// @brief Check whether the product of two int64 numbers exceeds the int64 range. | /// @brief Check whether the product of two int64 numbers exceeds the int64 range. | ||||
| @@ -31,6 +31,7 @@ enum PriorityEnum { | |||||
| COST_1, | COST_1, | ||||
| COST_2, | COST_2, | ||||
| COST_9 = 9, | COST_9 = 9, | ||||
| COST_10 = 10, | |||||
| }; | }; | ||||
| struct DNNEngineAttribute { | struct DNNEngineAttribute { | ||||
| @@ -135,6 +135,15 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||||
| /// | /// | ||||
| ge::Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | ge::Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Get user designeate shape order | |||||
| /// @param [in] model_id | |||||
| /// @param [out] user_designate_shape_order | |||||
| /// @return execute result | |||||
| /// | |||||
| ge::Status GetUserDesignateShapeOrder(uint32_t model_id, std::vector<std::string> &user_designate_shape_order); | |||||
| ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type); | ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type); | ||||
| /// | /// | ||||
| @@ -162,6 +171,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||||
| ge::Status CommandHandle(const ge::Command &command); | ge::Status CommandHandle(const ge::Command &command); | ||||
| ge::Status SetDump(const DumpConfig &dump_config); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Query model memory consuming interface | /// @brief Query model memory consuming interface | ||||
| @@ -261,6 +272,7 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||||
| ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); | ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); | ||||
| ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims, | ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims, | ||||
| std::vector<InputOutputDims> &output_dims); | std::vector<InputOutputDims> &output_dims); | ||||
| ge::Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info); | |||||
| private: | private: | ||||
| static bool isInit_; | static bool isInit_; | ||||
| @@ -27,6 +27,7 @@ namespace ge { | |||||
| enum MemStorageType { | enum MemStorageType { | ||||
| HBM = 0, | HBM = 0, | ||||
| RDMA_HBM, | RDMA_HBM, | ||||
| HOST_DDR, | |||||
| }; | }; | ||||
| struct HostVarInfo { | struct HostVarInfo { | ||||
| @@ -96,6 +96,10 @@ Status CheckCustomAiCpuOpLib(); | |||||
| Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file); | Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file); | ||||
| Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format); | |||||
| Status GetOutputLeaf(ge::NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info); | |||||
| void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | ||||
| std::vector<std::string> &output_nodes_name); | std::vector<std::string> &output_nodes_name); | ||||
| @@ -883,6 +883,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REF_VAR_ | |||||
| // Assign | // Assign | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ASSIGN_VALIDATE_SHAPE; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ASSIGN_VALIDATE_SHAPE; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ASSIGN_VAR_NAME; | |||||
| // ShapeN | // ShapeN | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SHAPEN_ATTR_N; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SHAPEN_ATTR_N; | ||||
| @@ -939,6 +940,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_NUM; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_NUM; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_LABEL; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_LABEL; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_COMBINED_BATCH; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_COMBINED_BATCH; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_USER_DESIGNEATE_SHAPE_ORDER; | |||||
| // Control flow | // Control flow | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_SWITCH_COND; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_SWITCH_COND; | ||||
| @@ -957,7 +959,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||||
| // Function Op | // Function Op | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_NODE_INDEX; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_NODE_INDEX; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_CONST_TYPE; | |||||
| // Used for mark the active node is for loop, type:bool | // Used for mark the active node is for loop, type:bool | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IS_LOOP_ACTIVE; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IS_LOOP_ACTIVE; | ||||
| @@ -968,6 +969,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_MEMORY_TYPE_WORKSPACE; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_MEMORY_TYPE_WORKSPACE; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_MEMORY_TYPE_RANGE; | |||||
| // Atomic addr clean attrs | // Atomic addr clean attrs | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATOMIC_ATTR_INPUT_INDEX; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATOMIC_ATTR_INPUT_INDEX; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATOMIC_ATTR_OUTPUT_INDEX; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATOMIC_ATTR_OUTPUT_INDEX; | ||||
| @@ -24,6 +24,7 @@ namespace ge { | |||||
| class GEContext { | class GEContext { | ||||
| public: | public: | ||||
| graphStatus GetOption(const std::string &key, std::string &option); | graphStatus GetOption(const std::string &key, std::string &option); | ||||
| bool GetHostExecFlag(); | |||||
| uint64_t SessionId(); | uint64_t SessionId(); | ||||
| uint32_t DeviceId(); | uint32_t DeviceId(); | ||||
| uint64_t TraceId(); | uint64_t TraceId(); | ||||
| @@ -153,9 +153,6 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder { | |||||
| graphStatus AddDynamicOutputDesc(const string &name, const unsigned int num, bool isPushBack = true); | graphStatus AddDynamicOutputDesc(const string &name, const unsigned int num, bool isPushBack = true); | ||||
| void RemoveInputDesc(uint32_t index); | |||||
| void RemoveOutputDesc(uint32_t index); | |||||
| bool IsOptionalInput(const string &name) const; | bool IsOptionalInput(const string &name) const; | ||||
| bool IsOptionalInput(uint32_t index) const; | bool IsOptionalInput(uint32_t index) const; | ||||
| @@ -20,6 +20,7 @@ | |||||
| #include <set> | #include <set> | ||||
| #include <map> | #include <map> | ||||
| #include <vector> | #include <vector> | ||||
| #include "external/graph/operator.h" | |||||
| #include "graph/node.h" | #include "graph/node.h" | ||||
| namespace ge { | namespace ge { | ||||
| @@ -63,8 +64,11 @@ class NodeUtils { | |||||
| static void UnlinkAll(const Node &node); | static void UnlinkAll(const Node &node); | ||||
| static graphStatus UpdatePeerNodeInputDesc(const NodePtr &node_ptr); | static graphStatus UpdatePeerNodeInputDesc(const NodePtr &node_ptr); | ||||
| static graphStatus AppendInputAnchor(const NodePtr &node, uint32_t index); | |||||
| static graphStatus RemoveInputAnchor(const NodePtr &node, uint32_t index); | |||||
| static graphStatus AppendInputAnchor(const NodePtr &node, uint32_t num); | |||||
| static graphStatus RemoveInputAnchor(const NodePtr &node, uint32_t num); | |||||
| static graphStatus AppendOutputAnchor(const NodePtr &node, uint32_t num); | |||||
| static graphStatus RemoveOutputAnchor(const NodePtr &node, uint32_t num); | |||||
| static bool IsInNodesEmpty(const Node &node); | static bool IsInNodesEmpty(const Node &node); | ||||
| static GeTensorDesc GetOutputDesc(const Node &node, uint32_t index); | static GeTensorDesc GetOutputDesc(const Node &node, uint32_t index); | ||||
| @@ -77,6 +81,7 @@ class NodeUtils { | |||||
| static graphStatus GetNodeUnknownShapeStatus(const Node &node, bool &is_unknow); | static graphStatus GetNodeUnknownShapeStatus(const Node &node, bool &is_unknow); | ||||
| static std::string GetNodeType(const Node &node); | static std::string GetNodeType(const Node &node); | ||||
| static std::string GetNodeType(const NodePtr &node); | |||||
| static ComputeGraphPtr GetSubgraph(const Node &node, uint32_t index); | static ComputeGraphPtr GetSubgraph(const Node &node, uint32_t index); | ||||
| static graphStatus SetSubgraph(Node &node, uint32_t index, const ComputeGraphPtr &subgraph); | static graphStatus SetSubgraph(Node &node, uint32_t index, const ComputeGraphPtr &subgraph); | ||||
| @@ -100,8 +105,17 @@ class NodeUtils { | |||||
| /// @param [in] node | /// @param [in] node | ||||
| /// @return Node | /// @return Node | ||||
| /// | /// | ||||
| static NodePtr GetParentInput(const Node &node); | |||||
| static NodePtr GetParentInput(const NodePtr &node); | static NodePtr GetParentInput(const NodePtr &node); | ||||
| /// | |||||
| /// @brief Get is dynamic shape graph from node. | |||||
| /// @param [in] node | |||||
| /// @return bool | |||||
| /// | |||||
| static bool IsDynamicShape(const Node &node); | |||||
| static bool IsDynamicShape(const NodePtr &node); | |||||
| /// | /// | ||||
| /// @brief Check is varying_input for while node | /// @brief Check is varying_input for while node | ||||
| /// @param [in] node: Data node for subgraph | /// @param [in] node: Data node for subgraph | ||||
| @@ -115,7 +129,7 @@ class NodeUtils { | |||||
| /// @param [out] string | /// @param [out] string | ||||
| /// @return bool | /// @return bool | ||||
| /// | /// | ||||
| static bool GetConstOpType(const NodePtr &in_node, std::string &op_type); | |||||
| static bool GetConstOpType(const NodePtr &node, std::string &type); | |||||
| /// | /// | ||||
| /// @brief Remove node-related subgraphs, including subgraphs of nodes in the subgraph. | /// @brief Remove node-related subgraphs, including subgraphs of nodes in the subgraph. | ||||
| @@ -138,9 +152,15 @@ class NodeUtils { | |||||
| /// | /// | ||||
| static vector<NodePtr> GetSubgraphOutputNodes(const Node &node); | static vector<NodePtr> GetSubgraphOutputNodes(const Node &node); | ||||
| static NodePtr GetInDataNodeByIndex(const Node &node, int index); | |||||
| static NodePtr GetInDataNodeByIndex(const Node &node, const int index); | |||||
| static vector<pair<InDataAnchorPtr, NodePtr>> GetOutDataNodesWithAnchorByIndex(const Node &node, const int index); | |||||
| static ge::ConstNodePtr GetNodeFromOperator(const Operator &oprt); | |||||
| static graphStatus GetInputConstData(const ConstNodePtr &node_ptr, const string &dst_name, GeTensorPtr &ge_tensor); | |||||
| static vector<NodePtr> GetOutDataNodesByIndex(const Node &node, int index); | |||||
| static graphStatus GetInputConstData(const Node &node, const string &dst_name, GeTensorPtr &ge_tensor); | |||||
| private: | private: | ||||
| static std::map<NodePtr, std::vector<uint32_t>> map_send_info_; | static std::map<NodePtr, std::vector<uint32_t>> map_send_info_; | ||||
| @@ -34,6 +34,7 @@ class TypeUtils { | |||||
| static bool IsFormatValid(Format format); | static bool IsFormatValid(Format format); | ||||
| static bool IsInternalFormat(Format format); | static bool IsInternalFormat(Format format); | ||||
| static std::string ImplyTypeToSerialString(domi::ImplyType imply_type); | |||||
| static std::string DataTypeToSerialString(DataType data_type); | static std::string DataTypeToSerialString(DataType data_type); | ||||
| static DataType SerialStringToDataType(const std::string &str); | static DataType SerialStringToDataType(const std::string &str); | ||||
| static std::string FormatToSerialString(Format format); | static std::string FormatToSerialString(Format format); | ||||
| @@ -830,6 +830,7 @@ const std::string REF_VAR_PRE_PEER_OUT_INDEX = "ref_var_pre_peer_out_index"; | |||||
| // Assign | // Assign | ||||
| const std::string ASSIGN_VALIDATE_SHAPE = "validate_shape"; | const std::string ASSIGN_VALIDATE_SHAPE = "validate_shape"; | ||||
| const std::string ASSIGN_VAR_NAME = "_assign_var_name"; | |||||
| // space2bacth batch2space | // space2bacth batch2space | ||||
| const std::string BATCH_SPACE_ATTR_BLOCK = "block"; | const std::string BATCH_SPACE_ATTR_BLOCK = "block"; | ||||
| @@ -931,7 +932,6 @@ const std::string ATTR_NAME_NEXT_ITERATION = "_next_iteration_node"; | |||||
| // Function Op | // Function Op | ||||
| const std::string ATTR_NAME_PARENT_NODE_INDEX = "_parent_node_index"; | const std::string ATTR_NAME_PARENT_NODE_INDEX = "_parent_node_index"; | ||||
| const std::string ATTR_NAME_PARENT_CONST_TYPE = "_parent_const_type"; | |||||
| // Used for mark the active node is for loop, type:bool | // Used for mark the active node is for loop, type:bool | ||||
| const std::string ATTR_NAME_IS_LOOP_ACTIVE = "is_loop_active"; | const std::string ATTR_NAME_IS_LOOP_ACTIVE = "is_loop_active"; | ||||
| @@ -942,6 +942,8 @@ const std::string ATTR_NAME_MEMORY_TYPE_OUTPUT = "memory_type_output"; | |||||
| const std::string ATTR_NAME_MEMORY_TYPE_WORKSPACE = "memory_type_workspace"; | const std::string ATTR_NAME_MEMORY_TYPE_WORKSPACE = "memory_type_workspace"; | ||||
| const std::string ATTR_NAME_MEMORY_TYPE_RANGE = "_memory_type_range"; | |||||
| const std::string MODEL_ATTR_SESSION_ID = "session_id"; | const std::string MODEL_ATTR_SESSION_ID = "session_id"; | ||||
| // lx fusion | // lx fusion | ||||
| @@ -991,6 +993,8 @@ const std::string ATTR_MBATCH_ORIGIN_INPUT_DIMS = "_mbatch_origin_input_dims"; | |||||
| const std::string ATTR_DYNAMIC_TYPE = "mbatch_dynamic_type"; | const std::string ATTR_DYNAMIC_TYPE = "mbatch_dynamic_type"; | ||||
| const std::string ATTR_USER_DESIGNEATE_SHAPE_ORDER = "user_designate_shape_order"; | |||||
| // For inserted op | // For inserted op | ||||
| const std::string ATTR_INSERTED_BY_GE = "_inserted_by_ge"; | const std::string ATTR_INSERTED_BY_GE = "_inserted_by_ge"; | ||||
| @@ -684,18 +684,6 @@ graphStatus OpDesc::AddDynamicOutputDesc(const string &name, const unsigned int | |||||
| return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
| } | } | ||||
| void OpDesc::RemoveInputDesc(uint32_t index) { | |||||
| while (inputs_desc_.size() > index) { | |||||
| inputs_desc_.pop_back(); | |||||
| } | |||||
| } | |||||
| void OpDesc::RemoveOutputDesc(uint32_t index) { | |||||
| while (outputs_desc_.size() > index) { | |||||
| outputs_desc_.pop_back(); | |||||
| } | |||||
| } | |||||
| bool OpDesc::IsOptionalInput(const string &name) const { | bool OpDesc::IsOptionalInput(const string &name) const { | ||||
| return optional_input_names_.find(name) != optional_input_names_.end(); | return optional_input_names_.find(name) != optional_input_names_.end(); | ||||
| } | } | ||||
| @@ -277,6 +277,22 @@ class OperatorImpl : public std::enable_shared_from_this<OperatorImpl> { | |||||
| return output_ptr; | return output_ptr; | ||||
| } | } | ||||
| OutHandler GetOutput(uint32_t index) { | |||||
| GE_CHK_BOOL_EXEC(op_desc_ != nullptr, return nullptr, "op_desc_ is nullptr."); | |||||
| string name = op_desc_->GetOutputNameByIndex(index); | |||||
| if (name.empty()) { | |||||
| GELOGE(GRAPH_FAILED, "Find src name by index failed. index[%u]", index); | |||||
| return nullptr; | |||||
| } | |||||
| shared_ptr<OpIO> output_ptr = ComGraphMakeShared<OpIO>(name, index, shared_from_this()); | |||||
| if (output_ptr == nullptr) { | |||||
| GELOGE(GRAPH_FAILED, "OpIO make shared failed"); | |||||
| return nullptr; | |||||
| } | |||||
| return output_ptr; | |||||
| } | |||||
| GeTensorDesc GetOutputDesc(const string &name) const { | GeTensorDesc GetOutputDesc(const string &name) const { | ||||
| GE_CHK_BOOL_EXEC(op_desc_ != nullptr, return GeTensorDesc(), "op_desc_ is nullptr."); | GE_CHK_BOOL_EXEC(op_desc_ != nullptr, return GeTensorDesc(), "op_desc_ is nullptr."); | ||||
| @@ -540,6 +556,13 @@ Operator &Operator::SetInput(const std::string &dst_name, const ge::Operator &sr | |||||
| return *this; | return *this; | ||||
| } | } | ||||
| Operator &Operator::SetInput(const std::string &dst_name, const ge::Operator &src_oprt, uint32_t index) { | |||||
| auto out_handler = src_oprt.GetOutput(index); | |||||
| GE_CHK_BOOL_EXEC(out_handler != nullptr, return *this, "out_handler is nullptr."); | |||||
| (void)SetInput(dst_name, out_handler); | |||||
| return *this; | |||||
| } | |||||
| Operator &Operator::AddControlInput(const Operator &src_oprt) { | Operator &Operator::AddControlInput(const Operator &src_oprt) { | ||||
| if (operator_impl_ == nullptr) { | if (operator_impl_ == nullptr) { | ||||
| GELOGE(GRAPH_FAILED, "operator impl is nullptr."); | GELOGE(GRAPH_FAILED, "operator impl is nullptr."); | ||||
| @@ -621,6 +644,11 @@ graphStatus Operator::GetInputConstDataOut(const string &dst_name, Tensor &data) | |||||
| return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
| } | } | ||||
| std::shared_ptr<const Node> Operator::GetNode() const { | |||||
| GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return nullptr, "operator impl is nullptr."); | |||||
| return operator_impl_->GetNode(); | |||||
| } | |||||
| TensorDesc Operator::GetInputDesc(const std::string &name) const { | TensorDesc Operator::GetInputDesc(const std::string &name) const { | ||||
| GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return TensorDesc(), "operator impl is nullptr."); | GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return TensorDesc(), "operator impl is nullptr."); | ||||
| return TensorAdapter::GeTensorDesc2TensorDesc(operator_impl_->GetInputDesc(name)); | return TensorAdapter::GeTensorDesc2TensorDesc(operator_impl_->GetInputDesc(name)); | ||||
| @@ -657,6 +685,11 @@ OutHandler Operator::GetOutput(const string &name) const { | |||||
| return operator_impl_->GetOutput(name); | return operator_impl_->GetOutput(name); | ||||
| } | } | ||||
| OutHandler Operator::GetOutput(uint32_t index) const { | |||||
| GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return nullptr, "operator impl is nullptr."); | |||||
| return operator_impl_->GetOutput(index); | |||||
| } | |||||
| TensorDesc Operator::GetOutputDesc(const std::string &name) const { | TensorDesc Operator::GetOutputDesc(const std::string &name) const { | ||||
| GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return TensorDesc(), "operator impl is nullptr."); | GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return TensorDesc(), "operator impl is nullptr."); | ||||
| return TensorAdapter::GeTensorDesc2TensorDesc(operator_impl_->GetOutputDesc(name)); | return TensorAdapter::GeTensorDesc2TensorDesc(operator_impl_->GetOutputDesc(name)); | ||||
| @@ -1540,6 +1573,7 @@ void GraphUtils::BreakConnect(const std::map<OperatorImplPtr, NodePtr> &all_node | |||||
| } | } | ||||
| op_impl->ClearOutputLinks(); | op_impl->ClearOutputLinks(); | ||||
| op_impl->ClearInputLinks(); | op_impl->ClearInputLinks(); | ||||
| OperatorKeeper::GetInstance().CheckOutOperator(op_impl); | |||||
| } | } | ||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -17,12 +17,14 @@ | |||||
| #include "./ge_context.h" | #include "./ge_context.h" | ||||
| #include "./ge_global_options.h" | #include "./ge_global_options.h" | ||||
| #include "./ge_local_context.h" | #include "./ge_local_context.h" | ||||
| #include "framework/common/ge_types.h" | |||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| namespace ge { | namespace ge { | ||||
| namespace { | namespace { | ||||
| const int64_t kMinTrainingTraceJobId = 256; | const int64_t kMinTrainingTraceJobId = 256; | ||||
| const int kDecimal = 10; | const int kDecimal = 10; | ||||
| const char *kHostExecPlacement = "HOST"; | |||||
| } // namespace | } // namespace | ||||
| GEContext &GetContext() { | GEContext &GetContext() { | ||||
| static GEContext ge_context{}; | static GEContext ge_context{}; | ||||
| @@ -33,6 +35,16 @@ graphStatus GEContext::GetOption(const std::string &key, std::string &option) { | |||||
| return GetThreadLocalContext().GetOption(key, option); | return GetThreadLocalContext().GetOption(key, option); | ||||
| } | } | ||||
| bool GEContext::GetHostExecFlag() { | |||||
| std::string exec_placement; | |||||
| if (GetThreadLocalContext().GetOption(GE_OPTION_EXEC_PLACEMENT, exec_placement) != GRAPH_SUCCESS) { | |||||
| GELOGW("get option OPTION_EXEC_PLACEMENT failed."); | |||||
| return false; | |||||
| } | |||||
| GELOGD("Option ge.exec.placement is %s.", exec_placement.c_str()); | |||||
| return exec_placement == kHostExecPlacement; | |||||
| } | |||||
| std::map<std::string, std::string> &GetMutableGlobalOptions() { | std::map<std::string, std::string> &GetMutableGlobalOptions() { | ||||
| static std::map<std::string, std::string> global_options{}; | static std::map<std::string, std::string> global_options{}; | ||||
| return global_options; | return global_options; | ||||
| @@ -243,8 +243,8 @@ graphStatus RefRelations::Impl::BuildRefRelationsForWhile( | |||||
| } | } | ||||
| auto in_data_anchor_idx = in_anchor->GetIdx(); | auto in_data_anchor_idx = in_anchor->GetIdx(); | ||||
| auto net_in_desc = netoutput->GetOpDesc()->MutableInputDesc(static_cast<uint32_t>(in_data_anchor_idx)); | auto net_in_desc = netoutput->GetOpDesc()->MutableInputDesc(static_cast<uint32_t>(in_data_anchor_idx)); | ||||
| int ref_d; | |||||
| int ref_n; | |||||
| int ref_d = 0; | |||||
| int ref_n = 0; | |||||
| (void)AttrUtils::GetInt(peer_out_data_node->GetOpDesc(), kRefIndex, ref_d); | (void)AttrUtils::GetInt(peer_out_data_node->GetOpDesc(), kRefIndex, ref_d); | ||||
| (void)AttrUtils::GetInt(net_in_desc, kRefIndex, ref_n); | (void)AttrUtils::GetInt(net_in_desc, kRefIndex, ref_n); | ||||
| @@ -351,6 +351,66 @@ graphStatus UpdateParentNodeOutTensor(const ConstNodePtr &node) { | |||||
| } | } | ||||
| return UpdateParentNodeForBranch(node, ref_out_tensors); | return UpdateParentNodeForBranch(node, ref_out_tensors); | ||||
| } | } | ||||
| string Serial(const vector<int64_t> &dims) { | |||||
| string serial_string; | |||||
| serial_string += "["; | |||||
| for (int64_t dim : dims) { | |||||
| serial_string += std::to_string(dim) + " "; | |||||
| } | |||||
| serial_string += "]"; | |||||
| return serial_string; | |||||
| } | |||||
| graphStatus UpdateOpInputDesc(const ConstNodePtr &node_ptr) { | |||||
| GE_IF_BOOL_EXEC(node_ptr == nullptr, GELOGE(GRAPH_FAILED, "node is null."); return GRAPH_FAILED); | |||||
| GE_IF_BOOL_EXEC(node_ptr->GetOpDesc() == nullptr, GELOGE(GRAPH_FAILED, "op_desc is null."); return GRAPH_FAILED); | |||||
| for (const auto &in_anchor : node_ptr->GetAllInDataAnchors()) { | |||||
| auto in_idx = in_anchor->GetIdx(); | |||||
| auto peer_out_data_anchor = in_anchor->GetPeerOutAnchor(); | |||||
| if (peer_out_data_anchor == nullptr) { | |||||
| continue; | |||||
| } | |||||
| auto peer_out_data_node = peer_out_data_anchor->GetOwnerNode(); | |||||
| if (peer_out_data_node == nullptr || peer_out_data_node->GetOpDesc() == nullptr) { | |||||
| continue; | |||||
| } | |||||
| int peer_out_idx = peer_out_data_anchor->GetIdx(); | |||||
| auto in_desc = node_ptr->GetOpDesc()->MutableInputDesc(static_cast<uint32_t>(in_idx)); | |||||
| auto peer_out_desc = peer_out_data_node->GetOpDesc()->MutableOutputDesc(static_cast<uint32_t>(peer_out_idx)); | |||||
| // check shape and dtype continuity. do not stop process | |||||
| auto in_shape = in_desc->GetShape().GetDims(); | |||||
| auto in_dtype = in_desc->GetDataType(); | |||||
| auto peer_out_shape = peer_out_desc->GetShape().GetDims(); | |||||
| auto peer_out_dtype = peer_out_desc->GetDataType(); | |||||
| if (peer_out_dtype != in_dtype) { | |||||
| GELOGW( | |||||
| "current node [%s] [%d]\'th out_dtype is [%s].peer output node [%s] [%d]\'th " | |||||
| "output_dtype is [%s].The two dtype should be same! Please check graph and fix it", | |||||
| node_ptr->GetName().c_str(), in_idx, TypeUtils::DataTypeToSerialString(in_dtype).c_str(), | |||||
| peer_out_data_node->GetName().c_str(), peer_out_idx, TypeUtils::DataTypeToSerialString(peer_out_dtype).c_str()); | |||||
| } else if ((!in_shape.empty()) && (in_shape != peer_out_shape)) { | |||||
| string in_shape_str = Serial(in_shape); | |||||
| string peer_out_shape_str = Serial(peer_out_shape); | |||||
| GELOGW( | |||||
| "current node [%s] [%d]\'th out_shape is [%s].peer input node [%s] [%d]\'th " | |||||
| "input_shape is [%s].The two shape should be same! Please check graph and fix it", | |||||
| node_ptr->GetName().c_str(), in_idx, in_shape_str.c_str(), peer_out_data_node->GetName().c_str(), peer_out_idx, | |||||
| peer_out_shape_str.c_str()); | |||||
| } | |||||
| // refresh current node input desc | |||||
| in_desc->SetOriginShape(peer_out_desc->GetOriginShape()); | |||||
| in_desc->SetShape(peer_out_desc->GetShape()); | |||||
| in_desc->SetDataType(peer_out_desc->GetDataType()); | |||||
| in_desc->SetOriginDataType(peer_out_desc->GetOriginDataType()); | |||||
| std::vector<std::pair<int64_t, int64_t>> shape_range; | |||||
| (void)peer_out_desc->GetShapeRange(shape_range); | |||||
| in_desc->SetShapeRange(shape_range); | |||||
| ge::TensorUtils::SetRealDimCnt(*in_desc, static_cast<uint32_t>(peer_out_desc->GetShape().GetDims().size())); | |||||
| } | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| } // namespace | } // namespace | ||||
| void ShapeRefiner::PrintInOutTensorShape(const ge::NodePtr &node, const std::string &phase) { | void ShapeRefiner::PrintInOutTensorShape(const ge::NodePtr &node, const std::string &phase) { | ||||
| if (!IsLogEnable(GE, DLOG_DEBUG)) { | if (!IsLogEnable(GE, DLOG_DEBUG)) { | ||||
| @@ -427,9 +487,7 @@ graphStatus ShapeRefiner::InferShapeAndType(const ConstNodePtr &node, Operator & | |||||
| return InferShapeAndType(node, op, true); | return InferShapeAndType(node, op, true); | ||||
| } | } | ||||
| graphStatus ShapeRefiner::InferShapeAndType(const ConstNodePtr &node, Operator &op, bool before_subgraph) { | graphStatus ShapeRefiner::InferShapeAndType(const ConstNodePtr &node, Operator &op, bool before_subgraph) { | ||||
| GE_IF_BOOL_EXEC(node == nullptr, GELOGE(GRAPH_FAILED, "node is null."); return GRAPH_FAILED); | |||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(GRAPH_FAILED, "op_desc is null."); return GRAPH_FAILED); | |||||
| const auto &op_type = op_desc->GetType(); | const auto &op_type = op_desc->GetType(); | ||||
| graphStatus ret; | graphStatus ret; | ||||
| @@ -554,6 +612,19 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferSh | |||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferShapeAndType(const NodePtr &node, | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferShapeAndType(const NodePtr &node, | ||||
| bool before_subgraph) { | bool before_subgraph) { | ||||
| GE_IF_BOOL_EXEC(node == nullptr, GELOGE(GRAPH_FAILED, "node is null."); return GRAPH_FAILED); | GE_IF_BOOL_EXEC(node == nullptr, GELOGE(GRAPH_FAILED, "node is null."); return GRAPH_FAILED); | ||||
| bool is_unknown_graph = node->GetOwnerComputeGraph()->GetGraphUnknownFlag(); | |||||
| auto opdesc = node->GetOpDesc(); | |||||
| GE_IF_BOOL_EXEC(opdesc == nullptr, GELOGE(GRAPH_FAILED, "op_desc is null."); return GRAPH_FAILED); | |||||
| // some op can not infershape twice such as aipp | |||||
| bool need_update_input = !is_unknown_graph && !opdesc->HasAttr("has_infered_verified"); | |||||
| if (need_update_input) { | |||||
| auto status = UpdateOpInputDesc(node); | |||||
| if (status != GRAPH_SUCCESS) { | |||||
| GELOGE(GRAPH_FAILED, "update op input_desc failed!"); | |||||
| return status; | |||||
| } | |||||
| } | |||||
| if (node->Verify() != GRAPH_SUCCESS) { | if (node->Verify() != GRAPH_SUCCESS) { | ||||
| GELOGE(GRAPH_FAILED, "Verifying %s failed.", node->GetName().c_str()); | GELOGE(GRAPH_FAILED, "Verifying %s failed.", node->GetName().c_str()); | ||||
| return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
| @@ -561,7 +632,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferSh | |||||
| PrintInOutTensorShape(node, "before_infershape"); | PrintInOutTensorShape(node, "before_infershape"); | ||||
| Operator op = OpDescUtils::CreateOperatorFromNode(node); | Operator op = OpDescUtils::CreateOperatorFromNode(node); | ||||
| bool is_unknown_graph = node->GetOwnerComputeGraph()->GetGraphUnknownFlag(); | |||||
| if (!is_unknown_graph) { | if (!is_unknown_graph) { | ||||
| auto inference_context = CreateInferenceContext(context_map, node); | auto inference_context = CreateInferenceContext(context_map, node); | ||||
| if (inference_context == nullptr) { | if (inference_context == nullptr) { | ||||
| @@ -574,7 +644,21 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferSh | |||||
| graphStatus status = InferShapeAndType(node, op, before_subgraph); | graphStatus status = InferShapeAndType(node, op, before_subgraph); | ||||
| if (status == GRAPH_PARAM_INVALID || status == GRAPH_SUCCESS) { | if (status == GRAPH_PARAM_INVALID || status == GRAPH_SUCCESS) { | ||||
| (void)ge::NodeUtils::UpdatePeerNodeInputDesc(node); | |||||
| if (is_unknown_graph) { | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| for (const auto &out_anchor : node->GetAllOutDataAnchors()) { | |||||
| auto output_tensor = op_desc->MutableOutputDesc(out_anchor->GetIdx()); | |||||
| ge::TensorUtils::SetRealDimCnt(*output_tensor, static_cast<uint32_t>(output_tensor->GetShape().GetDims().size())); | |||||
| output_tensor->SetOriginShape(output_tensor->GetShape()); | |||||
| output_tensor->SetOriginDataType(output_tensor->GetDataType()); | |||||
| GELOGD("node name is %s, origin shape is %ld, origin format is %s, origin data type is %s", | |||||
| node->GetName().c_str(), output_tensor->GetOriginShape().GetShapeSize(), | |||||
| TypeUtils::FormatToSerialString(output_tensor->GetOriginFormat()).c_str(), | |||||
| TypeUtils::DataTypeToSerialString(output_tensor->GetOriginDataType()).c_str()); | |||||
| } | |||||
| } else { | } else { | ||||
| GELOGE(GRAPH_FAILED, "%s call infer function failed.", node->GetName().c_str()); | GELOGE(GRAPH_FAILED, "%s call infer function failed.", node->GetName().c_str()); | ||||
| return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
| @@ -15,6 +15,7 @@ | |||||
| */ | */ | ||||
| #include "utils/node_utils.h" | #include "utils/node_utils.h" | ||||
| #include "utils/op_desc_utils.h" | |||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| #include "debug/ge_op_types.h" | #include "debug/ge_op_types.h" | ||||
| #include "debug/ge_util.h" | #include "debug/ge_util.h" | ||||
| @@ -301,6 +302,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer | |||||
| } | } | ||||
| for (const auto &out_anchor : node_ptr->GetAllOutDataAnchors()) { | for (const auto &out_anchor : node_ptr->GetAllOutDataAnchors()) { | ||||
| auto output_tensor = op_desc->MutableOutputDesc(out_anchor->GetIdx()); | auto output_tensor = op_desc->MutableOutputDesc(out_anchor->GetIdx()); | ||||
| auto out_dims = output_tensor->GetShape().GetDims(); | |||||
| auto out_dtype = output_tensor->GetDataType(); | |||||
| ge::TensorUtils::SetRealDimCnt(*output_tensor, static_cast<uint32_t>(output_tensor->GetShape().GetDims().size())); | ge::TensorUtils::SetRealDimCnt(*output_tensor, static_cast<uint32_t>(output_tensor->GetShape().GetDims().size())); | ||||
| output_tensor->SetOriginShape(output_tensor->GetShape()); | output_tensor->SetOriginShape(output_tensor->GetShape()); | ||||
| output_tensor->SetOriginDataType(output_tensor->GetDataType()); | output_tensor->SetOriginDataType(output_tensor->GetDataType()); | ||||
| @@ -320,6 +323,35 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer | |||||
| GELOGE(GRAPH_FAILED, "peer_input_desc is nullptr"); | GELOGE(GRAPH_FAILED, "peer_input_desc is nullptr"); | ||||
| continue; | continue; | ||||
| } | } | ||||
| // check shape and dtype continuity. do not stop process | |||||
| auto peer_input_dims = peer_input_desc->GetShape().GetDims(); | |||||
| auto peer_input_dtype = peer_input_desc->GetDataType(); | |||||
| if (out_dtype != peer_input_dtype) { | |||||
| GELOGW( | |||||
| "current node [%s] [%d]\'th out_dtype is [%s].peer input node [%s] [%d]\'th " | |||||
| "input_dtype is [%s].The two dtype should be same! Please check graph and fix it", | |||||
| node_ptr->GetName().c_str(), out_anchor->GetIdx(), TypeUtils::DataTypeToSerialString(out_dtype).c_str(), | |||||
| peer_anchor->GetOwnerNode()->GetName().c_str(), peer_anchor->GetIdx(), | |||||
| TypeUtils::DataTypeToSerialString(peer_input_dtype).c_str()); | |||||
| } else if ((!peer_input_dims.empty()) && (out_dims != peer_input_dims)) { | |||||
| string out_shape_str, peer_in_shape_str; | |||||
| out_shape_str += "["; | |||||
| for (int64_t dim : out_dims) { | |||||
| out_shape_str += std::to_string(dim) + " "; | |||||
| } | |||||
| out_shape_str += "]"; | |||||
| peer_in_shape_str += "["; | |||||
| for (int64_t dim : peer_input_dims) { | |||||
| peer_in_shape_str += std::to_string(dim) + " "; | |||||
| } | |||||
| peer_in_shape_str += "]"; | |||||
| GELOGW( | |||||
| "current node [%s] [%d]\'th out_shape is [%s].peer input node [%s] [%d]\'th " | |||||
| "input_shape is [%s].The two shape should be same! Please check graph and fix it", | |||||
| node_ptr->GetName().c_str(), out_anchor->GetIdx(), out_shape_str.c_str(), | |||||
| peer_anchor->GetOwnerNode()->GetName().c_str(), peer_anchor->GetIdx(), peer_in_shape_str.c_str()); | |||||
| } | |||||
| GELOGI("Peer input opdesc name is %s, need to flush: shape size is %zu, datatype is %d, original datatype is %d", | GELOGI("Peer input opdesc name is %s, need to flush: shape size is %zu, datatype is %d, original datatype is %d", | ||||
| peer_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), output_tensor->GetShape().GetDimNum(), | peer_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), output_tensor->GetShape().GetDimNum(), | ||||
| output_tensor->GetDataType(), output_tensor->GetOriginDataType()); | output_tensor->GetDataType(), output_tensor->GetOriginDataType()); | ||||
| @@ -341,15 +373,15 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer | |||||
| } | } | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::AppendInputAnchor(const NodePtr &node, | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::AppendInputAnchor(const NodePtr &node, | ||||
| uint32_t index) { | |||||
| uint32_t num) { | |||||
| if (node == nullptr) { | if (node == nullptr) { | ||||
| GELOGE(GRAPH_FAILED, "Nodeptr is nullptr"); | |||||
| GELOGE(GRAPH_FAILED, "Input node is null"); | |||||
| return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
| } | } | ||||
| GeTensorDesc data_desc(GeShape(), FORMAT_ND, DT_FLOAT); | GeTensorDesc data_desc(GeShape(), FORMAT_ND, DT_FLOAT); | ||||
| OpDescPtr op_desc = node->op_; | |||||
| for (size_t i = op_desc->GetInputsSize(); i < index; ++i) { | |||||
| const auto &op_desc = node->GetOpDesc(); | |||||
| for (size_t i = op_desc->GetInputsSize(); i < num; ++i) { | |||||
| if (op_desc->AddInputDesc(data_desc) != GRAPH_SUCCESS) { | if (op_desc->AddInputDesc(data_desc) != GRAPH_SUCCESS) { | ||||
| GELOGE(GRAPH_FAILED, "Add input desc failed"); | GELOGE(GRAPH_FAILED, "Add input desc failed"); | ||||
| return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
| @@ -357,7 +389,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::AppendInpu | |||||
| auto anchor = ComGraphMakeShared<InDataAnchor>(node, i); | auto anchor = ComGraphMakeShared<InDataAnchor>(node, i); | ||||
| if (anchor == nullptr) { | if (anchor == nullptr) { | ||||
| GELOGE(GRAPH_FAILED, "Current in_data_anchor is null, malloc shared_ptr failed."); | |||||
| GELOGE(OUT_OF_MEMORY, "Current in data anchor is null, make shared_ptr failed."); | |||||
| return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
| } | } | ||||
| node->in_data_anchors_.push_back(anchor); | node->in_data_anchors_.push_back(anchor); | ||||
| @@ -367,22 +399,81 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::AppendInpu | |||||
| } | } | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::RemoveInputAnchor(const NodePtr &node, | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::RemoveInputAnchor(const NodePtr &node, | ||||
| uint32_t index) { | |||||
| uint32_t num) { | |||||
| if (node == nullptr) { | if (node == nullptr) { | ||||
| GELOGE(GRAPH_FAILED, "Nodeptr is nullptr"); | |||||
| GELOGE(GRAPH_FAILED, "Input node is null"); | |||||
| return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
| } | } | ||||
| OpDescPtr op_desc = node->op_; | |||||
| op_desc->RemoveInputDesc(index); | |||||
| const auto &op_desc = node->GetOpDesc(); | |||||
| while (op_desc->GetInputsSize() > num) { | |||||
| if (!OpDescUtils::ClearInputDesc(op_desc, num)) { | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| } | |||||
| while (node->in_data_anchors_.size() > index) { | |||||
| auto input_names = op_desc->GetAllInputName(); | |||||
| (void)op_desc->UpdateInputName(input_names); | |||||
| auto is_input_const = op_desc->GetIsInputConst(); | |||||
| is_input_const.resize(num); | |||||
| op_desc->SetIsInputConst(is_input_const); | |||||
| while (node->in_data_anchors_.size() > num) { | |||||
| node->in_data_anchors_.pop_back(); | node->in_data_anchors_.pop_back(); | ||||
| } | } | ||||
| return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
| } | } | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::AppendOutputAnchor(const NodePtr &node, | |||||
| uint32_t num) { | |||||
| if (node == nullptr) { | |||||
| GELOGE(GRAPH_FAILED, "Input node is null"); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| GeTensorDesc data_desc(GeShape(), FORMAT_ND, DT_FLOAT); | |||||
| const OpDescPtr &op_desc = node->GetOpDesc(); | |||||
| for (size_t i = op_desc->GetOutputsSize(); i < num; ++i) { | |||||
| if (op_desc->AddOutputDesc(data_desc) != GRAPH_SUCCESS) { | |||||
| GELOGE(GRAPH_FAILED, "Add output desc failed"); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| auto anchor = ComGraphMakeShared<OutDataAnchor>(node, i); | |||||
| if (anchor == nullptr) { | |||||
| GELOGE(OUT_OF_MEMORY, "Current out data anchor is null, make shared_ptr failed."); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| node->out_data_anchors_.push_back(anchor); | |||||
| } | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::RemoveOutputAnchor(const NodePtr &node, | |||||
| uint32_t num) { | |||||
| if (node == nullptr) { | |||||
| GELOGE(GRAPH_FAILED, "Input node is null"); | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| const auto &op_desc = node->GetOpDesc(); | |||||
| auto output_names = op_desc->GetAllOutputName(); | |||||
| while (op_desc->GetOutputsSize() > num) { | |||||
| if (!OpDescUtils::ClearOutputDesc(op_desc, num)) { | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| } | |||||
| (void)op_desc->UpdateOutputName(output_names); | |||||
| while (node->out_data_anchors_.size() > num) { | |||||
| node->out_data_anchors_.pop_back(); | |||||
| } | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| bool NodeUtils::IsInNodesEmpty(const Node &node) { | bool NodeUtils::IsInNodesEmpty(const Node &node) { | ||||
| for (const auto &in_anchor : node.in_data_anchors_) { | for (const auto &in_anchor : node.in_data_anchors_) { | ||||
| if (in_anchor != nullptr) { | if (in_anchor != nullptr) { | ||||
| @@ -488,11 +579,22 @@ std::string NodeUtils::GetNodeType(const Node &node) { | |||||
| if (node.GetType() != FRAMEWORKOP) { | if (node.GetType() != FRAMEWORKOP) { | ||||
| return node.GetType(); | return node.GetType(); | ||||
| } | } | ||||
| std::string type; | std::string type; | ||||
| (void)AttrUtils::GetStr(node.GetOpDesc(), ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type); | (void)AttrUtils::GetStr(node.GetOpDesc(), ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type); | ||||
| return type; | return type; | ||||
| } | } | ||||
| std::string NodeUtils::GetNodeType(const NodePtr &node) { return node == nullptr ? "" : GetNodeType(*node); } | |||||
| graphStatus NodeUtils::GetInputConstData(const ConstNodePtr &node_ptr, const string &dst_name, GeTensorPtr &ge_tensor) { | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| graphStatus NodeUtils::GetInputConstData(const Node &node, const string &dst_name, GeTensorPtr &ge_tensor) { | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| ComputeGraphPtr NodeUtils::GetSubgraph(const Node &node, uint32_t index) { | ComputeGraphPtr NodeUtils::GetSubgraph(const Node &node, uint32_t index) { | ||||
| auto op_desc = node.GetOpDesc(); | auto op_desc = node.GetOpDesc(); | ||||
| if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
| @@ -544,16 +646,17 @@ bool NodeUtils::IsSubgraphInput(const NodePtr &node) { | |||||
| if (parent_op_desc == nullptr) { | if (parent_op_desc == nullptr) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE)) { | |||||
| bool is_unknown_shape = false; | |||||
| (void)AttrUtils::GetBool(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape); | |||||
| if (is_unknown_shape) return false; | |||||
| } | |||||
| if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE) && | |||||
| kCaseOpTypes.count(parent_op_desc->GetType()) == 0 && kWhileOpTypes.count(parent_op_desc->GetType()) == 0 && | |||||
| kForOpTypes.count(parent_op_desc->GetType()) == 0 && kIfOpTypes.count(parent_op_desc->GetType()) == 0) { | |||||
| return false; | |||||
| // dynamic shape unknown graph false | |||||
| // dynamic shape known graph with functional subgraph maybe true | |||||
| if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE)) { | |||||
| if (node->GetOwnerComputeGraph()->GetParentGraph()->GetGraphUnknownFlag()) { | |||||
| return false; | |||||
| } else { | |||||
| if (node->GetOwnerComputeGraph()->GetParentNode()->GetOwnerComputeGraph()->GetParentNode() == nullptr) { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| } | } | ||||
| return node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX); | return node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX); | ||||
| @@ -576,15 +679,13 @@ bool NodeUtils::IsSubgraphOutput(const NodePtr &node) { | |||||
| } | } | ||||
| if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE)) { | if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE)) { | ||||
| bool is_unknown_shape = false; | |||||
| (void)AttrUtils::GetBool(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape); | |||||
| if (is_unknown_shape) return false; | |||||
| } | |||||
| if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE) && | |||||
| kCaseOpTypes.count(parent_op_desc->GetType()) == 0 && kWhileOpTypes.count(parent_op_desc->GetType()) == 0 && | |||||
| kForOpTypes.count(parent_op_desc->GetType()) == 0 && kIfOpTypes.count(parent_op_desc->GetType()) == 0) { | |||||
| return false; | |||||
| if (node->GetOwnerComputeGraph()->GetParentGraph()->GetGraphUnknownFlag()) { | |||||
| return false; | |||||
| } else { | |||||
| if (node->GetOwnerComputeGraph()->GetParentNode()->GetOwnerComputeGraph()->GetParentNode() == nullptr) { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| } | } | ||||
| for (GeTensorDesc &tensor : node->GetOpDesc()->GetAllInputsDesc()) { | for (GeTensorDesc &tensor : node->GetOpDesc()->GetAllInputsDesc()) { | ||||
| @@ -601,16 +702,14 @@ bool NodeUtils::IsSubgraphOutput(const NodePtr &node) { | |||||
| /// @param [in] node | /// @param [in] node | ||||
| /// @return Node | /// @return Node | ||||
| /// | /// | ||||
| NodePtr NodeUtils::GetParentInput(const NodePtr &node) { | |||||
| GE_CHECK_NOTNULL_EXEC(node, return nullptr); | |||||
| NodePtr NodeUtils::GetParentInput(const Node &node) { | |||||
| uint32_t parent_index = 0; | uint32_t parent_index = 0; | ||||
| if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | |||||
| if (!AttrUtils::GetInt(node.GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| // Subgraph Data Node, check for constant input. | // Subgraph Data Node, check for constant input. | ||||
| const ComputeGraphPtr &graph = node->GetOwnerComputeGraph(); | |||||
| const ComputeGraphPtr &graph = node.GetOwnerComputeGraph(); | |||||
| GE_CHECK_NOTNULL_EXEC(graph, return nullptr); | GE_CHECK_NOTNULL_EXEC(graph, return nullptr); | ||||
| const NodePtr &parent_node = graph->GetParentNode(); | const NodePtr &parent_node = graph->GetParentNode(); | ||||
| @@ -625,6 +724,26 @@ NodePtr NodeUtils::GetParentInput(const NodePtr &node) { | |||||
| return peer_out_anchor->GetOwnerNode(); | return peer_out_anchor->GetOwnerNode(); | ||||
| } | } | ||||
| NodePtr NodeUtils::GetParentInput(const NodePtr &node) { return node == nullptr ? node : GetParentInput(*node); } | |||||
| /// | |||||
| /// @brief Get is dynamic shape graph from node. | |||||
| /// @param [in] node | |||||
| /// @return bool | |||||
| /// | |||||
| bool NodeUtils::IsDynamicShape(const Node &node) { | |||||
| const auto graph = GraphUtils::FindRootGraph(node.GetOwnerComputeGraph()); | |||||
| if (graph == nullptr) { | |||||
| return false; | |||||
| } | |||||
| bool is_dynamic_shape = false; | |||||
| (void)AttrUtils::GetBool(graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape); | |||||
| return is_dynamic_shape; | |||||
| } | |||||
| bool NodeUtils::IsDynamicShape(const NodePtr &node) { return node == nullptr ? false : IsDynamicShape(*node); } | |||||
| /// | /// | ||||
| /// @brief Check is varying_input for while node | /// @brief Check is varying_input for while node | ||||
| /// @param [in] node: Data node for subgraph | /// @param [in] node: Data node for subgraph | ||||
| @@ -678,27 +797,22 @@ bool NodeUtils::IsWhileVaryingInput(const ge::NodePtr &node) { | |||||
| /// @param [out] string | /// @param [out] string | ||||
| /// @return bool | /// @return bool | ||||
| /// | /// | ||||
| bool NodeUtils::GetConstOpType(const NodePtr &in_node, std::string &op_type) { | |||||
| GE_CHECK_NOTNULL_EXEC(in_node, return false); | |||||
| bool NodeUtils::GetConstOpType(const NodePtr &node, std::string &type) { | |||||
| if (node == nullptr) { | |||||
| return false; | |||||
| } | |||||
| if ((in_node->GetType() == CONSTANT) || (in_node->GetType() == CONSTANTOP)) { | |||||
| op_type = in_node->GetType(); | |||||
| if ((node->GetType() == CONSTANT) || (node->GetType() == CONSTANTOP)) { | |||||
| type = node->GetType(); | |||||
| return true; | return true; | ||||
| } | } | ||||
| if (in_node->GetType() == DATA) { | |||||
| std::string const_type; | |||||
| if (!AttrUtils::GetStr(in_node->GetOpDesc(), ATTR_NAME_PARENT_CONST_TYPE, const_type)) { | |||||
| return false; | |||||
| } | |||||
| if ((const_type == CONSTANT) || (const_type == CONSTANTOP)) { | |||||
| op_type = const_type; | |||||
| return true; | |||||
| } | |||||
| if (node->GetType() != DATA) { | |||||
| return false; // not subgraph input node | |||||
| } | } | ||||
| return false; | |||||
| const auto &parent = GetParentInput(node); | |||||
| return GetConstOpType(parent, type); | |||||
| } | } | ||||
| /// | /// | ||||
| @@ -809,7 +923,7 @@ vector<NodePtr> NodeUtils::GetSubgraphOutputNodes(const Node &node) { | |||||
| return out_data_node_vec; | return out_data_node_vec; | ||||
| } | } | ||||
| NodePtr NodeUtils::GetInDataNodeByIndex(const Node &node, int index) { | |||||
| NodePtr NodeUtils::GetInDataNodeByIndex(const Node &node, const int index) { | |||||
| if (node.GetInDataAnchor(index) == nullptr) { | if (node.GetInDataAnchor(index) == nullptr) { | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| @@ -819,12 +933,13 @@ NodePtr NodeUtils::GetInDataNodeByIndex(const Node &node, int index) { | |||||
| return node.GetInDataAnchor(index)->GetPeerOutAnchor()->GetOwnerNode(); | return node.GetInDataAnchor(index)->GetPeerOutAnchor()->GetOwnerNode(); | ||||
| } | } | ||||
| vector<NodePtr> NodeUtils::GetOutDataNodesByIndex(const Node &node, int index) { | |||||
| vector<NodePtr> out_data_nodes; | |||||
| vector<pair<InDataAnchorPtr, NodePtr>> NodeUtils::GetOutDataNodesWithAnchorByIndex(const Node &node, const int index) { | |||||
| vector<pair<InDataAnchorPtr, NodePtr>> out_data_nodes; | |||||
| auto out_data_anchor = node.GetOutDataAnchor(index); | auto out_data_anchor = node.GetOutDataAnchor(index); | ||||
| if (out_data_anchor == nullptr) { | if (out_data_anchor == nullptr) { | ||||
| return out_data_nodes; | return out_data_nodes; | ||||
| } | } | ||||
| for (const auto peer_in_anchor : out_data_anchor->GetPeerInDataAnchors()) { | for (const auto peer_in_anchor : out_data_anchor->GetPeerInDataAnchors()) { | ||||
| if (peer_in_anchor == nullptr) { | if (peer_in_anchor == nullptr) { | ||||
| continue; | continue; | ||||
| @@ -832,8 +947,10 @@ vector<NodePtr> NodeUtils::GetOutDataNodesByIndex(const Node &node, int index) { | |||||
| if (peer_in_anchor->GetOwnerNode() == nullptr) { | if (peer_in_anchor->GetOwnerNode() == nullptr) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| out_data_nodes.emplace_back(peer_in_anchor->GetOwnerNode()); | |||||
| out_data_nodes.emplace_back(std::make_pair(peer_in_anchor, peer_in_anchor->GetOwnerNode())); | |||||
| } | } | ||||
| return out_data_nodes; | return out_data_nodes; | ||||
| } | } | ||||
| ConstNodePtr NodeUtils::GetNodeFromOperator(const Operator &oprt) { return oprt.GetNode(); } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -438,6 +438,11 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector<ge::NodePtr> OpDescUtils:: | |||||
| if (switch_input.size() > 0) { | if (switch_input.size() > 0) { | ||||
| ret.insert(ret.end(), switch_input.begin(), switch_input.end()); | ret.insert(ret.end(), switch_input.begin(), switch_input.end()); | ||||
| } | } | ||||
| } else if (in_node->GetType() == DATA) { | |||||
| auto parent = NodeUtils::GetParentInput(in_node); | |||||
| if ((parent != nullptr) && (parent->GetType() == CONSTANT)) { | |||||
| ret.push_back(parent); | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| return ret; | return ret; | ||||
| @@ -244,6 +244,21 @@ static const std::map<domi::FrameworkType, std::string> kFmkTypeToString = { | |||||
| {domi::ANDROID_NN, "android_nn"}, {domi::ONNX, "onnx"}, {domi::FRAMEWORK_RESERVED, "framework_reserved"}, | {domi::ANDROID_NN, "android_nn"}, {domi::ONNX, "onnx"}, {domi::FRAMEWORK_RESERVED, "framework_reserved"}, | ||||
| }; | }; | ||||
| static const std::map<domi::ImplyType, std::string> kImplyTypeToString = { | |||||
| {domi::ImplyType::BUILDIN, "buildin"}, {domi::ImplyType::TVM, "tvm"}, {domi::ImplyType::CUSTOM, "custom"}, | |||||
| {domi::ImplyType::AI_CPU, "ai_cpu"}, {domi::ImplyType::CCE, "cce"}, {domi::ImplyType::GELOCAL, "gelocal"}, | |||||
| {domi::ImplyType::HCCL, "hccl"}, {domi::ImplyType::INVALID, "invalid"}}; | |||||
| std::string TypeUtils::ImplyTypeToSerialString(domi::ImplyType imply_type) { | |||||
| auto it = kImplyTypeToString.find(imply_type); | |||||
| if (it != kImplyTypeToString.end()) { | |||||
| return it->second; | |||||
| } else { | |||||
| GELOGE(GRAPH_FAILED, "ImplyTypeToSerialString: imply_type not support %u", imply_type); | |||||
| return "UNDEFINED"; | |||||
| } | |||||
| } | |||||
| bool TypeUtils::IsDataTypeValid(DataType dt) { | bool TypeUtils::IsDataTypeValid(DataType dt) { | ||||
| uint32_t num = static_cast<uint32_t>(dt); | uint32_t num = static_cast<uint32_t>(dt); | ||||
| GE_CHK_BOOL_EXEC((num <= DT_UNDEFINED), return false, "The DataType is invalid"); | GE_CHK_BOOL_EXEC((num <= DT_UNDEFINED), return false, "The DataType is invalid"); | ||||
| @@ -56,6 +56,9 @@ include_directories(${CMAKE_BINARY_DIR}/proto/ge) | |||||
| # need to remove dependencies on pb files later | # need to remove dependencies on pb files later | ||||
| file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | ||||
| "client/ge_api.cc" | "client/ge_api.cc" | ||||
| "common/dump/dump_manager.cc" | |||||
| "common/dump/dump_properties.cc" | |||||
| "common/dump/dump_op.cc" | |||||
| "common/formats/format_transfers/*.cc" | "common/formats/format_transfers/*.cc" | ||||
| "common/formats/formats.cc" | "common/formats/formats.cc" | ||||
| "common/formats/utils/formats_trans_utils.cc" | "common/formats/utils/formats_trans_utils.cc" | ||||
| @@ -124,6 +127,7 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
| "graph/preprocess/insert_op/ge_aipp_op.cc" | "graph/preprocess/insert_op/ge_aipp_op.cc" | ||||
| "graph/preprocess/insert_op/util_insert_aipp_op.cc" | "graph/preprocess/insert_op/util_insert_aipp_op.cc" | ||||
| "graph/preprocess/multi_batch_copy_graph.cc" | "graph/preprocess/multi_batch_copy_graph.cc" | ||||
| "graph/preprocess/multi_batch_options.cc" | |||||
| "host_kernels/add_kernel.cc" | "host_kernels/add_kernel.cc" | ||||
| "host_kernels/broadcast_args_kernel.cc" | "host_kernels/broadcast_args_kernel.cc" | ||||
| "host_kernels/broadcast_gradient_args_kernel.cc" | "host_kernels/broadcast_gradient_args_kernel.cc" | ||||
| @@ -138,6 +142,7 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
| "host_kernels/floormod_kernel.cc" | "host_kernels/floormod_kernel.cc" | ||||
| "host_kernels/gather_v2_kernel.cc" | "host_kernels/gather_v2_kernel.cc" | ||||
| "host_kernels/greater_kernel.cc" | "host_kernels/greater_kernel.cc" | ||||
| "host_kernels/identity_kernel.cc" | |||||
| "host_kernels/kernel_utils.cc" | "host_kernels/kernel_utils.cc" | ||||
| "host_kernels/maximum_kernel.cc" | "host_kernels/maximum_kernel.cc" | ||||
| "host_kernels/mul_kernel.cc" | "host_kernels/mul_kernel.cc" | ||||
| @@ -172,10 +177,18 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
| "hybrid/node_executor/aicpu/aicpu_node_executor.cc" | "hybrid/node_executor/aicpu/aicpu_node_executor.cc" | ||||
| "hybrid/node_executor/compiledsubgraph/known_node_executor.cc" | "hybrid/node_executor/compiledsubgraph/known_node_executor.cc" | ||||
| "hybrid/node_executor/controlop/control_op_executor.cc" | "hybrid/node_executor/controlop/control_op_executor.cc" | ||||
| "hybrid/node_executor/ge_local/ge_local_node_executor.cc" | |||||
| "hybrid/node_executor/hccl/hccl_node_executor.cc" | "hybrid/node_executor/hccl/hccl_node_executor.cc" | ||||
| "hybrid/node_executor/hostcpu/ge_local_node_executor.cc" | "hybrid/node_executor/hostcpu/ge_local_node_executor.cc" | ||||
| "hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" | |||||
| "hybrid/node_executor/host_cpu/kernel_factory.cc" | |||||
| "hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc" | |||||
| "hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" | |||||
| "hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" | |||||
| "hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" | |||||
| "hybrid/node_executor/node_executor.cc" | "hybrid/node_executor/node_executor.cc" | ||||
| "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | ||||
| "hybrid/node_executor/rts/rts_node_executor.cc" | |||||
| "hybrid/node_executor/task_context.cc" | "hybrid/node_executor/task_context.cc" | ||||
| "init/gelib.cc" | "init/gelib.cc" | ||||
| "model/ge_model.cc" | "model/ge_model.cc" | ||||
| @@ -215,6 +228,9 @@ target_link_libraries(ge_runner | |||||
| ######### libge_compiler.so ############# | ######### libge_compiler.so ############# | ||||
| # need to remove dependencies on pb files later | # need to remove dependencies on pb files later | ||||
| file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | ||||
| "common/dump/dump_properties.cc" | |||||
| "common/dump/dump_manager.cc" | |||||
| "common/dump/dump_op.cc" | |||||
| "common/formats/format_transfers/*.cc" | "common/formats/format_transfers/*.cc" | ||||
| "common/formats/formats.cc" | "common/formats/formats.cc" | ||||
| "common/formats/utils/formats_trans_utils.cc" | "common/formats/utils/formats_trans_utils.cc" | ||||
| @@ -274,6 +290,7 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
| "graph/preprocess/insert_op/ge_aipp_op.cc" | "graph/preprocess/insert_op/ge_aipp_op.cc" | ||||
| "graph/preprocess/insert_op/util_insert_aipp_op.cc" | "graph/preprocess/insert_op/util_insert_aipp_op.cc" | ||||
| "graph/preprocess/multi_batch_copy_graph.cc" | "graph/preprocess/multi_batch_copy_graph.cc" | ||||
| "graph/preprocess/multi_batch_options.cc" | |||||
| "host_kernels/add_kernel.cc" | "host_kernels/add_kernel.cc" | ||||
| "host_kernels/broadcast_args_kernel.cc" | "host_kernels/broadcast_args_kernel.cc" | ||||
| "host_kernels/broadcast_gradient_args_kernel.cc" | "host_kernels/broadcast_gradient_args_kernel.cc" | ||||
| @@ -288,6 +305,7 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
| "host_kernels/floormod_kernel.cc" | "host_kernels/floormod_kernel.cc" | ||||
| "host_kernels/gather_v2_kernel.cc" | "host_kernels/gather_v2_kernel.cc" | ||||
| "host_kernels/greater_kernel.cc" | "host_kernels/greater_kernel.cc" | ||||
| "host_kernels/identity_kernel.cc" | |||||
| "host_kernels/kernel_utils.cc" | "host_kernels/kernel_utils.cc" | ||||
| "host_kernels/maximum_kernel.cc" | "host_kernels/maximum_kernel.cc" | ||||
| "host_kernels/mul_kernel.cc" | "host_kernels/mul_kernel.cc" | ||||
| @@ -390,6 +390,22 @@ Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorIn | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status Session::GetVariables(const std::vector<std::string> &var_names, std::vector<Tensor> &var_values) { | |||||
| auto instance_ptr = ge::GELib::GetInstance(); | |||||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | |||||
| return FAILED; | |||||
| } | |||||
| GELOGT(TRACE_RUNNING, "Get Variables"); | |||||
| Status ret = ge::GELib::GetInstance()->SessionManagerObj().GetVariables(sessionId_, var_names, var_values); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "SessionManager RunGraphAsync failed"); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| bool Session::IsGraphNeedRebuild(uint32_t graph_id) { | bool Session::IsGraphNeedRebuild(uint32_t graph_id) { | ||||
| return ge::GELib::GetInstance()->SessionManagerObj().IsGraphNeedRebuild(sessionId_, graph_id); | return ge::GELib::GetInstance()->SessionManagerObj().IsGraphNeedRebuild(sessionId_, graph_id); | ||||
| } | } | ||||
| @@ -0,0 +1,120 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "common/dump/dump_manager.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/debug/log.h" | |||||
| namespace { | |||||
| const char *const kDumpOFF = "OFF"; | |||||
| const char *const kDumpoff = "off"; | |||||
| const char *const kDumpOn = "on"; | |||||
| } // namespace | |||||
| namespace ge { | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpManager &DumpManager::GetInstance() { | |||||
| static DumpManager instance; | |||||
| return instance; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf(const DumpConfig &dump_config) { | |||||
| std::lock_guard<std::mutex> lock(mutex_); | |||||
| dump_properties_.ClearDumpPropertyValue(); | |||||
| dump_properties_.ClearDumpInfo(); | |||||
| std::string dump_status; | |||||
| std::string dump_path; | |||||
| std::string dump_mode; | |||||
| std::string dump_op_switch; | |||||
| if (dump_config.dump_status.empty()) { | |||||
| GELOGI("Dump does not open"); | |||||
| return SUCCESS; | |||||
| } | |||||
| dump_status = dump_config.dump_status; | |||||
| GELOGI("Dump status is %s", dump_status.c_str()); | |||||
| if (dump_config.dump_status == kDumpoff || dump_config.dump_status == kDumpOFF) { | |||||
| dump_properties_.ClearDumpPropertyValue(); | |||||
| return SUCCESS; | |||||
| } | |||||
| dump_op_switch = dump_config.dump_op_switch; | |||||
| if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) { | |||||
| GELOGE(PARAM_INVALID, "Dump list is invalid,dump_op_switch is %s", dump_op_switch.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| if (!dump_config.dump_list.empty()) { | |||||
| for (auto model_dump : dump_config.dump_list) { | |||||
| std::string model_name = model_dump.model_name; | |||||
| GELOGI("Dump model is %s", model_name.c_str()); | |||||
| std::set<std::string> dump_layers; | |||||
| for (auto layer : model_dump.layers) { | |||||
| GELOGI("Dump layer is %s in model", layer.c_str()); | |||||
| dump_layers.insert(layer); | |||||
| } | |||||
| dump_properties_.AddPropertyValue(model_name, dump_layers); | |||||
| } | |||||
| if (dump_op_switch == kDumpOn) { | |||||
| GELOGI("Start to dump model and single op,dumo op switch is %s", dump_op_switch.c_str()); | |||||
| } else { | |||||
| GELOGI("Only dump model,dump op switch is %s", dump_op_switch.c_str()); | |||||
| } | |||||
| } else { | |||||
| GELOGI("Only dump single op,dumo op switch is %s", dump_op_switch.c_str()); | |||||
| } | |||||
| dump_path = dump_config.dump_path; | |||||
| if (dump_path.empty()) { | |||||
| GELOGE(PARAM_INVALID, "Dump path is empty"); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| if (dump_path[dump_path.size() - 1] != '/') { | |||||
| dump_path = dump_path + "/"; | |||||
| } | |||||
| dump_path = dump_path + CurrentTimeInStr() + "/"; | |||||
| GELOGI("Dump path is %s", dump_path.c_str()); | |||||
| dump_properties_.SetDumpPath(dump_path); | |||||
| dump_mode = dump_config.dump_mode; | |||||
| GELOGI("Dump mode is %s", dump_mode.c_str()); | |||||
| dump_properties_.SetDumpMode(dump_mode); | |||||
| return SUCCESS; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpManager::IsDumpOpen() { | |||||
| std::lock_guard<std::mutex> lock(mutex_); | |||||
| if (!dump_properties_.GetDumpPath().empty()) { | |||||
| return true; | |||||
| } | |||||
| return false; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const DumpProperties &DumpManager::GetDumpProperties() { | |||||
| std::lock_guard<std::mutex> lock(mutex_); | |||||
| return dump_properties_; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpManager::SetModelName(const std::string &model_name) { | |||||
| std::lock_guard<std::mutex> lock(mutex_); | |||||
| model_name_ = model_name; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpManager::GetModelName() { | |||||
| std::lock_guard<std::mutex> lock(mutex_); | |||||
| return model_name_; | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -0,0 +1,42 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_COMMON_DUMP_DUMP_MANAGER_H_ | |||||
| #define GE_COMMON_DUMP_DUMP_MANAGER_H_ | |||||
| #include <mutex> | |||||
| #include "common/dump/dump_properties.h" | |||||
| #include "common/ge_types.h" | |||||
| namespace ge { | |||||
| class DumpManager { | |||||
| public: | |||||
| static DumpManager &GetInstance(); | |||||
| Status SetDumpConf(const DumpConfig &dump_config); | |||||
| bool IsDumpOpen(); | |||||
| const DumpProperties &GetDumpProperties(); | |||||
| void SetModelName(const std::string &model_name); | |||||
| const std::string &GetModelName(); | |||||
| private: | |||||
| DumpProperties dump_properties_; | |||||
| std::mutex mutex_; | |||||
| std::string model_name_; | |||||
| }; | |||||
| } // namespace ge | |||||
| #endif // GE_COMMON_DUMP_DUMP_MANAGER_H_ | |||||
| @@ -0,0 +1,255 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "common/dump/dump_op.h" | |||||
| #include "aicpu/common/aicpu_task_struct.h" | |||||
| #include "common/dump/dump_manager.h" | |||||
| #include "common/ge/datatype_util.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/util.h" | |||||
| #include "graph/anchor.h" | |||||
| #include "graph/ge_tensor.h" | |||||
| #include "graph/op_desc.h" | |||||
| #include "graph/utils/tensor_utils.h" | |||||
| #include "proto/ge_ir.pb.h" | |||||
| #include "proto/op_mapping_info.pb.h" | |||||
| #include "runtime/mem.h" | |||||
| namespace { | |||||
| const uint32_t kAicpuLoadFlag = 1; | |||||
| const char *const kDumpOutput = "output"; | |||||
| const char *const kDumpInput = "input"; | |||||
| const char *const kDumpAll = "all"; | |||||
| const char *const kDumpKernelsDumpOp = "DumpDataInfo"; | |||||
| } // namespace | |||||
| namespace ge { | |||||
| DumpOp::~DumpOp() { | |||||
| if (proto_dev_mem_ != nullptr) { | |||||
| (void)rtFree(proto_dev_mem_); | |||||
| } | |||||
| if (proto_size_dev_mem_ != nullptr) { | |||||
| (void)rtFree(proto_size_dev_mem_); | |||||
| } | |||||
| proto_dev_mem_ = nullptr; | |||||
| proto_size_dev_mem_ = nullptr; | |||||
| } | |||||
| void DumpOp::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond) { | |||||
| global_step_ = reinterpret_cast<uintptr_t>(global_step); | |||||
| loop_per_iter_ = reinterpret_cast<uintptr_t>(loop_per_iter); | |||||
| loop_cond_ = reinterpret_cast<uintptr_t>(loop_cond); | |||||
| } | |||||
| void DumpOp::SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id) { | |||||
| dynamic_model_name_ = dynamic_model_name; | |||||
| dynamic_model_id_ = dynamic_model_id; | |||||
| } | |||||
| static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uintptr_t loop_cond, | |||||
| aicpu::dump::OpMappingInfo &op_mapping_info) { | |||||
| if (step_id != 0) { | |||||
| GELOGI("step_id exists."); | |||||
| op_mapping_info.set_step_id_addr(static_cast<uint64_t>(step_id)); | |||||
| } else { | |||||
| GELOGI("step_id is null."); | |||||
| } | |||||
| if (loop_per_iter != 0) { | |||||
| GELOGI("loop_per_iter exists."); | |||||
| op_mapping_info.set_iterations_per_loop_addr(static_cast<uint64_t>(loop_per_iter)); | |||||
| } else { | |||||
| GELOGI("loop_per_iter is null."); | |||||
| } | |||||
| if (loop_cond != 0) { | |||||
| GELOGI("loop_cond exists."); | |||||
| op_mapping_info.set_loop_cond_addr(static_cast<uint64_t>(loop_cond)); | |||||
| } else { | |||||
| GELOGI("loop_cond is null."); | |||||
| } | |||||
| } | |||||
| Status DumpOp::DumpOutput(aicpu::dump::Task &task) { | |||||
| GELOGI("Start dump output in Launch dump op"); | |||||
| const auto &output_descs = op_desc_->GetAllOutputsDesc(); | |||||
| for (size_t i = 0; i < output_descs.size(); ++i) { | |||||
| aicpu::dump::Output output; | |||||
| output.set_data_type(static_cast<int32_t>(DataTypeUtil::GetIrDataType(output_descs.at(i).GetDataType()))); | |||||
| output.set_format(static_cast<int32_t>(output_descs.at(i).GetFormat())); | |||||
| for (auto dim : output_descs.at(i).GetShape().GetDims()) { | |||||
| output.mutable_shape()->add_dim(dim); | |||||
| } | |||||
| int64_t output_size = 0; | |||||
| if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Get output size filed"); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| GELOGD("Get output size in lanch dump op is %ld", output_size); | |||||
| output.set_size(output_size); | |||||
| output.set_address(static_cast<uint64_t>(output_addrs_[i])); | |||||
| task.mutable_output()->Add(std::move(output)); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status DumpOp::DumpInput(aicpu::dump::Task &task) { | |||||
| GELOGI("Start dump input in Launch dump op"); | |||||
| const auto &input_descs = op_desc_->GetAllInputsDesc(); | |||||
| for (size_t i = 0; i < input_descs.size(); ++i) { | |||||
| aicpu::dump::Input input; | |||||
| input.set_data_type(static_cast<int32_t>(DataTypeUtil::GetIrDataType(input_descs.at(i).GetDataType()))); | |||||
| input.set_format(static_cast<int32_t>(input_descs.at(i).GetFormat())); | |||||
| for (auto dim : input_descs.at(i).GetShape().GetDims()) { | |||||
| input.mutable_shape()->add_dim(dim); | |||||
| } | |||||
| int64_t input_size = 0; | |||||
| if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Get output size filed"); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| GELOGD("Get input size in lanch dump op is %ld", input_size); | |||||
| input.set_size(input_size); | |||||
| input.set_address(static_cast<uint64_t>(input_addrs_[i])); | |||||
| task.mutable_input()->Add(std::move(input)); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| void DumpOp::SetDumpInfo(const DumpProperties &dump_properties, const OpDescPtr &op_desc, vector<uintptr_t> input_addrs, | |||||
| vector<uintptr_t> output_addrs, rtStream_t stream) { | |||||
| dump_properties_ = dump_properties; | |||||
| op_desc_ = op_desc; | |||||
| input_addrs_ = input_addrs; | |||||
| output_addrs_ = output_addrs; | |||||
| stream_ = stream; | |||||
| } | |||||
| Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { | |||||
| std::string proto_msg; | |||||
| size_t proto_size = op_mapping_info.ByteSizeLong(); | |||||
| bool ret = op_mapping_info.SerializeToString(&proto_msg); | |||||
| if (!ret || proto_size == 0) { | |||||
| GELOGE(FAILED, "Protobuf serialize failed,proto_size is %zu", proto_size); | |||||
| return FAILED; | |||||
| } | |||||
| rtError_t rt_ret = rtMalloc(&proto_dev_mem_, proto_size, RT_MEMORY_HBM); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||||
| return RT_FAILED; | |||||
| } | |||||
| rt_ret = rtMemcpy(proto_dev_mem_, proto_size, proto_msg.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); | |||||
| return RT_FAILED; | |||||
| } | |||||
| rt_ret = rtMalloc(&proto_size_dev_mem_, sizeof(size_t), RT_MEMORY_HBM); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||||
| return RT_FAILED; | |||||
| } | |||||
| rt_ret = rtMemcpy(proto_size_dev_mem_, sizeof(size_t), &proto_size, sizeof(size_t), RT_MEMCPY_HOST_TO_DEVICE); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); | |||||
| return RT_FAILED; | |||||
| } | |||||
| constexpr int32_t ioAddrNum = 2; | |||||
| constexpr uint32_t argsSize = sizeof(aicpu::AicpuParamHead) + ioAddrNum * sizeof(uint64_t); | |||||
| char args[argsSize] = {0}; | |||||
| auto paramHead = reinterpret_cast<aicpu::AicpuParamHead *>(args); | |||||
| paramHead->length = argsSize; | |||||
| paramHead->ioAddrNum = ioAddrNum; | |||||
| auto ioAddr = reinterpret_cast<uint64_t *>(args + sizeof(aicpu::AicpuParamHead)); | |||||
| ioAddr[0] = reinterpret_cast<uintptr_t>(proto_dev_mem_); | |||||
| ioAddr[1] = reinterpret_cast<uintptr_t>(proto_size_dev_mem_); | |||||
| rt_ret = rtCpuKernelLaunch(nullptr, kDumpKernelsDumpOp, | |||||
| 1, // blockDim default 1 | |||||
| args, argsSize, | |||||
| nullptr, // no need smDesc | |||||
| stream_); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rtCpuKernelLaunch failed,rt_ret:0x%X", rt_ret); | |||||
| return rt_ret; | |||||
| } | |||||
| GELOGI("Kernel launch dump op success"); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status DumpOp::LaunchDumpOp() { | |||||
| GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str()); | |||||
| int32_t device_id = 0; | |||||
| rtError_t rt_ret = rtGetDevice(&device_id); | |||||
| if (rt_ret != RT_ERROR_NONE || device_id < 0) { | |||||
| GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); | |||||
| return RT_FAILED; | |||||
| } | |||||
| aicpu::dump::OpMappingInfo op_mapping_info; | |||||
| auto dump_path = dump_properties_.GetDumpPath() + std::to_string(device_id) + "/"; | |||||
| op_mapping_info.set_dump_path(dump_path); | |||||
| op_mapping_info.set_flag(kAicpuLoadFlag); | |||||
| op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); | |||||
| if (!dynamic_model_name_.empty()) { | |||||
| op_mapping_info.set_model_name(dynamic_model_name_); | |||||
| op_mapping_info.set_model_id(dynamic_model_id_); | |||||
| } | |||||
| SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | |||||
| GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(), | |||||
| dump_path.c_str()); | |||||
| aicpu::dump::Task task; | |||||
| task.mutable_op()->set_op_name(op_desc_->GetName()); | |||||
| task.mutable_op()->set_op_type(op_desc_->GetType()); | |||||
| if (dump_properties_.GetDumpMode() == kDumpOutput) { | |||||
| if (DumpOutput(task) != SUCCESS) { | |||||
| GELOGE(FAILED, "Dump output failed"); | |||||
| return FAILED; | |||||
| } | |||||
| op_mapping_info.mutable_task()->Add(std::move(task)); | |||||
| } | |||||
| if (dump_properties_.GetDumpMode() == kDumpInput) { | |||||
| if (DumpInput(task) != SUCCESS) { | |||||
| GELOGE(FAILED, "Dump input failed"); | |||||
| return FAILED; | |||||
| } | |||||
| op_mapping_info.mutable_task()->Add(std::move(task)); | |||||
| } | |||||
| if (dump_properties_.GetDumpMode() == kDumpAll) { | |||||
| auto ret = DumpOutput(task); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(FAILED, "Dump output failed when in dumping all"); | |||||
| return FAILED; | |||||
| } | |||||
| ret = DumpInput(task); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(FAILED, "Dump input failed when in dumping all"); | |||||
| return FAILED; | |||||
| } | |||||
| op_mapping_info.mutable_task()->Add(std::move(task)); | |||||
| } | |||||
| auto ret = ExecutorDumpOp(op_mapping_info); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "Executor dump op failed"); | |||||
| return ret; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -0,0 +1,61 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_COMMON_DUMP_DUMP_OP_H_ | |||||
| #define GE_COMMON_DUMP_DUMP_OP_H_ | |||||
| #include <string> | |||||
| #include "common/ge_inner_error_codes.h" | |||||
| #include "common/properties_manager.h" | |||||
| #include "proto/op_mapping_info.pb.h" | |||||
| #include "runtime/stream.h" | |||||
| namespace ge { | |||||
| class DumpOp { | |||||
| public: | |||||
| DumpOp() = default; | |||||
| ~DumpOp(); | |||||
| void SetDumpInfo(const DumpProperties &dump_properties, const OpDescPtr &op_desc, vector<uintptr_t> input_addrs, | |||||
| vector<uintptr_t> output_addrs, rtStream_t stream); | |||||
| Status LaunchDumpOp(); | |||||
| void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond); | |||||
| void SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id); | |||||
| private: | |||||
| Status ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info); | |||||
| Status DumpOutput(aicpu::dump::Task &task); | |||||
| Status DumpInput(aicpu::dump::Task &task); | |||||
| DumpProperties dump_properties_; | |||||
| OpDescPtr op_desc_; | |||||
| std::vector<uintptr_t> input_addrs_; | |||||
| std::vector<uintptr_t> output_addrs_; | |||||
| void *proto_dev_mem_ = nullptr; | |||||
| void *proto_size_dev_mem_ = nullptr; | |||||
| rtStream_t stream_; | |||||
| uintptr_t global_step_; | |||||
| uintptr_t loop_per_iter_; | |||||
| uintptr_t loop_cond_; | |||||
| std::string dynamic_model_name_; | |||||
| std::uint32_t dynamic_model_id_; | |||||
| }; | |||||
| } // namespace ge | |||||
| #endif // GE_COMMON_DUMP_DUMP_OP_H_ | |||||
| @@ -0,0 +1,238 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "common/dump/dump_properties.h" | |||||
| #include <cstdio> | |||||
| #include <string> | |||||
| #include "common/ge/ge_util.h" | |||||
| #include "common/util.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/debug/log.h" | |||||
| #include "framework/common/ge_types.h" | |||||
| #include "framework/common/types.h" | |||||
| #include "graph/debug/ge_attr_define.h" | |||||
| #include "graph/ge_context.h" | |||||
| #include "graph/utils/attr_utils.h" | |||||
| namespace { | |||||
| const std::string kEnableFlag = "1"; | |||||
| const uint32_t kAicoreOverflow = (0x1 << 0); | |||||
| const uint32_t kAtomicOverflow = (0x1 << 1); | |||||
| const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); | |||||
| } // namespace | |||||
| namespace ge { | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) { | |||||
| CopyFrom(other); | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties::operator=( | |||||
| const DumpProperties &other) { | |||||
| CopyFrom(other); | |||||
| return *this; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOptions() { | |||||
| enable_dump_.clear(); | |||||
| enable_dump_debug_.clear(); | |||||
| dump_path_.clear(); | |||||
| dump_step_.clear(); | |||||
| dump_mode_.clear(); | |||||
| is_op_debug_ = false; | |||||
| op_debug_mode_ = 0; | |||||
| std::string enable_dump; | |||||
| (void)GetContext().GetOption(OPTION_EXEC_ENABLE_DUMP, enable_dump); | |||||
| enable_dump_ = enable_dump; | |||||
| std::string enable_dump_debug; | |||||
| (void)GetContext().GetOption(OPTION_EXEC_ENABLE_DUMP_DEBUG, enable_dump_debug); | |||||
| enable_dump_debug_ = enable_dump_debug; | |||||
| if ((enable_dump_ == kEnableFlag) || (enable_dump_debug_ == kEnableFlag)) { | |||||
| std::string dump_path; | |||||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_PATH, dump_path) == GRAPH_SUCCESS) { | |||||
| if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') { | |||||
| dump_path = dump_path + "/"; | |||||
| } | |||||
| dump_path = dump_path + CurrentTimeInStr() + "/"; | |||||
| GELOGI("Get dump path %s successfully", dump_path.c_str()); | |||||
| SetDumpPath(dump_path); | |||||
| } else { | |||||
| GELOGW("Dump path is not set"); | |||||
| } | |||||
| } | |||||
| if (enable_dump_ == kEnableFlag) { | |||||
| std::string dump_step; | |||||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_STEP, dump_step) == GRAPH_SUCCESS) { | |||||
| GELOGD("Get dump step %s successfully", dump_step.c_str()); | |||||
| SetDumpStep(dump_step); | |||||
| } | |||||
| string dump_mode; | |||||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_MODE, dump_mode) == GRAPH_SUCCESS) { | |||||
| GELOGD("Get dump mode %s successfully", dump_mode.c_str()); | |||||
| SetDumpMode(dump_mode); | |||||
| } | |||||
| AddPropertyValue(DUMP_ALL_MODEL, {}); | |||||
| } | |||||
| SetDumpDebugOptions(); | |||||
| } | |||||
| // The following is the new dump scenario of the fusion operator | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::AddPropertyValue( | |||||
| const std::string &model, const std::set<std::string> &layers) { | |||||
| for (const std::string &layer : layers) { | |||||
| GELOGI("This model %s config to dump layer %s", model.c_str(), layer.c_str()); | |||||
| } | |||||
| model_dump_properties_map_[model] = layers; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::DeletePropertyValue(const std::string &model) { | |||||
| auto iter = model_dump_properties_map_.find(model); | |||||
| if (iter != model_dump_properties_map_.end()) { | |||||
| model_dump_properties_map_.erase(iter); | |||||
| } | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::ClearDumpPropertyValue() { | |||||
| model_dump_properties_map_.clear(); | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::ClearDumpInfo() { | |||||
| enable_dump_.clear(); | |||||
| enable_dump_debug_.clear(); | |||||
| dump_path_.clear(); | |||||
| dump_step_.clear(); | |||||
| dump_mode_.clear(); | |||||
| is_op_debug_ = false; | |||||
| op_debug_mode_ = 0; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpProperties::GetAllDumpModel() const { | |||||
| std::set<std::string> model_list; | |||||
| for (auto &iter : model_dump_properties_map_) { | |||||
| model_list.insert(iter.first); | |||||
| } | |||||
| return model_list; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpProperties::GetPropertyValue( | |||||
| const std::string &model) const { | |||||
| auto iter = model_dump_properties_map_.find(model); | |||||
| if (iter != model_dump_properties_map_.end()) { | |||||
| return iter->second; | |||||
| } | |||||
| return {}; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsLayerNeedDump( | |||||
| const std::string &model, const std::string &om_name, const std::string &op_name) const { | |||||
| // if dump all | |||||
| if (model_dump_properties_map_.find(DUMP_ALL_MODEL) != model_dump_properties_map_.end()) { | |||||
| return true; | |||||
| } | |||||
| // if this model need dump | |||||
| auto om_name_iter = model_dump_properties_map_.find(om_name); | |||||
| auto model_name_iter = model_dump_properties_map_.find(model); | |||||
| if (om_name_iter != model_dump_properties_map_.end() || model_name_iter != model_dump_properties_map_.end()) { | |||||
| // if no dump layer info, dump all layer in this model | |||||
| auto model_iter = om_name_iter != model_dump_properties_map_.end() ? om_name_iter : model_name_iter; | |||||
| if (model_iter->second.empty()) { | |||||
| return true; | |||||
| } | |||||
| return model_iter->second.find(op_name) != model_iter->second.end(); | |||||
| } | |||||
| GELOGD("Model %s is not seated to be dump.", model.c_str()); | |||||
| return false; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpPath(const std::string &path) { | |||||
| dump_path_ = path; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperties::GetDumpPath() const { | |||||
| return dump_path_; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpStep(const std::string &step) { | |||||
| dump_step_ = step; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperties::GetDumpStep() const { | |||||
| return dump_step_; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpMode(const std::string &mode) { | |||||
| dump_mode_ = mode; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperties::GetDumpMode() const { | |||||
| return dump_mode_; | |||||
| } | |||||
| void DumpProperties::CopyFrom(const DumpProperties &other) { | |||||
| if (&other != this) { | |||||
| enable_dump_ = other.enable_dump_; | |||||
| enable_dump_debug_ = other.enable_dump_debug_; | |||||
| dump_path_ = other.dump_path_; | |||||
| dump_step_ = other.dump_step_; | |||||
| dump_mode_ = other.dump_mode_; | |||||
| model_dump_properties_map_ = other.model_dump_properties_map_; | |||||
| is_op_debug_ = other.is_op_debug_; | |||||
| op_debug_mode_ = other.op_debug_mode_; | |||||
| } | |||||
| } | |||||
| void DumpProperties::SetDumpDebugOptions() { | |||||
| if (enable_dump_debug_ == kEnableFlag) { | |||||
| std::string dump_debug_mode; | |||||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_DEBUG_MODE, dump_debug_mode) == GRAPH_SUCCESS) { | |||||
| GELOGD("Get dump debug mode %s successfully", dump_debug_mode.c_str()); | |||||
| } else { | |||||
| GELOGW("Dump debug mode is not set."); | |||||
| return; | |||||
| } | |||||
| if (dump_debug_mode == OP_DEBUG_AICORE) { | |||||
| GELOGD("ge.exec.dumpDebugMode=aicore_overflow, op debug is open."); | |||||
| is_op_debug_ = true; | |||||
| op_debug_mode_ = kAicoreOverflow; | |||||
| } else if (dump_debug_mode == OP_DEBUG_ATOMIC) { | |||||
| GELOGD("ge.exec.dumpDebugMode=atomic_overflow, op debug is open."); | |||||
| is_op_debug_ = true; | |||||
| op_debug_mode_ = kAtomicOverflow; | |||||
| } else if (dump_debug_mode == OP_DEBUG_ALL) { | |||||
| GELOGD("ge.exec.dumpDebugMode=all, op debug is open."); | |||||
| is_op_debug_ = true; | |||||
| op_debug_mode_ = kAllOverflow; | |||||
| } else { | |||||
| GELOGW("ge.exec.dumpDebugMode is invalid."); | |||||
| } | |||||
| } else { | |||||
| GELOGI("ge.exec.enableDumpDebug is false or is not set."); | |||||
| } | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -0,0 +1,86 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_COMMON_DUMP_DUMP_PROPERTIES_H_ | |||||
| #define GE_COMMON_DUMP_DUMP_PROPERTIES_H_ | |||||
| #include <map> | |||||
| #include <set> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| namespace ge { | |||||
| class DumpProperties { | |||||
| public: | |||||
| DumpProperties() = default; | |||||
| ~DumpProperties() = default; | |||||
| DumpProperties(const DumpProperties &dump); | |||||
| DumpProperties &operator=(const DumpProperties &dump); | |||||
| void InitByOptions(); | |||||
| void AddPropertyValue(const std::string &model, const std::set<std::string> &layers); | |||||
| void DeletePropertyValue(const std::string &model); | |||||
| void ClearDumpPropertyValue(); | |||||
| void ClearDumpInfo(); | |||||
| std::set<std::string> GetAllDumpModel() const; | |||||
| std::set<std::string> GetPropertyValue(const std::string &model) const; | |||||
| bool IsLayerNeedDump(const std::string &model, const std::string &om_name, const std::string &op_name) const; | |||||
| void SetDumpPath(const std::string &path); | |||||
| const std::string &GetDumpPath() const; | |||||
| void SetDumpStep(const std::string &step); | |||||
| const std::string &GetDumpStep() const; | |||||
| void SetDumpMode(const std::string &mode); | |||||
| const std::string &GetDumpMode() const; | |||||
| bool IsOpDebugOpen() const { return is_op_debug_; } | |||||
| uint32_t GetOpDebugMode() const { return op_debug_mode_; } | |||||
| private: | |||||
| void CopyFrom(const DumpProperties &other); | |||||
| void SetDumpDebugOptions(); | |||||
| std::string enable_dump_; | |||||
| std::string enable_dump_debug_; | |||||
| std::string dump_path_; | |||||
| std::string dump_step_; | |||||
| std::string dump_mode_; | |||||
| std::map<std::string, std::set<std::string>> model_dump_properties_map_; | |||||
| bool is_op_debug_ = false; | |||||
| uint32_t op_debug_mode_ = 0; | |||||
| }; | |||||
| } // namespace ge | |||||
| #endif // GE_COMMON_DUMP_DUMP_PROPERTIES_H_ | |||||
| @@ -15,23 +15,54 @@ | |||||
| */ | */ | ||||
| #include "common/ge/datatype_util.h" | #include "common/ge/datatype_util.h" | ||||
| #include "proto/ge_ir.pb.h" | |||||
| #include <map> | #include <map> | ||||
| namespace { | namespace { | ||||
| const std::vector<ge::DataType> kEmptyDatatypeVector; | const std::vector<ge::DataType> kEmptyDatatypeVector; | ||||
| std::map<ge::DataType, std::vector<ge::DataType>> g_translatable_data_type = { | std::map<ge::DataType, std::vector<ge::DataType>> g_translatable_data_type = { | ||||
| // key:src datatype, value:dst datatype | |||||
| {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}, | |||||
| {ge::DT_BOOL, {ge::DT_INT32}}, | |||||
| {ge::DT_FLOAT16, {ge::DT_FLOAT, ge::DT_FLOAT16}}, | |||||
| {ge::DT_INT64, {ge::DT_INT32}}}; | |||||
| // key:src datatype, value:dst datatype | |||||
| {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}, | |||||
| {ge::DT_BOOL, {ge::DT_INT32}}, | |||||
| {ge::DT_FLOAT16, {ge::DT_FLOAT, ge::DT_FLOAT16}}, | |||||
| {ge::DT_INT64, {ge::DT_INT32}}}; | |||||
| std::map<ge::DataType, std::vector<ge::DataType>> g_reverse_translatable_data_type = { | std::map<ge::DataType, std::vector<ge::DataType>> g_reverse_translatable_data_type = { | ||||
| // key:dst datatype,value:src datatype | |||||
| {ge::DT_FLOAT16, {ge::DT_FLOAT, ge::DT_FLOAT16}}, | |||||
| {ge::DT_INT32, {ge::DT_BOOL, ge::DT_INT64}}, | |||||
| {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}}; | |||||
| // key:dst datatype,value:src datatype | |||||
| {ge::DT_FLOAT16, {ge::DT_FLOAT, ge::DT_FLOAT16}}, | |||||
| {ge::DT_INT32, {ge::DT_BOOL, ge::DT_INT64}}, | |||||
| {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}}; | |||||
| static const std::map<ge::DataType, ge::proto::DataType> g_dump_data_type_map = { | |||||
| // key:ge datatype,value:proto datatype | |||||
| {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, | |||||
| {ge::DT_FLOAT, ge::proto::DT_FLOAT}, | |||||
| {ge::DT_FLOAT16, ge::proto::DT_FLOAT16}, | |||||
| {ge::DT_INT8, ge::proto::DT_INT8}, | |||||
| {ge::DT_UINT8, ge::proto::DT_UINT8}, | |||||
| {ge::DT_INT16, ge::proto::DT_INT16}, | |||||
| {ge::DT_UINT16, ge::proto::DT_UINT16}, | |||||
| {ge::DT_INT32, ge::proto::DT_INT32}, | |||||
| {ge::DT_INT64, ge::proto::DT_INT64}, | |||||
| {ge::DT_UINT32, ge::proto::DT_UINT32}, | |||||
| {ge::DT_UINT64, ge::proto::DT_UINT64}, | |||||
| {ge::DT_BOOL, ge::proto::DT_BOOL}, | |||||
| {ge::DT_DOUBLE, ge::proto::DT_DOUBLE}, | |||||
| {ge::DT_DUAL, ge::proto::DT_DUAL}, | |||||
| {ge::DT_DUAL_SUB_INT8, ge::proto::DT_DUAL_SUB_INT8}, | |||||
| {ge::DT_DUAL_SUB_UINT8, ge::proto::DT_DUAL_SUB_UINT8}, | |||||
| {ge::DT_COMPLEX64, ge::proto::DT_COMPLEX64}, | |||||
| {ge::DT_COMPLEX128, ge::proto::DT_COMPLEX128}, | |||||
| {ge::DT_QINT8, ge::proto::DT_QINT8}, | |||||
| {ge::DT_QINT16, ge::proto::DT_QINT16}, | |||||
| {ge::DT_QINT32, ge::proto::DT_QINT32}, | |||||
| {ge::DT_QUINT8, ge::proto::DT_QUINT8}, | |||||
| {ge::DT_QUINT16, ge::proto::DT_QUINT16}, | |||||
| {ge::DT_RESOURCE, ge::proto::DT_RESOURCE}, | |||||
| {ge::DT_STRING_REF, ge::proto::DT_STRING_REF}, | |||||
| {ge::DT_STRING, ge::proto::DT_STRING}, | |||||
| }; | |||||
| } // namespace | } // namespace | ||||
| namespace ge { | namespace ge { | ||||
| @@ -67,4 +98,13 @@ const std::vector<ge::DataType> &DataTypeUtil::GetTranslatableDataTypesByDst(con | |||||
| return search->second; | return search->second; | ||||
| } | } | ||||
| int32_t DataTypeUtil::GetIrDataType(ge::DataType data_type) { | |||||
| auto iter = g_dump_data_type_map.find(data_type); | |||||
| if (iter == g_dump_data_type_map.end()) { | |||||
| return static_cast<int32_t>(ge::proto::DT_UNDEFINED); | |||||
| } | |||||
| return static_cast<int32_t>(iter->second); | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -37,16 +37,17 @@ static const int32_t kGeSizeUint16 = sizeof(uint16_t); | |||||
| static const int32_t kGeSizeUint32 = sizeof(uint32_t); | static const int32_t kGeSizeUint32 = sizeof(uint32_t); | ||||
| static std::map<ge::DataType, int32_t> CONST_OPDATA_TYPE_SIZE_MAP = { | static std::map<ge::DataType, int32_t> CONST_OPDATA_TYPE_SIZE_MAP = { | ||||
| {ge::DT_FLOAT, kGeSizeFloat}, {ge::DT_FLOAT16, kGeSizeHalfFloat}, {ge::DT_INT8, kGeSizeInt8}, | |||||
| {ge::DT_INT16, kGeSizeInt16}, {ge::DT_INT32, kGeSizeInt32}, {ge::DT_INT64, kGeSizeInt64}, | |||||
| {ge::DT_UINT8, kGeSizeUint8}, {ge::DT_UINT16, kGeSizeUint16}, {ge::DT_UINT32, kGeSizeUint32}, | |||||
| {ge::DT_UINT64, kGeSizeUint64}, {ge::DT_DOUBLE, kGeSizeDouble}, {ge::DT_BOOL, kGeSizeBool}}; | |||||
| {ge::DT_FLOAT, kGeSizeFloat}, {ge::DT_FLOAT16, kGeSizeHalfFloat}, {ge::DT_INT8, kGeSizeInt8}, | |||||
| {ge::DT_INT16, kGeSizeInt16}, {ge::DT_INT32, kGeSizeInt32}, {ge::DT_INT64, kGeSizeInt64}, | |||||
| {ge::DT_UINT8, kGeSizeUint8}, {ge::DT_UINT16, kGeSizeUint16}, {ge::DT_UINT32, kGeSizeUint32}, | |||||
| {ge::DT_UINT64, kGeSizeUint64}, {ge::DT_DOUBLE, kGeSizeDouble}, {ge::DT_BOOL, kGeSizeBool}}; | |||||
| class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY DataTypeUtil { | class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY DataTypeUtil { | ||||
| public: | public: | ||||
| static bool DataTypeTranslatable(const ge::DataType &src_out_data_type, const ge::DataType &dst_in_data_type); | static bool DataTypeTranslatable(const ge::DataType &src_out_data_type, const ge::DataType &dst_in_data_type); | ||||
| static const std::vector<ge::DataType> &GetTranslatableDataTypesBySrc(const ge::DataType &src_out_data_type); | static const std::vector<ge::DataType> &GetTranslatableDataTypesBySrc(const ge::DataType &src_out_data_type); | ||||
| static const std::vector<ge::DataType> &GetTranslatableDataTypesByDst(const ge::DataType &dst_in_data_type); | static const std::vector<ge::DataType> &GetTranslatableDataTypesByDst(const ge::DataType &dst_in_data_type); | ||||
| static int32_t GetIrDataType(ge::DataType data_type); | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_COMMON_GE_DATATYPE_UTIL_H_ | #endif // GE_COMMON_GE_DATATYPE_UTIL_H_ | ||||
| @@ -187,8 +187,8 @@ void TBEPluginManager::LoadCustomOpLib() { | |||||
| std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas; | std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas; | ||||
| GELOGI("The size of registration_datas is: %zu", registration_datas.size()); | GELOGI("The size of registration_datas is: %zu", registration_datas.size()); | ||||
| for (OpRegistrationData reg_data : registration_datas) { | for (OpRegistrationData reg_data : registration_datas) { | ||||
| GELOGD("Begin to register optype: %s, imply_type: %u", reg_data.GetOmOptype().c_str(), | |||||
| static_cast<uint32_t>(reg_data.GetImplyType())); | |||||
| GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(), | |||||
| TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str()); | |||||
| domi::OpRegistry::Instance()->Register(reg_data); | domi::OpRegistry::Instance()->Register(reg_data); | ||||
| } | } | ||||
| } | } | ||||
| @@ -36,7 +36,6 @@ GE_COMMON_LOCAL_SRC_FILES := \ | |||||
| properties_manager.cc \ | properties_manager.cc \ | ||||
| types.cc\ | types.cc\ | ||||
| model_parser/base.cc \ | model_parser/base.cc \ | ||||
| model_parser/graph_parser_util.cc \ | |||||
| tbe_kernel_store.cc \ | tbe_kernel_store.cc \ | ||||
| op/attr_value_util.cc \ | op/attr_value_util.cc \ | ||||
| op/ge_op_utils.cc \ | op/ge_op_utils.cc \ | ||||
| @@ -562,7 +562,6 @@ inline Status CheckUint64MulOverflow(uint64_t a, uint64_t b) { | |||||
| /// @return Status | /// @return Status | ||||
| inline Status CheckFp16MulOverflow(fp16_t a, fp16_t b) { | inline Status CheckFp16MulOverflow(fp16_t a, fp16_t b) { | ||||
| fp16_t result = static_cast<fp16_t>(a) * static_cast<fp16_t>(b); | fp16_t result = static_cast<fp16_t>(a) * static_cast<fp16_t>(b); | ||||
| printf("result: %u, 0x%x\n", result.val, result.val); | |||||
| if (FP16_IS_INVALID(result.val)) { | if (FP16_IS_INVALID(result.val)) { | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -885,6 +884,23 @@ inline Status CheckInt32DivOverflow(int32_t a, int32_t b) { | |||||
| static_cast<uint32_t>(b)); \ | static_cast<uint32_t>(b)); \ | ||||
| return INTERNAL_ERROR; \ | return INTERNAL_ERROR; \ | ||||
| } | } | ||||
| } // namespace ge | |||||
| #define FMK_FP16_ZEROCHECK(a) \ | |||||
| if (fabs(a) < DBL_EPSILON) { \ | |||||
| GELOGE(INTERNAL_ERROR, "fp16 %f can not be zero !", a); \ | |||||
| return INTERNAL_ERROR; \ | |||||
| } | |||||
| #define FMK_FLOAT_ZEROCHECK(a) \ | |||||
| if (fabs(a) < FLT_EPSILON) { \ | |||||
| GELOGE(INTERNAL_ERROR, "float %f can not be zero !", a); \ | |||||
| return INTERNAL_ERROR; \ | |||||
| } | |||||
| #define FMK_DOUBLE_ZEROCHECK(a) \ | |||||
| if (fabs(a) < DBL_EPSILON) { \ | |||||
| GELOGE(INTERNAL_ERROR, "double %lf can not be zero !", a); \ | |||||
| return INTERNAL_ERROR; \ | |||||
| } | |||||
| } // namespace ge | |||||
| #endif // GE_COMMON_MATH_MATH_UTIL_H_ | #endif // GE_COMMON_MATH_MATH_UTIL_H_ | ||||
| @@ -1,501 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "graph_parser_util.h" | |||||
| #include <memory> | |||||
| #include "common/auth/file_saver.h" | |||||
| #include "common/convert/pb2json.h" | |||||
| #include "common/debug/log.h" | |||||
| #include "common/debug/memory_dumper.h" | |||||
| #include "common/model_parser/base.h" | |||||
| #include "common/model_saver.h" | |||||
| #include "common/properties_manager.h" | |||||
| #include "common/string_util.h" | |||||
| #include "common/types.h" | |||||
| #include "common/util.h" | |||||
| #include "common/util/error_manager/error_manager.h" | |||||
| #include "external/register/register_types.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/omg/parser/parser_inner_ctx.h" | |||||
| #include "graph/compute_graph.h" | |||||
| #include "graph/debug/ge_attr_define.h" | |||||
| #include "graph/debug/ge_attr_define.h" | |||||
| #include "graph/optimize/common/params.h" | |||||
| #include "graph/utils/type_utils.h" | |||||
| #include "omg/omg_inner_types.h" | |||||
| #include "omg/parser/model_parser.h" | |||||
| #include "omg/parser/parser_factory.h" | |||||
| #include "omg/parser/weights_parser.h" | |||||
| #include "parser/common/pre_checker.h" | |||||
| #include "proto/ge_ir.pb.h" | |||||
| #include "register/op_registry.h" | |||||
| namespace ge { | |||||
| namespace { | |||||
| // The function is incomplete. Currently, only l2_optimize, off_optimize is supported. | |||||
| const char *const kInputShapeSample1 = "\"input_name1:n1,c1,h1,w1\""; | |||||
| const char *const kInputShapeSample2 = "\"input_name1:1,3,224,224\""; | |||||
| const char *const kSplitError1 = "size not equal to 2 split by \":\""; | |||||
| const char *const kEmptyError = "can not be empty"; | |||||
| const char *const kFloatNumError = "exist float number"; | |||||
| const char *const kDigitError = "is not digit"; | |||||
| const char *const kOutputTypeSample = "correct sample is \"opname:index:dtype\""; | |||||
| const char *const kOutputTypeSupport = "only support FP32, FP16, UINT8"; | |||||
| const char *const kOutputTypeError = "The multiple out nodes set in output_type must be found in out_nodes."; | |||||
| vector<string> SplitInputShape(const std::string &input_shape) { | |||||
| vector<string> shape_pair_vec; | |||||
| size_t pos = input_shape.rfind(":"); | |||||
| if (pos != std::string::npos) { | |||||
| shape_pair_vec.emplace_back(input_shape.substr(0, pos)); | |||||
| shape_pair_vec.emplace_back(input_shape.substr(pos + 1, input_shape.size() - pos)); | |||||
| } | |||||
| return shape_pair_vec; | |||||
| } | |||||
| static std::map<std::string, ge::DataType> output_type_str_to_datatype = { | |||||
| {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}}; | |||||
| static bool CheckInputTrueOrFalse(const std::string &s, const std::string &atc_param) { | |||||
| if ((s == "true") || (s == "false")) { | |||||
| return true; | |||||
| } else { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10033", {"parameter", "value"}, {atc_param, s}); | |||||
| GELOGE(PARAM_INVALID, "Input parameter[--%s]'s value[%s] must be true or false.", atc_param.c_str(), s.c_str()); | |||||
| return false; | |||||
| } | |||||
| } | |||||
| bool CheckDigitStr(std::string &str) { | |||||
| for (char c : str) { | |||||
| if (!isdigit(c)) { | |||||
| GELOGE(domi::FAILED, "value[%s] is not positive integer", str.c_str()); | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| Status StringToInt(std::string &str, int32_t &value) { | |||||
| try { | |||||
| if (!CheckDigitStr(str)) { | |||||
| GELOGE(PARAM_INVALID, "Invalid of digit string: %s ", str.c_str()); | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, | |||||
| {"--output_type", str, "is not positive integer"}); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| value = stoi(str); | |||||
| } catch (std::invalid_argument &) { | |||||
| GELOGE(PARAM_INVALID, "Invalid of digit string: %s, catch invalid_argument.", str.c_str()); | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"output_type", str}); | |||||
| return PARAM_INVALID; | |||||
| } catch (std::out_of_range &) { | |||||
| GELOGE(PARAM_INVALID, "Invalid of digit string: %s, catch out_of_range.", str.c_str()); | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, {"output_type", str}); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status VerifyOutputTypeAndOutNodes(std::vector<std::string> &out_type_vec) { | |||||
| std::vector<std::pair<std::string, int32_t>> user_out_nodes = domi::GetContext().user_out_nodes; | |||||
| std::set<std::string> out_nodes_info; | |||||
| for (uint32_t i = 0; i < user_out_nodes.size(); ++i) { | |||||
| // out_nodes set should include output_type and output_format | |||||
| std::string tmp = user_out_nodes[i].first + ":" + to_string(user_out_nodes[i].second); | |||||
| out_nodes_info.emplace(tmp); | |||||
| } | |||||
| for (uint32_t i = 0; i < out_type_vec.size(); ++i) { | |||||
| if (out_nodes_info.find(out_type_vec[i]) == out_nodes_info.end()) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, | |||||
| {"--output_type", out_type_vec[i], kOutputTypeError}); | |||||
| GELOGE(domi::FAILED, "Invalid value for --output_type[%s], %s.", out_type_vec[i].c_str(), kOutputTypeError); | |||||
| return domi::FAILED; | |||||
| } | |||||
| } | |||||
| return domi::SUCCESS; | |||||
| } | |||||
| Status ParseOutputType(const std::string &output_type, std::map<std::string, vector<uint32_t>> &out_type_index_map, | |||||
| std::map<std::string, vector<ge::DataType>> &out_type_dt_map) { | |||||
| if (output_type.find(':') == std::string::npos) { | |||||
| GELOGI("output_type is not multiple nodes, means all out nodes"); | |||||
| auto it = output_type_str_to_datatype.find(output_type); | |||||
| if (it == output_type_str_to_datatype.end()) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, | |||||
| {"--output_type", output_type, kOutputTypeSupport}); | |||||
| GELOGE(PARAM_INVALID, "Invalid value for --output_type[%s], %s.", output_type.c_str(), kOutputTypeSupport); | |||||
| return domi::FAILED; | |||||
| } | |||||
| return domi::SUCCESS; | |||||
| } | |||||
| std::vector<std::string> out_type_vec; | |||||
| vector<string> nodes_v = StringUtils::Split(output_type, ';'); | |||||
| for (const string &node : nodes_v) { | |||||
| vector<string> node_index_type_v = StringUtils::Split(node, ':'); | |||||
| if (node_index_type_v.size() != 3) { // The size must be 3. | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, | |||||
| {"--output_type", node, kOutputTypeSample}); | |||||
| GELOGE(PARAM_INVALID, "Invalid value for --output_type[%s], %s.", node.c_str(), kOutputTypeSample); | |||||
| return domi::FAILED; | |||||
| } | |||||
| ge::DataType tmp_dt; | |||||
| std::string node_name = StringUtils::Trim(node_index_type_v[0]); | |||||
| std::string index_str = StringUtils::Trim(node_index_type_v[1]); | |||||
| int32_t index; | |||||
| if (StringToInt(index_str, index) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s.", index_str.c_str()); | |||||
| return domi::FAILED; | |||||
| } | |||||
| std::string dt_value = StringUtils::Trim(node_index_type_v[2]); | |||||
| auto it = output_type_str_to_datatype.find(dt_value); | |||||
| if (it == output_type_str_to_datatype.end()) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, | |||||
| {"--output_type", dt_value, kOutputTypeSupport}); | |||||
| GELOGE(ge::PARAM_INVALID, "Invalid value for --output_type[%s], %s.", dt_value.c_str(), kOutputTypeSupport); | |||||
| return domi::FAILED; | |||||
| } else { | |||||
| tmp_dt = it->second; | |||||
| } | |||||
| out_type_vec.push_back(node_name + ":" + index_str); | |||||
| auto it_index = out_type_index_map.find(node_name); | |||||
| if (it_index == out_type_index_map.end()) { | |||||
| vector<uint32_t> tmp_vec; | |||||
| tmp_vec.push_back(index); | |||||
| out_type_index_map.emplace(node_name, tmp_vec); | |||||
| } else { | |||||
| it_index->second.push_back(index); | |||||
| } | |||||
| auto it_dt = out_type_dt_map.find(node_name); | |||||
| if (it_dt == out_type_dt_map.end()) { | |||||
| vector<ge::DataType> tmp_vec; | |||||
| tmp_vec.push_back(tmp_dt); | |||||
| out_type_dt_map.emplace(node_name, tmp_vec); | |||||
| } else { | |||||
| it_dt->second.push_back(tmp_dt); | |||||
| } | |||||
| } | |||||
| return VerifyOutputTypeAndOutNodes(out_type_vec); | |||||
| } | |||||
| Status CheckOutNode(ge::OpDescPtr op_desc, int32_t index) { | |||||
| int32_t out_size = op_desc->GetOutputsSize(); | |||||
| if (index < 0 || index >= out_size) { | |||||
| GELOGE(domi::FAILED, | |||||
| "out_node [%s] output index:%d must be smaller " | |||||
| "than node output size:%d and can not be negative!", | |||||
| op_desc->GetName().c_str(), index, out_size); | |||||
| std::string fail_reason = "output index:" + to_string(index) + | |||||
| " must be smaller than output size:" + to_string(out_size) + " and can not be negative!"; | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10003", {"parameter", "value", "reason"}, | |||||
| {"out_nodes", op_desc->GetName(), fail_reason}); | |||||
| return domi::FAILED; | |||||
| } | |||||
| return domi::SUCCESS; | |||||
| } | |||||
| Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info) { | |||||
| ge::OpDescPtr tmpDescPtr = node->GetOpDesc(); | |||||
| if (tmpDescPtr == nullptr) { | |||||
| GELOGE(domi::FAILED, "Get outnode op desc fail."); | |||||
| return domi::FAILED; | |||||
| } | |||||
| size_t size = tmpDescPtr->GetOutputsSize(); | |||||
| if (node->GetType() != NETOUTPUT) { | |||||
| for (size_t index = 0; index < size; ++index) { | |||||
| output_nodes_info.push_back(std::make_pair(node, index)); | |||||
| } | |||||
| } else { | |||||
| const auto in_anchors = node->GetAllInDataAnchors(); | |||||
| for (auto in_anchor : in_anchors) { | |||||
| auto out_anchor = in_anchor->GetPeerOutAnchor(); | |||||
| if (out_anchor == nullptr) { | |||||
| GELOGE(domi::FAILED, "Get leaf node op desc fail."); | |||||
| return domi::FAILED; | |||||
| } | |||||
| auto out_node = out_anchor->GetOwnerNode(); | |||||
| output_nodes_info.push_back(std::make_pair(out_node, out_anchor->GetIdx())); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | |||||
| std::vector<std::string> &output_nodes_name) { | |||||
| output_nodes_name.clear(); | |||||
| if (domi::GetContext().out_top_names.empty()) { | |||||
| // tf process, no top name. | |||||
| for (const auto output_node_info : output_nodes_info) { | |||||
| std::string node_name = output_node_info.first->GetName(); | |||||
| int32_t index = output_node_info.second; | |||||
| output_nodes_name.push_back(node_name + ":" + std::to_string(index)); | |||||
| } | |||||
| return; | |||||
| } | |||||
| // caffe process, need add top name after node_name:index | |||||
| for (size_t i = 0; i < output_nodes_info.size(); ++i) { | |||||
| std::string node_name = output_nodes_info[i].first->GetName(); | |||||
| int32_t index = output_nodes_info[i].second; | |||||
| if (i < domi::GetContext().out_top_names.size()) { | |||||
| output_nodes_name.push_back(node_name + ":" + std::to_string(index) + ":" + domi::GetContext().out_top_names[i]); | |||||
| } else { | |||||
| GELOGW("Get top name of node [%s] fail.", node_name.c_str()); | |||||
| output_nodes_name.push_back(node_name + ":" + std::to_string(index)); | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOutputFp16NodesFormat(const string &is_output_fp16) { | |||||
| if (is_output_fp16.empty()) { | |||||
| return SUCCESS; | |||||
| } | |||||
| vector<domiTensorFormat_t> &output_formats = domi::GetContext().output_formats; | |||||
| output_formats.clear(); | |||||
| vector<string> node_format_vec = StringUtils::Split(is_output_fp16, ','); | |||||
| for (auto &is_fp16 : node_format_vec) { | |||||
| StringUtils::Trim(is_fp16); | |||||
| if (!CheckInputTrueOrFalse(is_fp16, "is_output_adjust_hw_layout")) { | |||||
| GELOGE(PARAM_INVALID, "Invalid Param, is_output_adjust_hw_layout only support true/false: but is [%s]", | |||||
| is_output_fp16.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| if (is_fp16 == "false") { | |||||
| output_formats.push_back(DOMI_TENSOR_ND); | |||||
| } else if (is_fp16 == "true") { | |||||
| output_formats.push_back(domi::DOMI_TENSOR_NC1HWC0); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SetOutputNodeInfo(ge::Graph &graph, | |||||
| const std::string &output_type, | |||||
| const std::string &output) { | |||||
| ge::ComputeGraphPtr compute_graph = ge::GraphUtils::GetComputeGraph(graph); | |||||
| GE_CHECK_NOTNULL(compute_graph); | |||||
| std::vector<std::pair<std::string, int32_t>> user_out_nodes = domi::GetContext().user_out_nodes; | |||||
| std::vector<domiTensorFormat_t> output_formats = domi::GetContext().output_formats; | |||||
| std::vector<std::pair<ge::NodePtr, int32_t>> output_nodes_info; | |||||
| std::vector<std::string> output_nodes_name; | |||||
| std::map<std::string, vector<uint32_t>> out_type_index_map; | |||||
| std::map<std::string, vector<ge::DataType>> out_type_dt_map; | |||||
| if (!output_type.empty()) { | |||||
| if (ParseOutputType(output_type, out_type_index_map, out_type_dt_map) != SUCCESS) { | |||||
| GELOGE(domi::FAILED, "Parse output_type failed."); | |||||
| return domi::FAILED; | |||||
| } | |||||
| } | |||||
| // User declared outputs | |||||
| for (uint32_t i = 0; i < user_out_nodes.size(); ++i) { | |||||
| ge::NodePtr out_node = compute_graph->FindNode(user_out_nodes[i].first); | |||||
| if (out_node == nullptr) { | |||||
| GELOGE(domi::FAILED, "Can not find src node (%s) in graph.", user_out_nodes[i].first.c_str()); | |||||
| return domi::FAILED; | |||||
| } | |||||
| auto op_desc = out_node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| if (CheckOutNode(op_desc, user_out_nodes[i].second) != SUCCESS) { | |||||
| GELOGE(domi::FAILED, "Check out node (%s) fail.", user_out_nodes[i].first.c_str()); | |||||
| return domi::FAILED; | |||||
| } | |||||
| if (i < output_formats.size()) { | |||||
| if (output_formats[i] == domi::DOMI_TENSOR_NC1HWC0) { | |||||
| GELOGI("The output node [%s] should be set NC1HWC0", user_out_nodes[i].first.c_str()); | |||||
| if (!ge::AttrUtils::SetBool(op_desc, "output_set_fp16_nc1hwc0", true)) { | |||||
| GELOGW("The output node [%s] set NC1HWC0 failed", user_out_nodes[i].first.c_str()); | |||||
| } | |||||
| } | |||||
| } | |||||
| auto it_index = out_type_index_map.find(user_out_nodes[i].first); | |||||
| auto it_dt = out_type_dt_map.find(user_out_nodes[i].first); | |||||
| if ((it_index != out_type_index_map.end()) && (it_dt != out_type_dt_map.end())) { | |||||
| GELOGI("The output node [%s] need to be set output_type", user_out_nodes[i].first.c_str()); | |||||
| (void)ge::AttrUtils::SetListDataType(op_desc, "_output_dt_list", it_dt->second); | |||||
| (void)ge::AttrUtils::SetListInt(op_desc, "_output_dt_index", it_index->second); | |||||
| } | |||||
| output_nodes_info.push_back(std::make_pair(out_node, user_out_nodes[i].second)); | |||||
| } | |||||
| // default output node (leaf) | |||||
| if (user_out_nodes.empty()) { | |||||
| for (ge::NodePtr node : compute_graph->GetDirectNode()) { | |||||
| if (!node->GetInDataNodes().empty() && node->GetOutDataNodes().empty()) { | |||||
| Status ret = GetOutputLeaf(node, output_nodes_info); | |||||
| GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "find leaf fail."); | |||||
| } | |||||
| } | |||||
| } | |||||
| GetOutputNodesNameAndIndex(output_nodes_info, output_nodes_name); | |||||
| compute_graph->SetGraphOutNodesInfo(output_nodes_info); | |||||
| domi::GetContext().net_out_nodes = output_nodes_name; | |||||
| return domi::SUCCESS; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ParseInputShape( | |||||
| const string &input_shape, unordered_map<string, vector<int64_t>> &shape_map, | |||||
| vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input) { | |||||
| vector<string> shape_vec = StringUtils::Split(input_shape, ';'); | |||||
| const int DEFAULT_SHAPE_PAIR_SIZE = 2; | |||||
| for (const auto &shape : shape_vec) { | |||||
| vector<string> shape_pair_vec = SplitInputShape(shape); | |||||
| if (shape_pair_vec.size() != DEFAULT_SHAPE_PAIR_SIZE) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, | |||||
| {shape, kSplitError1, kInputShapeSample1}); | |||||
| GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.", | |||||
| shape.c_str(), kSplitError1, kInputShapeSample1); | |||||
| return false; | |||||
| } | |||||
| if (shape_pair_vec[1].empty()) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, | |||||
| {shape, kEmptyError, kInputShapeSample1}); | |||||
| GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.", | |||||
| shape.c_str(), kEmptyError, kInputShapeSample1); | |||||
| return false; | |||||
| } | |||||
| vector<string> shape_value_strs = StringUtils::Split(shape_pair_vec[1], ','); | |||||
| vector<int64_t> shape_values; | |||||
| for (auto &shape_value_str : shape_value_strs) { | |||||
| // stoul: The method may throw an exception: invalid_argument/out_of_range | |||||
| if (std::string::npos != shape_value_str.find('.')) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, | |||||
| {shape, kFloatNumError, kInputShapeSample2}); | |||||
| GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.", | |||||
| shape.c_str(), kFloatNumError, kInputShapeSample2); | |||||
| return false; | |||||
| } | |||||
| long left_result = 0; | |||||
| try { | |||||
| left_result = stol(StringUtils::Trim(shape_value_str)); | |||||
| if (!shape_value_str.empty() && (shape_value_str.front() == '-')) { | |||||
| // The value maybe dynamic shape [-1], need substr it and verify isdigit. | |||||
| shape_value_str = shape_value_str.substr(1); | |||||
| } | |||||
| for (char c : shape_value_str) { | |||||
| if (!isdigit(c)) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, | |||||
| {shape, kDigitError, kInputShapeSample2}); | |||||
| GELOGE(PARAM_INVALID, "--input_shape's shape value[%s] is not digit", shape_value_str.c_str()); | |||||
| return false; | |||||
| } | |||||
| } | |||||
| } catch (const std::out_of_range &) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, | |||||
| {"input_shape", shape_value_str}); | |||||
| GELOGW("Input parameter[--input_shape]’s value[%s] cause out of range execption!", shape_value_str.c_str()); | |||||
| return false; | |||||
| } catch (const std::invalid_argument &) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, | |||||
| {"input_shape", shape_value_str}); | |||||
| GELOGW("Input parameter[--input_shape]’s value[%s] cause invalid argument!", shape_value_str.c_str()); | |||||
| return false; | |||||
| } catch (...) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10015", {"parameter", "value"}, | |||||
| {"input_shape", shape_value_str}); | |||||
| GELOGW("Input parameter[--input_shape]’s value[%s] cause unkown execption!", shape_value_str.c_str()); | |||||
| return false; | |||||
| } | |||||
| int64_t result = left_result; | |||||
| // - 1 is not currently supported | |||||
| if (!is_dynamic_input && result <= 0) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10011", {"shape", "result"}, {shape, std::to_string(result)}); | |||||
| GELOGW( | |||||
| "Input parameter[--input_shape]’s shape value[%s] is invalid, " | |||||
| "expect positive integer, but value is %ld.", | |||||
| shape.c_str(), result); | |||||
| return false; | |||||
| } | |||||
| shape_values.push_back(result); | |||||
| } | |||||
| shape_map.emplace(make_pair(StringUtils::Trim(shape_pair_vec[0]), shape_values)); | |||||
| user_shape_map.push_back(make_pair(StringUtils::Trim(shape_pair_vec[0]), shape_values)); | |||||
| } | |||||
| return true; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOutputNodes(const string &out_nodes) { | |||||
| try { | |||||
| // parse output node | |||||
| if (!out_nodes.empty()) { | |||||
| domi::GetContext().out_nodes_map.clear(); | |||||
| domi::GetContext().user_out_nodes.clear(); | |||||
| vector<string> nodes_v = StringUtils::Split(out_nodes, ';'); | |||||
| for (const string &node : nodes_v) { | |||||
| vector<string> key_value_v = StringUtils::Split(node, ':'); | |||||
| if (key_value_v.size() != 2) { // The size must be 2. | |||||
| ErrorManager::GetInstance().ATCReportErrMessage( | |||||
| "E10001", {"parameter", "value", "reason"}, | |||||
| {"--out_nodes", node, "the correct format is \"node_name1:0;node_name1:1;node_name2:0\""}); | |||||
| GELOGE(PARAM_INVALID, | |||||
| "The input format of --out_nodes is invalid, the correct format is " | |||||
| "\"node_name1:0;node_name1:1;node_name2:0\", while the actual input is %s.", | |||||
| node.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]); | |||||
| // stoi: The method may throw an exception: invalid_argument/out_of_range | |||||
| if (!CheckDigitStr(key_value_v[1])) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, | |||||
| {"--out_nodes", out_nodes, "is not positive integer"}); | |||||
| GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s", out_nodes.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| int32_t index = stoi(StringUtils::Trim(key_value_v[1])); | |||||
| if (iter != domi::GetContext().out_nodes_map.end()) { | |||||
| iter->second.emplace_back(index); | |||||
| } else { | |||||
| std::vector<int32_t> index_v; | |||||
| index_v.emplace_back(index); | |||||
| domi::GetContext().out_nodes_map.emplace(key_value_v[0], index_v); | |||||
| } | |||||
| domi::GetContext().user_out_nodes.push_back(std::make_pair(key_value_v[0], index)); | |||||
| } | |||||
| } | |||||
| } catch (std::invalid_argument &) { | |||||
| GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", out_nodes.c_str()); | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"out_nodes", out_nodes}); | |||||
| return PARAM_INVALID; | |||||
| } catch (std::out_of_range &) { | |||||
| GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", out_nodes.c_str()); | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, {"out_nodes", out_nodes}); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOpConf(const char *op_conf) { | |||||
| if (op_conf != nullptr && *op_conf != '\0') { | |||||
| // divided by ":" | |||||
| PropertiesManager::Instance().SetPropertyDelimiter(OP_CONF_DELIMITER); | |||||
| // Parsing the op_conf configuration item file | |||||
| if (!PropertiesManager::Instance().Init(op_conf)) { | |||||
| GELOGE(FAILED, "op_name_map init failed!"); | |||||
| return FAILED; | |||||
| } | |||||
| // Return map and put it into ATC global variable | |||||
| domi::GetContext().op_conf_map = PropertiesManager::Instance().GetPropertyMap(); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -1,62 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_COMMON_GRAPH_PARSER_UTIL_H_ | |||||
| #define GE_COMMON_GRAPH_PARSER_UTIL_H_ | |||||
| #include <google/protobuf/message.h> | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include <vector> | |||||
| #include "framework/common/types.h" | |||||
| #include "framework/omg/omg_inner_types.h" | |||||
| #include "proto/ge_ir.pb.h" | |||||
| #include "proto/om.pb.h" | |||||
| #include "graph/compute_graph.h" | |||||
| #include "graph/graph.h" | |||||
| #include "graph/model.h" | |||||
| #include "runtime/kernel.h" | |||||
| using domi::Status; | |||||
| using std::pair; | |||||
| using std::string; | |||||
| using std::unordered_map; | |||||
| using std::vector; | |||||
| namespace ge { | |||||
| Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format); | |||||
| Status ParseOutputFp16NodesFormat(const string &is_output_fp16); | |||||
| Status ParseOutputNodes(const string &out_nodes); | |||||
| bool ParseInputShape(const string &input_shape, unordered_map<string, vector<int64_t>> &shape_map, | |||||
| vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input); | |||||
| Status ParseOpConf(const char *op_conf); | |||||
| } // namespace ge | |||||
| namespace domi { | |||||
| /** | |||||
| * @ingroup domi_omg | |||||
| * @brief get omg context | |||||
| * @return reference of OmgContext | |||||
| */ | |||||
| ge::OmgContext &GetContext(); | |||||
| } // namespace domi | |||||
| #endif // GE_COMMON_GRAPH_PARSER_UTIL_H_ | |||||
| @@ -76,8 +76,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||||
| for (size_t i = 0; i < device_id_.size(); ++i) { | for (size_t i = 0; i < device_id_.size(); ++i) { | ||||
| ret = StartProfiling(0, device_id_[i]); | ret = StartProfiling(0, device_id_[i]); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Profiling start failed on device %d.", device_id_[i]); | |||||
| return FAILED; | |||||
| GELOGW("Profiling start failed on device %d.", device_id_[i]); | |||||
| continue; | |||||
| } | } | ||||
| GELOGI("Profiling init succ on device %d.", device_id_[i]); | GELOGI("Profiling init succ on device %d.", device_id_[i]); | ||||
| } | } | ||||
| @@ -316,7 +316,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St | |||||
| ProfMgrCfg prof_cfg = {send_profiling_config_}; | ProfMgrCfg prof_cfg = {send_profiling_config_}; | ||||
| void *prof_handle = ProfMgrStartUp(&prof_cfg); | void *prof_handle = ProfMgrStartUp(&prof_cfg); | ||||
| if (prof_handle == nullptr) { | if (prof_handle == nullptr) { | ||||
| GELOGW("ProfMgrStartUp failed."); | |||||
| GELOGW("ProfMgrStartUp failed on device %d ", device_id); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| GELOGD("StartProfiling, prof_handle: %p", prof_handle); | GELOGD("StartProfiling, prof_handle: %p", prof_handle); | ||||
| @@ -31,193 +31,6 @@ | |||||
| #include "graph/utils/attr_utils.h" | #include "graph/utils/attr_utils.h" | ||||
| namespace ge { | namespace ge { | ||||
| namespace { | |||||
| const string kEnableFlag = "1"; | |||||
| const uint32_t kAicoreOverflow = (0x1 << 0); | |||||
| const uint32_t kAtomicOverflow = (0x1 << 1); | |||||
| const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); | |||||
| } // namespace | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) { | |||||
| CopyFrom(other); | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties::operator=( | |||||
| const DumpProperties &other) { | |||||
| CopyFrom(other); | |||||
| return *this; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOptions() { | |||||
| enable_dump_.clear(); | |||||
| enable_dump_debug_.clear(); | |||||
| dump_path_.clear(); | |||||
| dump_step_.clear(); | |||||
| dump_mode_.clear(); | |||||
| is_op_debug_ = false; | |||||
| op_debug_mode_ = 0; | |||||
| string enable_dump; | |||||
| (void)GetContext().GetOption(OPTION_EXEC_ENABLE_DUMP, enable_dump); | |||||
| enable_dump_ = enable_dump; | |||||
| string enable_dump_debug; | |||||
| (void)GetContext().GetOption(OPTION_EXEC_ENABLE_DUMP_DEBUG, enable_dump_debug); | |||||
| enable_dump_debug_ = enable_dump_debug; | |||||
| if ((enable_dump_ == kEnableFlag) || (enable_dump_debug_ == kEnableFlag)) { | |||||
| string dump_path; | |||||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_PATH, dump_path) == GRAPH_SUCCESS) { | |||||
| if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') { | |||||
| dump_path = dump_path + "/"; | |||||
| } | |||||
| dump_path = dump_path + CurrentTimeInStr() + "/"; | |||||
| GELOGI("Get dump path %s successfully", dump_path.c_str()); | |||||
| SetDumpPath(dump_path); | |||||
| } else { | |||||
| GELOGW("DUMP_PATH is not set"); | |||||
| } | |||||
| } | |||||
| if (enable_dump_ == kEnableFlag) { | |||||
| string dump_step; | |||||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_STEP, dump_step) == GRAPH_SUCCESS) { | |||||
| GELOGD("Get dump step %s successfully", dump_step.c_str()); | |||||
| SetDumpStep(dump_step); | |||||
| } | |||||
| string dump_mode; | |||||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_MODE, dump_mode) == GRAPH_SUCCESS) { | |||||
| GELOGD("Get dump mode %s successfully", dump_mode.c_str()); | |||||
| SetDumpMode(dump_mode); | |||||
| } | |||||
| AddPropertyValue(DUMP_ALL_MODEL, {}); | |||||
| } | |||||
| SetDumpDebugOptions(); | |||||
| } | |||||
| // The following is the new dump scenario of the fusion operator | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::AddPropertyValue( | |||||
| const std::string &model, const std::set<std::string> &layers) { | |||||
| for (const std::string &layer : layers) { | |||||
| GELOGI("This model %s config to dump layer %s", model.c_str(), layer.c_str()); | |||||
| } | |||||
| model_dump_properties_map_[model] = layers; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::DeletePropertyValue(const std::string &model) { | |||||
| auto iter = model_dump_properties_map_.find(model); | |||||
| if (iter != model_dump_properties_map_.end()) { | |||||
| model_dump_properties_map_.erase(iter); | |||||
| } | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpProperties::GetAllDumpModel() const { | |||||
| std::set<std::string> model_list; | |||||
| for (auto &iter : model_dump_properties_map_) { | |||||
| model_list.insert(iter.first); | |||||
| } | |||||
| return model_list; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpProperties::GetPropertyValue( | |||||
| const std::string &model) const { | |||||
| auto iter = model_dump_properties_map_.find(model); | |||||
| if (iter != model_dump_properties_map_.end()) { | |||||
| return iter->second; | |||||
| } | |||||
| return {}; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsLayerNeedDump( | |||||
| const std::string &model, const std::string &om_name, const std::string &op_name) const { | |||||
| // if dump all | |||||
| if (model_dump_properties_map_.find(DUMP_ALL_MODEL) != model_dump_properties_map_.end()) { | |||||
| return true; | |||||
| } | |||||
| // if this model need dump | |||||
| auto om_name_iter = model_dump_properties_map_.find(om_name); | |||||
| auto model_name_iter = model_dump_properties_map_.find(model); | |||||
| if (om_name_iter != model_dump_properties_map_.end() || model_name_iter != model_dump_properties_map_.end()) { | |||||
| // if no dump layer info, dump all layer in this model | |||||
| auto model_iter = om_name_iter != model_dump_properties_map_.end() ? om_name_iter : model_name_iter; | |||||
| if (model_iter->second.empty()) { | |||||
| return true; | |||||
| } | |||||
| return model_iter->second.find(op_name) != model_iter->second.end(); | |||||
| } | |||||
| GELOGD("Model %s is not seated to be dump.", model.c_str()); | |||||
| return false; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpPath(const std::string &path) { | |||||
| dump_path_ = path; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string DumpProperties::GetDumpPath() const { return dump_path_; } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpStep(const std::string &step) { | |||||
| dump_step_ = step; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string DumpProperties::GetDumpStep() const { return dump_step_; } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpMode(const std::string &mode) { | |||||
| dump_mode_ = mode; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string DumpProperties::GetDumpMode() const { return dump_mode_; } | |||||
| void DumpProperties::CopyFrom(const DumpProperties &other) { | |||||
| if (&other != this) { | |||||
| enable_dump_ = other.enable_dump_; | |||||
| enable_dump_debug_ = other.enable_dump_debug_; | |||||
| dump_path_ = other.dump_path_; | |||||
| dump_step_ = other.dump_step_; | |||||
| dump_mode_ = other.dump_mode_; | |||||
| model_dump_properties_map_ = other.model_dump_properties_map_; | |||||
| is_op_debug_ = other.is_op_debug_; | |||||
| op_debug_mode_ = other.op_debug_mode_; | |||||
| } | |||||
| } | |||||
| void DumpProperties::SetDumpDebugOptions() { | |||||
| if (enable_dump_debug_ == kEnableFlag) { | |||||
| string dump_debug_mode; | |||||
| if (GetContext().GetOption(OPTION_EXEC_DUMP_DEBUG_MODE, dump_debug_mode) == GRAPH_SUCCESS) { | |||||
| GELOGD("Get dump debug mode %s successfully", dump_debug_mode.c_str()); | |||||
| } else { | |||||
| GELOGW("Dump debug mode is not set."); | |||||
| return; | |||||
| } | |||||
| if (dump_debug_mode == OP_DEBUG_AICORE) { | |||||
| GELOGD("ge.exec.dumpDebugMode=aicore_overflow, op debug is open."); | |||||
| is_op_debug_ = true; | |||||
| op_debug_mode_ = kAicoreOverflow; | |||||
| } else if (dump_debug_mode == OP_DEBUG_ATOMIC) { | |||||
| GELOGD("ge.exec.dumpDebugMode=atomic_overflow, op debug is open."); | |||||
| is_op_debug_ = true; | |||||
| op_debug_mode_ = kAtomicOverflow; | |||||
| } else if (dump_debug_mode == OP_DEBUG_ALL) { | |||||
| GELOGD("ge.exec.dumpDebugMode=all, op debug is open."); | |||||
| is_op_debug_ = true; | |||||
| op_debug_mode_ = kAllOverflow; | |||||
| } else { | |||||
| GELOGW("ge.exec.dumpDebugMode is invalid."); | |||||
| } | |||||
| } else { | |||||
| GELOGI("ge.exec.enableDumpDebug is false or is not set."); | |||||
| } | |||||
| } | |||||
| PropertiesManager::PropertiesManager() : is_inited_(false), delimiter("=") {} | PropertiesManager::PropertiesManager() : is_inited_(false), delimiter("=") {} | ||||
| PropertiesManager::~PropertiesManager() {} | PropertiesManager::~PropertiesManager() {} | ||||
| @@ -24,6 +24,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "graph/op_desc.h" | #include "graph/op_desc.h" | ||||
| #include "common/dump/dump_properties.h" | |||||
| namespace ge { | namespace ge { | ||||
| // Configuration property management | // Configuration property management | ||||
| @@ -32,50 +33,6 @@ static const char *USE_FUSION __attribute__((unused)) = "FMK_USE_FUSION"; | |||||
| static const char *TIMESTAT_ENABLE __attribute__((unused)) = "DAVINCI_TIMESTAT_ENABLE"; | static const char *TIMESTAT_ENABLE __attribute__((unused)) = "DAVINCI_TIMESTAT_ENABLE"; | ||||
| static const char *ANNDROID_DEBUG __attribute__((unused)) = "ANNDROID_DEBUG"; | static const char *ANNDROID_DEBUG __attribute__((unused)) = "ANNDROID_DEBUG"; | ||||
| class DumpProperties { | |||||
| public: | |||||
| DumpProperties() = default; | |||||
| ~DumpProperties() = default; | |||||
| DumpProperties(const DumpProperties &dump); | |||||
| DumpProperties &operator=(const DumpProperties &dump); | |||||
| void InitByOptions(); | |||||
| void AddPropertyValue(const std::string &model, const std::set<std::string> &layers); | |||||
| void DeletePropertyValue(const std::string &model); | |||||
| std::set<std::string> GetAllDumpModel() const; | |||||
| std::set<std::string> GetPropertyValue(const std::string &model) const; | |||||
| bool IsLayerNeedDump(const std::string &model, const std::string &om_name, const std::string &op_name) const; | |||||
| void SetDumpPath(const std::string &path); | |||||
| std::string GetDumpPath() const; | |||||
| void SetDumpStep(const std::string &step); | |||||
| std::string GetDumpStep() const; | |||||
| void SetDumpMode(const std::string &mode); | |||||
| std::string GetDumpMode() const; | |||||
| bool IsOpDebugOpen() const { return is_op_debug_; } | |||||
| uint32_t GetOpDebugMode() const { return op_debug_mode_; } | |||||
| private: | |||||
| void CopyFrom(const DumpProperties &other); | |||||
| void SetDumpDebugOptions(); | |||||
| string enable_dump_; | |||||
| string enable_dump_debug_; | |||||
| std::string dump_path_; | |||||
| std::string dump_step_; | |||||
| std::string dump_mode_; | |||||
| std::map<std::string, std::set<std::string>> model_dump_properties_map_; | |||||
| bool is_op_debug_ = false; | |||||
| uint32_t op_debug_mode_ = 0; | |||||
| }; | |||||
| class PropertiesManager { | class PropertiesManager { | ||||
| public: | public: | ||||
| // Singleton | // Singleton | ||||
| @@ -502,6 +502,7 @@ const uint32_t MODEL_FILE_HEAD_LEN = 256; | |||||
| /// @brief Input node type | /// @brief Input node type | ||||
| /// | /// | ||||
| const std::string INPUT_TYPE = "Input"; | const std::string INPUT_TYPE = "Input"; | ||||
| const std::string DUMMY_DATA = "DummyData"; | |||||
| /// | /// | ||||
| /// @ingroup domi_omg | /// @ingroup domi_omg | ||||
| @@ -57,7 +57,7 @@ const int kWarningThreshold = 536870912 * 2; // 536870912 represent 512M | |||||
| /// Based on the security coding specification and the current actual (protobuf) model size, it is determined as 2G-1 | /// Based on the security coding specification and the current actual (protobuf) model size, it is determined as 2G-1 | ||||
| const int kMaxFileSizeLimit = INT_MAX; | const int kMaxFileSizeLimit = INT_MAX; | ||||
| const int kMaxBuffSize = 256; | const int kMaxBuffSize = 256; | ||||
| const char *const kPathValidReason = "The path can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' and chinese character"; | |||||
| const char *const kPathValidReason = "The path can only contain 'a-z' 'A-Z' '0-9' '-' '.' '_' and chinese character"; | |||||
| } // namespace | } // namespace | ||||
| namespace ge { | namespace ge { | ||||
| @@ -311,6 +311,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t GetCurrentTimestap() { | |||||
| return static_cast<uint64_t>(total_use_time); | return static_cast<uint64_t>(total_use_time); | ||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint32_t GetCurrentSecondTimestap() { | |||||
| struct timeval tv {}; | |||||
| int ret = gettimeofday(&tv, nullptr); | |||||
| GE_LOGE_IF(ret != 0, "Func gettimeofday may failed: ret=%d", ret); | |||||
| auto total_use_time = tv.tv_sec; // seconds | |||||
| return static_cast<uint32_t>(total_use_time); | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInt64MulOverflow(int64_t a, int64_t b) { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInt64MulOverflow(int64_t a, int64_t b) { | ||||
| if (a > 0) { | if (a > 0) { | ||||
| if (b > 0) { | if (b > 0) { | ||||
| @@ -372,10 +380,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const | |||||
| } | } | ||||
| // A regular matching expression to verify the validity of the input file path | // A regular matching expression to verify the validity of the input file path | ||||
| // ^(/|./|(../)+|)([.]?[\u4e00-\u9fa5A-Za-z0-9_.-]+/)*[\u4e00-\u9fa5A-Za-z0-9_+.-]+$ | |||||
| // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores | // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores | ||||
| // File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.) | // File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.) | ||||
| std::string mode = "^(/+|./+|(../+)+|)(../|([.]?[\u4e00-\u9fa5A-Za-z0-9_.-]+)/+)*[\u4e00-\u9fa5A-Za-z0-9_+.-]+$"; | |||||
| std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
| !ValidateStr(real_path, mode), | !ValidateStr(real_path, mode), | ||||
| @@ -408,10 +415,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const | |||||
| return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), PATH_MAX); | return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), PATH_MAX); | ||||
| // A regular matching expression to verify the validity of the input file path | // A regular matching expression to verify the validity of the input file path | ||||
| // ^(/|./|(../)+|)([.]?[\u4e00-\u9fa5A-Za-z0-9_-]+/)*[\u4e00-\u9fa5A-Za-z0-9_+.-]+$ | |||||
| // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores | // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores | ||||
| // File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.) | // File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.) | ||||
| std::string mode = "^(/+|./+|(../+)+|)(../|([.]?[\u4e00-\u9fa5A-Za-z0-9_.-]+)/+)*[\u4e00-\u9fa5A-Za-z0-9_+.-]+$"; | |||||
| std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
| !ValidateStr(file_path, mode), | !ValidateStr(file_path, mode), | ||||
| @@ -460,9 +466,9 @@ FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::str | |||||
| int ret = regcomp(®, mode.c_str(), cflags); | int ret = regcomp(®, mode.c_str(), cflags); | ||||
| if (ret) { | if (ret) { | ||||
| regerror(ret, ®, ebuff, kMaxBuffSize); | regerror(ret, ®, ebuff, kMaxBuffSize); | ||||
| GELOGE(ge::PARAM_INVALID, "regcomp failed, reason: %s", ebuff); | |||||
| GELOGW("regcomp failed, reason: %s", ebuff); | |||||
| regfree(®); | regfree(®); | ||||
| return false; | |||||
| return true; | |||||
| } | } | ||||
| ret = regexec(®, str.c_str(), 0, nullptr, 0); | ret = regexec(®, str.c_str(), 0, nullptr, 0); | ||||
| @@ -42,6 +42,8 @@ const char *const kVectorCore = "VectorCore"; | |||||
| const char *const kVectorEngine = "VectorEngine"; | const char *const kVectorEngine = "VectorEngine"; | ||||
| const char *const kAIcoreEngine = "AIcoreEngine"; | const char *const kAIcoreEngine = "AIcoreEngine"; | ||||
| const char *const kCustomOpFlag = "_custom_op_flag"; | const char *const kCustomOpFlag = "_custom_op_flag"; | ||||
| const char *const kHostCpuEngineName = "DNN_VM_HOST_CPU"; | |||||
| const char *const kHostCpuOpKernelLibName = "DNN_VM_HOST_CPU_OP_STORE"; | |||||
| } // namespace | } // namespace | ||||
| namespace ge { | namespace ge { | ||||
| @@ -181,6 +183,7 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) { | |||||
| GELOGI("DNNEngineManager: Can not get op info by op type %s", op_desc->GetType().c_str()); | GELOGI("DNNEngineManager: Can not get op info by op type %s", op_desc->GetType().c_str()); | ||||
| return ""; | return ""; | ||||
| } | } | ||||
| GE_IF_BOOL_EXEC(ge::GetContext().GetHostExecFlag(), return GetHostCpuEngineName(op_infos, op_desc)); | |||||
| std::string ge_core_type; | std::string ge_core_type; | ||||
| Status ret = ge::GetContext().GetOption(ge::CORE_TYPE, ge_core_type); | Status ret = ge::GetContext().GetOption(ge::CORE_TYPE, ge_core_type); | ||||
| GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE")); | GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE")); | ||||
| @@ -245,6 +248,22 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) { | |||||
| return ""; | return ""; | ||||
| } | } | ||||
| std::string DNNEngineManager::GetHostCpuEngineName(const std::vector<OpInfo> &op_infos, | |||||
| const OpDescPtr &op_desc) const { | |||||
| for (const auto &it : op_infos) { | |||||
| if ((it.engine == kHostCpuEngineName) && (it.opKernelLib == kHostCpuOpKernelLibName)) { | |||||
| op_desc->SetOpEngineName(kHostCpuEngineName); | |||||
| op_desc->SetOpKernelLibName(kHostCpuOpKernelLibName); | |||||
| GELOGI("DNNEngineManager: Set OpKernelLibName %s and OpEngineName %s to %s", kHostCpuOpKernelLibName, | |||||
| kHostCpuEngineName, op_desc->GetName().c_str()); | |||||
| return kHostCpuEngineName; | |||||
| } | |||||
| } | |||||
| GELOGE(FAILED, "DNNEngineManager: HostCpuEngine not support [%s, %s].", op_desc->GetName().c_str(), | |||||
| op_desc->GetType().c_str()); | |||||
| return ""; | |||||
| } | |||||
| const std::map<std::string, SchedulerConf> &DNNEngineManager::GetSchedulers() const { return schedulers_; } | const std::map<std::string, SchedulerConf> &DNNEngineManager::GetSchedulers() const { return schedulers_; } | ||||
| Status DNNEngineManager::ParserJsonFile() { | Status DNNEngineManager::ParserJsonFile() { | ||||
| @@ -76,6 +76,7 @@ class DNNEngineManager { | |||||
| Status ParserEngineMessage(const json engines_json, const string &scheduler_mark, | Status ParserEngineMessage(const json engines_json, const string &scheduler_mark, | ||||
| map<string, EngineConfPtr> &engines); | map<string, EngineConfPtr> &engines); | ||||
| Status CheckJsonFile(); | Status CheckJsonFile(); | ||||
| std::string GetHostCpuEngineName(const std::vector<OpInfo> &op_infos, const OpDescPtr &op_desc) const; | |||||
| PluginManager plugin_mgr_; | PluginManager plugin_mgr_; | ||||
| std::map<std::string, DNNEnginePtr> engines_map_; | std::map<std::string, DNNEnginePtr> engines_map_; | ||||
| std::map<std::string, ge::DNNEngineAttribute> engines_attrs_map_; | std::map<std::string, ge::DNNEngineAttribute> engines_attrs_map_; | ||||
| @@ -5,6 +5,13 @@ | |||||
| "name": "1980_hwts", | "name": "1980_hwts", | ||||
| "ex_attrs": "", | "ex_attrs": "", | ||||
| "cal_engines": [ | "cal_engines": [ | ||||
| { | |||||
| "id": "DNN_VM_HOST_CPU", | |||||
| "name": "HOST_CPU", | |||||
| "independent": false, | |||||
| "skip_assign_stream": true, | |||||
| "attach": true | |||||
| }, | |||||
| { | { | ||||
| "id": "DNN_VM_GE_LOCAL", | "id": "DNN_VM_GE_LOCAL", | ||||
| "name": "GE_LOCAL", | "name": "GE_LOCAL", | ||||
| @@ -26,6 +26,9 @@ file(GLOB PROTO_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
| file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | ||||
| "ge_executor.cc" | "ge_executor.cc" | ||||
| "../common/dump/dump_properties.cc" | |||||
| "../common/dump/dump_manager.cc" | |||||
| "../common/dump/dump_op.cc" | |||||
| "../common/ge/op_tiling_manager.cc" | "../common/ge/op_tiling_manager.cc" | ||||
| "../common/ge/plugin_manager.cc" | "../common/ge/plugin_manager.cc" | ||||
| "../common/profiling/profiling_manager.cc" | "../common/profiling/profiling_manager.cc" | ||||
| @@ -23,6 +23,7 @@ | |||||
| #include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
| #include "common/helper/model_helper.h" | #include "common/helper/model_helper.h" | ||||
| #include "common/profiling/profiling_manager.h" | #include "common/profiling/profiling_manager.h" | ||||
| #include "common/dump/dump_manager.h" | |||||
| #include "common/util.h" | #include "common/util.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "framework/common/util.h" | #include "framework/common/util.h" | ||||
| @@ -35,6 +36,8 @@ | |||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| #include "mmpa/mmpa_api.h" | #include "mmpa/mmpa_api.h" | ||||
| #include "single_op/single_op_manager.h" | #include "single_op/single_op_manager.h" | ||||
| #include "graph/manager/graph_var_manager.h" | |||||
| #include "graph/load/new_model_manager/davinci_model.h" | |||||
| using std::string; | using std::string; | ||||
| using std::vector; | using std::vector; | ||||
| @@ -348,18 +351,46 @@ Status GeExecutor::SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, u | |||||
| } | } | ||||
| vector<uint64_t> cur_dynamic_dims; | vector<uint64_t> cur_dynamic_dims; | ||||
| if (GetCurDynamicDims(model_id, dynamic_dims, cur_dynamic_dims) != SUCCESS) { | |||||
| GELOGE(FAILED, "GetCurDynamicDims failed."); | |||||
| std::vector<ge::TensorDesc> input_desc; | |||||
| std::vector<ge::TensorDesc> output_desc; | |||||
| ret = GetModelDescInfo(model_id, input_desc, output_desc); | |||||
| if (ret != ge::SUCCESS) { | |||||
| GELOGE(FAILED, "GetModelDescInfo failed."); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| vector<string> user_designate_shape_order; | |||||
| vector<int64_t> all_data_dims; | |||||
| ret = GetUserDesignateShapeOrder(model_id, user_designate_shape_order); | |||||
| if (ret != ge::SUCCESS) { | |||||
| GELOGE(FAILED, "GetUserDesignateShapeOrder failed."); | |||||
| return FAILED; | |||||
| } | |||||
| for (auto &data_name : user_designate_shape_order) { | |||||
| for (size_t j = 0; j < input_desc.size(); ++j) { | |||||
| if (input_desc.at(j).GetName() == data_name) { | |||||
| for (auto dim : input_desc.at(j).GetShape().GetDims()) { | |||||
| all_data_dims.push_back(dim); | |||||
| } | |||||
| break; | |||||
| } | |||||
| } | |||||
| } | |||||
| if (dynamic_dims.size() != all_data_dims.size()) { | |||||
| GELOGE(FAILED, "Dynamic input size [%lu] is not equal with all data dims size [%lu]!", dynamic_dims.size(), | |||||
| all_data_dims.size()); | |||||
| return FAILED; | |||||
| } | |||||
| for (std::size_t i = 0; i < all_data_dims.size(); ++i) { | |||||
| if (all_data_dims[i] < 0) { | |||||
| cur_dynamic_dims.push_back(dynamic_dims[i]); | |||||
| } | |||||
| } | |||||
| size_t dynamic_dim_num = cur_dynamic_dims.size(); | size_t dynamic_dim_num = cur_dynamic_dims.size(); | ||||
| uint64_t dynamic_input_size = static_cast<uint64_t>(dynamic_dim_num * sizeof(uint64_t)); | uint64_t dynamic_input_size = static_cast<uint64_t>(dynamic_dim_num * sizeof(uint64_t)); | ||||
| if (length < dynamic_input_size) { | if (length < dynamic_input_size) { | ||||
| GELOGE(FAILED, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); | GELOGE(FAILED, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| for (uint32_t i = 0; i < dynamic_dim_num; ++i) { | for (uint32_t i = 0; i < dynamic_dim_num; ++i) { | ||||
| // Memcpy dynamic dim[i] from host to device | // Memcpy dynamic dim[i] from host to device | ||||
| if (rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(dynamic_input_addr) + sizeof(uint64_t) * i), | if (rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(dynamic_input_addr) + sizeof(uint64_t) * i), | ||||
| @@ -549,6 +580,12 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { | |||||
| GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id); | GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| std::shared_ptr<DavinciModel> davinci_model = ModelManager::GetInstance()->GetModel(model_id); | |||||
| if (davinci_model != nullptr) { | |||||
| uint64_t session_id = davinci_model->GetSessionId(); | |||||
| VarManagerPool::Instance().RemoveVarManager(session_id); | |||||
| } | |||||
| return GraphLoader::UnloadModel(model_id); | return GraphLoader::UnloadModel(model_id); | ||||
| } | } | ||||
| @@ -658,6 +695,30 @@ Status GeExecutor::GetCombinedDynamicDims(uint32_t model_id, vector<vector<int64 | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Get user designeate shape order | |||||
| /// @param [in] model_id | |||||
| /// @param [out] user_designate_shape_order | |||||
| /// @return execute result | |||||
| /// | |||||
| Status GeExecutor::GetUserDesignateShapeOrder(uint32_t model_id, vector<string> &user_designate_shape_order) { | |||||
| GELOGI("Begin to get user designate shape info."); | |||||
| if (!isInit_) { | |||||
| GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| return GE_EXEC_NOT_INIT; | |||||
| } | |||||
| Status ret = GraphExecutor::GetUserDesignateShapeOrder(model_id, user_designate_shape_order); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "GetUserDesignateShapeOrder failed."); | |||||
| return ret; | |||||
| } | |||||
| GELOGI("Get user designate shape order succ."); | |||||
| return SUCCESS; | |||||
| } | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Get AIPP input format | /// @brief Get AIPP input format | ||||
| @@ -674,7 +735,7 @@ Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo | |||||
| } | } | ||||
| Status ret = GraphExecutor::GetAIPPInfo(model_id, index, aipp_info); | Status ret = GraphExecutor::GetAIPPInfo(model_id, index, aipp_info); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "GetAIPPInfo failed."); | |||||
| GELOGW("GetAIPPInfo is not success."); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| GELOGI("GetAIPPInfo succ."); | GELOGI("GetAIPPInfo succ."); | ||||
| @@ -1020,4 +1081,26 @@ Status GeExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, | |||||
| GELOGI("GetAllAippInputOutputDims succ."); | GELOGI("GetAllAippInputOutputDims succ."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GeExecutor::GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) { | |||||
| GELOGI("Begin to GetOpDescInfo."); | |||||
| Status ret = GraphExecutor::GetOpDescInfo(device_id, stream_id, task_id, op_desc_info); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "GetOpDescInfo failed."); | |||||
| return ret; | |||||
| } | |||||
| GELOGI("GetOpDescInfo succ."); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GeExecutor::SetDump(const DumpConfig &dump_config) { | |||||
| GELOGI("Start to set dump config"); | |||||
| auto ret = DumpManager::GetInstance().SetDumpConf(dump_config); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "Set dump conf failed"); | |||||
| return ret; | |||||
| } | |||||
| GELOGI("Set dump config succ."); | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -3,6 +3,9 @@ LOCAL_PATH := $(call my-dir) | |||||
| local_ge_executor_src_files := \ | local_ge_executor_src_files := \ | ||||
| ge_executor.cc \ | ge_executor.cc \ | ||||
| ../common/profiling/profiling_manager.cc \ | ../common/profiling/profiling_manager.cc \ | ||||
| ../common/dump/dump_properties.cc \ | |||||
| ../common/dump/dump_manager.cc \ | |||||
| ../common/dump/dump_op.cc \ | |||||
| ../common/ge/plugin_manager.cc \ | ../common/ge/plugin_manager.cc \ | ||||
| ../common/ge/op_tiling_manager.cc \ | ../common/ge/op_tiling_manager.cc \ | ||||
| ../graph/load/graph_loader.cc \ | ../graph/load/graph_loader.cc \ | ||||
| @@ -26,6 +26,9 @@ COMMON_LOCAL_SRC_FILES := \ | |||||
| common/formats/format_transfers/format_transfer_nchw_fz_c04.cc \ | common/formats/format_transfers/format_transfer_nchw_fz_c04.cc \ | ||||
| common/formats/formats.cc \ | common/formats/formats.cc \ | ||||
| common/profiling/profiling_manager.cc \ | common/profiling/profiling_manager.cc \ | ||||
| common/dump/dump_properties.cc \ | |||||
| common/dump/dump_manager.cc \ | |||||
| common/dump/dump_op.cc \ | |||||
| common/helper/model_cache_helper.cc \ | common/helper/model_cache_helper.cc \ | ||||
| ge_local_engine/engine/host_cpu_engine.cc \ | ge_local_engine/engine/host_cpu_engine.cc \ | ||||
| @@ -42,6 +45,7 @@ GRAPH_MANAGER_LOCAL_SRC_FILES := \ | |||||
| graph/manager/graph_manager_utils.cc \ | graph/manager/graph_manager_utils.cc \ | ||||
| graph/manager/graph_context.cc \ | graph/manager/graph_context.cc \ | ||||
| graph/preprocess/graph_preprocess.cc \ | graph/preprocess/graph_preprocess.cc \ | ||||
| graph/preprocess/multi_batch_options.cc \ | |||||
| graph/preprocess/multi_batch_copy_graph.cc \ | graph/preprocess/multi_batch_copy_graph.cc \ | ||||
| graph/execute/graph_execute.cc \ | graph/execute/graph_execute.cc \ | ||||
| graph/load/graph_loader.cc \ | graph/load/graph_loader.cc \ | ||||
| @@ -149,6 +153,7 @@ OMG_HOST_SRC_FILES := \ | |||||
| host_kernels/slice_kernel.cc \ | host_kernels/slice_kernel.cc \ | ||||
| host_kernels/slice_d_kernel.cc \ | host_kernels/slice_d_kernel.cc \ | ||||
| host_kernels/dynamic_stitch_kernel.cc \ | host_kernels/dynamic_stitch_kernel.cc \ | ||||
| host_kernels/identity_kernel.cc \ | |||||
| graph/passes/stop_gradient_pass.cc \ | graph/passes/stop_gradient_pass.cc \ | ||||
| graph/passes/prevent_gradient_pass.cc \ | graph/passes/prevent_gradient_pass.cc \ | ||||
| graph/passes/identity_pass.cc \ | graph/passes/identity_pass.cc \ | ||||
| @@ -165,12 +170,16 @@ OMG_HOST_SRC_FILES := \ | |||||
| graph/passes/switch_to_stream_switch_pass.cc \ | graph/passes/switch_to_stream_switch_pass.cc \ | ||||
| graph/passes/attach_stream_label_pass.cc \ | graph/passes/attach_stream_label_pass.cc \ | ||||
| graph/passes/multi_batch_pass.cc \ | graph/passes/multi_batch_pass.cc \ | ||||
| graph/passes/multi_batch_clone_pass.cc \ | |||||
| graph/passes/subexpression_migration_pass.cc \ | |||||
| graph/passes/unused_args_clean_pass.cc \ | |||||
| graph/passes/next_iteration_pass.cc \ | graph/passes/next_iteration_pass.cc \ | ||||
| graph/passes/control_trigger_pass.cc \ | graph/passes/control_trigger_pass.cc \ | ||||
| graph/passes/cond_pass.cc \ | graph/passes/cond_pass.cc \ | ||||
| graph/passes/cond_remove_pass.cc \ | graph/passes/cond_remove_pass.cc \ | ||||
| graph/passes/for_pass.cc \ | graph/passes/for_pass.cc \ | ||||
| graph/passes/enter_pass.cc \ | graph/passes/enter_pass.cc \ | ||||
| graph/passes/assign_pass.cc \ | |||||
| graph/passes/addn_pass.cc \ | graph/passes/addn_pass.cc \ | ||||
| graph/passes/common_subexpression_elimination_pass.cc \ | graph/passes/common_subexpression_elimination_pass.cc \ | ||||
| graph/passes/transop_symmetry_elimination_pass.cc \ | graph/passes/transop_symmetry_elimination_pass.cc \ | ||||
| @@ -185,11 +194,10 @@ OMG_HOST_SRC_FILES := \ | |||||
| graph/passes/transpose_transdata_pass.cc \ | graph/passes/transpose_transdata_pass.cc \ | ||||
| graph/passes/hccl_memcpy_pass.cc \ | graph/passes/hccl_memcpy_pass.cc \ | ||||
| graph/passes/flow_ctrl_pass.cc \ | graph/passes/flow_ctrl_pass.cc \ | ||||
| graph/passes/global_step_insert_pass.cc \ | |||||
| graph/passes/link_gen_mask_nodes_pass.cc \ | graph/passes/link_gen_mask_nodes_pass.cc \ | ||||
| graph/passes/replace_with_empty_const_pass.cc \ | graph/passes/replace_with_empty_const_pass.cc \ | ||||
| graph/passes/hccl_group_pass.cc \ | graph/passes/hccl_group_pass.cc \ | ||||
| graph/passes/switch_fusion_pass.cc \ | |||||
| graph/passes/switch_split_pass.cc \ | |||||
| graph/passes/memcpy_addr_async_pass.cc \ | graph/passes/memcpy_addr_async_pass.cc \ | ||||
| graph/passes/set_input_output_offset_pass.cc \ | graph/passes/set_input_output_offset_pass.cc \ | ||||
| @@ -26,6 +26,9 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
| common/ge/op_tiling_manager.cc\ | common/ge/op_tiling_manager.cc\ | ||||
| common/helper/model_cache_helper.cc \ | common/helper/model_cache_helper.cc \ | ||||
| common/profiling/profiling_manager.cc \ | common/profiling/profiling_manager.cc \ | ||||
| common/dump/dump_manager.cc \ | |||||
| common/dump/dump_properties.cc \ | |||||
| common/dump/dump_op.cc \ | |||||
| engine_manager/dnnengine_manager.cc \ | engine_manager/dnnengine_manager.cc \ | ||||
| ge_local_engine/engine/host_cpu_engine.cc \ | ge_local_engine/engine/host_cpu_engine.cc \ | ||||
| generator/ge_generator.cc \ | generator/ge_generator.cc \ | ||||
| @@ -93,7 +96,6 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
| graph/manager/util/variable_accelerate_ctrl.cc \ | graph/manager/util/variable_accelerate_ctrl.cc \ | ||||
| graph/optimize/graph_optimize.cc \ | graph/optimize/graph_optimize.cc \ | ||||
| graph/optimize/mem_rw_conflict_optimize.cc \ | graph/optimize/mem_rw_conflict_optimize.cc \ | ||||
| graph/optimize/optimizer/allreduce_fusion_pass.cc \ | |||||
| graph/optimize/summary_optimize.cc \ | graph/optimize/summary_optimize.cc \ | ||||
| graph/partition/engine_place.cc \ | graph/partition/engine_place.cc \ | ||||
| graph/partition/graph_partition.cc \ | graph/partition/graph_partition.cc \ | ||||
| @@ -119,10 +121,10 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
| graph/passes/dimension_compute_pass.cc \ | graph/passes/dimension_compute_pass.cc \ | ||||
| graph/passes/dropout_pass.cc \ | graph/passes/dropout_pass.cc \ | ||||
| graph/passes/hccl_group_pass.cc \ | graph/passes/hccl_group_pass.cc \ | ||||
| graph/passes/switch_fusion_pass.cc \ | |||||
| graph/passes/switch_split_pass.cc \ | |||||
| graph/passes/enter_pass.cc \ | graph/passes/enter_pass.cc \ | ||||
| graph/passes/assign_pass.cc \ | |||||
| graph/passes/flow_ctrl_pass.cc \ | graph/passes/flow_ctrl_pass.cc \ | ||||
| graph/passes/global_step_insert_pass.cc \ | |||||
| host_kernels/transpose_kernel.cc \ | host_kernels/transpose_kernel.cc \ | ||||
| host_kernels/add_kernel.cc \ | host_kernels/add_kernel.cc \ | ||||
| host_kernels/broadcast_args_kernel.cc \ | host_kernels/broadcast_args_kernel.cc \ | ||||
| @@ -131,6 +133,7 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
| host_kernels/concat_offset_kernel.cc \ | host_kernels/concat_offset_kernel.cc \ | ||||
| host_kernels/concat_v2_kernel.cc \ | host_kernels/concat_v2_kernel.cc \ | ||||
| host_kernels/dynamic_stitch_kernel.cc \ | host_kernels/dynamic_stitch_kernel.cc \ | ||||
| host_kernels/identity_kernel.cc \ | |||||
| host_kernels/empty_kernel.cc \ | host_kernels/empty_kernel.cc \ | ||||
| host_kernels/expanddims_kernel.cc \ | host_kernels/expanddims_kernel.cc \ | ||||
| host_kernels/fill_kernel.cc \ | host_kernels/fill_kernel.cc \ | ||||
| @@ -172,6 +175,9 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
| graph/passes/link_gen_mask_nodes_pass.cc \ | graph/passes/link_gen_mask_nodes_pass.cc \ | ||||
| graph/passes/merge_pass.cc \ | graph/passes/merge_pass.cc \ | ||||
| graph/passes/multi_batch_pass.cc \ | graph/passes/multi_batch_pass.cc \ | ||||
| graph/passes/multi_batch_clone_pass.cc \ | |||||
| graph/passes/subexpression_migration_pass.cc \ | |||||
| graph/passes/unused_args_clean_pass.cc \ | |||||
| graph/passes/net_output_pass.cc \ | graph/passes/net_output_pass.cc \ | ||||
| graph/passes/next_iteration_pass.cc \ | graph/passes/next_iteration_pass.cc \ | ||||
| graph/passes/no_use_reshape_remove_pass.cc \ | graph/passes/no_use_reshape_remove_pass.cc \ | ||||
| @@ -225,6 +231,7 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
| graph/preprocess/graph_preprocess.cc \ | graph/preprocess/graph_preprocess.cc \ | ||||
| graph/preprocess/insert_op/ge_aipp_op.cc \ | graph/preprocess/insert_op/ge_aipp_op.cc \ | ||||
| graph/preprocess/insert_op/util_insert_aipp_op.cc \ | graph/preprocess/insert_op/util_insert_aipp_op.cc \ | ||||
| graph/preprocess/multi_batch_options.cc \ | |||||
| graph/preprocess/multi_batch_copy_graph.cc \ | graph/preprocess/multi_batch_copy_graph.cc \ | ||||
| init/gelib.cc \ | init/gelib.cc \ | ||||
| model/ge_model.cc \ | model/ge_model.cc \ | ||||
| @@ -267,10 +274,17 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
| hybrid/node_executor/aicpu/aicpu_ext_info.cc \ | hybrid/node_executor/aicpu/aicpu_ext_info.cc \ | ||||
| hybrid/node_executor/aicpu/aicpu_node_executor.cc \ | hybrid/node_executor/aicpu/aicpu_node_executor.cc \ | ||||
| hybrid/node_executor/compiledsubgraph/known_node_executor.cc \ | hybrid/node_executor/compiledsubgraph/known_node_executor.cc \ | ||||
| hybrid/node_executor/hostcpu/ge_local_node_executor.cc \ | |||||
| hybrid/node_executor/ge_local/ge_local_node_executor.cc \ | |||||
| hybrid/node_executor/host_cpu/host_cpu_node_executor.cc \ | |||||
| hybrid/node_executor/host_cpu/kernel_factory.cc \ | |||||
| hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc \ | |||||
| hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ | |||||
| hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ | |||||
| hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ | |||||
| hybrid/node_executor/controlop/control_op_executor.cc \ | hybrid/node_executor/controlop/control_op_executor.cc \ | ||||
| hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ | hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ | ||||
| hybrid/node_executor/hccl/hccl_node_executor.cc \ | hybrid/node_executor/hccl/hccl_node_executor.cc \ | ||||
| hybrid/node_executor/rts/rts_node_executor.cc \ | |||||
| hybrid/node_executor/node_executor.cc \ | hybrid/node_executor/node_executor.cc \ | ||||
| hybrid/node_executor/task_context.cc \ | hybrid/node_executor/task_context.cc \ | ||||
| hybrid/hybrid_davinci_model.cc \ | hybrid/hybrid_davinci_model.cc \ | ||||
| @@ -343,7 +357,6 @@ LOCAL_SHARED_LIBRARIES := \ | |||||
| libgraph \ | libgraph \ | ||||
| libregister \ | libregister \ | ||||
| libge_common \ | libge_common \ | ||||
| libhccl \ | |||||
| libmsprof \ | libmsprof \ | ||||
| liberror_manager \ | liberror_manager \ | ||||
| @@ -425,7 +438,6 @@ LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | libc_sec \ | ||||
| libslog \ | libslog \ | ||||
| libmmpa \ | libmmpa \ | ||||
| libhccl \ | |||||
| libmsprof \ | libmsprof \ | ||||
| LOCAL_LDFLAGS := -lrt -ldl | LOCAL_LDFLAGS := -lrt -ldl | ||||
| @@ -457,7 +469,6 @@ LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | libc_sec \ | ||||
| libslog \ | libslog \ | ||||
| libmmpa \ | libmmpa \ | ||||
| libhccl \ | |||||
| libmsprof \ | libmsprof \ | ||||
| LOCAL_LDFLAGS := -lrt -ldl | LOCAL_LDFLAGS := -lrt -ldl | ||||
| @@ -658,10 +658,13 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph fail, graph id: %u", id); | GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph fail, graph id: %u", id); | ||||
| VarManagerPool::Instance().RemoveVarManager(session_id); | |||||
| return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; | return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; | ||||
| } | } | ||||
| id += 1; | id += 1; | ||||
| VarManagerPool::Instance().RemoveVarManager(session_id); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -28,6 +28,7 @@ | |||||
| #include "graph/common/ge_call_wrapper.h" | #include "graph/common/ge_call_wrapper.h" | ||||
| #include "init/gelib.h" | #include "init/gelib.h" | ||||
| #include "model/ge_model.h" | #include "model/ge_model.h" | ||||
| #include "graph/ge_context.h" | |||||
| using domi::BuildMode; | using domi::BuildMode; | ||||
| @@ -166,11 +167,15 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfo | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, | |||||
| std::vector<SubGraphInfoPtr> &subgraph_ptr_list, GeModelPtr &ge_model_ptr, | |||||
| uint64_t session_id) { | |||||
| Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_list, | |||||
| GeModelPtr &ge_model_ptr, uint64_t session_id) { | |||||
| if (ge::GetContext().GetHostExecFlag()) { | |||||
| GE_CHK_STATUS_RET(BuildForHostCpuGraph(comp_graph, ge_model_ptr, session_id), "Build for host-cpu graph failed."); | |||||
| return SUCCESS; | |||||
| } | |||||
| GELOGI("Begin to build known shape graph[%s].", comp_graph->GetName().c_str()); | GELOGI("Begin to build known shape graph[%s].", comp_graph->GetName().c_str()); | ||||
| Status ret = SecondPartition(comp_graph, subgraph_ptr_list); | |||||
| Status ret = SecondPartition(comp_graph, subgraph_list); | |||||
| GE_CHK_STATUS_RET(ret, "Graph[%s] second partition Failed.", comp_graph->GetName().c_str()); | GE_CHK_STATUS_RET(ret, "Graph[%s] second partition Failed.", comp_graph->GetName().c_str()); | ||||
| auto subgraph_map = graph_partitioner_.GetSubGraphMap(); | auto subgraph_map = graph_partitioner_.GetSubGraphMap(); | ||||
| @@ -257,6 +262,10 @@ Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeMo | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id) { | |||||
| return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id); | |||||
| } | |||||
| Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | ||||
| std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | ||||
| GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | ||||
| @@ -63,10 +63,12 @@ class GraphBuilder { | |||||
| Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | ||||
| GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | ||||
| uint64_t session_id = INVALID_SESSION_ID); | uint64_t session_id = INVALID_SESSION_ID); | ||||
| Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | |||||
| Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_list, | |||||
| GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | ||||
| Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | ||||
| uint64_t session_id = INVALID_SESSION_ID); | uint64_t session_id = INVALID_SESSION_ID); | ||||
| Status BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | |||||
| uint64_t session_id = INVALID_SESSION_ID); | |||||
| int build_mode_; | int build_mode_; | ||||
| std::map<std::string, int> stream_max_parallel_num_; | std::map<std::string, int> stream_max_parallel_num_; | ||||
| @@ -745,6 +745,23 @@ bool BlockMemAssigner::IsContinuousOutput(const NodePtr &n) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| bool BlockMemAssigner::IsZeroCopyBlock(const NodePtr &node, bool continuous) { | |||||
| if (NodeUtils::IsDynamicShape(node)) { | |||||
| return ((node->GetType() == DATA_TYPE) && !continuous) || (node->GetType() == NETOUTPUT); | |||||
| } | |||||
| if ((node->GetType() == DATA_TYPE) && !continuous) { | |||||
| return !node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX); | |||||
| } | |||||
| if (node->GetType() == NETOUTPUT) { | |||||
| const auto &owner = node->GetOwnerComputeGraph(); | |||||
| return owner->GetParentGraph() == nullptr; | |||||
| } | |||||
| return false; | |||||
| } | |||||
| MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, | MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, | ||||
| MemoryType mem_type, const NodePtr &n, uint32_t out_index, | MemoryType mem_type, const NodePtr &n, uint32_t out_index, | ||||
| const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem, | const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem, | ||||
| @@ -793,9 +810,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "new an object failed."); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "new an object failed."); | ||||
| // Data and netoutput need zero copy block | // Data and netoutput need zero copy block | ||||
| if ((node_op_desc->GetType() == DATA_TYPE && !continuous) || (node_op_desc->GetType() == NETOUTPUT)) { | |||||
| block->is_zero_copy_ = true; | |||||
| } | |||||
| block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | |||||
| block->Init(real_size, mem_type, n, out_index, no_align_size); | block->Init(real_size, mem_type, n, out_index, no_align_size); | ||||
| block->stream_id_ = node_op_desc->GetStreamId(); | block->stream_id_ = node_op_desc->GetStreamId(); | ||||
| @@ -970,6 +985,14 @@ bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool i | |||||
| return false; | return false; | ||||
| } | } | ||||
| bool IsKnownSubgraphData(const NodePtr &node) { | |||||
| if (NodeUtils::IsDynamicShape(node)) { | |||||
| return false; | |||||
| } | |||||
| return node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX); | |||||
| } | |||||
| void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory) { | void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory) { | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(to_release == nullptr, return, "Input parameter to_release is null."); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(to_release == nullptr, return, "Input parameter to_release is null."); | ||||
| GE_CHK_TRUE_EXEC_INFO(to_release->ref_count_ <= 0, return, "Release memory"); | GE_CHK_TRUE_EXEC_INFO(to_release->ref_count_ <= 0, return, "Release memory"); | ||||
| @@ -1092,7 +1115,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
| (void)ge::AttrUtils::GetBool(op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic); | (void)ge::AttrUtils::GetBool(op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic); | ||||
| // Allocate memory for the current node and release node memory of the same size in the workspace | // Allocate memory for the current node and release node memory of the same size in the workspace | ||||
| GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", | GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", | ||||
| ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_[stream_id]);) | |||||
| ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_[stream_id])); | |||||
| if (IsContinuousOutput(node)) { | if (IsContinuousOutput(node)) { | ||||
| (void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); | (void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -1118,6 +1141,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
| out_node_set_continuous_input = IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index); | out_node_set_continuous_input = IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index); | ||||
| no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input); | no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input); | ||||
| } | } | ||||
| no_need_assign_memory = (no_need_assign_memory || IsKnownSubgraphData(node)); | |||||
| if (no_need_assign_memory) { | if (no_need_assign_memory) { | ||||
| zero_memory_list_.emplace_back(node, kOutput, i, false); | zero_memory_list_.emplace_back(node, kOutput, i, false); | ||||
| continue; | continue; | ||||
| @@ -1474,8 +1498,8 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, siz | |||||
| return; | return; | ||||
| } | } | ||||
| if ((op_desc->GetType() == DATA) || (op_desc->GetType() == AIPP_DATA_TYPE) || (op_desc->GetType() == MULTISHAPE) || | |||||
| (op_desc->GetType() == NETOUTPUT)) { | |||||
| static const set<string> kSetOffsetTypes = {DATA_TYPE, AIPP_DATA_TYPE, MULTISHAPE, NETOUTPUT}; | |||||
| if ((kSetOffsetTypes.count(op_desc->GetType()) > 0) && !IsKnownSubgraphData(node_type.node)) { | |||||
| if ((output_list[node_type.index] == kInvalidOffset) || (output_list[node_type.index] < offset)) { | if ((output_list[node_type.index] == kInvalidOffset) || (output_list[node_type.index] < offset)) { | ||||
| output_list.at(node_type.index) = offset; | output_list.at(node_type.index) = offset; | ||||
| } | } | ||||
| @@ -352,6 +352,8 @@ class BlockMemAssigner : public MemAssigner { | |||||
| void AssignContinuousBlocks(); | void AssignContinuousBlocks(); | ||||
| bool IsZeroCopyBlock(const NodePtr &node, bool continuous); | |||||
| bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | ||||
| uint32_t &peer_input_index); | uint32_t &peer_input_index); | ||||
| @@ -1227,6 +1227,18 @@ ge::Status GraphMemoryAssigner::SetInputOffset() { | |||||
| return ge::SUCCESS; | return ge::SUCCESS; | ||||
| } | } | ||||
| NodePtr GraphMemoryAssigner::GetKnownInputNode(const NodePtr &node) const { | |||||
| if (!node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) { | |||||
| return node; | |||||
| } | |||||
| if (NodeUtils::IsDynamicShape(node)) { | |||||
| return node; | |||||
| } | |||||
| return NodeUtils::GetParentInput(node); | |||||
| } | |||||
| ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const { | ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const { | ||||
| uint32_t parent_index = 0; | uint32_t parent_index = 0; | ||||
| if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | ||||
| @@ -1235,13 +1247,29 @@ ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vecto | |||||
| // Subgraph Data Node, check for constant input. | // Subgraph Data Node, check for constant input. | ||||
| std::string op_type; | std::string op_type; | ||||
| NodePtr in_node = NodeUtils::GetParentInput(node); | |||||
| if (!NodeUtils::GetConstOpType(in_node, op_type)) { | |||||
| return SUCCESS; // not constant input. | |||||
| const auto &in_node = NodeUtils::GetParentInput(node); | |||||
| if (NodeUtils::GetConstOpType(in_node, op_type)) { | |||||
| input_list = in_node->GetOpDesc()->GetOutputOffset(); | |||||
| node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as const output. | |||||
| return SUCCESS; // Constant input. | |||||
| } | |||||
| // Memory allocated for dynamic shape subgraph Data. | |||||
| if (NodeUtils::IsDynamicShape(node)) { | |||||
| return SUCCESS; | |||||
| } | |||||
| const auto &owner = node->GetOwnerComputeGraph(); | |||||
| const auto &parent_desc = owner->GetParentNode()->GetOpDesc(); | |||||
| const auto parent_inputs = parent_desc->GetInputOffset(); | |||||
| if (parent_inputs.size() <= parent_index) { | |||||
| GELOGE(FAILED, "Get Parent input offset failed, node: %s, input size: %zu, parent index: %u", | |||||
| node->GetName().c_str(), parent_inputs.size(), parent_index); | |||||
| return FAILED; | |||||
| } | } | ||||
| vector<int64_t> const_input_list = in_node->GetOpDesc()->GetOutputOffset(); | |||||
| node->GetOpDesc()->SetOutputOffset(const_input_list); // Set Data output same as const output. | |||||
| input_list = {parent_inputs[parent_index]}; | |||||
| node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input. | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -1287,7 +1315,8 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< | |||||
| input_list.back()); | input_list.back()); | ||||
| } else { | } else { | ||||
| int64_t output_offset = output_list.at(peer_out_anchor->GetIdx()); | int64_t output_offset = output_list.at(peer_out_anchor->GetIdx()); | ||||
| if (peer_out_anchor->GetOwnerNode()->GetType() == CONSTANT) { | |||||
| const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode()); | |||||
| if (in_node->GetType() == CONSTANT) { | |||||
| GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(input_index); | GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(input_index); | ||||
| GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, output_offset)); | GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, output_offset)); | ||||
| } | } | ||||
| @@ -181,6 +181,8 @@ class GraphMemoryAssigner { | |||||
| ge::Status UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const; | ge::Status UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const; | ||||
| NodePtr GetKnownInputNode(const NodePtr &node) const; | |||||
| MemoryOffsetList memory_offset_; | MemoryOffsetList memory_offset_; | ||||
| ge::ComputeGraphPtr compute_graph_; | ge::ComputeGraphPtr compute_graph_; | ||||
| HybridMemAssignerPtr mem_assigner_; | HybridMemAssignerPtr mem_assigner_; | ||||
| @@ -182,38 +182,26 @@ void ModelBuilder::SetInputIsConst(const ge::NodePtr &n) { | |||||
| for (size_t i = 0; i < is_input_const.size(); i++) { | for (size_t i = 0; i < is_input_const.size(); i++) { | ||||
| is_input_const[i] = false; | is_input_const[i] = false; | ||||
| } | } | ||||
| std::string const_type; | |||||
| auto in_data_anchors = n->GetAllInDataAnchors(); | auto in_data_anchors = n->GetAllInDataAnchors(); | ||||
| for (size_t index = 0; index < in_data_anchors.size(); index++) { | for (size_t index = 0; index < in_data_anchors.size(); index++) { | ||||
| auto in_data_anchor = in_data_anchors.at(index); | auto in_data_anchor = in_data_anchors.at(index); | ||||
| const auto &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | const auto &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | ||||
| GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | ||||
| const auto &src_node = peer_out_anchor->GetOwnerNode(); | const auto &src_node = peer_out_anchor->GetOwnerNode(); | ||||
| if (src_node->GetType() == CONSTANT) { | |||||
| if (!NodeUtils::GetConstOpType(src_node, const_type)) { | |||||
| continue; | |||||
| } | |||||
| if (const_type == CONSTANT) { | |||||
| if (!SetInputConst(node_op_desc, src_node, index, is_input_const)) { | if (!SetInputConst(node_op_desc, src_node, index, is_input_const)) { | ||||
| return; | return; | ||||
| } | } | ||||
| } else if (src_node->GetType() == CONSTANTOP) { | |||||
| } else { | |||||
| if ((index < is_input_const.size()) && is_input_const[index]) { | if ((index < is_input_const.size()) && is_input_const[index]) { | ||||
| is_input_const[index] = false; | is_input_const[index] = false; | ||||
| } | } | ||||
| } else if (src_node->GetType() == DATA) { | |||||
| uint32_t parent_index = 0; | |||||
| if (!AttrUtils::GetInt(src_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | |||||
| continue; | |||||
| } | |||||
| // Subgraph Data Node, check for constant input. | |||||
| std::string op_type; | |||||
| const NodePtr in_node = NodeUtils::GetParentInput(src_node); | |||||
| if (!NodeUtils::GetConstOpType(in_node, op_type)) { | |||||
| continue; // not constant input. | |||||
| } | |||||
| if (op_type == CONSTANT) { | |||||
| if (!SetInputConst(node_op_desc, in_node, index, is_input_const)) { | |||||
| return; | |||||
| } | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| @@ -16,6 +16,7 @@ | |||||
| #include "graph/build/stream_allocator.h" | #include "graph/build/stream_allocator.h" | ||||
| #include <memory> | #include <memory> | ||||
| #include <algorithm> | |||||
| #include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "framework/common/fmk_error_codes.h" | #include "framework/common/fmk_error_codes.h" | ||||
| @@ -374,8 +375,8 @@ Status StreamAllocator::InsertOneEventInTwoNodes(const NodePtr &cur_node, const | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| if ((cur_node->GetType() == ENTER) || (cur_node->GetType() == REFENTER)) { | |||||
| GELOGD("No need to insert event after enter_node %s.", cur_node->GetName().c_str()); | |||||
| if (((cur_node->GetType() == ENTER) || (cur_node->GetType() == REFENTER)) && (next_node->GetType() != STREAMACTIVE)) { | |||||
| GELOGD("No need to insert event between %s and %s.", cur_node->GetName().c_str(), next_node->GetName().c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -721,6 +722,7 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) { | |||||
| GELOGE(FAILED, "SplitStreams:streamid(%ld) > last_stream_id(%ld)", stream_id, last_stream_id); | GELOGE(FAILED, "SplitStreams:streamid(%ld) > last_stream_id(%ld)", stream_id, last_stream_id); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| bool is_stream_first_node = (stream_node_num_vec[stream_id] == 0); | |||||
| AddNodeNum(cur_node, stream_node_num_vec[stream_id]); | AddNodeNum(cur_node, stream_node_num_vec[stream_id]); | ||||
| stream_2_nodes_map[stream_id].push_back(cur_node); | stream_2_nodes_map[stream_id].push_back(cur_node); | ||||
| // The maximum number of tasks per stream. | // The maximum number of tasks per stream. | ||||
| @@ -737,7 +739,7 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) { | |||||
| stream_continuous_2_nodes_map[continuous_stream_label].push_back(cur_node); | stream_continuous_2_nodes_map[continuous_stream_label].push_back(cur_node); | ||||
| } | } | ||||
| // Split the stream if it exceeds the maximum number of nodes in the stream. | // Split the stream if it exceeds the maximum number of nodes in the stream. | ||||
| if (NeedSpiltNewStream(stream_node_num_vec[stream_id], max_node_num_one_stream, op_desc)) { | |||||
| if (NeedSpiltNewStream(stream_node_num_vec[stream_id], max_node_num_one_stream, op_desc, is_stream_first_node)) { | |||||
| last_stream_id++; | last_stream_id++; | ||||
| GELOGI( | GELOGI( | ||||
| "stream_node_num_vec[%ld]= %ld > max_node_num_one_stream : %ld, " | "stream_node_num_vec[%ld]= %ld > max_node_num_one_stream : %ld, " | ||||
| @@ -801,7 +803,11 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) { | |||||
| } | } | ||||
| bool StreamAllocator::NeedSpiltNewStream(int64_t stream_node_num, int64_t max_node_num_one_stream, | bool StreamAllocator::NeedSpiltNewStream(int64_t stream_node_num, int64_t max_node_num_one_stream, | ||||
| const OpDescPtr &op_desc) const { | |||||
| const OpDescPtr &op_desc, bool is_stream_first_node) const { | |||||
| if (is_stream_first_node) { | |||||
| GELOGD("First node of stream does not need to split new stream"); | |||||
| return false; | |||||
| } | |||||
| const set<string> label_op_types({LABELSET, LABELGOTO, LABELGOTOEX, LABELSWITCH, LABELSWITCHBYINDEX}); | const set<string> label_op_types({LABELSET, LABELGOTO, LABELGOTOEX, LABELSWITCH, LABELSWITCHBYINDEX}); | ||||
| bool is_first_active_node = false; | bool is_first_active_node = false; | ||||
| (void)AttrUtils::GetBool(op_desc, ATTR_NAME_SUBGRAPH_FIRST_ACTIVE, is_first_active_node); | (void)AttrUtils::GetBool(op_desc, ATTR_NAME_SUBGRAPH_FIRST_ACTIVE, is_first_active_node); | ||||
| @@ -1019,6 +1025,18 @@ Status StreamAllocator::SetActiveStreamsForLoop() { | |||||
| loop_active_streams.emplace_back(static_cast<uint32_t>(stream_id)); | loop_active_streams.emplace_back(static_cast<uint32_t>(stream_id)); | ||||
| } | } | ||||
| } | } | ||||
| map<int64_t, NodePtr> stream_id_to_last_node; | |||||
| set<int64_t> streams_skip_iterator_event; | |||||
| for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | |||||
| int64_t stream_id = node->GetOpDesc()->GetStreamId(); | |||||
| if (find(loop_active_streams.begin(), loop_active_streams.end(), stream_id) != loop_active_streams.end()) { | |||||
| stream_id_to_last_node[stream_id] = node; | |||||
| // last node in stream which has streamswitch or IF may be not execute, it will cause block if add event on them | |||||
| if (node->GetOpDesc()->GetType() == STREAMSWITCH) { | |||||
| streams_skip_iterator_event.insert(stream_id); | |||||
| } | |||||
| } | |||||
| } | |||||
| // Set the stream that needs to be activated | // Set the stream that needs to be activated | ||||
| for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | ||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
| @@ -1031,7 +1049,31 @@ Status StreamAllocator::SetActiveStreamsForLoop() { | |||||
| GELOGE(FAILED, "SetListInt failed."); | GELOGE(FAILED, "SetListInt failed."); | ||||
| return FAILED); | return FAILED); | ||||
| for (const auto &stream_id : loop_active_streams) { | for (const auto &stream_id : loop_active_streams) { | ||||
| GELOGI("Active stream %u for node: %s", stream_id, node->GetName().c_str()); | |||||
| GELOGI("Active stream %u for node: %s.", stream_id, node->GetName().c_str()); | |||||
| } | |||||
| // In switch group optimze case, some data input branch may exec slowly. | |||||
| // when condition input branch judge false and some switch has no false branch, | |||||
| // In this condition, data branch has no synchronize point, | |||||
| // it may cause some stream actived by iterator next step when this stream still alive. | |||||
| // If above situation happen, active message will lose, cause process block in next iteration. | |||||
| // In order to avoid this abnormal happen, | |||||
| // add event between each last node and iterator active node in target active stream | |||||
| GELOGI("there are %zu next iterator target streams has streamswitch node.", streams_skip_iterator_event.size()); | |||||
| for (auto iter : stream_id_to_last_node) { | |||||
| if (streams_skip_iterator_event.find(iter.first) != streams_skip_iterator_event.end()) { | |||||
| GELOGI("skip stream %ld which has streamswitch node when add event to next iterator active node", | |||||
| iter.first); | |||||
| continue; | |||||
| } | |||||
| if (iter.second->GetOwnerComputeGraph()->GetParentGraph() != nullptr) { | |||||
| GELOGI("skip stream %ld which last node in subgraph when add event to next iterator active node", | |||||
| iter.first); | |||||
| continue; | |||||
| } | |||||
| AddSendEventId(iter.second, event_num_); | |||||
| AddRecvEventId(node, event_num_); | |||||
| event_num_++; | |||||
| } | } | ||||
| break; | break; | ||||
| @@ -1132,7 +1174,7 @@ Status StreamAllocator::InsertSyncEventNodes() { | |||||
| return status; | return status; | ||||
| } | } | ||||
| GELOGI("Insert recv event %u before node: %s", event_id, node->GetName().c_str()); | |||||
| GELOGI("Insert recv event %u before node: %s.", event_id, node->GetName().c_str()); | |||||
| } | } | ||||
| // Add the node corresponding to the send event | // Add the node corresponding to the send event | ||||
| @@ -1160,7 +1202,7 @@ Status StreamAllocator::InsertSyncEventNodes() { | |||||
| return status; | return status; | ||||
| } | } | ||||
| GELOGI("Insert send event %u after node: %s", event_id, node->GetName().c_str()); | |||||
| GELOGI("Insert send event %u after node: %s.", event_id, node->GetName().c_str()); | |||||
| } | } | ||||
| } | } | ||||
| @@ -58,7 +58,8 @@ class StreamAllocator { | |||||
| bool IsActiveAfterNextIteration(const NodePtr &active_node_ptr) const; | bool IsActiveAfterNextIteration(const NodePtr &active_node_ptr) const; | ||||
| Status SplitStreams(std::vector<std::set<int64_t>> &split_streams); | Status SplitStreams(std::vector<std::set<int64_t>> &split_streams); | ||||
| bool NeedSpiltNewStream(int64_t stream_node_num, int64_t max_node_num_one_stream, const OpDescPtr &op_desc) const; | |||||
| bool NeedSpiltNewStream(int64_t stream_node_num, int64_t max_node_num_one_stream, const OpDescPtr &op_desc, | |||||
| bool is_stream_first_node) const; | |||||
| Status UpdateActiveStreams(const std::vector<std::set<int64_t>> &split_streams); | Status UpdateActiveStreams(const std::vector<std::set<int64_t>> &split_streams); | ||||
| void UpdateLabelStreams(const std::vector<std::set<int64_t>> &split_streams); | void UpdateLabelStreams(const std::vector<std::set<int64_t>> &split_streams); | ||||
| @@ -95,8 +95,8 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t | |||||
| GELOGE(FAILED, "SetListStr failed."); | GELOGE(FAILED, "SetListStr failed."); | ||||
| return FAILED); | return FAILED); | ||||
| GELOGI("Call GenerateTask Success, task_def_list.size:%zu, op_name_map.size:%zu", task_def_list.size(), | |||||
| op_name_map.size()); | |||||
| GELOGI("GenerateTask Success, task list:%zu, op map:%zu, logic mem base:%p, logic weight base:%p, logic var base:%p", | |||||
| task_def_list.size(), op_name_map.size(), run_context.dataMemBase, run_context.weightMemBase, var_mem_base_); | |||||
| // Init and serialize model_task_def | // Init and serialize model_task_def | ||||
| ModelTaskDef model_task_def; | ModelTaskDef model_task_def; | ||||
| @@ -260,7 +260,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
| int64_t group_key; | int64_t group_key; | ||||
| uint32_t node_index = 0; | uint32_t node_index = 0; | ||||
| rtStream_t stream = nullptr; | rtStream_t stream = nullptr; | ||||
| bool is_unknown_shape = graph->GetGraphUnknownFlag(); | |||||
| bool is_unknown_shape = graph->GetGraphUnknownFlag() || GetContext().GetHostExecFlag(); | |||||
| if (is_unknown_shape) { | if (is_unknown_shape) { | ||||
| GE_CHK_STATUS_RET(SetUnknownShapeStream(run_context, stream), "Set unknown shape stream failed."); | GE_CHK_STATUS_RET(SetUnknownShapeStream(run_context, stream), "Set unknown shape stream failed."); | ||||
| } | } | ||||
| @@ -479,7 +479,12 @@ Status TaskGenerator::UpdateAnchorStatus(const NodePtr &node) { | |||||
| GELOGE(INTERNAL_ERROR, "AnchorUtils::SetStatus failed."); | GELOGE(INTERNAL_ERROR, "AnchorUtils::SetStatus failed."); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| } else if (peer_anchor->GetOwnerNode()->GetType() == CONSTANT) { | |||||
| continue; | |||||
| } | |||||
| std::string const_type; | |||||
| bool is_const = NodeUtils::GetConstOpType(peer_anchor->GetOwnerNode(), const_type); | |||||
| if (is_const && (const_type == CONSTANT)) { | |||||
| if (AnchorUtils::SetStatus(anchor, ANCHOR_CONST) != GRAPH_SUCCESS) { | if (AnchorUtils::SetStatus(anchor, ANCHOR_CONST) != GRAPH_SUCCESS) { | ||||
| GELOGE(INTERNAL_ERROR, "AnchorUtils::SetStatus failed."); | GELOGE(INTERNAL_ERROR, "AnchorUtils::SetStatus failed."); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| @@ -17,9 +17,13 @@ | |||||
| #include "graph/common/transop_util.h" | #include "graph/common/transop_util.h" | ||||
| #include "common/types.h" | #include "common/types.h" | ||||
| #include "graph/utils/type_utils.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| namespace { | namespace { | ||||
| const int kInvalidTransopDataIndex = -1; | const int kInvalidTransopDataIndex = -1; | ||||
| const int kTransOpOutIndex = 0; | |||||
| std::map<ge::DataType, ge::DataType> precision_loss_transfer_map = {{ge::DT_FLOAT, ge::DT_BOOL}}; | |||||
| } // namespace | } // namespace | ||||
| namespace ge { | namespace ge { | ||||
| @@ -60,4 +64,20 @@ int TransOpUtil::GetTransOpDataIndex(const std::string &type) { | |||||
| } | } | ||||
| return kInvalidTransopDataIndex; | return kInvalidTransopDataIndex; | ||||
| } | } | ||||
| bool TransOpUtil::CheckPrecisionLoss(const ge::NodePtr &src_node) { | |||||
| auto idx = TransOpUtil::GetTransOpDataIndex(src_node); | |||||
| auto input_desc = src_node->GetOpDesc()->GetInputDesc(idx); | |||||
| auto output_desc = src_node->GetOpDesc()->GetOutputDesc(kTransOpOutIndex); | |||||
| auto src_dtype = input_desc.GetDataType(); | |||||
| auto dst_dtype = output_desc.GetDataType(); | |||||
| auto iter = precision_loss_transfer_map.find(src_dtype); | |||||
| if (iter != precision_loss_transfer_map.end() && iter->second == dst_dtype) { | |||||
| GELOGW("Node %s transfer data type from %s to %s ,it will cause precision loss. ignore pass.", | |||||
| src_node->GetName().c_str(), TypeUtils::DataTypeToSerialString(src_dtype).c_str(), | |||||
| TypeUtils::DataTypeToSerialString(dst_dtype).c_str()); | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -33,6 +33,8 @@ class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY TransOpUtil { | |||||
| static int GetTransOpDataIndex(const std::string &type); | static int GetTransOpDataIndex(const std::string &type); | ||||
| static bool CheckPrecisionLoss(const NodePtr &src_node); | |||||
| private: | private: | ||||
| TransOpUtil(); | TransOpUtil(); | ||||
| @@ -519,6 +519,25 @@ Status GraphExecutor::GetCombinedDynamicDims(uint32_t model_id, std::vector<std: | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Get user designate shape order | |||||
| /// @param [in] model_id | |||||
| /// @param [out] user_input_shape_order | |||||
| /// @return execute result | |||||
| /// | |||||
| ge::Status GraphExecutor::GetUserDesignateShapeOrder(uint32_t model_id, | |||||
| std::vector<std::string> &user_input_shape_order) { | |||||
| auto model_manager = ge::ModelManager::GetInstance(); | |||||
| GE_CHECK_NOTNULL(model_manager); | |||||
| Status ret = model_manager->GetUserDesignateShapeOrder(model_id, user_input_shape_order); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "GetUserDesignateShapeOrder failed."); | |||||
| return ret; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) { | Status GraphExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) { | ||||
| auto model_manager = ge::ModelManager::GetInstance(); | auto model_manager = ge::ModelManager::GetInstance(); | ||||
| GE_CHECK_NOTNULL(model_manager); | GE_CHECK_NOTNULL(model_manager); | ||||
| @@ -570,7 +589,7 @@ Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigI | |||||
| GE_CHECK_NOTNULL(model_manager); | GE_CHECK_NOTNULL(model_manager); | ||||
| Status ret = model_manager->GetAIPPInfo(model_id, index, aipp_info); | Status ret = model_manager->GetAIPPInfo(model_id, index, aipp_info); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "GetAIPPInfo failed."); | |||||
| GELOGW("GetAIPPInfo is not success."); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -602,4 +621,16 @@ Status GraphExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t inde | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphExecutor::GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, | |||||
| OpDescInfo &op_desc_info) { | |||||
| auto model_manager = ge::ModelManager::GetInstance(); | |||||
| GE_CHECK_NOTNULL(model_manager); | |||||
| Status ret = model_manager->GetOpDescInfo(device_id, stream_id, task_id, op_desc_info); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "GetOpDescInfo failed."); | |||||
| return ret; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -95,6 +95,15 @@ class GraphExecutor { | |||||
| /// | /// | ||||
| static Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | static Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Get user designate shape order | |||||
| /// @param [in] model_id | |||||
| /// @param [out] user_input_shape_order | |||||
| /// @return execute result | |||||
| /// | |||||
| static Status GetUserDesignateShapeOrder(uint32_t model_id, std::vector<std::string> &user_input_shape_order); | |||||
| static Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type); | static Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type); | ||||
| static Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info); | static Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info); | ||||
| @@ -107,6 +116,8 @@ class GraphExecutor { | |||||
| static Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims, | static Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims, | ||||
| std::vector<InputOutputDims> &output_dims); | std::vector<InputOutputDims> &output_dims); | ||||
| static Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info); | |||||
| private: | private: | ||||
| Status PrepareInputData(const std::vector<GeTensor> &input_tensor, InputData &graph_input_data, | Status PrepareInputData(const std::vector<GeTensor> &input_tensor, InputData &graph_input_data, | ||||
| OutputData &graph_output_data, std::vector<InputOutputDescInfo> &output_desc); | OutputData &graph_output_data, std::vector<InputOutputDescInfo> &output_desc); | ||||
| @@ -38,7 +38,9 @@ namespace ge { | |||||
| Status AippUtils::ConvertAippParams2AippInfo(domi::AippOpParams *aipp_params, AippConfigInfo &aipp_info) { | Status AippUtils::ConvertAippParams2AippInfo(domi::AippOpParams *aipp_params, AippConfigInfo &aipp_info) { | ||||
| GE_CHECK_NOTNULL(aipp_params); | GE_CHECK_NOTNULL(aipp_params); | ||||
| AIPP_CONVERT_TO_AIPP_INFO(aipp_mode); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(input_format); | AIPP_CONVERT_TO_AIPP_INFO(input_format); | ||||
| AIPP_CONVERT_TO_AIPP_INFO(related_input_rank); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(src_image_size_w); | AIPP_CONVERT_TO_AIPP_INFO(src_image_size_w); | ||||
| AIPP_CONVERT_TO_AIPP_INFO(src_image_size_h); | AIPP_CONVERT_TO_AIPP_INFO(src_image_size_h); | ||||
| AIPP_CONVERT_TO_AIPP_INFO(crop); | AIPP_CONVERT_TO_AIPP_INFO(crop); | ||||
| @@ -85,6 +87,8 @@ Status AippUtils::ConvertAippParams2AippInfo(domi::AippOpParams *aipp_params, Ai | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_1, 0); | AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_1, 0); | ||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_2, 0); | AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_2, 0); | ||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_3, 0); | AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_3, 0); | ||||
| AIPP_CONVERT_TO_AIPP_INFO(support_rotation); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(max_src_image_size); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -171,6 +171,44 @@ void DataDumper::SaveOpDebugId(uint32_t task_id, uint32_t stream_id, void *op_de | |||||
| is_op_debug_ = is_op_debug; | is_op_debug_ = is_op_debug; | ||||
| } | } | ||||
| void DataDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, | |||||
| uint32_t stream_id) { | |||||
| GELOGD("Start SaveDumpOpInfo of task_id: %u, stream_id: %u", task_id, stream_id); | |||||
| OpDescInfo op_desc_info; | |||||
| op_desc_info.op_name = op->GetName(); | |||||
| op_desc_info.task_id = task_id; | |||||
| op_desc_info.stream_id = stream_id; | |||||
| for (size_t i = 0; i < op->GetInputsSize(); ++i) { | |||||
| GeTensorDesc input_desc = op->GetInputDesc(i); | |||||
| op_desc_info.input_format.emplace_back(input_desc.GetFormat()); | |||||
| op_desc_info.input_shape.emplace_back(input_desc.GetShape().GetDims()); | |||||
| op_desc_info.input_data_type.emplace_back(input_desc.GetDataType()); | |||||
| } | |||||
| for (size_t j = 0; j < op->GetOutputsSize(); ++j) { | |||||
| GeTensorDesc output_desc = op->GetOutputDesc(j); | |||||
| op_desc_info.output_format.emplace_back(output_desc.GetFormat()); | |||||
| op_desc_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | |||||
| op_desc_info.output_data_type.emplace_back(output_desc.GetDataType()); | |||||
| } | |||||
| op_desc_info.input_addrs = ModelUtils::GetInputDataAddrs(model_param, op); | |||||
| op_desc_info.output_addrs = ModelUtils::GetOutputDataAddrs(model_param, op); | |||||
| op_desc_info_.emplace_back(op_desc_info); | |||||
| } | |||||
| bool DataDumper::GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const { | |||||
| GELOGI("There are %zu op need to dump.", op_desc_info_.size()); | |||||
| for (size_t index = 0; index < op_desc_info_.size(); ++index) { | |||||
| OpDescInfo dump_op_info = op_desc_info_.at(index); | |||||
| if (dump_op_info.task_id == task_id && dump_op_info.stream_id == stream_id) { | |||||
| GELOGI("find exception op of task_id: %u, stream_id: %u.", task_id, stream_id); | |||||
| op_desc_info = dump_op_info; | |||||
| return true; | |||||
| } | |||||
| } | |||||
| return false; | |||||
| } | |||||
| void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, | void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, | ||||
| uintptr_t args) { | uintptr_t args) { | ||||
| if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
| @@ -325,17 +363,24 @@ Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicp | |||||
| // check dump output tensor desc is redirected by attr ATTR_DATA_DUMP_REF | // check dump output tensor desc is redirected by attr ATTR_DATA_DUMP_REF | ||||
| if (AttrUtils::GetStr(&output_desc, ATTR_DATA_DUMP_REF, node_name_index)) { | if (AttrUtils::GetStr(&output_desc, ATTR_DATA_DUMP_REF, node_name_index)) { | ||||
| GE_CHK_STATUS_RET(DumpRefOutput(inner_dump_info, output, i, node_name_index), "DumpRefOutput failed"); | GE_CHK_STATUS_RET(DumpRefOutput(inner_dump_info, output, i, node_name_index), "DumpRefOutput failed"); | ||||
| task.mutable_output()->Add(std::move(output)); | |||||
| } else { | } else { | ||||
| GE_IF_BOOL_EXEC( | |||||
| IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i), | |||||
| GELOGD("DumpOutputWithTask[%s] output[%zu] is l1 addr, skip it", inner_dump_info.op->GetName().c_str(), i); | |||||
| continue;); | |||||
| const auto input_size = inner_dump_info.op->GetInputsSize(); | |||||
| auto addr = inner_dump_info.args + (i + input_size) * kAddrLen; | |||||
| GE_CHK_STATUS_RET(GenerateOutput(output, output_descs, addr, i), "Generate output failed"); | |||||
| if (IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i)) { | |||||
| GELOGI("[L1Fusion] DumpOutputWithTask[%s] output[%zu] is l1 addr.", inner_dump_info.op->GetName().c_str(), i); | |||||
| int64_t output_size = 0; | |||||
| if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Get output size failed."); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| GELOGI("Get output size of l1_fusion_dump is %ld", output_size); | |||||
| GenerateOpBuffer(output_size, task); | |||||
| } else { | |||||
| const auto input_size = inner_dump_info.op->GetInputsSize(); | |||||
| auto addr = inner_dump_info.args + (i + input_size) * kAddrLen; | |||||
| GE_CHK_STATUS_RET(GenerateOutput(output, output_descs, addr, i), "Generate output failed"); | |||||
| task.mutable_output()->Add(std::move(output)); | |||||
| } | |||||
| } | } | ||||
| task.mutable_output()->Add(std::move(output)); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -468,20 +513,38 @@ Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump:: | |||||
| // check dump input tensor desc is redirected by attr ATTR_DATA_DUMP_REF | // check dump input tensor desc is redirected by attr ATTR_DATA_DUMP_REF | ||||
| if (AttrUtils::GetStr(&input_descs.at(i), ATTR_DATA_DUMP_REF, node_name_index)) { | if (AttrUtils::GetStr(&input_descs.at(i), ATTR_DATA_DUMP_REF, node_name_index)) { | ||||
| GE_CHK_STATUS_RET(DumpRefInput(inner_dump_info, input, i, node_name_index), "DumpRefInput failed"); | GE_CHK_STATUS_RET(DumpRefInput(inner_dump_info, input, i, node_name_index), "DumpRefInput failed"); | ||||
| task.mutable_input()->Add(std::move(input)); | |||||
| // normal dump without attr | // normal dump without attr | ||||
| } else { | } else { | ||||
| GE_IF_BOOL_EXEC(IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i), | |||||
| GELOGD("DumpInput[%s] input[%zu] is l1 addr, skip it", inner_dump_info.op->GetName().c_str(), i); | |||||
| continue;); | |||||
| auto addr = inner_dump_info.args + kAddrLen * i; | |||||
| GE_CHK_STATUS_RET(GenerateInput(input, input_descs, addr, i), "Generate input failed"); | |||||
| if (IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i)) { | |||||
| GELOGI("[L1Fusion] DumpInput[%s] input[%zu] is l1 addr", inner_dump_info.op->GetName().c_str(), i); | |||||
| int64_t input_size = 0; | |||||
| if (AttrUtils::GetInt(input_descs.at(i), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) { | |||||
| GELOGI("Get aipp input size according to attr is %ld", input_size); | |||||
| } else if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Get input size failed."); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| GELOGI("Get input size of l1_fusion_dump is %ld", input_size); | |||||
| GenerateOpBuffer(input_size, task); | |||||
| } else { | |||||
| auto addr = inner_dump_info.args + kAddrLen * i; | |||||
| GE_CHK_STATUS_RET(GenerateInput(input, input_descs, addr, i), "Generate input failed"); | |||||
| task.mutable_input()->Add(std::move(input)); | |||||
| } | |||||
| } | } | ||||
| task.mutable_input()->Add(std::move(input)); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void DataDumper::GenerateOpBuffer(const int64_t &size, aicpu::dump::Task &task) { | |||||
| aicpu::dump::OpBuffer op_buffer; | |||||
| op_buffer.set_buffer_type(aicpu::dump::BufferType::L1); | |||||
| op_buffer.set_address(reinterpret_cast<uintptr_t>(l1_fusion_addr_)); | |||||
| op_buffer.set_size(size); | |||||
| task.mutable_buffer()->Add(std::move(op_buffer)); | |||||
| } | |||||
| Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info) { | Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info) { | ||||
| std::string proto_str; | std::string proto_str; | ||||
| size_t proto_size = op_mapping_info.ByteSizeLong(); | size_t proto_size = op_mapping_info.ByteSizeLong(); | ||||
| @@ -720,7 +783,7 @@ void DataDumper::PrintCheckLog(string &dump_list_key) { | |||||
| bool not_find_by_omname = model_list.find(om_name_) == model_list.end(); | bool not_find_by_omname = model_list.find(om_name_) == model_list.end(); | ||||
| bool not_find_by_modelname = model_list.find(model_name_) == model_list.end(); | bool not_find_by_modelname = model_list.find(model_name_) == model_list.end(); | ||||
| dump_list_key = not_find_by_omname ? model_name_ : om_name_; | dump_list_key = not_find_by_omname ? model_name_ : om_name_; | ||||
| GELOGI("%zu op need dump in %s.", op_list_.size(), dump_list_key.c_str()); | |||||
| GELOGI("%zu op need dump in known shape model %s.", op_list_.size(), dump_list_key.c_str()); | |||||
| if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) { | if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) { | ||||
| if (not_find_by_omname && not_find_by_modelname) { | if (not_find_by_omname && not_find_by_modelname) { | ||||
| @@ -30,6 +30,7 @@ | |||||
| #include "proto/op_mapping_info.pb.h" | #include "proto/op_mapping_info.pb.h" | ||||
| #include "runtime/mem.h" | #include "runtime/mem.h" | ||||
| #include "task_info/task_info.h" | #include "task_info/task_info.h" | ||||
| #include "framework/common/ge_types.h" | |||||
| namespace ge { | namespace ge { | ||||
| class DataDumper { | class DataDumper { | ||||
| @@ -64,10 +65,14 @@ class DataDumper { | |||||
| void SetRefInfo(const std::map<OpDescPtr, void *> &ref_info) { ref_info_ = ref_info; }; | void SetRefInfo(const std::map<OpDescPtr, void *> &ref_info) { ref_info_ = ref_info; }; | ||||
| void SetL1FusionAddr(void *addr) { l1_fusion_addr_ = addr; }; | |||||
| void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond); | void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond); | ||||
| void SaveDumpInput(const std::shared_ptr<Node> &node); | void SaveDumpInput(const std::shared_ptr<Node> &node); | ||||
| void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id); | |||||
| // args is device memory stored first output addr | // args is device memory stored first output addr | ||||
| void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args); | void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args); | ||||
| void SaveEndGraphId(uint32_t task_id, uint32_t stream_id); | void SaveEndGraphId(uint32_t task_id, uint32_t stream_id); | ||||
| @@ -81,6 +86,7 @@ class DataDumper { | |||||
| void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; } | void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; } | ||||
| const DumpProperties &GetDumpProperties() const { return dump_properties_; } | const DumpProperties &GetDumpProperties() const { return dump_properties_; } | ||||
| bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const; | |||||
| private: | private: | ||||
| void ReleaseDevMem(void **ptr) noexcept; | void ReleaseDevMem(void **ptr) noexcept; | ||||
| @@ -100,6 +106,7 @@ class DataDumper { | |||||
| struct InnerDumpInfo; | struct InnerDumpInfo; | ||||
| struct InnerInputMapping; | struct InnerInputMapping; | ||||
| std::vector<OpDescInfo> op_desc_info_; | |||||
| std::vector<InnerDumpInfo> op_list_; | std::vector<InnerDumpInfo> op_list_; | ||||
| uint32_t end_graph_task_id_ = 0; | uint32_t end_graph_task_id_ = 0; | ||||
| uint32_t end_graph_stream_id_ = 0; | uint32_t end_graph_stream_id_ = 0; | ||||
| @@ -111,6 +118,7 @@ class DataDumper { | |||||
| uintptr_t loop_cond_; | uintptr_t loop_cond_; | ||||
| ComputeGraphPtr compute_graph_; | ComputeGraphPtr compute_graph_; | ||||
| std::map<OpDescPtr, void *> ref_info_; | std::map<OpDescPtr, void *> ref_info_; | ||||
| void *l1_fusion_addr_ = nullptr; | |||||
| uint32_t op_debug_task_id_ = 0; | uint32_t op_debug_task_id_ = 0; | ||||
| uint32_t op_debug_stream_id_ = 0; | uint32_t op_debug_stream_id_ = 0; | ||||
| @@ -135,6 +143,7 @@ class DataDumper { | |||||
| const uintptr_t &addr, size_t index); | const uintptr_t &addr, size_t index); | ||||
| Status GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | Status GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | ||||
| const uintptr_t &addr, size_t index); | const uintptr_t &addr, size_t index); | ||||
| void GenerateOpBuffer(const int64_t &size, aicpu::dump::Task &task); | |||||
| }; | }; | ||||
| struct DataDumper::InnerDumpInfo { | struct DataDumper::InnerDumpInfo { | ||||
| uint32_t task_id; | uint32_t task_id; | ||||
| @@ -84,6 +84,8 @@ const uint32_t kAddrLen = sizeof(void *); | |||||
| const int kDecimal = 10; | const int kDecimal = 10; | ||||
| const int kBytes = 8; | const int kBytes = 8; | ||||
| const uint32_t kDataMemAlignSizeCompare = 64; | const uint32_t kDataMemAlignSizeCompare = 64; | ||||
| const uint32_t kDumpL1FusionOpMByteSize = 2 * 1024 * 1024; | |||||
| const uint32_t kDumpFlagOfL1Fusion = 0; | |||||
| const char *const kDefaultBatchLable = "Batch_default"; | const char *const kDefaultBatchLable = "Batch_default"; | ||||
| inline bool IsDataOp(const std::string &node_type) { | inline bool IsDataOp(const std::string &node_type) { | ||||
| @@ -97,7 +99,6 @@ inline bool IsNoTaskAndDumpNeeded(const OpDescPtr &op_desc) { | |||||
| } // namespace | } // namespace | ||||
| std::mutex DavinciModel::tvm_bin_mutex_; | std::mutex DavinciModel::tvm_bin_mutex_; | ||||
| std::set<std::string> DavinciModel::tvm_bin_kernel_; | |||||
| DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener> &listener) | DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener> &listener) | ||||
| : weights_mem_base_(nullptr), | : weights_mem_base_(nullptr), | ||||
| @@ -179,6 +180,10 @@ DavinciModel::~DavinciModel() { | |||||
| FreeFeatureMapMem(); | FreeFeatureMapMem(); | ||||
| if (l1_fusion_addr_ != nullptr) { | |||||
| GE_CHK_RT(rtFree(l1_fusion_addr_)); | |||||
| } | |||||
| if (rt_model_handle_ != nullptr) { | if (rt_model_handle_ != nullptr) { | ||||
| GE_CHK_RT(rtModelDestroy(rt_model_handle_)); | GE_CHK_RT(rtModelDestroy(rt_model_handle_)); | ||||
| rt_model_handle_ = nullptr; | rt_model_handle_ = nullptr; | ||||
| @@ -305,7 +310,7 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p | |||||
| if (weight_ptr == nullptr) { | if (weight_ptr == nullptr) { | ||||
| weights_mem_base_ = MallocWeightsMem(weights_size); | weights_mem_base_ = MallocWeightsMem(weights_size); | ||||
| if (weights_mem_base_ == nullptr) { | if (weights_mem_base_ == nullptr) { | ||||
| GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); | |||||
| GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); | |||||
| return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; | return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; | ||||
| } | } | ||||
| is_inner_weight_base_ = true; | is_inner_weight_base_ = true; | ||||
| @@ -367,7 +372,7 @@ void DavinciModel::InitRuntimeParams() { | |||||
| session_id_ = runtime_param_.session_id; | session_id_ = runtime_param_.session_id; | ||||
| GELOGI( | GELOGI( | ||||
| "InitRuntimeParams(), session_id:%u, stream_num:%lu, event_num:%u, label_num:%u, " | |||||
| "InitRuntimeParams(), session_id:%lu, stream_num:%u, event_num:%u, label_num:%u, " | |||||
| "logic_mem_base:0x%lx, logic_weight_base:0x%lx, logic_var_base:0x%lx, " | "logic_mem_base:0x%lx, logic_weight_base:0x%lx, logic_var_base:0x%lx, " | ||||
| "memory_size:%lu, weight_size:%lu, var_size:%lu", | "memory_size:%lu, weight_size:%lu, var_size:%lu", | ||||
| runtime_param_.session_id, runtime_param_.stream_num, runtime_param_.event_num, runtime_param_.label_num, | runtime_param_.session_id, runtime_param_.stream_num, runtime_param_.event_num, runtime_param_.label_num, | ||||
| @@ -401,6 +406,7 @@ void DavinciModel::CheckHasHcomOp() { | |||||
| /// | /// | ||||
| Status DavinciModel::BindModelStream() { | Status DavinciModel::BindModelStream() { | ||||
| // Stream not in active_stream_indication_ is active stream. | // Stream not in active_stream_indication_ is active stream. | ||||
| is_stream_list_bind_ = false; | |||||
| if ((!input_queue_ids_.empty() || !output_queue_ids_.empty()) || (deploy_type_ == AICPU_DEPLOY_CROSS_THREAD)) { | if ((!input_queue_ids_.empty() || !output_queue_ids_.empty()) || (deploy_type_ == AICPU_DEPLOY_CROSS_THREAD)) { | ||||
| for (size_t i = 0; i < stream_list_.size(); ++i) { | for (size_t i = 0; i < stream_list_.size(); ++i) { | ||||
| if (active_stream_indication_.count(i) == 0) { | if (active_stream_indication_.count(i) == 0) { | ||||
| @@ -419,7 +425,7 @@ Status DavinciModel::BindModelStream() { | |||||
| GE_CHK_RT_RET(rtModelBindStream(rt_model_handle_, stream_list_[i], RT_HEAD_STREAM)); | GE_CHK_RT_RET(rtModelBindStream(rt_model_handle_, stream_list_[i], RT_HEAD_STREAM)); | ||||
| } | } | ||||
| } | } | ||||
| is_stream_list_bind_ = true; | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -600,6 +606,12 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
| // create model_handle to load model | // create model_handle to load model | ||||
| GE_CHK_RT_RET(rtModelCreate(&rt_model_handle_, 0)); | GE_CHK_RT_RET(rtModelCreate(&rt_model_handle_, 0)); | ||||
| GE_CHK_RT_RET(rtModelGetId(rt_model_handle_, &runtime_model_id_)); | GE_CHK_RT_RET(rtModelGetId(rt_model_handle_, &runtime_model_id_)); | ||||
| // malloc 2M for dump l1fusion op | |||||
| GE_CHK_RT_RET(rtMalloc(&l1_fusion_addr_, kDumpL1FusionOpMByteSize, RT_MEMORY_DDR)); | |||||
| // send l1fusion dump addr to rts | |||||
| GE_CHK_RT_RET(rtDumpAddrSet(rt_model_handle_, l1_fusion_addr_, kDumpL1FusionOpMByteSize, kDumpFlagOfL1Fusion)); | |||||
| // inference will use default graph_id 0; | // inference will use default graph_id 0; | ||||
| runtime_param_.graph_id = compute_graph->GetGraphID(); | runtime_param_.graph_id = compute_graph->GetGraphID(); | ||||
| @@ -748,11 +760,18 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
| typedef Status (DavinciModel::*OpDescCall)(const OpDescPtr &); | typedef Status (DavinciModel::*OpDescCall)(const OpDescPtr &); | ||||
| static std::map<std::string, OpDescCall> op_desc_handle = { | static std::map<std::string, OpDescCall> op_desc_handle = { | ||||
| {VARIABLE, &DavinciModel::InitVariable}, {CONSTANTOP, &DavinciModel::InitConstant}, | |||||
| {STREAMACTIVE, &DavinciModel::InitStreamActive}, {STREAMSWITCH, &DavinciModel::InitStreamSwitch}, | |||||
| {STREAMSWITCHN, &DavinciModel::InitStreamSwitchN}, {LABELSET, &DavinciModel::InitLabelSet}, | |||||
| {VARIABLE, &DavinciModel::InitVariable}, | |||||
| {CONSTANTOP, &DavinciModel::InitConstant}, | |||||
| {STREAMACTIVE, &DavinciModel::InitStreamActive}, | |||||
| {STREAMSWITCH, &DavinciModel::InitStreamSwitch}, | |||||
| {STREAMSWITCHN, &DavinciModel::InitStreamSwitchN}, | |||||
| {LABELSET, &DavinciModel::InitLabelSet}, | |||||
| {CASE, &DavinciModel::InitCase}, | |||||
| }; | }; | ||||
| GE_CHK_STATUS_RET(InitInputOutputForDynamic(compute_graph), "InitInputOutputForDynamic failed."); | |||||
| map<uint32_t, OpDescPtr> data_by_index; | |||||
| auto nodes = compute_graph->GetAllNodes(); | auto nodes = compute_graph->GetAllNodes(); | ||||
| const TBEKernelStore &tbekernel_store = ge_model_->GetTBEKernelStore(); | const TBEKernelStore &tbekernel_store = ge_model_->GetTBEKernelStore(); | ||||
| for (size_t i = 0; i < nodes.size(); i++) { | for (size_t i = 0; i < nodes.size(); i++) { | ||||
| @@ -770,7 +789,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
| GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc); | GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc); | ||||
| if (IsDataOp(op_desc->GetType())) { | if (IsDataOp(op_desc->GetType())) { | ||||
| if (InitDataOp(node, data_op_index) != SUCCESS) { | |||||
| if (InitDataOp(node, data_op_index, data_by_index) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str()); | GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str()); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| @@ -839,21 +858,44 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
| GE_TIMESTAMP_ADD(InitTbeHandle); | GE_TIMESTAMP_ADD(InitTbeHandle); | ||||
| } | } | ||||
| AdjustDataOpList(data_by_index); | |||||
| GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc."); | GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc."); | ||||
| GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle."); | GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DavinciModel::InitInputOutputForDynamic(const ComputeGraphPtr &compute_graph) { | |||||
| if (!known_node_) return SUCCESS; | |||||
| // for dynamic shape | |||||
| auto direct_nodes = compute_graph->GetDirectNode(); | |||||
| for (size_t i = 0; i < direct_nodes.size(); i++) { | |||||
| auto node = direct_nodes.at(i); | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| if (op_desc == nullptr) { | |||||
| GELOGE(PARAM_INVALID, "op_desc is null."); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| if (IsDataOp(op_desc->GetType())) { | |||||
| GELOGD("init data op %s", op_desc->GetName().c_str()); | |||||
| data_op_list_.push_back(op_desc); | |||||
| } | |||||
| if (op_desc->GetType() == NETOUTPUT) { | |||||
| GELOGD("init netouput op %s", op_desc->GetName().c_str()); | |||||
| output_op_list_.push_back(op_desc); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Data Op Initialize. | /// @brief Data Op Initialize. | ||||
| /// @param [in] NodePtr: Data Op. | /// @param [in] NodePtr: Data Op. | ||||
| /// @param [in/out] data_op_index: NetOutput addr size info. | /// @param [in/out] data_op_index: NetOutput addr size info. | ||||
| /// @return Status | /// @return Status | ||||
| Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index) { | |||||
| Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, map<uint32_t, OpDescPtr> &data_by_index) { | |||||
| // op_desc Checked by Init: Data, valid. | // op_desc Checked by Init: Data, valid. | ||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| if (known_node_) { | if (known_node_) { | ||||
| data_op_list_.push_back(op_desc); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| uint32_t parent_index = 0; // Ignore subgraph Data Node. | uint32_t parent_index = 0; // Ignore subgraph Data Node. | ||||
| @@ -885,6 +927,7 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index) { | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| new_input_data_info_[data_index] = zero_copy_offset; | new_input_data_info_[data_index] = zero_copy_offset; | ||||
| data_by_index[data_index] = op_desc; | |||||
| for (size_t index = 0; index < virtual_addr_list.size(); ++index) { | for (size_t index = 0; index < virtual_addr_list.size(); ++index) { | ||||
| void *addr = virtual_addr_list.at(index); | void *addr = virtual_addr_list.at(index); | ||||
| @@ -904,6 +947,24 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Sort Data op list by index. | |||||
| /// @param [in] data_by_index: map of Data Op. | |||||
| /// @return | |||||
| /// | |||||
| void DavinciModel::AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_index) { | |||||
| if (data_by_index.size() != data_op_list_.size()) { | |||||
| GELOGW("Data map size: %zu, Data list size: %zu.", data_by_index.size(), data_op_list_.size()); | |||||
| return; | |||||
| } | |||||
| data_op_list_.clear(); | |||||
| for (auto &item : data_by_index) { | |||||
| data_op_list_.emplace_back(item.second); | |||||
| } | |||||
| } | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief input zero copy node Initialize. | /// @brief input zero copy node Initialize. | ||||
| @@ -946,7 +1007,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { | |||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| // excludes the function op sub graph, e.g. case,if | // excludes the function op sub graph, e.g. case,if | ||||
| if (known_node_) { | if (known_node_) { | ||||
| output_op_list_.push_back(op_desc); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| ComputeGraphPtr owner_graph = node->GetOwnerComputeGraph(); | ComputeGraphPtr owner_graph = node->GetOwnerComputeGraph(); | ||||
| @@ -989,9 +1049,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { | |||||
| new_output_data_info_[num + idx] = zero_copy_offset; | new_output_data_info_[num + idx] = zero_copy_offset; | ||||
| void *addr = virtual_addr_list.at(idx); | void *addr = virtual_addr_list.at(idx); | ||||
| int64_t input_offset = input_offset_list.at(idx); | int64_t input_offset = input_offset_list.at(idx); | ||||
| if (new_output_outside_addrs_.find(addr) != new_output_outside_addrs_.end()) { | |||||
| continue; | |||||
| } | |||||
| vector<void *> tensor_addrs; | vector<void *> tensor_addrs; | ||||
| zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs); | zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs); | ||||
| auto rslt = new_output_outside_addrs_.insert(std::pair<void *, ZeroCopyOffset>(addr, zero_copy_offset)); | auto rslt = new_output_outside_addrs_.insert(std::pair<void *, ZeroCopyOffset>(addr, zero_copy_offset)); | ||||
| @@ -1464,6 +1521,17 @@ void DavinciModel::GetCombinedDynamicDims(std::vector<std::vector<int64_t>> &bat | |||||
| batch_info = combined_batch_info_; | batch_info = combined_batch_info_; | ||||
| } | } | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Get user designate shape order | |||||
| /// @param [out] user_input_shape_order | |||||
| /// @return None | |||||
| /// | |||||
| void DavinciModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) const { | |||||
| user_input_shape_order.clear(); | |||||
| user_input_shape_order = user_designate_shape_order_; | |||||
| } | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Get AIPP input info | /// @brief Get AIPP input info | ||||
| @@ -1475,7 +1543,7 @@ Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) { | |||||
| GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); | GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); | ||||
| OpDescPtr data_op = data_op_list_[index]; | OpDescPtr data_op = data_op_list_[index]; | ||||
| if (!data_op->HasAttr(ATTR_NAME_AIPP)) { | if (!data_op->HasAttr(ATTR_NAME_AIPP)) { | ||||
| GELOGE(GE_AIPP_NOT_EXIST, "GetAIPPInfo: there is not AIPP related with index %u.", index); | |||||
| GELOGW("GetAIPPInfo: there is not AIPP related with index %u.", index); | |||||
| return GE_AIPP_NOT_EXIST; | return GE_AIPP_NOT_EXIST; | ||||
| } | } | ||||
| @@ -1488,10 +1556,6 @@ Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) { | |||||
| GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); | GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); | ||||
| GELOGI("GetAIPPInfo: node data: %s, type: %s, current index: %u, current node related input rank: %u", | GELOGI("GetAIPPInfo: node data: %s, type: %s, current index: %u, current node related input rank: %u", | ||||
| data_op->GetName().c_str(), data_op->GetType().c_str(), index, aipp_params->related_input_rank()); | data_op->GetName().c_str(), data_op->GetType().c_str(), index, aipp_params->related_input_rank()); | ||||
| if (aipp_params->aipp_mode() == domi::AippOpParams::dynamic) { | |||||
| GELOGI("GetAIPPInfo, dynamic Aipp is not support to query temporarily."); | |||||
| return GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY; | |||||
| } | |||||
| GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(aipp_params.get(), aipp_info), | GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(aipp_params.get(), aipp_info), | ||||
| "convert aipp params to aipp config info failed"); | "convert aipp params to aipp config info failed"); | ||||
| @@ -1563,51 +1627,51 @@ Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInf | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input) { | |||||
| void DavinciModel::SetInputDimsInfo(const vector<int64_t> &model_input_dims, Format &format, | |||||
| InputOutputDescInfo &input) { | |||||
| uint32_t n, c, h, w; | uint32_t n, c, h, w; | ||||
| n = format == FORMAT_NHWC ? NHWC_DIM_N : NCHW_DIM_N; | n = format == FORMAT_NHWC ? NHWC_DIM_N : NCHW_DIM_N; | ||||
| c = format == FORMAT_NHWC ? NHWC_DIM_C : NCHW_DIM_C; | c = format == FORMAT_NHWC ? NHWC_DIM_C : NCHW_DIM_C; | ||||
| h = format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H; | h = format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H; | ||||
| w = format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W; | w = format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W; | ||||
| if (model_input_dims.size() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) { | |||||
| input.shape_info.num = model_input_dims[n]; | |||||
| input.shape_info.height = model_input_dims[h]; | |||||
| input.shape_info.width = model_input_dims[w]; | |||||
| input.shape_info.channel = model_input_dims[c]; | |||||
| } | |||||
| for (size_t k = 0; k < model_input_dims.size(); ++k) { | |||||
| input.shape_info.dims.push_back(model_input_dims[k]); | |||||
| } | |||||
| return; | |||||
| } | |||||
| void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input) { | |||||
| if (is_new_model_desc_ && op_desc->HasAttr(ATTR_NAME_INPUT_DIMS)) { | if (is_new_model_desc_ && op_desc->HasAttr(ATTR_NAME_INPUT_DIMS)) { | ||||
| // When static aipp is set, need to get the model input dims which processed by aipp | // When static aipp is set, need to get the model input dims which processed by aipp | ||||
| vector<int64_t> model_input_dims; | vector<int64_t> model_input_dims; | ||||
| (void)AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_DIMS, model_input_dims); | (void)AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_DIMS, model_input_dims); | ||||
| if (model_input_dims.size() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) { | |||||
| input.shape_info.num = model_input_dims[n]; | |||||
| input.shape_info.height = model_input_dims[h]; | |||||
| input.shape_info.width = model_input_dims[w]; | |||||
| input.shape_info.channel = model_input_dims[c]; | |||||
| } | |||||
| for (size_t k = 0; k < model_input_dims.size(); ++k) { | |||||
| input.shape_info.dims.push_back(model_input_dims[k]); | |||||
| } | |||||
| is_new_model_desc_ = false; | |||||
| SetInputDimsInfo(model_input_dims, format, input); | |||||
| return; | return; | ||||
| } | } | ||||
| if (!op_desc->HasAttr(ATTR_MBATCH_ORIGIN_INPUT_DIMS)) { | |||||
| if (op_desc->GetInputDescPtr(0)->GetShape().GetDimNum() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) { | |||||
| input.shape_info.num = op_desc->GetInputDescPtr(0)->GetShape().GetDim(n); | |||||
| input.shape_info.height = op_desc->GetInputDescPtr(0)->GetShape().GetDim(h); | |||||
| input.shape_info.width = op_desc->GetInputDescPtr(0)->GetShape().GetDim(w); | |||||
| input.shape_info.channel = op_desc->GetInputDescPtr(0)->GetShape().GetDim(c); | |||||
| } | |||||
| for (size_t k = 0; k < op_desc->GetInputDescPtr(0)->GetShape().GetDimNum(); k++) { | |||||
| input.shape_info.dims.push_back(op_desc->GetInputDescPtr(0)->GetShape().GetDim(k)); | |||||
| } | |||||
| // judge if this data is linked dynamic aipp first, multiply batch has been considered | |||||
| if (op_desc->HasAttr("_dynamic_aipp_input_dims")) { | |||||
| vector<int64_t> dynamic_aipp_input_dims; | |||||
| (void)AttrUtils::GetListInt(op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_input_dims); | |||||
| SetInputDimsInfo(dynamic_aipp_input_dims, format, input); | |||||
| return; | |||||
| } else { | } else { | ||||
| vector<int64_t> origin_input_dims; | |||||
| (void)AttrUtils::GetListInt(op_desc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims); | |||||
| if (origin_input_dims.size() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) { | |||||
| input.shape_info.num = origin_input_dims[n]; | |||||
| input.shape_info.height = origin_input_dims[h]; | |||||
| input.shape_info.width = origin_input_dims[w]; | |||||
| input.shape_info.channel = origin_input_dims[c]; | |||||
| } | |||||
| for (size_t k = 0; k < origin_input_dims.size(); ++k) { | |||||
| input.shape_info.dims.push_back(origin_input_dims[k]); | |||||
| // judge if this data is multiply batch | |||||
| if (!op_desc->HasAttr(ATTR_MBATCH_ORIGIN_INPUT_DIMS)) { | |||||
| vector<int64_t> input_dims = op_desc->GetInputDescPtr(0)->GetShape().GetDims(); | |||||
| SetInputDimsInfo(input_dims, format, input); | |||||
| return; | |||||
| } else { | |||||
| vector<int64_t> origin_input_dims; | |||||
| (void)AttrUtils::GetListInt(op_desc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims); | |||||
| SetInputDimsInfo(origin_input_dims, format, input); | |||||
| return; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -1630,6 +1694,8 @@ Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, s | |||||
| formats.push_back(format); | formats.push_back(format); | ||||
| input_desc.push_back(input); | input_desc.push_back(input); | ||||
| } | } | ||||
| // cause GetInputDescInfo called not only once, set is_new_model_desc_ to false after calc the model input dims | |||||
| is_new_model_desc_ = false; | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -2106,22 +2172,24 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| if ((kind == RT_MEMCPY_DEVICE_TO_DEVICE) && (copy_only_addrs_.count(output.second.GetBasicAddr()) == 0)) { | |||||
| continue; // Skip: Feed by zero copy. | |||||
| } | |||||
| DataBuffer &buffer = blobs[output.first]; | DataBuffer &buffer = blobs[output.first]; | ||||
| uint64_t mem_size = static_cast<uint64_t>(output.second.GetDataSize()); | uint64_t mem_size = static_cast<uint64_t>(output.second.GetDataSize()); | ||||
| if ((buffer.length == 0) || (mem_size == 0)) { | if ((buffer.length == 0) || (mem_size == 0)) { | ||||
| GELOGI("Length of data is zero, No need copy. output tensor index=%u", output.first); | GELOGI("Length of data is zero, No need copy. output tensor index=%u", output.first); | ||||
| continue; | continue; | ||||
| } | } | ||||
| if (buffer.length < mem_size) { | |||||
| if (is_dynamic_) { | |||||
| GELOGI("No need to check output data size."); | |||||
| } else if (buffer.length < mem_size) { | |||||
| GELOGE(FAILED, "Tensor data size=%lu, buffer size=%u", mem_size, buffer.length); | GELOGE(FAILED, "Tensor data size=%lu, buffer size=%u", mem_size, buffer.length); | ||||
| return FAILED; | return FAILED; | ||||
| } else if (buffer.length > mem_size) { | } else if (buffer.length > mem_size) { | ||||
| GELOGW("Tensor data size=%lu, buffer size=%u", mem_size, buffer.length); | GELOGW("Tensor data size=%lu, buffer size=%u", mem_size, buffer.length); | ||||
| } | } | ||||
| if ((kind == RT_MEMCPY_DEVICE_TO_DEVICE) && (copy_only_addrs_.count(output.second.GetBasicAddr()) == 0)) { | |||||
| continue; // Skip: Feed by zero copy. | |||||
| } | |||||
| uint64_t data_size = output.second.GetDataSize(); | uint64_t data_size = output.second.GetDataSize(); | ||||
| uint64_t buffer_length = buffer.length; | uint64_t buffer_length = buffer.length; | ||||
| void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data)); | void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data)); | ||||
| @@ -2564,10 +2632,12 @@ Status DavinciModel::ModelRunStop() { | |||||
| void DavinciModel::UnbindTaskSinkStream() { | void DavinciModel::UnbindTaskSinkStream() { | ||||
| // unbinding hcom stream | // unbinding hcom stream | ||||
| UnbindHcomStream(); | UnbindHcomStream(); | ||||
| for (size_t i = 0; i < stream_list_.size(); i++) { | |||||
| // unbind rt_model_handle and streams | |||||
| GE_LOGW_IF(rtModelUnbindStream(rt_model_handle_, stream_list_[i]) != RT_ERROR_NONE, | |||||
| "Unbind stream from model failed! Index: %zu", i); | |||||
| if (is_stream_list_bind_) { | |||||
| for (size_t i = 0; i < stream_list_.size(); i++) { | |||||
| // unbind rt_model_handle and streams | |||||
| GE_LOGW_IF(rtModelUnbindStream(rt_model_handle_, stream_list_[i]) != RT_ERROR_NONE, | |||||
| "Unbind stream from model failed! Index: %zu", i); | |||||
| } | |||||
| } | } | ||||
| if (is_inner_model_stream_) { | if (is_inner_model_stream_) { | ||||
| @@ -2610,11 +2680,7 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| const vector<void *> addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, output_op_list_[kDataIndex]); | const vector<void *> addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, output_op_list_[kDataIndex]); | ||||
| if (outputs.size() > addr_list.size()) { | |||||
| GELOGE(FAILED, "output data addr %u should less than output op number %u.", outputs.size(), addr_list.size()); | |||||
| return FAILED; | |||||
| } | |||||
| for (size_t i = 0; i < addr_list.size(); ++i) { | |||||
| for (size_t i = 0; i < addr_list.size() && i < outputs.size(); ++i) { | |||||
| knonw_output_data_info_[addr_list[i]] = outputs[i]; | knonw_output_data_info_[addr_list[i]] = outputs[i]; | ||||
| GELOGI("DavinciModel::CreateKnownZeroCopyMap output %d,v addr %p,p addr %p .", i, addr_list[i], outputs[i]); | GELOGI("DavinciModel::CreateKnownZeroCopyMap output %d,v addr %p,p addr %p .", i, addr_list[i], outputs[i]); | ||||
| } | } | ||||
| @@ -2755,19 +2821,21 @@ Status DavinciModel::DistributeTask() { | |||||
| } | } | ||||
| const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | ||||
| GELOGI("there are %zu task need to save.", task_list_.size()); | |||||
| for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | ||||
| auto &task = task_list_.at(task_index); | auto &task = task_list_.at(task_index); | ||||
| GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); | GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); | ||||
| // for data dump | // for data dump | ||||
| if (reinterpret_cast<void *>(task->GetDumpArgs()) != nullptr) { | |||||
| auto op_index = std::max(model_task_def->task(task_index).kernel().context().op_index(), | |||||
| model_task_def->task(task_index).kernel_ex().op_index()); | |||||
| OpDescPtr op = GetOpByIndex(op_index); | |||||
| if (op == nullptr) { | |||||
| GELOGE(PARAM_INVALID, "Op index %u is null, op list size %zu.", op_index, op_list_.size()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| auto op_index = std::max(model_task_def->task(task_index).kernel().context().op_index(), | |||||
| model_task_def->task(task_index).kernel_ex().op_index()); | |||||
| OpDescPtr op = GetOpByIndex(op_index); | |||||
| if (op == nullptr) { | |||||
| GELOGE(PARAM_INVALID, "Op index %u is null, op list size %zu.", op_index, op_list_.size()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); | |||||
| if (reinterpret_cast<void *>(task->GetDumpArgs()) != nullptr) { | |||||
| bool call_dump = GetDumpProperties().IsLayerNeedDump(name_, om_name_, op->GetName()) && task->CallSaveDumpInfo(); | bool call_dump = GetDumpProperties().IsLayerNeedDump(name_, om_name_, op->GetName()) && task->CallSaveDumpInfo(); | ||||
| if (call_dump) { | if (call_dump) { | ||||
| SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); | SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); | ||||
| @@ -2873,7 +2941,7 @@ void DavinciModel::DisableZeroCopy(const void *addr) { | |||||
| void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<void *> &outside_addrs, const void *info, | void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<void *> &outside_addrs, const void *info, | ||||
| void *args, size_t size, size_t offset) { | void *args, size_t size, size_t offset) { | ||||
| // Internal call has ensured that op_desc is not nullptr | // Internal call has ensured that op_desc is not nullptr | ||||
| GELOGI("[ZCPY] SetZeroCopyAddr for %s.", op_desc->GetName().c_str()); | |||||
| GELOGD("[ZCPY] SetZeroCopyAddr for %s.", op_desc->GetName().c_str()); | |||||
| size_t nums = outside_addrs.size(); | size_t nums = outside_addrs.size(); | ||||
| ZeroCopyTask zero_copy_task(op_desc->GetName(), static_cast<uint8_t *>(args), size); | ZeroCopyTask zero_copy_task(op_desc->GetName(), static_cast<uint8_t *>(args), size); | ||||
| for (size_t i = 0; i < nums; ++i) { | for (size_t i = 0; i < nums; ++i) { | ||||
| @@ -2994,7 +3062,7 @@ Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &outp | |||||
| } | } | ||||
| for (ZeroCopyTask &task : zero_copy_tasks_) { | for (ZeroCopyTask &task : zero_copy_tasks_) { | ||||
| GE_CHK_STATUS_RET(task.DistributeParam(is_async_mode_ ? rt_model_stream_ : nullptr), "[ZCPY] Update args failed."); | |||||
| GE_CHK_STATUS_RET(task.DistributeParam(is_async_mode_, rt_model_stream_), "[ZCPY] Update args failed."); | |||||
| } | } | ||||
| output_data.index = input_data.index; | output_data.index = input_data.index; | ||||
| @@ -3106,7 +3174,6 @@ const char *DavinciModel::GetRegisterStub(const string &binfile, const string &s | |||||
| } else { | } else { | ||||
| binfile_key = session_graph_id + "_" + binfile; | binfile_key = session_graph_id + "_" + binfile; | ||||
| } | } | ||||
| std::lock_guard<std::mutex> lock(tvm_bin_mutex_); | |||||
| auto it = tvm_bin_kernel_.find(binfile_key); | auto it = tvm_bin_kernel_.find(binfile_key); | ||||
| if (it != tvm_bin_kernel_.end()) { | if (it != tvm_bin_kernel_.end()) { | ||||
| return it->c_str(); | return it->c_str(); | ||||
| @@ -3242,7 +3309,6 @@ void DavinciModel::StoreTbeHandle(const std::string &handle_key) { | |||||
| // Online mode FE may call rtFunctionRegister. | // Online mode FE may call rtFunctionRegister. | ||||
| TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | ||||
| // Need protection of tvm_bin_mutex_. | |||||
| auto it = used_tbe_handle_map_.find(handle_key); | auto it = used_tbe_handle_map_.find(handle_key); | ||||
| if (it != used_tbe_handle_map_.end()) { | if (it != used_tbe_handle_map_.end()) { | ||||
| // GE registered, increase reference. | // GE registered, increase reference. | ||||
| @@ -3262,9 +3328,9 @@ void DavinciModel::StoreTbeHandle(const std::string &handle_key) { | |||||
| void DavinciModel::CleanTbeHandle() { | void DavinciModel::CleanTbeHandle() { | ||||
| TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | ||||
| std::lock_guard<std::mutex> lock(tvm_bin_mutex_); | |||||
| kernel_store.EraseTBEHandle(used_tbe_handle_map_); | kernel_store.EraseTBEHandle(used_tbe_handle_map_); | ||||
| used_tbe_handle_map_.clear(); | used_tbe_handle_map_.clear(); | ||||
| tvm_bin_kernel_.clear(); | |||||
| } | } | ||||
| /// | /// | ||||
| @@ -3315,21 +3381,26 @@ Status DavinciModel::InitStreamSwitchN(const OpDescPtr &op_desc) { | |||||
| GELOGI("StreamSwitchNOp node:%s, active_stream_id=%u.", op_desc->GetName().c_str(), active_stream_list[j]); | GELOGI("StreamSwitchNOp node:%s, active_stream_id=%u.", op_desc->GetName().c_str(), active_stream_list[j]); | ||||
| } | } | ||||
| (void)AttrUtils::GetInt(op_desc, ATTR_DYNAMIC_TYPE, dynamic_type_); | |||||
| batch_info_.clear(); | |||||
| combined_batch_info_.clear(); | |||||
| uint32_t batch_num = 0; | uint32_t batch_num = 0; | ||||
| if (!AttrUtils::GetInt(op_desc, ATTR_NAME_BATCH_NUM, batch_num)) { | if (!AttrUtils::GetInt(op_desc, ATTR_NAME_BATCH_NUM, batch_num)) { | ||||
| GELOGE(FAILED, "Failed to get attr ATTR_NAME_BATCH_NUM, StreamSwitchN: %s.", op_desc->GetName().c_str()); | GELOGE(FAILED, "Failed to get attr ATTR_NAME_BATCH_NUM, StreamSwitchN: %s.", op_desc->GetName().c_str()); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| for (uint32_t i = 0; i < batch_num; i++) { | |||||
| return SetDynamicBatchInfo(op_desc, batch_num); | |||||
| } | |||||
| Status DavinciModel::SetDynamicBatchInfo(const OpDescPtr &op_desc, uint32_t batch_num) { | |||||
| batch_info_.clear(); | |||||
| combined_batch_info_.clear(); | |||||
| (void)AttrUtils::GetInt(op_desc, ATTR_DYNAMIC_TYPE, dynamic_type_); | |||||
| (void)AttrUtils::GetListStr(op_desc, ATTR_USER_DESIGNEATE_SHAPE_ORDER, user_designate_shape_order_); | |||||
| for (uint32_t i = 0; i < batch_num; ++i) { | |||||
| std::vector<int64_t> batch_shape; | std::vector<int64_t> batch_shape; | ||||
| const std::string attr_name = ATTR_NAME_PRED_VALUE + "_" + std::to_string(i); | const std::string attr_name = ATTR_NAME_PRED_VALUE + "_" + std::to_string(i); | ||||
| if (!AttrUtils::GetListInt(op_desc, attr_name, batch_shape)) { | if (!AttrUtils::GetListInt(op_desc, attr_name, batch_shape)) { | ||||
| GELOGE(FAILED, "Failed to get attr ATTR_NAME_PRED_VALUE, StreamSwitchN: %s.", op_desc->GetName().c_str()); | |||||
| GELOGE(FAILED, "Get attr ATTR_NAME_PRED_VALUE failed, Node: %s", op_desc->GetName().c_str()); | |||||
| batch_info_.clear(); | batch_info_.clear(); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -3344,6 +3415,16 @@ Status DavinciModel::InitStreamSwitchN(const OpDescPtr &op_desc) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DavinciModel::InitCase(const OpDescPtr &op_desc) { | |||||
| uint32_t batch_num = 0; | |||||
| if (!AttrUtils::GetInt(op_desc, ATTR_NAME_BATCH_NUM, batch_num)) { | |||||
| GELOGI("Not multi-batch Node: %s", op_desc->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| return SetDynamicBatchInfo(op_desc, batch_num); | |||||
| } | |||||
| bool DavinciModel::IsBroadCastOpData(const ge::NodePtr &var_node) { | bool DavinciModel::IsBroadCastOpData(const ge::NodePtr &var_node) { | ||||
| for (auto out_anchor : var_node->GetAllOutDataAnchors()) { | for (auto out_anchor : var_node->GetAllOutDataAnchors()) { | ||||
| GE_RT_FALSE_CHECK_NOTNULL(out_anchor); | GE_RT_FALSE_CHECK_NOTNULL(out_anchor); | ||||
| @@ -3406,12 +3487,13 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||||
| GELOGI("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_); | GELOGI("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_); | ||||
| GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed."); | GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed."); | ||||
| if (!input_data.is_dynamic_batch) { | |||||
| is_dynamic_ = input_data.is_dynamic_batch; | |||||
| if (!is_dynamic_) { | |||||
| zero_copy_batch_label_addrs_.clear(); | zero_copy_batch_label_addrs_.clear(); | ||||
| } | } | ||||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_PRE_PROC_START)); | GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_PRE_PROC_START)); | ||||
| Status ret = CopyModelData(input_data, output_data, input_data.is_dynamic_batch); | |||||
| Status ret = CopyModelData(input_data, output_data, is_dynamic_); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u", | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u", | ||||
| model_id_); | model_id_); | ||||
| @@ -3587,6 +3669,7 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &compute_graph) { | |||||
| data_dumper_.SetOmName(om_name_); | data_dumper_.SetOmName(om_name_); | ||||
| data_dumper_.SetComputeGraph(compute_graph); | data_dumper_.SetComputeGraph(compute_graph); | ||||
| data_dumper_.SetRefInfo(saved_task_addrs_); | data_dumper_.SetRefInfo(saved_task_addrs_); | ||||
| data_dumper_.SetL1FusionAddr(l1_fusion_addr_); | |||||
| int32_t device_id = 0; | int32_t device_id = 0; | ||||
| rtError_t rt_ret = rtGetDevice(&device_id); | rtError_t rt_ret = rtGetDevice(&device_id); | ||||
| @@ -3627,19 +3710,9 @@ void DavinciModel::PushHcclStream(rtStream_t value) { | |||||
| all_hccl_stream_list_.push_back(value); | all_hccl_stream_list_.push_back(value); | ||||
| } | } | ||||
| void DavinciModel::CreateHcclFollowStream(rtStream_t stream, int64_t remain_cap) { | |||||
| void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t stream) { | |||||
| std::lock_guard<std::mutex> lock(capacity_of_stream_mutex_); | std::lock_guard<std::mutex> lock(capacity_of_stream_mutex_); | ||||
| capacity_of_stream_.emplace_back(make_pair(stream, remain_cap)); | |||||
| } | |||||
| void DavinciModel::ReuseHcclFollowStream(int64_t remain_cap, int64_t &index) { | |||||
| std::lock_guard<std::mutex> lock(capacity_of_stream_mutex_); | |||||
| if (remain_cap == 0) { | |||||
| capacity_of_stream_.erase(capacity_of_stream_.begin() + index); | |||||
| } else { | |||||
| capacity_of_stream_.at(index).second = remain_cap; | |||||
| index++; | |||||
| } | |||||
| main_follow_stream_mapping_[main_stream_id].emplace_back(stream); | |||||
| } | } | ||||
| Status DavinciModel::GetComputeGraphInfo(const ComputeGraphPtr &graph, vector<ComputeGraphDescInfo> &graph_desc_info) { | Status DavinciModel::GetComputeGraphInfo(const ComputeGraphPtr &graph, vector<ComputeGraphDescInfo> &graph_desc_info) { | ||||
| @@ -3756,8 +3829,7 @@ Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vector<Input | |||||
| (void)TensorUtils::GetSize(*(data_op->GetInputDescPtr(kDataIndex)), data_input_size); | (void)TensorUtils::GetSize(*(data_op->GetInputDescPtr(kDataIndex)), data_input_size); | ||||
| GELOGD( | GELOGD( | ||||
| "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %u, tensor_size: %zu, format: " | "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %u, tensor_size: %zu, format: " | ||||
| "%s, " | |||||
| "data_type: %s, shape: %s .", | |||||
| "%s, data_type: %s, shape: %s .", | |||||
| index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, | index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, | ||||
| TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), | TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), | ||||
| TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), | TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), | ||||
| @@ -184,10 +184,10 @@ class DavinciModel { | |||||
| size_t TotalMemSize() const { return runtime_param_.mem_size; } | size_t TotalMemSize() const { return runtime_param_.mem_size; } | ||||
| // model name | // model name | ||||
| string Name() { return name_; } | |||||
| string Name() const { return name_; } | |||||
| // om_name | // om_name | ||||
| string OmName() { return om_name_; } | |||||
| string OmName() const { return om_name_; } | |||||
| // version | // version | ||||
| uint32_t Version() const { return version_; } | uint32_t Version() const { return version_; } | ||||
| @@ -268,7 +268,7 @@ class DavinciModel { | |||||
| /// @brief For TVM Op, avoid Addr Reuse. | /// @brief For TVM Op, avoid Addr Reuse. | ||||
| /// @return void* | /// @return void* | ||||
| /// | /// | ||||
| static const char *GetRegisterStub(const string &tvm_binfile_key, const string &session_graph_model_id = ""); | |||||
| const char *GetRegisterStub(const string &tvm_binfile_key, const string &session_graph_model_id = ""); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -299,6 +299,8 @@ class DavinciModel { | |||||
| /// | /// | ||||
| void GetCombinedDynamicDims(std::vector<std::vector<int64_t>> &batch_info) const; | void GetCombinedDynamicDims(std::vector<std::vector<int64_t>> &batch_info) const; | ||||
| void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) const; | |||||
| void GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynamic_type); | void GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynamic_type); | ||||
| void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info); | void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info); | ||||
| @@ -440,6 +442,10 @@ class DavinciModel { | |||||
| Status SinkTimeProfile(const InputData ¤t_data); | Status SinkTimeProfile(const InputData ¤t_data); | ||||
| void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) { | |||||
| data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id); | |||||
| } | |||||
| void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args) { | void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args) { | ||||
| data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); | data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); | ||||
| } | } | ||||
| @@ -449,9 +455,8 @@ class DavinciModel { | |||||
| DavinciModel(const DavinciModel &model) = delete; | DavinciModel(const DavinciModel &model) = delete; | ||||
| const vector<std::pair<rtStream_t, int64_t>> &GetHcclFolowStream() { return capacity_of_stream_; } | |||||
| void CreateHcclFollowStream(rtStream_t stream, int64_t remain_cap); | |||||
| void ReuseHcclFollowStream(int64_t remain_cap, int64_t &index); | |||||
| const map<int64_t, std::vector<rtStream_t>> &GetHcclFolowStream() { return main_follow_stream_mapping_; } | |||||
| void SaveHcclFollowStream(int64_t main_stream_id, rtStream_t stream); | |||||
| void InitRuntimeParams(); | void InitRuntimeParams(); | ||||
| Status InitVariableMem(); | Status InitVariableMem(); | ||||
| @@ -500,6 +505,16 @@ class DavinciModel { | |||||
| void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); } | void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); } | ||||
| const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); } | const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); } | ||||
| void SetMemcpyOffsetAndAddr(map<int64_t, void *> &memcpy_4g_offset_addr) { | |||||
| memcpy_4g_offset_addr_.insert(memcpy_4g_offset_addr.begin(), memcpy_4g_offset_addr.end()); | |||||
| } | |||||
| const map<int64_t, void *> &GetMemcpyOffsetAndAddr() const { return memcpy_4g_offset_addr_; } | |||||
| bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const { | |||||
| return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info); | |||||
| } | |||||
| Status InitInputOutputForDynamic(const ComputeGraphPtr &compute_graph); | |||||
| private: | private: | ||||
| // memory address of weights | // memory address of weights | ||||
| uint8_t *weights_mem_base_; | uint8_t *weights_mem_base_; | ||||
| @@ -575,6 +590,8 @@ class DavinciModel { | |||||
| void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input); | void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input); | ||||
| void SetInputDimsInfo(const vector<int64_t> &model_input_dims, Format &format, InputOutputDescInfo &input); | |||||
| Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats); | Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats); | ||||
| Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo); | Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo); | ||||
| @@ -619,7 +636,15 @@ class DavinciModel { | |||||
| /// @param [in/out] data_op_index: NetOutput addr size info. | /// @param [in/out] data_op_index: NetOutput addr size info. | ||||
| /// @return Status | /// @return Status | ||||
| /// | /// | ||||
| Status InitDataOp(const NodePtr &node, uint32_t &data_op_index); | |||||
| Status InitDataOp(const NodePtr &node, uint32_t &data_op_index, map<uint32_t, OpDescPtr> &data_by_index); | |||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Sort Data op list by index. | |||||
| /// @param [in] data_by_index: map of Data Op. | |||||
| /// @return | |||||
| /// | |||||
| void AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_index); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -666,6 +691,15 @@ class DavinciModel { | |||||
| Status InitStreamSwitchN(const OpDescPtr &op_desc); | Status InitStreamSwitchN(const OpDescPtr &op_desc); | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Case Op Init. | |||||
| /// @return Status | |||||
| /// | |||||
| Status InitCase(const OpDescPtr &op_desc); | |||||
| Status SetDynamicBatchInfo(const OpDescPtr &op_desc, uint32_t batch_num); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief TVM Op Init. | /// @brief TVM Op Init. | ||||
| @@ -840,7 +874,7 @@ class DavinciModel { | |||||
| // for reuse hccl_follow_stream | // for reuse hccl_follow_stream | ||||
| std::mutex capacity_of_stream_mutex_; | std::mutex capacity_of_stream_mutex_; | ||||
| std::vector<std::pair<rtStream_t, int64_t>> capacity_of_stream_; | |||||
| std::map<int64_t, std::vector<rtStream_t>> main_follow_stream_mapping_; | |||||
| vector<rtEvent_t> event_list_; | vector<rtEvent_t> event_list_; | ||||
| @@ -866,6 +900,7 @@ class DavinciModel { | |||||
| bool is_async_mode_; // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_. | bool is_async_mode_; // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_. | ||||
| bool is_stream_list_bind_{false}; | |||||
| bool is_pure_head_stream_{false}; | bool is_pure_head_stream_{false}; | ||||
| rtStream_t rt_head_stream_{nullptr}; | rtStream_t rt_head_stream_{nullptr}; | ||||
| rtStream_t rt_entry_stream_{nullptr}; | rtStream_t rt_entry_stream_{nullptr}; | ||||
| @@ -891,8 +926,8 @@ class DavinciModel { | |||||
| std::set<uint32_t> hcom_streams_; | std::set<uint32_t> hcom_streams_; | ||||
| RuntimeParam runtime_param_; | RuntimeParam runtime_param_; | ||||
| static std::mutex tvm_bin_mutex_; // lock for tvm maps. | |||||
| static std::set<std::string> tvm_bin_kernel_; | |||||
| static std::mutex tvm_bin_mutex_; | |||||
| std::set<std::string> tvm_bin_kernel_; | |||||
| std::map<std::string, uint32_t> used_tbe_handle_map_; | std::map<std::string, uint32_t> used_tbe_handle_map_; | ||||
| @@ -906,6 +941,7 @@ class DavinciModel { | |||||
| uint64_t iterator_count_; | uint64_t iterator_count_; | ||||
| bool is_l1_fusion_enable_; | bool is_l1_fusion_enable_; | ||||
| std::map<OpDescPtr, void *> saved_task_addrs_; | std::map<OpDescPtr, void *> saved_task_addrs_; | ||||
| void *l1_fusion_addr_ = nullptr; | |||||
| bool known_node_ = false; | bool known_node_ = false; | ||||
| uint32_t total_args_size_ = 0; | uint32_t total_args_size_ = 0; | ||||
| @@ -921,7 +957,9 @@ class DavinciModel { | |||||
| vector<vector<int64_t>> batch_info_; | vector<vector<int64_t>> batch_info_; | ||||
| std::vector<std::vector<int64_t>> combined_batch_info_; | std::vector<std::vector<int64_t>> combined_batch_info_; | ||||
| vector<string> user_designate_shape_order_; | |||||
| int32_t dynamic_type_ = 0; | int32_t dynamic_type_ = 0; | ||||
| bool is_dynamic_ = false; | |||||
| vector<uint64_t> batch_size_; | vector<uint64_t> batch_size_; | ||||
| // key: input tensor name, generally rts op; | // key: input tensor name, generally rts op; | ||||
| @@ -938,6 +976,8 @@ class DavinciModel { | |||||
| void *op_debug_addr_ = nullptr; | void *op_debug_addr_ = nullptr; | ||||
| void *p2p_debug_addr_ = nullptr; | void *p2p_debug_addr_ = nullptr; | ||||
| bool is_new_model_desc_{false}; | bool is_new_model_desc_{false}; | ||||
| std::map<int64_t, void *> memcpy_4g_offset_addr_; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ | #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ | ||||
| @@ -20,6 +20,7 @@ | |||||
| #include "common/l2_cache_optimize.h" | #include "common/l2_cache_optimize.h" | ||||
| #include "common/profiling/profiling_manager.h" | #include "common/profiling/profiling_manager.h" | ||||
| #include "common/dump/dump_manager.h" | |||||
| #include "common/properties_manager.h" | #include "common/properties_manager.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "framework/common/util.h" | #include "framework/common/util.h" | ||||
| @@ -172,7 +173,7 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | |||||
| return GE_EXEC_MODEL_ID_INVALID; | return GE_EXEC_MODEL_ID_INVALID; | ||||
| } | } | ||||
| uint64_t session_id = it->second->GetSessionId(); | uint64_t session_id = it->second->GetSessionId(); | ||||
| GELOGI("Destroy aicpu session for infer, session id is %u.", session_id); | |||||
| GELOGI("Destroy aicpu session for infer, session id is %lu.", session_id); | |||||
| DestroyAicpuSession(session_id); | DestroyAicpuSession(session_id); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -259,7 +260,7 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||||
| bool is_shape_unknown = false; | bool is_shape_unknown = false; | ||||
| GE_CHK_STATUS_RET(ge_root_model->CheckIsUnknownShape(is_shape_unknown), "CheckIsUnknownShape failed, model id:%u", | GE_CHK_STATUS_RET(ge_root_model->CheckIsUnknownShape(is_shape_unknown), "CheckIsUnknownShape failed, model id:%u", | ||||
| model_id); | model_id); | ||||
| if (is_shape_unknown) { | |||||
| if (is_shape_unknown || GetContext().GetHostExecFlag()) { | |||||
| return DoLoadHybridModelOnline(model_id, ge_root_model, listener); | return DoLoadHybridModelOnline(model_id, ge_root_model, listener); | ||||
| } | } | ||||
| @@ -729,6 +730,22 @@ Status ModelManager::GetCombinedDynamicDims(const uint32_t model_id, vector<vect | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Get user designate shape order | |||||
| /// @param [in] model_id | |||||
| /// @param [out] user_input_shape_order | |||||
| /// @return execute result | |||||
| /// | |||||
| Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id, | |||||
| std::vector<std::string> &user_input_shape_order) { | |||||
| auto davinci_model = GetModel(model_id); | |||||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, | |||||
| "GetUserDesignateShapeOrder Failed, Invalid Model ID %u!", model_id) | |||||
| davinci_model->GetUserDesignateShapeOrder(user_input_shape_order); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) { | Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) { | ||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
| GE_CHECK_NOTNULL(davinci_model); | GE_CHECK_NOTNULL(davinci_model); | ||||
| @@ -831,7 +848,11 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||||
| } | } | ||||
| davinci_model->SetDeviceId(device_id); | davinci_model->SetDeviceId(device_id); | ||||
| davinci_model->SetOmName(model.om_name); | davinci_model->SetOmName(model.om_name); | ||||
| davinci_model->SetDumpProperties(dump_properties_); | |||||
| if (DumpManager::GetInstance().IsDumpOpen()) { | |||||
| davinci_model->SetDumpProperties(DumpManager::GetInstance().GetDumpProperties()); | |||||
| } else { | |||||
| davinci_model->SetDumpProperties(dump_properties_); | |||||
| } | |||||
| /// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail. | /// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail. | ||||
| /// These session_ids come from the same model, so the values of session_id are the same. | /// These session_ids come from the same model, so the values of session_id are the same. | ||||
| @@ -1070,4 +1091,19 @@ ge::Status ModelManager::SyncExecuteModel(uint32_t model_id, const vector<GeTens | |||||
| return model->Execute(inputs, outputs); | return model->Execute(inputs, outputs); | ||||
| } | } | ||||
| Status ModelManager::GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) { | |||||
| for (const auto &model : model_map_) { | |||||
| auto davinci_model = model.second; | |||||
| if (davinci_model->GetDeviceId() == device_id) { | |||||
| GELOGI("Start to GetOpDescInfo of device_id: %u.", device_id); | |||||
| if (davinci_model->GetOpDescInfo(stream_id, task_id, op_desc_info)) { | |||||
| GELOGI("Find specific node of stream_id: %u, task_id: %u.", stream_id, task_id); | |||||
| return SUCCESS; | |||||
| } | |||||
| } | |||||
| } | |||||
| return FAILED; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -201,6 +201,15 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| /// | /// | ||||
| ge::Status GetCombinedDynamicDims(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | ge::Status GetCombinedDynamicDims(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Get user designate shape order | |||||
| /// @param [in] model_id | |||||
| /// @param [out] user_input_shape_order | |||||
| /// @return execute result | |||||
| /// | |||||
| Status GetUserDesignateShapeOrder(const uint32_t model_id, std::vector<std::string> &user_input_shape_order); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Get AIPP info | /// @brief Get AIPP info | ||||
| @@ -263,6 +272,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| std::vector<InputOutputDims> &output_dims); | std::vector<InputOutputDims> &output_dims); | ||||
| bool IsDynamicShape(uint32_t model_id); | bool IsDynamicShape(uint32_t model_id); | ||||
| ge::Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info); | |||||
| private: | private: | ||||
| /// | /// | ||||
| @@ -28,7 +28,6 @@ namespace { | |||||
| const uint32_t kMaxTaskOfStream = 200; | const uint32_t kMaxTaskOfStream = 200; | ||||
| } | } | ||||
| uint32_t HcclTaskInfo::max_node_of_hccl_stream_ = 0; | |||||
| std::mutex HcclTaskInfo::hccl_follow_stream_mutex_; | std::mutex HcclTaskInfo::hccl_follow_stream_mutex_; | ||||
| HcclTaskInfo::~HcclTaskInfo() { | HcclTaskInfo::~HcclTaskInfo() { | ||||
| @@ -41,7 +40,6 @@ HcclTaskInfo::~HcclTaskInfo() { | |||||
| } | } | ||||
| davinci_model_ = nullptr; | davinci_model_ = nullptr; | ||||
| ops_kernel_store_ = nullptr; | ops_kernel_store_ = nullptr; | ||||
| max_node_of_hccl_stream_ = 0; | |||||
| args_ = nullptr; | args_ = nullptr; | ||||
| } | } | ||||
| Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | ||||
| @@ -133,45 +131,39 @@ Status HcclTaskInfo::SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciM | |||||
| } | } | ||||
| std::lock_guard<std::mutex> lock(hccl_follow_stream_mutex_); | std::lock_guard<std::mutex> lock(hccl_follow_stream_mutex_); | ||||
| if (max_node_of_hccl_stream_ == 0) { | |||||
| uint32_t max_stream_count; | |||||
| uint32_t max_task_count; | |||||
| ret = rtGetMaxStreamAndTask(RT_NORMAL_STREAM, &max_stream_count, &max_task_count); | |||||
| if (ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Get max stream and task count by rts failed."); | |||||
| return RT_ERROR_TO_GE_STATUS(ret); | |||||
| } | |||||
| max_node_of_hccl_stream_ = max_task_count / kMaxTaskOfStream; | |||||
| } | |||||
| int64_t main_stream_id = op_desc->GetStreamId(); | |||||
| const std::map<int64_t, std::vector<rtStream_t>> &main_follow_stream_mapping = davinci_model->GetHcclFolowStream(); | |||||
| if (static_cast<size_t>(hccl_stream_num) <= davinci_model->GetHcclFolowStream().size()) { | |||||
| GELOGI("capacity of follow stream is enough to be reused."); | |||||
| ReuseStream(hccl_stream_num, davinci_model); | |||||
| if (main_follow_stream_mapping.find(main_stream_id) != main_follow_stream_mapping.end()) { | |||||
| const std::vector<rtStream_t> &follow_stream_usage = main_follow_stream_mapping.at(main_stream_id); | |||||
| if (static_cast<size_t>(hccl_stream_num) <= follow_stream_usage.size()) { | |||||
| GELOGI("capacity of follow stream is enough to be reused."); | |||||
| for (int64_t i = 0; i < hccl_stream_num; i++) { | |||||
| hccl_stream_list_.emplace_back(follow_stream_usage.at(i)); | |||||
| } | |||||
| } else { | |||||
| GELOGI("need to reuse follow stream and create new follow stream."); | |||||
| size_t created_stream_num = follow_stream_usage.size(); | |||||
| hccl_stream_list_ = follow_stream_usage; | |||||
| ret = CreateStream(hccl_stream_num - created_stream_num, davinci_model, main_stream_id); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(RT_FAILED, "Create hccl stream failed."); | |||||
| return RT_ERROR_TO_GE_STATUS(ret); | |||||
| } | |||||
| } | |||||
| GELOGI("Initialize hccl slave stream success, hcclStreamNum =%ld", hccl_stream_num); | |||||
| } else { | } else { | ||||
| GELOGI("need to reuse follow stream and create new follow stream."); | |||||
| size_t created_stream_num = davinci_model->GetHcclFolowStream().size(); | |||||
| ReuseStream(created_stream_num, davinci_model); | |||||
| ret = CreateStream(hccl_stream_num - created_stream_num, davinci_model); | |||||
| GELOGI("need to create follow stream for %s with new mainstream %ld.", op_desc->GetName().c_str(), main_stream_id); | |||||
| ret = CreateStream(hccl_stream_num, davinci_model, main_stream_id); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(RT_FAILED, "Create hccl stream failed."); | GELOGE(RT_FAILED, "Create hccl stream failed."); | ||||
| return RT_ERROR_TO_GE_STATUS(ret); | return RT_ERROR_TO_GE_STATUS(ret); | ||||
| } | } | ||||
| } | } | ||||
| GELOGI("Initialize hccl slave stream success, hcclStreamNum =%ld", hccl_stream_num); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void HcclTaskInfo::ReuseStream(int64_t stream_num, DavinciModel *davinci_model) { | |||||
| GELOGI("Start to reuse %ld follow stream.", stream_num); | |||||
| int64_t index = 0; | |||||
| for (int64_t i = 0; i < stream_num; i++) { | |||||
| hccl_stream_list_.emplace_back(davinci_model->GetHcclFolowStream().at(index).first); | |||||
| int64_t remain_cap = davinci_model->GetHcclFolowStream().at(index).second - 1; | |||||
| davinci_model->ReuseHcclFollowStream(remain_cap, index); | |||||
| } | |||||
| } | |||||
| Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_model) { | |||||
| Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_model, int64_t main_stream_id) { | |||||
| GELOGI("Start to create %ld hccl stream.", stream_num); | GELOGI("Start to create %ld hccl stream.", stream_num); | ||||
| for (int64_t i = 0; i < stream_num; ++i) { | for (int64_t i = 0; i < stream_num; ++i) { | ||||
| rtStream_t stream = nullptr; | rtStream_t stream = nullptr; | ||||
| @@ -189,8 +181,7 @@ Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_mode | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| } | } | ||||
| GELOGD("hccl_stream addr is=%p", stream); | GELOGD("hccl_stream addr is=%p", stream); | ||||
| int64_t remain_cap = max_node_of_hccl_stream_ - 1; | |||||
| davinci_model->CreateHcclFollowStream(stream, remain_cap); | |||||
| davinci_model->SaveHcclFollowStream(main_stream_id, stream); | |||||
| hccl_stream_list_.emplace_back(stream); | hccl_stream_list_.emplace_back(stream); | ||||
| davinci_model->PushHcclStream(stream); | davinci_model->PushHcclStream(stream); | ||||
| @@ -60,9 +60,7 @@ class HcclTaskInfo : public TaskInfo { | |||||
| void GetPrivateDefByTaskDef(const domi::TaskDef &task); | void GetPrivateDefByTaskDef(const domi::TaskDef &task); | ||||
| void ReuseStream(int64_t stream_num, DavinciModel *davinci_model); | |||||
| ge::Status CreateStream(int64_t stream_num, DavinciModel *davinci_model); | |||||
| ge::Status CreateStream(int64_t stream_num, DavinciModel *davinci_model, int64_t main_stream_id); | |||||
| Status SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciModel *davinci_model); | Status SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciModel *davinci_model); | ||||
| @@ -77,7 +75,6 @@ class HcclTaskInfo : public TaskInfo { | |||||
| void *private_def_; | void *private_def_; | ||||
| uint32_t private_def_len_; | uint32_t private_def_len_; | ||||
| static std::mutex hccl_follow_stream_mutex_; | static std::mutex hccl_follow_stream_mutex_; | ||||
| static uint32_t max_node_of_hccl_stream_; | |||||
| vector<GETaskKernelHcclInfo> kernel_hccl_infos_; | vector<GETaskKernelHcclInfo> kernel_hccl_infos_; | ||||
| vector<void *> input_data_addrs_; | vector<void *> input_data_addrs_; | ||||
| vector<void *> output_data_addrs_; | vector<void *> output_data_addrs_; | ||||
| @@ -25,7 +25,6 @@ | |||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "framework/common/l2_cache_optimize.h" | #include "framework/common/l2_cache_optimize.h" | ||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| #include "graph/debug/ge_attr_define.h" | |||||
| #include "graph/load/new_model_manager/davinci_model.h" | #include "graph/load/new_model_manager/davinci_model.h" | ||||
| #include "graph/load/new_model_manager/model_utils.h" | #include "graph/load/new_model_manager/model_utils.h" | ||||
| #include "runtime/kernel.h" | #include "runtime/kernel.h" | ||||
| @@ -92,7 +91,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci | |||||
| string session_graph_model_id; | string session_graph_model_id; | ||||
| davinci_model_->GetUniqueId(op_desc_, session_graph_model_id); | davinci_model_->GetUniqueId(op_desc_, session_graph_model_id); | ||||
| // get bin_file_key | // get bin_file_key | ||||
| const char *bin_file_key = DavinciModel::GetRegisterStub(op_desc_->GetName(), session_graph_model_id); | |||||
| const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id); | |||||
| // new aicpu kernel(rtCpuKernelLaunch) no need to check function | // new aicpu kernel(rtCpuKernelLaunch) no need to check function | ||||
| if (kernel_type_ == cce::ccKernelType::CCE_AI_CORE) { | if (kernel_type_ == cce::ccKernelType::CCE_AI_CORE) { | ||||
| rtError_t rt_ret; | rtError_t rt_ret; | ||||
| @@ -395,7 +394,14 @@ Status KernelTaskInfo::Distribute() { | |||||
| "stubfunc:%p blockdim:%u stream:%p", | "stubfunc:%p blockdim:%u stream:%p", | ||||
| call_skt, task_id_, skt_id_, skt_info_.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); | call_skt, task_id_, skt_id_, skt_info_.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); | ||||
| // l1 fusion enable and env flag open (kCloseSkt for skt debug) | // l1 fusion enable and env flag open (kCloseSkt for skt debug) | ||||
| if (call_skt && (env_flag != kCloseSkt)) { | |||||
| bool open_dump = false; | |||||
| auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel(); | |||||
| if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || | |||||
| all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() || | |||||
| all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) { | |||||
| open_dump = true; | |||||
| } | |||||
| if (call_skt && (env_flag != kCloseSkt) && !open_dump) { | |||||
| GE_RETURN_IF_ERROR(SuperKernelDistribute()); | GE_RETURN_IF_ERROR(SuperKernelDistribute()); | ||||
| } else { | } else { | ||||
| // call rtKernelLaunch for current task | // call rtKernelLaunch for current task | ||||
| @@ -577,7 +583,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||||
| // When inferencing, stub_func_ is different from dynamic-registration to runtime, and needs to be modified. | // When inferencing, stub_func_ is different from dynamic-registration to runtime, and needs to be modified. | ||||
| string session_graph_model_id; | string session_graph_model_id; | ||||
| davinci_model_->GetUniqueId(op_desc, session_graph_model_id); | davinci_model_->GetUniqueId(op_desc, session_graph_model_id); | ||||
| const char *bin_file_key = DavinciModel::GetRegisterStub(op_desc->GetName(), session_graph_model_id); | |||||
| const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc->GetName(), session_graph_model_id); | |||||
| rtError_t rt_ret = rtQueryFunctionRegistered(const_cast<char *>(bin_file_key)); | rtError_t rt_ret = rtQueryFunctionRegistered(const_cast<char *>(bin_file_key)); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| stub_func_ = const_cast<char *>(bin_file_key); | stub_func_ = const_cast<char *>(bin_file_key); | ||||
| @@ -634,7 +640,11 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||||
| skt_dump_args_ = static_cast<char *>(args_) + offset; | skt_dump_args_ = static_cast<char *>(args_) + offset; | ||||
| if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | ||||
| op_desc->GetName())) { | op_desc->GetName())) { | ||||
| dump_flag_ = RT_KERNEL_DUMPFLAG; | |||||
| if (IsL1FusionOp(op_desc)) { | |||||
| dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG; | |||||
| } else { | |||||
| dump_flag_ = RT_KERNEL_DUMPFLAG; | |||||
| } | |||||
| dump_args_ = static_cast<char *>(args_) + offset; | dump_args_ = static_cast<char *>(args_) + offset; | ||||
| } | } | ||||
| @@ -653,6 +663,25 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| bool KernelTaskInfo::IsL1FusionOp(const OpDescPtr &op_desc) { | |||||
| std::vector<int64_t> input_memory_type; | |||||
| (void)ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, input_memory_type); | |||||
| for (size_t i = 0; i < input_memory_type.size(); ++i) { | |||||
| if (input_memory_type.at(i) == RT_MEMORY_L1) { | |||||
| return true; | |||||
| } | |||||
| } | |||||
| std::vector<int64_t> output_memory_type; | |||||
| (void)ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, output_memory_type); | |||||
| for (size_t i = 0; i < output_memory_type.size(); ++i) { | |||||
| if (output_memory_type.at(i) == RT_MEMORY_L1) { | |||||
| return true; | |||||
| } | |||||
| } | |||||
| return false; | |||||
| } | |||||
| Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::KernelDef &kernel_def) { | Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::KernelDef &kernel_def) { | ||||
| GELOGI("Do InitAICPUCustomTask"); | GELOGI("Do InitAICPUCustomTask"); | ||||
| OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index); | OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index); | ||||
| @@ -904,7 +933,11 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
| if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | ||||
| op_desc->GetName())) { | op_desc->GetName())) { | ||||
| dump_flag_ = RT_KERNEL_DUMPFLAG; | |||||
| if (IsL1FusionOp(op_desc)) { | |||||
| dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG; | |||||
| } else { | |||||
| dump_flag_ = RT_KERNEL_DUMPFLAG; | |||||
| } | |||||
| dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead); | dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead); | ||||
| } | } | ||||
| @@ -127,6 +127,7 @@ class KernelTaskInfo : public TaskInfo { | |||||
| static void FreeRtMem(void **ptr); | static void FreeRtMem(void **ptr); | ||||
| Status SuperKernelDistribute(); | Status SuperKernelDistribute(); | ||||
| bool IsL1FusionOp(const OpDescPtr &op_desc); | |||||
| // For super kernel | // For super kernel | ||||
| Status SaveSKTDumpInfo(); | Status SaveSKTDumpInfo(); | ||||
| @@ -59,7 +59,12 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel | |||||
| // malloc args memory | // malloc args memory | ||||
| size_t args_size = sizeof(void *) * io_addrs.size(); | size_t args_size = sizeof(void *) * io_addrs.size(); | ||||
| rtError_t rt_ret = rtMalloc(&args_, args_size + kAlignBytes, RT_MEMORY_HBM); | |||||
| rtMemType_t memory_type = RT_MEMORY_HBM; | |||||
| if (op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE)) { | |||||
| memory_type = RT_MEMORY_TS_4G; | |||||
| } | |||||
| GELOGI("memory_type: %u", memory_type); | |||||
| rtError_t rt_ret = rtMalloc(&args_, args_size + kAlignBytes, memory_type); | |||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| @@ -36,6 +36,12 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da | |||||
| count_ = memcpy_async.count(); | count_ = memcpy_async.count(); | ||||
| kind_ = memcpy_async.kind(); | kind_ = memcpy_async.kind(); | ||||
| dst_max_ = memcpy_async.dst_max(); | dst_max_ = memcpy_async.dst_max(); | ||||
| OpDescPtr op_desc = davinci_model->GetOpByIndex(memcpy_async.op_index()); | |||||
| if (op_desc == nullptr) { | |||||
| GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async.op_index()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| if (davinci_model->IsKnownNode()) { | if (davinci_model->IsKnownNode()) { | ||||
| src_ = reinterpret_cast<uint8_t *>(davinci_model_->GetCurrentArgsAddr(args_offset_)); | src_ = reinterpret_cast<uint8_t *>(davinci_model_->GetCurrentArgsAddr(args_offset_)); | ||||
| dst_ = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(src_) + sizeof(void *)); | dst_ = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(src_) + sizeof(void *)); | ||||
| @@ -49,9 +55,17 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.dst(), dst_); | |||||
| if (ret != SUCCESS) { | |||||
| return ret; | |||||
| // dst_ needs different address for different chips | |||||
| if (op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE)) { | |||||
| ret = AllocTsMemoryForMemcpy(op_desc, davinci_model); | |||||
| if (ret != SUCCESS) { | |||||
| return ret; | |||||
| } | |||||
| } else { | |||||
| ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.dst(), dst_); | |||||
| if (ret != SUCCESS) { | |||||
| return ret; | |||||
| } | |||||
| } | } | ||||
| GELOGI("MemcpyAsyncTaskInfo Init Success, logic[0x%lx, 0x%lx], src:%p, dst:%p, max:%lu, count:%lu", | GELOGI("MemcpyAsyncTaskInfo Init Success, logic[0x%lx, 0x%lx], src:%p, dst:%p, max:%lu, count:%lu", | ||||
| @@ -108,5 +122,33 @@ Status MemcpyAsyncTaskInfo::UpdateArgs() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status MemcpyAsyncTaskInfo::AllocTsMemoryForMemcpy(const OpDescPtr &op_desc, DavinciModel *davinci_model) { | |||||
| int64_t size = 0; | |||||
| auto tensor_desc = op_desc->GetOutputDescPtr(0); | |||||
| if ((tensor_desc == nullptr) || (TensorUtils::GetTensorSizeInBytes(*tensor_desc, size) != GRAPH_SUCCESS)) { | |||||
| GELOGE(FAILED, "GetTensorSizeInBytes failed!"); | |||||
| return FAILED; | |||||
| } | |||||
| rtError_t rt_ret = rtMalloc(&memory_4g_, size, RT_MEMORY_TS_4G); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "rtMalloc failed, ret: 0x%X", rt_ret); | |||||
| return FAILED; | |||||
| } | |||||
| // map save the opdesc's offset and special address, for update the streamSwitchN's input address | |||||
| std::map<int64_t, void *> memcpy_4g_offset_addr; | |||||
| vector<int64_t> offsets = op_desc->GetOutputOffset(); | |||||
| if (offsets.empty()) { | |||||
| GELOGE(FAILED, "GetOutputOffset failed!"); | |||||
| return FAILED; | |||||
| } | |||||
| memcpy_4g_offset_addr.insert(std::pair<int64_t, void *>(offsets[0], memory_4g_)); | |||||
| davinci_model->SetMemcpyOffsetAndAddr(memcpy_4g_offset_addr); | |||||
| dst_ = reinterpret_cast<uint8_t *>(memory_4g_); | |||||
| return SUCCESS; | |||||
| } | |||||
| REGISTER_TASK_INFO(RT_MODEL_TASK_MEMCPY_ASYNC, MemcpyAsyncTaskInfo); | REGISTER_TASK_INFO(RT_MODEL_TASK_MEMCPY_ASYNC, MemcpyAsyncTaskInfo); | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -18,15 +18,24 @@ | |||||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ | ||||
| #include "graph/load/new_model_manager/task_info/task_info.h" | #include "graph/load/new_model_manager/task_info/task_info.h" | ||||
| #include "graph/op_desc.h" | |||||
| namespace ge { | namespace ge { | ||||
| class MemcpyAsyncTaskInfo : public TaskInfo { | class MemcpyAsyncTaskInfo : public TaskInfo { | ||||
| public: | public: | ||||
| MemcpyAsyncTaskInfo() : dst_(nullptr), dst_max_(0), src_(nullptr), count_(0), kind_(0) {} | |||||
| MemcpyAsyncTaskInfo() : dst_(nullptr), dst_max_(0), src_(nullptr), count_(0), kind_(0), memory_4g_(nullptr) {} | |||||
| ~MemcpyAsyncTaskInfo() override { | ~MemcpyAsyncTaskInfo() override { | ||||
| src_ = nullptr; | src_ = nullptr; | ||||
| dst_ = nullptr; | dst_ = nullptr; | ||||
| if (memory_4g_ != nullptr) { | |||||
| rtError_t ret = rtFree(memory_4g_); | |||||
| if (ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret); | |||||
| } | |||||
| memory_4g_ = nullptr; | |||||
| } | |||||
| } | } | ||||
| Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | ||||
| @@ -38,6 +47,7 @@ class MemcpyAsyncTaskInfo : public TaskInfo { | |||||
| Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | ||||
| private: | private: | ||||
| Status AllocTsMemoryForMemcpy(const OpDescPtr &op_desc, DavinciModel *davinci_model); | |||||
| uint8_t *dst_; | uint8_t *dst_; | ||||
| uint64_t dst_max_; | uint64_t dst_max_; | ||||
| uint8_t *src_; | uint8_t *src_; | ||||
| @@ -46,6 +56,7 @@ class MemcpyAsyncTaskInfo : public TaskInfo { | |||||
| DavinciModel *davinci_model_ = nullptr; | DavinciModel *davinci_model_ = nullptr; | ||||
| uint32_t args_offset_ = 0; | uint32_t args_offset_ = 0; | ||||
| domi::MemcpyAsyncDef memcpy_async; | domi::MemcpyAsyncDef memcpy_async; | ||||
| void *memory_4g_; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ | #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ | ||||
| @@ -66,16 +66,13 @@ Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel * | |||||
| GELOGE(FAILED, "Get true stream ptr of switchN op failed."); | GELOGE(FAILED, "Get true stream ptr of switchN op failed."); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| if (davinci_model->IsKnownNode()) { | |||||
| input_ptr_ = davinci_model->GetCurrentFixedAddr(args_offset_); | |||||
| } else { | |||||
| auto input_data_addr = ModelUtils::GetInputDataAddrs(davinci_model->GetRuntimeParam(), op_desc); | |||||
| if (input_data_addr.empty()) { | |||||
| GELOGE(FAILED, "Input data addr is nullptr."); | |||||
| return FAILED; | |||||
| } | |||||
| input_ptr_ = input_data_addr[0]; | |||||
| // update StreamSwitchN's input_ptr_ | |||||
| Status ret = InputPtrUpdate(op_desc, davinci_model); | |||||
| if (ret != SUCCESS) { | |||||
| return ret; | |||||
| } | } | ||||
| davinci_model->DisableZeroCopy(input_ptr_); | davinci_model->DisableZeroCopy(input_ptr_); | ||||
| GELOGI("StreamSwitchNTaskInfo Init Success, inputSize:%u, elementSize:%d, trueStreamID:%ld.", input_size_, | GELOGI("StreamSwitchNTaskInfo Init Success, inputSize:%u, elementSize:%d, trueStreamID:%ld.", input_size_, | ||||
| element_size_, op_desc->GetStreamId()); | element_size_, op_desc->GetStreamId()); | ||||
| @@ -154,5 +151,36 @@ Status StreamSwitchNTaskInfo::CalculateArgs(const domi::TaskDef &task_def, Davin | |||||
| GELOGI("Calculate stream switchn task args , tensor_size %ld, args_offset %ld", tensor_size, args_offset_); | GELOGI("Calculate stream switchn task args , tensor_size %ld, args_offset %ld", tensor_size, args_offset_); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status StreamSwitchNTaskInfo::InputPtrUpdate(const OpDescPtr &op_desc, DavinciModel *davinci_model) { | |||||
| bool is_4g_mem = false; | |||||
| const map<int64_t, void *> memcpy_4g_offset_addr = davinci_model->GetMemcpyOffsetAndAddr(); | |||||
| vector<int64_t> input_offset = op_desc->GetInputOffset(); | |||||
| if (input_offset.empty()) { | |||||
| GELOGE(FAILED, "Get StreamSwitchN's input offset failed."); | |||||
| return FAILED; | |||||
| } | |||||
| auto iter = memcpy_4g_offset_addr.find(input_offset[0]); | |||||
| if (iter != memcpy_4g_offset_addr.end()) { | |||||
| input_ptr_ = iter->second; | |||||
| is_4g_mem = true; | |||||
| } | |||||
| if (is_4g_mem == false) { | |||||
| if (davinci_model->IsKnownNode()) { | |||||
| input_ptr_ = davinci_model->GetCurrentFixedAddr(args_offset_); | |||||
| } else { | |||||
| auto input_data_addr = ModelUtils::GetInputDataAddrs(davinci_model->GetRuntimeParam(), op_desc); | |||||
| if (input_data_addr.empty()) { | |||||
| return FAILED; | |||||
| } | |||||
| input_ptr_ = input_data_addr[0]; | |||||
| } | |||||
| } | |||||
| GELOGI("StreamSwitchN's input_ptr is %p, is_4g_mem: %d", input_ptr_, is_4g_mem); | |||||
| return SUCCESS; | |||||
| } | |||||
| REGISTER_TASK_INFO(RT_MODEL_TASK_STREAM_SWITCH_N, StreamSwitchNTaskInfo); | REGISTER_TASK_INFO(RT_MODEL_TASK_STREAM_SWITCH_N, StreamSwitchNTaskInfo); | ||||
| } // namespace ge | |||||
| } // namespace ge | |||||
| @@ -42,6 +42,7 @@ class StreamSwitchNTaskInfo : public TaskInfo { | |||||
| private: | private: | ||||
| Status GetTrueStreamPtr(const OpDescPtr &op_desc, DavinciModel *davinci_model); | Status GetTrueStreamPtr(const OpDescPtr &op_desc, DavinciModel *davinci_model); | ||||
| Status InputPtrUpdate(const OpDescPtr &op_desc, DavinciModel *davinci_model); | |||||
| void *input_ptr_; | void *input_ptr_; | ||||
| uint32_t input_size_; | uint32_t input_size_; | ||||
| void *value_ptr_; | void *value_ptr_; | ||||
| @@ -143,10 +143,11 @@ Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const ma | |||||
| /** | /** | ||||
| * @ingroup ge | * @ingroup ge | ||||
| * @brief Update task param to device. | * @brief Update task param to device. | ||||
| * @param [in] async_mode: true for asychronous mode. | |||||
| * @param [in] stream: Stream for asychronous update. | * @param [in] stream: Stream for asychronous update. | ||||
| * @return: 0 SUCCESS / others FAILED | * @return: 0 SUCCESS / others FAILED | ||||
| */ | */ | ||||
| Status ZeroCopyTask::DistributeParam(rtStream_t stream) { | |||||
| Status ZeroCopyTask::DistributeParam(bool async_mode, rtStream_t stream) { | |||||
| if (!is_updated_) { | if (!is_updated_) { | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -154,7 +155,7 @@ Status ZeroCopyTask::DistributeParam(rtStream_t stream) { | |||||
| is_updated_ = false; | is_updated_ = false; | ||||
| GE_CHECK_NOTNULL(args_addr_); | GE_CHECK_NOTNULL(args_addr_); | ||||
| rtError_t rt_err = RT_ERROR_NONE; | rtError_t rt_err = RT_ERROR_NONE; | ||||
| if (stream != nullptr) { | |||||
| if (async_mode) { | |||||
| rt_err = | rt_err = | ||||
| rtMemcpyAsync(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE_EX, stream); | rtMemcpyAsync(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE_EX, stream); | ||||
| } else { | } else { | ||||
| @@ -77,10 +77,11 @@ class ZeroCopyTask { | |||||
| /** | /** | ||||
| * @ingroup ge | * @ingroup ge | ||||
| * @brief Update task param to device. | * @brief Update task param to device. | ||||
| * @param [in] async_mode: true for asychronous mode. | |||||
| * @param [in] stream: Stream for asychronous update. | * @param [in] stream: Stream for asychronous update. | ||||
| * @return: 0 SUCCESS / others FAILED | * @return: 0 SUCCESS / others FAILED | ||||
| */ | */ | ||||
| ge::Status DistributeParam(rtStream_t stream); | |||||
| ge::Status DistributeParam(bool async_mode, rtStream_t stream); | |||||
| protected: | protected: | ||||
| bool CheckDynamicBatch(const map<string, set<uintptr_t>> &batch_addrs, const string &batch_label, uintptr_t addr); | bool CheckDynamicBatch(const map<string, set<uintptr_t>> &batch_addrs, const string &batch_label, uintptr_t addr); | ||||
| @@ -97,4 +98,4 @@ class ZeroCopyTask { | |||||
| map<uintptr_t, vector<size_t>> task_addr_offset_; | map<uintptr_t, vector<size_t>> task_addr_offset_; | ||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_TASK_H_ | |||||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_TASK_H_ | |||||
| @@ -91,7 +91,13 @@ | |||||
| #include "graph/passes/variable_ref_delete_op_pass.h" | #include "graph/passes/variable_ref_delete_op_pass.h" | ||||
| #include "graph/passes/variable_ref_useless_control_out_delete_pass.h" | #include "graph/passes/variable_ref_useless_control_out_delete_pass.h" | ||||
| #include "graph/passes/end_of_sequence_add_control_pass.h" | #include "graph/passes/end_of_sequence_add_control_pass.h" | ||||
| #include "graph/passes/subexpression_migration_pass.h" | |||||
| #include "graph/passes/unused_args_clean_pass.h" | |||||
| #include "graph/passes/global_step_insert_pass.h" | |||||
| #include "graph/utils/tensor_adapter.h" | #include "graph/utils/tensor_adapter.h" | ||||
| #include "graph/utils/type_utils.h" | |||||
| #include "graph/graph_util.h" | |||||
| #include "graph/types.h" | |||||
| #include "inc/pass_manager.h" | #include "inc/pass_manager.h" | ||||
| #include "init/gelib.h" | #include "init/gelib.h" | ||||
| @@ -102,6 +108,8 @@ const char *const kNetOutput = "NetOutput"; | |||||
| const char *const kVariable = "Variable"; | const char *const kVariable = "Variable"; | ||||
| const char *const kSend = "Send"; | const char *const kSend = "Send"; | ||||
| const char *const kRecv = "Recv"; | const char *const kRecv = "Recv"; | ||||
| const char *const kCheckPointForGetVar = "CheckPointGraphForGetVar"; | |||||
| const char *const kCheckPointGraph = "checkpoint_graph"; | |||||
| bool IsTailingOptimization() { | bool IsTailingOptimization() { | ||||
| string is_tailing_optimization_option; | string is_tailing_optimization_option; | ||||
| @@ -380,6 +388,11 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||||
| GM_RUN_AND_DUMP_PERF("PrepareRunningFormatRefiner", graph_preparer_.PrepareRunningFormatRefiner); | GM_RUN_AND_DUMP_PERF("PrepareRunningFormatRefiner", graph_preparer_.PrepareRunningFormatRefiner); | ||||
| GM_RUN_AND_DUMP_PERF("RefineRunningFormat", graph_optimize_.OptimizeOriginalGraphJudgeInsert, compute_graph); | GM_RUN_AND_DUMP_PERF("RefineRunningFormat", graph_optimize_.OptimizeOriginalGraphJudgeInsert, compute_graph); | ||||
| if (std::getenv("AnalyzeMode")) { | |||||
| GELOGI("Do return failed after refine_running_format when in analyze mode!"); | |||||
| return FAILED; | |||||
| } | |||||
| GM_RUN_AND_DUMP_PERF("SubexpressionMigration", SubexpressionMigration, compute_graph); | |||||
| GE_RUN(GraphManager, graph_preparer_.RecordAIPPInfo, compute_graph); | GE_RUN(GraphManager, graph_preparer_.RecordAIPPInfo, compute_graph); | ||||
| if (IsTailingOptimization()) { | if (IsTailingOptimization()) { | ||||
| GM_RUN_AND_DUMP_PERF("OptimizeSwitchOp", graph_preparer_.SwitchOpOptimize, compute_graph); | GM_RUN_AND_DUMP_PERF("OptimizeSwitchOp", graph_preparer_.SwitchOpOptimize, compute_graph); | ||||
| @@ -392,9 +405,11 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||||
| GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass)) | GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass)) | ||||
| GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); | GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(graph_optimize_.IdentifyReference(compute_graph), "Identify reference failed."); | GE_CHK_STATUS_RET(graph_optimize_.IdentifyReference(compute_graph), "Identify reference failed."); | ||||
| GM_RUN_AND_DUMP_PERF("OptimizeSubgraph", OptimizeSubgraph, graph_node, compute_graph, session_id); | GM_RUN_AND_DUMP_PERF("OptimizeSubgraph", OptimizeSubgraph, graph_node, compute_graph, session_id); | ||||
| GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph); | GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph); | ||||
| GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts", graph_optimize_.OptimizeGraphBeforeBuildForRts, compute_graph); | |||||
| GM_RUN_AND_DUMP_PERF("Build", Build, graph_node, compute_graph, ge_root_model, session_id); | GM_RUN_AND_DUMP_PERF("Build", Build, graph_node, compute_graph, ge_root_model, session_id); | ||||
| // when set incre build, save om model and var manager | // when set incre build, save om model and var manager | ||||
| @@ -403,12 +418,25 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||||
| if (save_ret != SUCCESS) { | if (save_ret != SUCCESS) { | ||||
| GELOGW("Fail to save cache."); | GELOGW("Fail to save cache."); | ||||
| } | } | ||||
| // release rts generate context | |||||
| RtContextUtil::GetInstance().DestroyRtContexts(session_id); | |||||
| GEEVENT("[GEPERFTRACE] GE PreRun End"); | GEEVENT("[GEPERFTRACE] GE PreRun End"); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| #undef RUN_AND_DUMP | |||||
| Status GraphManager::SubexpressionMigration(ComputeGraphPtr &compute_graph) { | |||||
| PassManager pass_manager; | |||||
| GE_CHK_STATUS_RET(pass_manager.AddPass("SubexpressionMigrationPass", new (std::nothrow) SubexpressionMigrationPass)); | |||||
| GE_CHK_STATUS_RET(pass_manager.AddPass("UnusedArgsCleanPass", new (std::nothrow) UnusedArgsCleanPass)); | |||||
| GE_TIMESTAMP_START(SubexpressionMigrationPass); | |||||
| auto ret = pass_manager.Run(compute_graph); | |||||
| GE_TIMESTAMP_END(SubexpressionMigrationPass, "GraphManager::OptimizeStage1_1"); | |||||
| if (ret != SUCCESS && ret != NOT_CHANGED) { | |||||
| GELOGE(ret, "Run SubexpressionMigrationPass failed, ret:%u.", ret); | |||||
| return ret; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs, | Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs, | ||||
| GeRootModelPtr &ge_root_model, uint64_t session_id) { | GeRootModelPtr &ge_root_model, uint64_t session_id) { | ||||
| @@ -427,6 +455,8 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: | |||||
| ret = IncreBuild(graph_node, ge_model); | ret = IncreBuild(graph_node, ge_model); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| ret = PreRun(graph_node, inputs, ge_root_model, session_id); | ret = PreRun(graph_node, inputs, ge_root_model, session_id); | ||||
| // release rts generate context | |||||
| RtContextUtil::GetInstance().DestroyRtContexts(session_id); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "PreRun Failed."); | GELOGE(ret, "PreRun Failed."); | ||||
| return ret; | return ret; | ||||
| @@ -1388,6 +1418,9 @@ bool GraphManager::CheckNetOutputForCheckpointGraph(NodePtr &node) { | |||||
| } | } | ||||
| bool GraphManager::CheckVariableForCheckpointGraph(NodePtr &node) { | bool GraphManager::CheckVariableForCheckpointGraph(NodePtr &node) { | ||||
| if (node->GetOpDesc()->HasAttr(kCheckPointForGetVar)) { | |||||
| return false; | |||||
| } | |||||
| auto out = node->GetOutDataAnchor(0); | auto out = node->GetOutDataAnchor(0); | ||||
| if (out == nullptr) { | if (out == nullptr) { | ||||
| GELOGE(GE_GRAPH_PARAM_NULLPTR, "out is nullptr."); | GELOGE(GE_GRAPH_PARAM_NULLPTR, "out is nullptr."); | ||||
| @@ -1573,48 +1606,6 @@ Status GraphManager::RemoveIsolatedConst(ge::ComputeGraphPtr &compute_graph) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphManager::NewOptimizeAfterMergeSubGraph(ge::ComputeGraphPtr &compute_graph) { | |||||
| GELOGD("NewOptimizeAfterMergeSubGraph in"); | |||||
| GEPass ge_passes(compute_graph); | |||||
| NamesToPass names_to_passes; | |||||
| ConstantFoldingPass constant_folding_pass; | |||||
| names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); | |||||
| GE_TIMESTAMP_START(names_to_passes); | |||||
| auto ret = ge_passes.Run(names_to_passes); | |||||
| GE_TIMESTAMP_END(names_to_passes, "GraphManager::ge_passes"); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "Run ge_passes optimize for OptimizeAfterMergeSubGraph failed, ret:%d.", ret); | |||||
| return ret; | |||||
| } | |||||
| ret = RemoveIsolatedConst(compute_graph); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "Remove isolated Constant failed, ret:%d.", ret); | |||||
| return ret; | |||||
| } | |||||
| PassManager passes; | |||||
| GE_CHK_STATUS_RET(passes.AddPass("MultiBatchPass", new (std::nothrow) MultiBatchPass)); | |||||
| GE_CHK_STATUS_RET(passes.AddPass("CompileNodesPass", new (std::nothrow) CompileNodesPass)); | |||||
| GE_CHK_STATUS_RET(passes.AddPass("AtomicAddrCleanPass", new (std::nothrow) AtomicAddrCleanPass)); | |||||
| GE_TIMESTAMP_START(passes); | |||||
| ret = passes.Run(compute_graph); | |||||
| GE_TIMESTAMP_END(passes, "GraphManager::passes"); | |||||
| if (ret != SUCCESS && ret != NOT_CHANGED) { | |||||
| GELOGE(ret, "Run passes optimize for OptimizeAfterMergeSubGraph failed"); | |||||
| return ret; | |||||
| } | |||||
| ret = compute_graph->TopologicalSorting(); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "Graph topological sort failed, ret:%d.", ret); | |||||
| return ret; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | ||||
| string options = "default"; | string options = "default"; | ||||
| if (GetContext().GetOption("ge.exec.variable_acc", options) != SUCCESS) { | if (GetContext().GetOption("ge.exec.variable_acc", options) != SUCCESS) { | ||||
| @@ -1721,10 +1712,17 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||||
| graph_pass.AddPass("OptimizeStage1_3::SwitchToStreamSwitchPass", new (std::nothrow) SwitchToStreamSwitchPass)) | graph_pass.AddPass("OptimizeStage1_3::SwitchToStreamSwitchPass", new (std::nothrow) SwitchToStreamSwitchPass)) | ||||
| GE_CHK_STATUS_RET( | GE_CHK_STATUS_RET( | ||||
| graph_pass.AddPass("OptimizeStage1_3::AttachStreamLabelPass", new (std::nothrow) AttachStreamLabelPass)) | graph_pass.AddPass("OptimizeStage1_3::AttachStreamLabelPass", new (std::nothrow) AttachStreamLabelPass)) | ||||
| GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::MultiBatchPass", new (std::nothrow) MultiBatchPass(true))) | |||||
| GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::IteratorOpPass", new (std::nothrow) IteratorOpPass)) | GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::IteratorOpPass", new (std::nothrow) IteratorOpPass)) | ||||
| GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::VariableRefUselessControlOutDeletePass", | GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::VariableRefUselessControlOutDeletePass", | ||||
| new (std::nothrow) VariableRefUselessControlOutDeletePass)) | new (std::nothrow) VariableRefUselessControlOutDeletePass)) | ||||
| GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ReshapeRecoveryPass", new (std::nothrow) ReshapeRecoveryPass)) | GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ReshapeRecoveryPass", new (std::nothrow) ReshapeRecoveryPass)) | ||||
| if (options_.train_graph_flag) { | |||||
| // Priority: The GlobalStepInsertPass should work before graph partitioner. | |||||
| // Reason: Make sure that the var "global_step" can be partitioned to known sub graph and allocated memory | |||||
| GE_CHK_STATUS_RET( | |||||
| graph_pass.AddPass("OptimizeStage1_3::GlobalStepInsertPass", new (std::nothrow) GlobalStepInsertPass)) | |||||
| } | |||||
| GE_TIMESTAMP_START(graph_pass); | GE_TIMESTAMP_START(graph_pass); | ||||
| ret = graph_pass.Run(compute_graph); | ret = graph_pass.Run(compute_graph); | ||||
| GE_TIMESTAMP_END(graph_pass, "GraphManager::OptimizeStage1_3"); | GE_TIMESTAMP_END(graph_pass, "GraphManager::OptimizeStage1_3"); | ||||
| @@ -1787,11 +1785,8 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||||
| PassManager pass_for_control_attr_optimize; | PassManager pass_for_control_attr_optimize; | ||||
| if (options_.train_graph_flag) { | if (options_.train_graph_flag) { | ||||
| const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); | |||||
| if (unknown_shape_skip == nullptr) { | |||||
| GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::FlowCtrlPass", | |||||
| new (std::nothrow) FlowCtrlPass)) | |||||
| } | |||||
| GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::FlowCtrlPass", | |||||
| new (std::nothrow) FlowCtrlPass)) | |||||
| } | } | ||||
| GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::MultiBatchPass", | GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::MultiBatchPass", | ||||
| @@ -1821,14 +1816,10 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||||
| pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::" | pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::" | ||||
| "EndOfSequenceAddControlPass", | "EndOfSequenceAddControlPass", | ||||
| new (std::nothrow) EndOfSequenceAddControlPass)) | new (std::nothrow) EndOfSequenceAddControlPass)) | ||||
| const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); | |||||
| if (unknown_shape_skip == nullptr) { | |||||
| // SubgraphPass solves memory_assign_conflicts by insert MemcpyAsync node, which depends on multi attrs and | |||||
| // graph-structure. So try not to add new pass after SubgraphPass. | |||||
| GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::SubgraphPass", | |||||
| new (std::nothrow) SubgraphPass)) | |||||
| } | |||||
| // SubgraphPass solves memory_assign_conflicts by insert MemcpyAsync node, which depends on multi attrs and | |||||
| // graph-structure. So try not to add new pass after SubgraphPass. | |||||
| GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::SubgraphPass", | |||||
| new (std::nothrow) SubgraphPass)) | |||||
| // AttachStreamLabelPass modifies attr without changing structure of compute_graph | // AttachStreamLabelPass modifies attr without changing structure of compute_graph | ||||
| GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::AttachStreamLabelPass", | GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::AttachStreamLabelPass", | ||||
| new (std::nothrow) AttachStreamLabelPass)) | new (std::nothrow) AttachStreamLabelPass)) | ||||
| @@ -1870,120 +1861,6 @@ void GraphManager::ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_gr | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| Status GraphManager::OptimizeAfterMergeSubGraph(ge::ComputeGraphPtr &compute_graph) { | |||||
| GELOGI("Start optimize after merge sub graph."); | |||||
| GEPass ge_passes_for_shape(compute_graph); | |||||
| NamesToPass names_to_passes_for_shape; | |||||
| CastRemovePass cast_remove_pass; | |||||
| names_to_passes_for_shape.emplace_back("CastRemovePass", &cast_remove_pass); | |||||
| TransposeTransDataPass transpose_transdata_pass; | |||||
| names_to_passes_for_shape.emplace_back("TransposeTransDataPass", &transpose_transdata_pass); | |||||
| GE_TIMESTAMP_START(ge_passes_for_shape); | |||||
| Status ret = ge_passes_for_shape.Run(names_to_passes_for_shape); | |||||
| GE_TIMESTAMP_END(ge_passes_for_shape, "GraphManager::GePassesForShape"); | |||||
| GE_CHK_STATUS_RET(ret, "Run ge_passes_for_shape optimize for OptimizeAfterMergeSubGraph failed, ret:%d.", ret); | |||||
| string options = "default"; | |||||
| if (GetContext().GetOption("ge.exec.variable_acc", options) != SUCCESS) { | |||||
| GELOGI("get ge.exec.variable_acc failed. set default value."); | |||||
| } | |||||
| PassManager after_merge_passes; | |||||
| GE_CHK_STATUS_RET(after_merge_passes.AddPass("PermutePass", new (std::nothrow) PermutePass)); | |||||
| GE_IF_BOOL_EXEC(options == "default" || options == "1", GELOGI("turn on variable accelerator"); GE_CHK_STATUS_RET( | |||||
| after_merge_passes.AddPass("VariableOpPass", new (std::nothrow) VariableOpPass(&var_acc_ctrl_)))); | |||||
| ret = after_merge_passes.Run(compute_graph); | |||||
| if (ret != SUCCESS && ret != NOT_CHANGED) { | |||||
| GELOGE(ret, "Run passes after merge sub graph failed, ret:%d.", ret); | |||||
| return ret; | |||||
| } | |||||
| // reshape remove + symmetry_elimination_pass to replace transop depth fusion pass | |||||
| GEPass ge_passes_symmetry(compute_graph); | |||||
| NamesToPass names_to_passes_for_symmetry; | |||||
| ReshapeRemovePass reshape_remove_pass; | |||||
| names_to_passes_for_symmetry.emplace_back("ReshapeRemovePass", &reshape_remove_pass); | |||||
| TransOpSymmetryEliminationPass symmetry_elimination_pass; | |||||
| names_to_passes_for_symmetry.emplace_back("TransOpSymmetryEliminationPass", &symmetry_elimination_pass); | |||||
| ret = ge_passes_symmetry.Run(names_to_passes_for_symmetry); | |||||
| GE_CHK_STATUS_RET(ret, "Run ge_passes optimize for OptimizeAfterMergeSubGraph failed, ret:%d.", ret); | |||||
| PassManager after_merge_fusion_passes; | |||||
| GE_CHK_STATUS_RET(after_merge_fusion_passes.AddPass("TransOpWithoutReshapeFusionPass", | |||||
| new (std::nothrow) TransOpWithoutReshapeFusionPass)); | |||||
| GE_CHK_STATUS_RET( | |||||
| after_merge_fusion_passes.AddPass("TransOpBreadthFusionPass", new (std::nothrow) TransOpBreadthFusionPass)); | |||||
| GE_CHK_STATUS_RET( | |||||
| after_merge_fusion_passes.AddPass("VariableRefDeleteOpPass", new (std::nothrow) VariableRefDeleteOpPass)); | |||||
| GE_CHK_STATUS_RET(after_merge_fusion_passes.AddPass("SameTransdataBreadthFusionPass", | |||||
| new (std::nothrow) SameTransdataBreadthFusionPass)); | |||||
| GE_CHK_STATUS_RET( | |||||
| after_merge_fusion_passes.AddPass("MarkGraphUnknownStatusPass", new (std::nothrow) MarkGraphUnknownStatusPass)); | |||||
| GE_CHK_STATUS_RET(after_merge_fusion_passes.AddPass("AtomicAddrCleanPass", new (std::nothrow) AtomicAddrCleanPass)); | |||||
| GE_CHK_STATUS_RET(after_merge_fusion_passes.AddPass( | |||||
| "LinkGenMaskNodesPass", new (std::nothrow) LinkGenMaskNodesPass(options_.stream_max_parallel_num))); | |||||
| GE_TIMESTAMP_START(after_merge_fusion_passes); | |||||
| ret = after_merge_fusion_passes.Run(compute_graph); | |||||
| GE_TIMESTAMP_END(after_merge_fusion_passes, "GraphManager::AfterMergePasses"); | |||||
| if (ret != SUCCESS && ret != NOT_CHANGED) { | |||||
| GELOGE(ret, "Run passes after merge sub graph failed, ret:%d.", ret); | |||||
| return ret; | |||||
| } | |||||
| // add variable attr for hccl broadcast,need to be removed after variable pass online | |||||
| for (const ge::NodePtr &node : compute_graph->GetDirectNode()) { | |||||
| if (node->GetOpDesc()->GetType() != VARIABLE) { | |||||
| continue; | |||||
| } | |||||
| if (IsBroadCastOpData(node)) { | |||||
| AdjustBroadCastOpData(node); | |||||
| } | |||||
| if (IsAssignOpData(node)) { | |||||
| AdjustAssignOpData(node); | |||||
| } | |||||
| } | |||||
| GEPass ge_passes(compute_graph); | |||||
| NamesToPass names_to_passes; | |||||
| TransOpNearbyAllreduceFusionPass trans_op_nearby_allreduce_fusion_pass; | |||||
| names_to_passes.emplace_back("TransOpNearbyAllreduceFusionPass", &trans_op_nearby_allreduce_fusion_pass); | |||||
| names_to_passes_for_shape.emplace_back("ReshapeRemovePass", &reshape_remove_pass); | |||||
| ConstantFoldingPass constant_folding_pass; | |||||
| names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); | |||||
| DimensionAdjustPass dimension_adjust_pass; | |||||
| names_to_passes.emplace_back("DimensionAdjustPass", &dimension_adjust_pass); | |||||
| CondRemovePass condition_remove_pass; | |||||
| names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass); | |||||
| GE_TIMESTAMP_START(names_to_passes); | |||||
| ret = ge_passes.Run(names_to_passes); | |||||
| GE_TIMESTAMP_END(names_to_passes, "GraphManager::MergedGraphNameToPasses"); | |||||
| GE_CHK_STATUS_RET(ret, "Run ge_passes optimize for OptimizeAfterMergeSubGraph failed, ret:%d.", ret); | |||||
| ret = RemoveIsolatedConst(compute_graph); | |||||
| GE_CHK_STATUS_RET(ret, "Remove isolated Constant failed, ret:%d.", ret); | |||||
| PassManager pass_for_optimize; | |||||
| const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); | |||||
| if (unknown_shape_skip == nullptr) { | |||||
| GE_CHK_STATUS_RET(pass_for_optimize.AddPass("SubgraphPass", new (std::nothrow) SubgraphPass)); | |||||
| } | |||||
| GE_CHK_STATUS_RET(pass_for_optimize.AddPass("MultiBatchPass", new (std::nothrow) MultiBatchPass)); | |||||
| GE_CHK_STATUS_RET(pass_for_optimize.AddPass("CompileNodesPass", new (std::nothrow) CompileNodesPass)); | |||||
| GE_TIMESTAMP_START(pass_for_optimize); | |||||
| ret = pass_for_optimize.Run(compute_graph); | |||||
| GE_TIMESTAMP_END(pass_for_optimize, "GraphManager::OptimizePass"); | |||||
| if (ret != SUCCESS && ret != NOT_CHANGED) { | |||||
| GELOGE(ret, "Run optimize pass failed"); | |||||
| return ret; | |||||
| } | |||||
| ret = compute_graph->TopologicalSorting(); | |||||
| GE_CHK_STATUS_RET(ret, "Graph topological sort failed, ret:%d.", ret); | |||||
| GELOGI("End optimize after merge sub graph."); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { | Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { | ||||
| GELOGI("[LoadGraphAsync] run_graph_flag[%d], graph_id[%u]", options_.run_graph_flag, graph_node->GetGraphId()); | GELOGI("[LoadGraphAsync] run_graph_flag[%d], graph_id[%u]", options_.run_graph_flag, graph_node->GetGraphId()); | ||||
| @@ -2185,6 +2062,19 @@ Status GraphManager::IncreBuild(const GraphNodePtr &graph_node, GeModelPtr &ge_m | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| void GraphManager::ConstructGeInput(std::vector<ge::GeTensor> &ge_inputs, PreRunArgs &args) { | |||||
| for (auto const &input : args.input_tensor) { | |||||
| std::vector<int64_t> input_dims; | |||||
| std::transform(input.dims.begin(), input.dims.end(), std::back_inserter(input_dims), | |||||
| [](int64_t x) -> int64_t { return x; }); | |||||
| GeShape input_shape(input_dims); | |||||
| GeTensorDesc input_tensor_desc; | |||||
| input_tensor_desc.SetShape(input_shape); | |||||
| input_tensor_desc.SetDataType(static_cast<ge::DataType>(input.data_type)); | |||||
| ge_inputs.emplace_back(input_tensor_desc); | |||||
| } | |||||
| } | |||||
| void GraphManager::PreRunThread(GraphManager *graph_manager) { | void GraphManager::PreRunThread(GraphManager *graph_manager) { | ||||
| if (prctl(PR_SET_NAME, ("GE_PreRun")) != 0) { | if (prctl(PR_SET_NAME, ("GE_PreRun")) != 0) { | ||||
| GELOGW("Set thread name failed."); | GELOGW("Set thread name failed."); | ||||
| @@ -2198,16 +2088,8 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { | |||||
| GetThreadLocalContext() = args.context; | GetThreadLocalContext() = args.context; | ||||
| GELOGI("A new loop start."); | GELOGI("A new loop start."); | ||||
| std::vector<ge::GeTensor> ge_inputs; | std::vector<ge::GeTensor> ge_inputs; | ||||
| for (auto const &input : args.input_tensor) { | |||||
| std::vector<int64_t> input_dims; | |||||
| std::transform(input.dims.begin(), input.dims.end(), std::back_inserter(input_dims), | |||||
| [](int64_t x) -> int64_t { return x; }); | |||||
| GeShape input_shape(input_dims); | |||||
| GeTensorDesc input_tensor_desc; | |||||
| input_tensor_desc.SetShape(input_shape); | |||||
| input_tensor_desc.SetDataType(static_cast<ge::DataType>(input.data_type)); | |||||
| ge_inputs.emplace_back(input_tensor_desc); | |||||
| } | |||||
| ConstructGeInput(ge_inputs, args); | |||||
| // find graph | // find graph | ||||
| GraphNodePtr graph_node = nullptr; | GraphNodePtr graph_node = nullptr; | ||||
| Status ret = graph_manager->GetGraphNode(args.graph_id, graph_node); | Status ret = graph_manager->GetGraphNode(args.graph_id, graph_node); | ||||
| @@ -2229,14 +2111,11 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { | |||||
| graph_node->SetRunFlag(true); | graph_node->SetRunFlag(true); | ||||
| ComputeGraphPtr compute_graph_tmp = GraphUtils::GetComputeGraph(*(graph_node->GetGraph())); | ComputeGraphPtr compute_graph_tmp = GraphUtils::GetComputeGraph(*(graph_node->GetGraph())); | ||||
| if (graph_manager->GetTrainFlag()) { | |||||
| if (compute_graph_tmp == nullptr) { | |||||
| ReturnError(graph_manager, args.callback, GE_GRAPH_GRAPH_NODE_NULL, | |||||
| "[RunGraph] compute_graph_tmp is NULL, graph id = %u."); | |||||
| graph_node->Unlock(); | |||||
| return; | |||||
| } | |||||
| if (compute_graph_tmp == nullptr) { | |||||
| ReturnError(graph_manager, args.callback, GE_GRAPH_GRAPH_NODE_NULL, | |||||
| "[RunGraph] compute_graph_tmp is NULL, graph id = %u."); | |||||
| graph_node->Unlock(); | |||||
| return; | |||||
| } | } | ||||
| // when set incre build, save cache helper. | // when set incre build, save cache helper. | ||||
| graph_manager->AddModelCacheHelperToMap(args.graph_id, args.session_id, compute_graph_tmp); | graph_manager->AddModelCacheHelperToMap(args.graph_id, args.session_id, compute_graph_tmp); | ||||
| @@ -2266,11 +2145,19 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { | |||||
| GeModelPtr ge_model = nullptr; | GeModelPtr ge_model = nullptr; | ||||
| if (graph_manager->IncreBuild(graph_node, ge_model) != SUCCESS) { | if (graph_manager->IncreBuild(graph_node, ge_model) != SUCCESS) { | ||||
| ret = graph_manager->PreRun(graph_node, ge_inputs, ge_root_model, args.session_id); | ret = graph_manager->PreRun(graph_node, ge_inputs, ge_root_model, args.session_id); | ||||
| // release rts generate context | |||||
| RtContextUtil::GetInstance().DestroyRtContexts(args.session_id); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| graph_node->SetRunFlag(false); | graph_node->SetRunFlag(false); | ||||
| ReturnError(graph_manager, args.callback, ret, "PreRun Failed, thread exit.."); | |||||
| graph_node->Unlock(); | |||||
| return; | |||||
| if (!std::getenv("AnalyzeMode")) { | |||||
| ReturnError(graph_manager, args.callback, ret, "PreRun Failed, thread exit.."); | |||||
| graph_node->Unlock(); | |||||
| return; | |||||
| } else { | |||||
| ReturnError(graph_manager, graph_node, args.callback, ret, "PreRun Failed, keep geop continue!"); | |||||
| graph_node->Unlock(); | |||||
| continue; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| graph_node->SetBuildFlag(true); | graph_node->SetBuildFlag(true); | ||||
| @@ -2350,13 +2237,74 @@ void GraphManager::ReturnError(GraphManager *graph_manager, RunAsyncCallback cal | |||||
| if (graph_manager == nullptr) { | if (graph_manager == nullptr) { | ||||
| return; | return; | ||||
| } | } | ||||
| GELOGE(ret, "%s.", log.c_str()); | |||||
| StopQueue(graph_manager); | StopQueue(graph_manager); | ||||
| GELOGE(ret, "%s.", log.c_str()); | |||||
| std::vector<ge::OutputTensorInfo> outputs; | std::vector<ge::OutputTensorInfo> outputs; | ||||
| callback(ret, outputs); | callback(ret, outputs); | ||||
| } | } | ||||
| void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, RunAsyncCallback callback, | |||||
| Status ret, const string &log) { | |||||
| std::vector<ge::OutputTensorInfo> outputs; | |||||
| auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); | |||||
| if (graph_manager == nullptr || compute_graph == nullptr) { | |||||
| GELOGE(GRAPH_FAILED, "[Analyze Mode] compute graph is null!"); | |||||
| callback(GRAPH_FAILED, outputs); | |||||
| return; | |||||
| } | |||||
| for (const auto &node : compute_graph->GetAllNodes()) { | |||||
| if (node->GetType() != "NetOutput") { | |||||
| continue; | |||||
| } | |||||
| for (size_t i = 0; i < node->GetAllInDataAnchorsSize(); i++) { | |||||
| auto input_desc = node->GetOpDesc()->MutableInputDesc(i); | |||||
| ge::OutputTensorInfo tensor; | |||||
| tensor.dims = input_desc->GetShape().GetDims(); | |||||
| tensor.data_type = static_cast<uint32_t>(input_desc->GetDataType()); | |||||
| int64_t len = 1; | |||||
| if (input_desc->GetShape().GetDims() != std::vector<int64_t>({})) { | |||||
| len = input_desc->GetShape().GetShapeSize(); | |||||
| } | |||||
| if (len < 0) { | |||||
| GELOGE(GRAPH_FAILED, "Analyze Mode does not support GEOP output unknown shape!"); | |||||
| callback(GRAPH_FAILED, outputs); | |||||
| return; | |||||
| } else if (len == 0) { | |||||
| GELOGI("getted shape size is 0.Do process as empty tensor!"); | |||||
| len = 1; | |||||
| } | |||||
| auto size = GetSizeByDataType(input_desc->GetDataType()); | |||||
| if (size <= 0) { | |||||
| GELOGE(PARAM_INVALID, "Failed to get cube size, the data type %s is invalid", | |||||
| ge::TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str()); | |||||
| callback(GRAPH_FAILED, outputs); | |||||
| return; | |||||
| } | |||||
| if (CheckInt64MulOverflow(len, static_cast<int64_t>(size)) != true) { | |||||
| GELOGE(MEMALLOC_FAILED, "int64 multiply happens overflow! a:%ld b:%d", len, size); | |||||
| callback(GRAPH_FAILED, outputs); | |||||
| return; | |||||
| } | |||||
| tensor.length = len * size; | |||||
| auto pbuff = new (std::nothrow) uint8_t[tensor.length]; | |||||
| if (!pbuff) { | |||||
| GELOGE(MEMALLOC_FAILED, "new buff failed!"); | |||||
| callback(GRAPH_FAILED, outputs); | |||||
| return; | |||||
| } | |||||
| // To avoid global step too small and can not stop, totally set a bigger value | |||||
| for (int64_t i = 0; i < tensor.length; i++) { | |||||
| *(pbuff + i) = 0x7F; // here stands for a positive max value | |||||
| } | |||||
| tensor.data.reset(pbuff); | |||||
| outputs.emplace_back(std::move(tensor)); | |||||
| } | |||||
| } | |||||
| callback(SUCCESS, outputs); | |||||
| return; | |||||
| } | |||||
| bool GraphManager::IsGraphNeedRebuild(uint32_t graph_id) { | bool GraphManager::IsGraphNeedRebuild(uint32_t graph_id) { | ||||
| // find graph | // find graph | ||||
| GraphNodePtr graph_node = nullptr; | GraphNodePtr graph_node = nullptr; | ||||
| @@ -2479,4 +2427,99 @@ Status GraphManager::Build(const GraphNodePtr &graph_node, ComputeGraphPtr &comp | |||||
| graph_node->SetGeRootModel(ge_root_model); | graph_node->SetGeRootModel(ge_root_model); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphManager::GenCheckPointGraph(const std::map<std::string, GeTensorDesc> &all_variables, Graph &graph) { | |||||
| ge::ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>(kCheckPointGraph); | |||||
| GE_CHECK_NOTNULL(compute_graph); | |||||
| OpDescPtr save_desc = MakeShared<ge::OpDesc>(compute_graph->GetName() + "_" + kSave, kSave); | |||||
| GE_CHECK_NOTNULL(save_desc); | |||||
| uint32_t save_index = 0; | |||||
| for (auto iter = all_variables.begin(); iter != all_variables.end(); ++iter) { | |||||
| GE_CHK_GRAPH_STATUS_RET(save_desc->AddInputDesc(save_index, iter->second)); | |||||
| save_index++; | |||||
| } | |||||
| NodePtr save_node = compute_graph->AddNode(save_desc); | |||||
| uint32_t index = 0; | |||||
| for (auto iter = all_variables.begin(); iter != all_variables.end(); ++iter) { | |||||
| OpDescPtr var_desc = MakeShared<ge::OpDesc>(iter->first, VARIABLE); | |||||
| GE_CHECK_NOTNULL(var_desc); | |||||
| if (!AttrUtils::SetBool(var_desc, kCheckPointForGetVar, true)) { | |||||
| GELOGW("Set check point graph attr failed."); | |||||
| } | |||||
| GE_CHK_GRAPH_STATUS_RET(var_desc->AddOutputDesc(iter->second)); | |||||
| NodePtr var_node = compute_graph->AddNode(var_desc); | |||||
| GE_CHK_STATUS(GraphUtils::AddEdge(var_node->GetOutDataAnchor(0), save_node->GetInDataAnchor(index)), | |||||
| "Add edge[%s->%s] fail.", var_node->GetName().c_str(), save_node->GetName().c_str()); | |||||
| index++; | |||||
| } | |||||
| compute_graph->Dump(); | |||||
| graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphManager::SaveVariables(const Graph &graph, const std::vector<std::string> &var_names, | |||||
| const std::vector<Tensor> &outputs, std::vector<Tensor> &var_values) { | |||||
| map<string, Tensor> var_results; | |||||
| GE_CHK_STATUS_RET(SaveCheckPointResult(graph, outputs, var_results), "Save check point result failed."); | |||||
| if (!var_names.empty()) { | |||||
| for (const auto &var_name : var_names) { | |||||
| if (var_results.count(var_name) == 0) { | |||||
| GELOGE(FAILED, "Fetch var[%s] value failed.", var_name.c_str()); | |||||
| return FAILED; | |||||
| } else { | |||||
| var_values.emplace_back(var_results[var_name]); | |||||
| } | |||||
| } | |||||
| } else { | |||||
| for (auto iter = var_results.begin(); iter != var_results.end(); ++iter) { | |||||
| var_values.emplace_back(iter->second); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphManager::SaveCheckPointResult(const Graph &graph, const std::vector<Tensor> &outputs, | |||||
| map<string, Tensor> &var_results) { | |||||
| auto compute_graph = GraphUtils::GetComputeGraph(graph); | |||||
| NodePtr netoutput_node = nullptr; | |||||
| for (const auto &node : compute_graph->GetAllNodes()) { | |||||
| if (node->GetType() == NETOUTPUT) { | |||||
| netoutput_node = node; | |||||
| break; | |||||
| } | |||||
| } | |||||
| GE_CHECK_NOTNULL(netoutput_node); | |||||
| for (const auto &in : netoutput_node->GetAllInDataAnchors()) { | |||||
| auto out_anchor = in->GetPeerOutAnchor(); | |||||
| GE_CHECK_NOTNULL(out_anchor); | |||||
| auto peer_node = out_anchor->GetOwnerNode(); | |||||
| while (peer_node->GetType() != VARIABLE) { | |||||
| if (peer_node->GetAllInDataAnchors().size() != 1) { | |||||
| GELOGE(FAILED, "peer_node [%s] has more than 1 input in checkpoint Graph.", peer_node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| auto peer_node_in_anchor = peer_node->GetAllInDataAnchors().at(0); | |||||
| auto peer_node_out_anchor = peer_node_in_anchor->GetPeerOutAnchor(); | |||||
| if (peer_node_out_anchor != nullptr) { | |||||
| peer_node = peer_node_out_anchor->GetOwnerNode(); | |||||
| if (peer_node->GetType() == VARIABLE) { | |||||
| break; | |||||
| } | |||||
| } | |||||
| } | |||||
| if (peer_node->GetType() != VARIABLE) { | |||||
| GELOGE(FAILED, " peer_node %s is not variable in checkpoint Graph.", peer_node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| auto var_name = peer_node->GetName(); | |||||
| GELOGI("[GraphManager] SaveVariables, varName is %s.", var_name.c_str()); | |||||
| if (in->GetIdx() >= static_cast<int>(outputs.size())) { | |||||
| GELOGE(FAILED, "variable index[%d] out of range[%zu].", in->GetIdx(), outputs.size()); | |||||
| return FAILED; | |||||
| } | |||||
| var_results.emplace(var_name, outputs.at(in->GetIdx())); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -159,6 +159,13 @@ class GraphManager { | |||||
| void SetOptionsRunGraphFlag(bool run_graph_flag); | void SetOptionsRunGraphFlag(bool run_graph_flag); | ||||
| Status GenCheckPointGraph(const std::map<std::string, GeTensorDesc> &all_variables, Graph &graph); | |||||
| Status SaveVariables(const Graph &graph, const std::vector<std::string> &var_names, | |||||
| const std::vector<Tensor> &outputs, std::vector<Tensor> &var_values); | |||||
| Status SaveCheckPointResult(const Graph &graph, const std::vector<Tensor> &outputs, map<string, Tensor> &var_results); | |||||
| private: | private: | ||||
| struct PreRunArgs { | struct PreRunArgs { | ||||
| GraphId graph_id; | GraphId graph_id; | ||||
| @@ -267,9 +274,8 @@ class GraphManager { | |||||
| Status OptimizeStage1(ComputeGraphPtr &compute_graph); | Status OptimizeStage1(ComputeGraphPtr &compute_graph); | ||||
| Status OptimizeStage2(ComputeGraphPtr &compute_graph); | Status OptimizeStage2(ComputeGraphPtr &compute_graph); | ||||
| Status OptimizeAfterMergeSubGraph(ge::ComputeGraphPtr &compute_graph); | |||||
| Status NewOptimizeAfterMergeSubGraph(ge::ComputeGraphPtr &compute_graph); | |||||
| Status SubexpressionMigration(ComputeGraphPtr &compute_graph); | |||||
| Status LoadGraphAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node); | Status LoadGraphAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node); | ||||
| @@ -288,10 +294,13 @@ class GraphManager { | |||||
| Status IncreBuild(const GraphNodePtr &graph_node, GeModelPtr &ge_model); | Status IncreBuild(const GraphNodePtr &graph_node, GeModelPtr &ge_model); | ||||
| void RemoveModelCacheHelper(const GraphId &graph_id); | void RemoveModelCacheHelper(const GraphId &graph_id); | ||||
| static void ConstructGeInput(std::vector<ge::GeTensor> &ge_inputs, PreRunArgs &args); | |||||
| static void PreRunThread(GraphManager *graph_manager); | static void PreRunThread(GraphManager *graph_manager); | ||||
| static void RunThread(GraphManager *graph_manager); | static void RunThread(GraphManager *graph_manager); | ||||
| static void StopQueue(GraphManager *graph_manager); | static void StopQueue(GraphManager *graph_manager); | ||||
| static void ReturnError(GraphManager *graph_manager, RunAsyncCallback callback, Status ret, const string &log); | static void ReturnError(GraphManager *graph_manager, RunAsyncCallback callback, Status ret, const string &log); | ||||
| static void ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, RunAsyncCallback callback, Status ret, | |||||
| const string &log); | |||||
| void ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_graph); | void ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_graph); | ||||
| @@ -855,6 +855,32 @@ void VarManager::RemoveAllocatedGraphId(const std::string &var_name) { | |||||
| var_resource_->RemoveAllocatedGraphId(var_name); | var_resource_->RemoveAllocatedGraphId(var_name); | ||||
| } | } | ||||
| Status VarManager::GetAllVariables(std::map<std::string, GeTensorDesc> &all_variables) { | |||||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||||
| if (var_resource_ == nullptr) { | |||||
| GELOGW("VarManager has not been inited."); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| auto new_variable_desc = var_resource_->GetAllVarDesc(); | |||||
| if (new_variable_desc.size() == 0) { | |||||
| GELOGW("VarManager don't have variables."); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| for (auto iter = new_variable_desc.begin(); iter != new_variable_desc.end(); ++iter) { | |||||
| auto trans_road = var_resource_->GetTransRoad(iter->first); | |||||
| if (trans_road == nullptr || trans_road->empty()) { | |||||
| GELOGI("The variable %s does not have any trans road", iter->first.c_str()); | |||||
| all_variables[iter->first] = iter->second; | |||||
| continue; | |||||
| } | |||||
| // get origin trans info : the first trans node info | |||||
| auto origin_trans_node_info = trans_road->at(0); | |||||
| all_variables[iter->first] = origin_trans_node_info.input; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| VarManagerPool::~VarManagerPool() { Destory(); } | VarManagerPool::~VarManagerPool() { Destory(); } | ||||
| VarManagerPool &VarManagerPool::Instance() { | VarManagerPool &VarManagerPool::Instance() { | ||||
| @@ -897,4 +923,22 @@ VarManager *VarManagerPool::GetVarManager(uint64_t session_id) { | |||||
| var_manager_map_[session_id] = var_manager; | var_manager_map_[session_id] = var_manager; | ||||
| return var_manager; | return var_manager; | ||||
| } | } | ||||
| void VarManagerPool::RemoveVarManager(uint64_t session_id) { | |||||
| VarManager *var_manager = nullptr; | |||||
| { | |||||
| std::lock_guard<std::mutex> lock(var_manager_mutex_); | |||||
| auto it = var_manager_map_.find(session_id); | |||||
| if (it != var_manager_map_.end()) { | |||||
| var_manager = it->second; | |||||
| var_manager_map_.erase(it); | |||||
| } | |||||
| } | |||||
| if (var_manager != nullptr) { | |||||
| var_manager->Destory(); | |||||
| delete var_manager; | |||||
| var_manager = nullptr; | |||||
| } | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -157,6 +157,8 @@ class VarResource { | |||||
| bool IsVarAddr(const int64_t &offset); | bool IsVarAddr(const int64_t &offset); | ||||
| std::unordered_map<std::string, ge::GeTensorDesc> GetAllVarDesc() const { return cur_var_tensor_desc_map_; } | |||||
| private: | private: | ||||
| std::string VarKey(const std::string &var_name, const ge::GeTensorDesc &tensor_desc); | std::string VarKey(const std::string &var_name, const ge::GeTensorDesc &tensor_desc); | ||||
| @@ -276,6 +278,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { | |||||
| uint8_t *GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type); | uint8_t *GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type); | ||||
| Status GetAllVariables(std::map<std::string, GeTensorDesc> &all_variables); | |||||
| private: | private: | ||||
| uint32_t version_; | uint32_t version_; | ||||
| uint64_t session_id_; | uint64_t session_id_; | ||||
| @@ -300,6 +304,8 @@ class VarManagerPool { | |||||
| VarManager *GetVarManager(uint64_t session_id); | VarManager *GetVarManager(uint64_t session_id); | ||||
| void RemoveVarManager(uint64_t session_id); | |||||
| void Destory() noexcept; | void Destory() noexcept; | ||||
| ge::Status Init() const; | ge::Status Init() const; | ||||
| @@ -16,7 +16,6 @@ | |||||
| #include "graph/manager/rdma_pool_allocator.h" | #include "graph/manager/rdma_pool_allocator.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "graph/manager/graph_mem_allocator.h" | |||||
| namespace { | namespace { | ||||
| const size_t kAlignedSize = 512; | const size_t kAlignedSize = 512; | ||||
| @@ -27,12 +27,11 @@ | |||||
| #include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
| #include "graph/manager/block_memory.h" | #include "graph/manager/block_memory.h" | ||||
| #include "graph/manager/graph_mem_allocator.h" | |||||
| #include "graph/node.h" | #include "graph/node.h" | ||||
| #include "runtime/mem.h" | #include "runtime/mem.h" | ||||
| namespace ge { | namespace ge { | ||||
| class MemoryAllocator; | |||||
| class RdmaPoolAllocator { | class RdmaPoolAllocator { | ||||
| public: | public: | ||||
| explicit RdmaPoolAllocator(rtMemType_t memory_type); | explicit RdmaPoolAllocator(rtMemType_t memory_type); | ||||