code sync for runpackage C75B150

5 years ago · b07dc44f04
--- a/inc/common/optimizer/graph_optimizer.h
+++ b/inc/common/optimizer/graph_optimizer.h
@@ -42,6 +42,9 @@ class GraphOptimizer {
  // optimize original graph for FE quant optimize
  virtual Status OptimizeGraphPrepare(ComputeGraph &graph) { return SUCCESS; }

  // optimize graph before build for RTS
  virtual Status OptimizeGraphBeforeBuild(ComputeGraph &graph) { return SUCCESS; }

  // optimize original graph, using in graph preparation stage
  virtual Status OptimizeOriginalGraph(ComputeGraph &graph) = 0;

--- a/inc/common/util/ai_core/common/aicore_util_attr_define.h
+++ b/inc/common/util/ai_core/common/aicore_util_attr_define.h
@@ -0,0 +1,39 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef INC_COMMON_UTILS_AI_CORE_COMMON_ATTR_DEFINE_H_
 #define INC_COMMON_UTILS_AI_CORE_COMMON_ATTR_DEFINE_H_

 #include <string>

 namespace fe {
 static const std::string SCOPE_ID_ATTR = "fusion_scope";

 static const std::string FE_IMPLY_TYPE = "_fe_imply_type";

 static const std::string PARENT_OP_TYPE = "parentOpType";

 static const std::string ATTR_NAME_TASK_L2_FUSION_INFO_EXTEND_PTR = "task_l2_fusion_info_extend_content";

 static const std::string ATTR_DATA_DUMP_REF = "_datadump_ref";

 static const std::string ATTR_NAME_L2_FUSION_EXTEND_PTR = "l2_fusion_extend_content";

 static const std::string L1_OPTIMIZED = "l1_optimized";

 static const std::string L2_OPTIMIZED = "l2_optimized";
 }  // namespace fe
 #endif
--- a/inc/common/util/ai_core/common/aicore_util_types.h
+++ b/inc/common/util/ai_core/common/aicore_util_types.h
@@ -0,0 +1,118 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef INC_COMMON_UTILS_AI_CORE_COMMON_TYPES_H_
 #define INC_COMMON_UTILS_AI_CORE_COMMON_TYPES_H_

 #include "graph/anchor.h"
 #include "graph/types.h"
 #include "runtime/kernel.h"
 #include <map>
 #include <string>
 #include <vector>

 namespace fe {
 struct FusionOpSrc {
  uint32_t src_op_id;
  ge::AnchorPtr src_anchor;
  int32_t fusion_src_index;
  int32_t fusion_dst_index;
 };

 struct FusionOpDst {
  uint32_t dst_op_id;
  ge::AnchorPtr dst_anchor;
 };

 struct FusionDataFlow {
  std::pair<ge::AnchorPtr, ge::AnchorPtr> edge;
  std::pair<std::string, ge::AnchorPtr> node_dataindex_pair;
 };

 typedef struct tagL2FusionData {
  uint32_t l2Index;
  uint64_t l2Addr;
  uint64_t l2PageNum;
 } L2FusionData_t;
 typedef std::map<uint64_t, L2FusionData_t> L2FusionDataMap_t;

 typedef struct tagFeSmDesc {
  rtL2Ctrl_t l2ctrl;
  std::string nodeName[8];
  uint8_t outputIndex[8];
 } feSmDesc_t;

 typedef struct TagTaskL2FusionInfo {
  std::string nodeName;
  feSmDesc_t l2Info;
  L2FusionDataMap_t input;
  L2FusionDataMap_t output;
  uint32_t isUsed;
 } TaskL2FusionInfo_t;

 using L2FusionInfoPtr = std::shared_ptr<TaskL2FusionInfo_t>;

 typedef struct ToOpStruct {
  int64_t opL1Space = 0;
  std::vector<int64_t> opL1FusionType;
  int64_t opL1WorkspaceFlag = 0;  // for workspace flag
  int64_t opL1WorkspaceSize = 0;
  std::vector<std::vector<int64_t>> validInputShape;
  std::vector<std::vector<int64_t>> validOutputShape;
  std::vector<std::vector<int64_t>> sliceInputOffset;   // conv & pooling & ReadSelect
  std::vector<std::vector<int64_t>> sliceOutputOffset;  // WriteSelect
  std::vector<uint32_t> totalShape;
  uint32_t splitIndex = 0;
  ToOpStruct() {
    // set invalid value for essential variable
    opL1Space = -1;
    opL1WorkspaceSize = -1;
  }
 } ToOpStruct_t;

 enum OpImplType {
  EN_IMPL_CUSTOM_CONSTANT_CCE = 0,    // custom constant op
  EN_IMPL_CUSTOM_TIK,                 // custom tik op
  EN_IMPL_CUSTOM_TBE,                 // custom tbe op
  EN_IMPL_HW_CONSTANT_CCE,            // Huawei built-in constant op
  EN_IMPL_HW_GENERAL_CCE,             // Huawei built-in cce op
  EN_IMPL_HW_TIK,                     // Huawei built-in tik op
  EN_IMPL_HW_TBE,                     // Huawei built-in tbe op
  EN_IMPL_RL,                         // RL op
  EN_IMPL_PLUGIN_TBE,                 // Huawei built-in tbe plugin op
  EN_IMPL_VECTOR_CORE_HW_TBE,         // Huawei built-in tbe op
  EN_IMPL_VECTOR_CORE_CUSTOM_TBE,     // custom tbe op
  EN_IMPL_NON_PERSISTENT_CUSTOM_TBE,  // custom tbe op
  EN_RESERVED                         // reserved value
 };

 static const std::map<ge::DataType, uint32_t> DATATYPE_SIZE_MAP{{ge::DT_FLOAT, sizeof(float)},
                                                                {ge::DT_FLOAT16, sizeof(int16_t)},
                                                                {ge::DT_INT8, sizeof(int8_t)},
                                                                {ge::DT_INT32, sizeof(int32_t)},
                                                                {ge::DT_UINT8, sizeof(uint8_t)},
                                                                {ge::DT_UINT32, sizeof(uint32_t)},
                                                                {ge::DT_INT16, sizeof(int16_t)},
                                                                {ge::DT_UINT16, sizeof(uint16_t)},
                                                                {ge::DT_INT64, sizeof(int64_t)},
                                                                {ge::DT_UINT64, sizeof(uint64_t)},
                                                                {ge::DT_DOUBLE, sizeof(double)},
                                                                {ge::DT_BOOL, sizeof(bool)},
                                                                {ge::DT_DUAL, sizeof(float) + sizeof(int8_t)},
                                                                {ge::DT_DUAL_SUB_UINT8, sizeof(int8_t)},
                                                                {ge::DT_DUAL_SUB_INT8, sizeof(int8_t)}};
 }  // namespace fe
 #endif
--- a/inc/common/util/ai_core/common/graph_comm.h
+++ b/inc/common/util/ai_core/common/graph_comm.h
@@ -0,0 +1,107 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef INC_COMMON_UTILS_AI_CORE_COMMON_GRAPH_COMMON_H_
 #define INC_COMMON_UTILS_AI_CORE_COMMON_GRAPH_COMMON_H_

 #include "graph/compute_graph.h"
 #include "common/aicore_util_types.h"
 #include "register/graph_optimizer/graph_optimize_register_error_codes.h"

 #include <map>
 #include <string>
 #include <utility>
 #include <vector>

 namespace fe {

 using kScopeNodeMap_t = std::map<int64_t, std::vector<ge::NodePtr>>;
 using kScopeNodePair_t = std::pair<int64_t, std::vector<ge::NodePtr>>;

 class GraphCommImpl;
 using GraphCommImplPtr = std::unique_ptr<GraphCommImpl>;

 class GraphComm {
 public:
  GraphComm(const string &engineName);
  virtual ~GraphComm();
  GraphComm(const GraphComm &in) = delete;
  GraphComm &operator=(const GraphComm &in) = delete;

  Status GetscopeNodeMap(ge::ComputeGraph &graph, kScopeNodeMap_t &fusionMap);

  Status CopyFusionOpNodes(vector<FusionDataFlow> &fusInputEdgeList, vector<FusionDataFlow> &fusOutputEdgeList,
                           vector<ge::NodePtr> &fusNodelist, ge::OpDescPtr fusionOpDesc,
                           ge::ComputeGraphPtr fusionGraph);

  Status CopyFusionOpEdges(ge::OpDescPtr fusionOpDesc, ge::ComputeGraph &origGraph, ge::ComputeGraphPtr fusionGraph);

  Status GetNodeDataFlowMap(const ge::NodePtr &fusNode,
                            std::map<ge::NodePtr, std::map<ge::AnchorPtr, ge::AnchorPtr>> &fusionOpAnchorsMap,
                            ge::kFusionDataFlowVec_t &fusDataflowList, const int &mapType);

  Status GetFusionNodeEdgeList(std::vector<ge::NodePtr> &fusNodelist, std::vector<FusionDataFlow> &fusInputEdgeList,
                               std::vector<FusionDataFlow> &fusOutputEdgeList);
  void ClearFusionSrc();

  void ClearFusionDst();

  void AddFusionOutputSrc(const uint32_t &src_op_id, const ge::AnchorPtr &src_anchor, const int32_t &fusion_src_index,
                          std::pair<string, ge::AnchorPtr> &node_dataindex_pair);

  void AddFusionInputSrc(const uint32_t &src_op_id, const ge::AnchorPtr &src_anchor, const int32_t &fusion_dst_index,
                         std::pair<string, ge::AnchorPtr> &node_dataindex_pair);

  void SaveFusionDst(const uint32_t &dst_op_id, ge::AnchorPtr dst_anchor);

  bool IsFusionDstExist(const uint32_t &dst_op_id, const ge::AnchorPtr &dst_anchor);

  bool GetFusionSrc(const uint32_t &src_op_id, const ge::AnchorPtr &src_anchor, int32_t &fusion_src_index,
                    int32_t &fusion_dst_index);

  Status GetFusionNodeCtrlEdgeList(vector<ge::NodePtr> &fusNodelist, vector<FusionDataFlow> &fusInputCtrlEdgeList,
                                   vector<FusionDataFlow> &fusOutputCtrlEdgeList);

  Status MergeFusionNodeEdgeList(ge::NodePtr &fusNode, vector<ge::NodePtr> &fusNodelist,
                                 vector<FusionDataFlow> &fusInputEdgeList, vector<FusionDataFlow> &fusOutputEdgeList);

  Status MergeFusionNodeCtrlEdgeList(ge::NodePtr &fusNode, vector<ge::NodePtr> &fusNodelist,
                                     vector<FusionDataFlow> &fusInputEdgeList,
                                     vector<FusionDataFlow> &fusOutputEdgeList);

  string GetEngineName();

 private:
  Status MergeFusionNodeInputEdgeList(ge::NodePtr fusNode, std::vector<ge::NodePtr> &fusNodelist,
                                      std::vector<FusionDataFlow> &fusInputEdgeList);
  Status MergeFusionNodeOutputEdgeList(ge::NodePtr fusNode, std::vector<ge::NodePtr> &fusNodelist,
                                       std::vector<FusionDataFlow> &fusOutputEdgeList);

  string engineName_;

  std::vector<FusionOpSrc> exist_fusion_src_list_;
  std::vector<FusionOpDst> exist_fusion_dst_list_;

  // std::vector<std::multimap<std::string, uint32_t>>
  ge::kFusionDataFlowVec_t fusion_input_dataflow_list_;

  // std::vector<std::multimap<std::string, ge::AnchorPtr>>
  ge::kFusionDataFlowVec_t fusion_output_dataflow_list_;

  GraphCommImplPtr graphCommImplPtr_;
 };
 }  // namespace fe
 #endif
--- a/inc/common/util/ai_core/common/scope_allocator.h
+++ b/inc/common/util/ai_core/common/scope_allocator.h
@@ -0,0 +1,42 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef INC_COMMON_UTILS_AI_CORE_COMMON_SCOPE_ALLOCATOR_H_
 #define INC_COMMON_UTILS_AI_CORE_COMMON_SCOPE_ALLOCATOR_H_

 #include "graph/op_desc.h"

 namespace fe {
 class ScopeAllocator {
 public:
  ScopeAllocator();
  virtual ~ScopeAllocator();
  ScopeAllocator(const ScopeAllocator& in) = delete;
  ScopeAllocator& operator=(const ScopeAllocator& in) = delete;

 public:
  void Init();
  int64_t GetCurrentScopeId();
  int64_t AllocateScopeId(void);
  bool HasScopeAttr(ge::ConstOpDescPtr opdef);
  bool GetScopeAttr(ge::ConstOpDescPtr opdef, int64_t& scopeId);
  bool SetScopeAttr(ge::OpDescPtr opdef, int64_t scopeId);

 private:
  int64_t scopeId;
 };
 }  // namespace fe
 #endif
--- a/inc/common/util/ai_core/param_calculate/aicore_param_calculator.h
+++ b/inc/common/util/ai_core/param_calculate/aicore_param_calculator.h
@@ -14,15 +14,20 @@
 * limitations under the License.
 */

 #ifndef GE_GRAPH_PASSES_SWITCH_SPLIT_PASS_H_
 #define GE_GRAPH_PASSES_SWITCH_SPLIT_PASS_H_
 #ifndef AICORE_PARAM_CALCULATOR
 #define AICORE_PARAM_CALCULATOR

 #include <set>
 #include "graph/passes/base_pass.h"
 namespace ge {
 class SwitchSplitPass : public BaseNodePass {
 #include "graph/node.h"
 #include "graph_optimizer/graph_optimize_register_error_codes.h"

 namespace fe {
 class AICoreParamCalculator {
 public:
  Status Run(NodePtr &node) override;
  AICoreParamCalculator();

  ~AICoreParamCalculator();

  Status CalcOpRunningParam(ge::Node &node);
 };
 }  // namespace ge
 #endif  // GE_GRAPH_PASSES_SWITCH_SPLIT_PASS_H_
 }  // namespace fe
 #endif  // AICORE_PARAM_CALCULATOR
--- a/inc/common/util/ai_core/param_calculate/tensorsize_calculator.h
+++ b/inc/common/util/ai_core/param_calculate/tensorsize_calculator.h
@@ -0,0 +1,45 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef TENSORSIZE_CALCULATOR_H
 #define TENSORSIZE_CALCULATOR_H

 #include "graph_optimizer/graph_optimize_register_error_codes.h"

 #include <map>
 #include <string>
 #include "graph/compute_graph.h"
 #include "graph/op_desc.h"

 namespace fe {
 class TensorSizeCalculator {
 public:
  /**
   * Calculate the tensor size of input and output of each opdesc
   * @param opDesc opdesc object
   * @param opImplType op impl type
   * @return status SUCCESS or FAILED
   */
  static Status CalculateOpTensorSize(ge::OpDesc &opDesc);

 private:
  static Status CalcInputOpTensorSize(ge::OpDesc &opDesc, int32_t &outputRealCalcFlag);

  static Status CalcOutputOpTensorSize(ge::OpDesc &opDesc, int32_t &outputRealCalcFlag);
 };
 }  // namespace fe

 #endif  // TENSORSIZE_CALCULATOR_H
--- a/inc/external/ge/ge_api.h
+++ b/inc/external/ge/ge_api.h
@@ -98,6 +98,15 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Session {
  ///
  Status RunGraphAsync(uint32_t graphId, const std::vector<ge::InputTensorInfo> &inputs, RunAsyncCallback callback);

  ///
  /// @ingroup ge_graph
  /// @brief get variables in the session with specific session id
  /// @param [in] var_names: variable names
  /// @param [out] var_values: variable values
  /// @return Status result of function
  ///
  Status GetVariables(const std::vector<std::string> &var_names, std::vector<Tensor> &var_values);

  ///
  /// @ingroup ge_graph
  /// @brief register callback func with specific summary or checkpoint by users
--- a/inc/external/ge/ge_ir_build.h
+++ b/inc/external/ge/ge_ir_build.h
@@ -23,6 +23,12 @@
 #include "graph/graph.h"
 #include "graph/ge_error_codes.h"

 namespace {
 #define IR_MAJOR_VERSION (int(1))
 #define IR_MINOR_VERSION (int(0))
 #define IR_PATCH_VERSION (int(0))
 }  // namespace

 namespace ge {

 struct ModelBufferData {
@@ -71,5 +77,17 @@ graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<std::string
 */
 graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &model);

 /**
 * @ingroup AscendCL
 * @brief query IR interface version
 *
 * @param major_version[OUT] IR interface major version
 * @param minor_version[OUT] IR interface minor version
 * @param patch_version[OUT] IR interface patch version
 * @retval GRAPH_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *patch_version);

 };  // namespace ge
 #endif
--- a/inc/external/graph/operator.h
+++ b/inc/external/graph/operator.h
@@ -45,9 +45,11 @@
 namespace ge {
 class Operator;
 class OperatorImpl;
 class NodeUtils;
 class NamedAttrs;
 class Graph;
 class AttrValue;
 class Node;

 using SubgraphBuilder = std::function<Graph()>;
 using OperatorImplPtr = std::shared_ptr<OperatorImpl>;
@@ -65,8 +67,8 @@ using std::string;
 class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Operator {
 public:
  friend class OperatorImpl;

  friend class GraphBuilderImpl;
  friend class NodeUtils;

  using OpInt = int64_t;
  using OpFloat = float;
@@ -104,6 +106,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Operator {

  Operator &SetInput(const string &dst_name, const Operator &src_oprt, const string &name);  // lint !e148

  Operator &SetInput(const string &dst_name, const Operator &src_oprt, uint32_t index);

  Operator &AddControlInput(const Operator &src_oprt);

  graphStatus GetInputConstData(const string &dst_name, Tensor &data) const;
@@ -269,11 +273,15 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Operator {

  OutHandler GetOutput(const string &name) const;

  OutHandler GetOutput(uint32_t index) const;

  OperatorImplPtr GetOperatorImplPtr() const;

  OperatorImplPtr operator_impl_{nullptr};

  graphStatus GetInputConstDataOut(const string &dst_name, Tensor &data) const;

  std::shared_ptr<const Node> GetNode() const;
 };
 /*lint +e148*/
 }  // namespace ge
--- a/inc/external/graph/operator_reg.h
+++ b/inc/external/graph/operator_reg.h
@@ -130,6 +130,10 @@ class OpReg {
    Operator::SetInput(#x, v, srcName);                                        \
    return *this;                                                              \
  }                                                                            \
  _THIS_TYPE &set_input_##x(Operator &v, uint32_t index) {                     \
    Operator::SetInput(#x, v, index);                                          \
    return *this;                                                              \
  }                                                                            \
  _THIS_TYPE &set_input_##x(Operator &v) {                                     \
    Operator::SetInput(#x, v);                                                 \
    return *this;                                                              \
@@ -159,6 +163,10 @@ class OpReg {
    Operator::SetInput(#x, v, srcName);                                        \
    return *this;                                                              \
  }                                                                            \
  _THIS_TYPE &set_input_##x(Operator &v, uint32_t index) {                     \
    Operator::SetInput(#x, v, index);                                          \
    return *this;                                                              \
  }                                                                            \
  TensorDesc get_input_desc_##x() const { return Operator::GetInputDesc(#x); } \
  graphStatus update_input_desc_##x(const TensorDesc &tensorDesc) {            \
    return Operator::UpdateInputDesc(#x, tensorDesc);                          \
--- a/inc/external/register/scope/scope_fusion_pass_register.h
+++ b/inc/external/register/scope/scope_fusion_pass_register.h
@@ -0,0 +1,331 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef EXTERNAL_REGISTER_SCOPE_SCOPE_FUSION_PASS_REGISTER_H_
 #define EXTERNAL_REGISTER_SCOPE_SCOPE_FUSION_PASS_REGISTER_H_

 #include <memory>
 #include <string>
 #include <vector>
 #include <map>
 #include "ge/ge_api_error_codes.h"
 #include "register/register_error_codes.h"
 #include "register/register_types.h"
 #include "graph/operator.h"

 #define CHECK_INNER_NODE_CONDITION(cond, fusion_rlt)  \
  do {                                                \
    if (!(cond)) {                                    \
      if ((fusion_rlt) != nullptr) {                  \
        (fusion_rlt)->SetType(ge::kScopeInvalidType); \
      }                                               \
      return;                                         \
    }                                                 \
  } while (0)

 namespace domi {
 class TensorFlowModelParser;
 }  // namespace domi
 namespace ge {
 const int32_t kFusionDisableIndex = 99999;
 const char *const kScopeToMultiNodes = "ScopeToMultiNodes";
 const char *const kScopeInvalidType = "ScopeInvalidType";
 const char *const kInputFromFusionScope = "InputFromFusionScope";
 const char *const kOutputToFusionScope = "OutputToFusionScope";
 class ScopePattern;
 using ScopeFusionPatterns = std::vector<std::vector<ScopePattern *>>;

 class ScopePassManager;

 class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY Scope {
 public:
  explicit Scope(const std::string &name, const std::string &sub_type = "", Scope *father_scope = nullptr);
  ~Scope();

  std::string Name() const;
  std::string SubType() const;
  std::map<std::string, ge::OperatorPtr> AllNodesMap() const;
  Scope *GetSubScope(const std::string &scope_name) const;
  std::string LastName() const;
  std::vector<Scope *> GetAllSubScopes() const;
  const Scope *GetFatherScope() const;

 private:
  class ScopeImpl;
  std::unique_ptr<ScopeImpl> impl_;
  friend class ScopeBasePass;
  friend class ScopeTree;
  friend class NodeOpTypeFeature;
  friend class NodeAttrFeature;
  friend class ScopeFeature;
 };

 class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY FusionScopesResult {
 public:
  FusionScopesResult();
  ~FusionScopesResult();
  void SetName(const std::string &name);
  void SetType(const std::string &type);
  void SetDescription(const std::string &description);
  std::string Name() const;
  std::vector<ge::OperatorPtr> Nodes() const;
  void InsertInputs(const std::string &inner_op_name, const std::vector<int32_t> &index_map);
  void InsertOutputs(const std::string &inner_op_name, const std::vector<int32_t> &index_map);

  class InnerNodeInfo {
   public:
    explicit InnerNodeInfo(const std::string &fusion_node_name);
    InnerNodeInfo(const std::string &fusion_node_name, const std::string &name, const std::string &type);
    InnerNodeInfo(InnerNodeInfo &&other) noexcept;
    InnerNodeInfo &operator=(InnerNodeInfo &&other) noexcept;
    InnerNodeInfo(const InnerNodeInfo &) = delete;
    InnerNodeInfo &operator=(const InnerNodeInfo &) = delete;
    ~InnerNodeInfo();
    InnerNodeInfo &SetName(const std::string &name);
    InnerNodeInfo &SetType(const std::string &type);
    InnerNodeInfo &InsertInput(const std::string &input_node, int32_t peer_out_idx);
    InnerNodeInfo &InsertOutput(const std::string &output_node, int32_t peer_in_idx);
    ge::graphStatus BuildInnerNode();
    ge::graphStatus SetInputFormat(const std::string &input_name, const std::string &format);
    ge::graphStatus SetOutputFormat(const std::string &output_name, const std::string &format);
    ge::graphStatus SetDynamicInputFormat(const std::string &input_name, uint32_t index, const std::string &format);
    ge::graphStatus SetDynamicOutputFormat(const std::string &output_name, uint32_t index, const std::string &format);
    ge::Operator *MutableOperator();

    std::string GetName() const;
    std::string GetType() const;
    std::vector<std::pair<std::string, int32_t>> GetInputs() const;
    std::vector<std::pair<std::string, int32_t>> GetOutputs() const;

   private:
    class InnerNodeInfoImpl;
    std::unique_ptr<InnerNodeInfoImpl> impl_;
  };

  InnerNodeInfo *AddInnerNode(const std::string &name, const std::string &type);
  InnerNodeInfo *MutableRecentInnerNode();
  InnerNodeInfo *MutableInnerNode(uint32_t index);
  ge::graphStatus CheckInnerNodesInfo();

 private:
  class FusionScopesResultImpl;
  std::unique_ptr<FusionScopesResultImpl> impl_;
  friend class ScopeGraph;
  friend class ScopeBasePass;
  friend class TensorFlowModelParser;
 };

 class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeTree {
 public:
  ScopeTree();
  Status Init();
  ScopeTree(const ScopeTree &scopetree) = delete;
  ScopeTree &operator=(const ScopeTree &scopetree) = delete;
  ~ScopeTree();

  std::vector<Scope *> GetAllScopes() const;

 private:
  class ScopeTreeImpl;
  std::unique_ptr<ScopeTreeImpl> impl_;
  friend class ScopeGraph;
  friend class ScopeBasePass;
 };

 class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeGraph {
 public:
  ScopeGraph();
  Status Init();
  ScopeGraph(const ScopeGraph &scope_graph) = delete;
  ScopeGraph &operator=(const ScopeGraph &scope_graph) = delete;
  ~ScopeGraph();

  const ScopeTree *GetScopeTree() const;
  std::map<std::string, ge::OperatorPtr> GetNodesMap() const;

 private:
  class ScopeGraphImpl;
  std::unique_ptr<ScopeGraphImpl> impl_;
  friend class ScopePassManager;
  friend class ScopeBasePass;
  friend class TensorFlowModelParser;
 };

 class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeAttrValue {
 public:
  ScopeAttrValue();
  ScopeAttrValue(ScopeAttrValue const &attr_value);
  ScopeAttrValue &operator=(ScopeAttrValue const &attr_value);
  ~ScopeAttrValue();

  void SetIntValue(int64_t value);
  void SetFloatValue(float value);
  void SetStringValue(std::string value);
  void SetBoolValue(bool value);

 private:
  class ScopeAttrValueImpl;
  std::unique_ptr<ScopeAttrValueImpl> impl_;
  friend class NodeAttrFeature;
 };

 class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeBaseFeature {
 public:
  virtual bool Match(const Scope *scope) = 0;
  virtual ~ScopeBaseFeature(){};
 };

 class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY NodeOpTypeFeature : ScopeBaseFeature {
 public:
  NodeOpTypeFeature(std::string nodeType, int num, int step = 0);
  NodeOpTypeFeature(NodeOpTypeFeature const &feature);
  NodeOpTypeFeature &operator=(NodeOpTypeFeature const &feature);
  ~NodeOpTypeFeature();
  bool Match(const Scope *scope) override;

 private:
  class NodeOpTypeFeatureImpl;
  std::unique_ptr<NodeOpTypeFeatureImpl> impl_;
 };

 class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY NodeAttrFeature : ScopeBaseFeature {
 public:
  NodeAttrFeature(std::string nodeType, std::string attr_name, ge::DataType datatype, ScopeAttrValue attr_value);
  NodeAttrFeature(NodeAttrFeature const &feature);
  NodeAttrFeature &operator=(NodeAttrFeature const &feature);
  ~NodeAttrFeature();
  bool Match(const Scope *scope) override;

 private:
  class NodeAttrFeatureImpl;
  std::unique_ptr<NodeAttrFeatureImpl> impl_;
 };

 class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeFeature : ScopeBaseFeature {
 public:
  ScopeFeature(std::string sub_type, int32_t num, std::string suffix = "", std::string sub_scope_mask = "",
               int step = 0);
  ScopeFeature(ScopeFeature const &feature);
  ScopeFeature &operator=(ScopeFeature const &feature);
  ~ScopeFeature();
  bool Match(const Scope *scope) override;

 private:
  class ScopeFeatureImpl;
  std::unique_ptr<ScopeFeatureImpl> impl_;
 };

 class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopePattern {
 public:
  ScopePattern();
  ~ScopePattern();

  ScopePattern &SetSubType(const std::string &sub_type);
  ScopePattern &AddNodeOpTypeFeature(NodeOpTypeFeature feature);
  ScopePattern &AddNodeAttrFeature(NodeAttrFeature feature);
  ScopePattern &AddScopeFeature(ScopeFeature feature);

 private:
  class ScopePatternImpl;
  std::unique_ptr<ScopePatternImpl> impl_;
  friend class ScopeBasePass;
 };

 class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopesResult {
 public:
  ScopesResult();
  ScopesResult(ScopesResult const &result);
  ScopesResult &operator=(ScopesResult const &result);
  ~ScopesResult();

  void SetScopes(std::vector<Scope *> &scopes);
  void SetNodes(std::vector<ge::OperatorPtr> &nodes);

 private:
  class ScopesResultImpl;
  std::unique_ptr<ScopesResultImpl> impl_;
  friend class ScopeBasePass;
 };

 class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeBasePass {
 public:
  ScopeBasePass();
  virtual ~ScopeBasePass();

 protected:
  // Subclasses implement respective fusion strategies and build the Patterns
  virtual std::vector<ScopeFusionPatterns> DefinePatterns() = 0;
  // Define the name of the scope pass
  virtual std::string PassName() = 0;
  // Subclasses implement respective multi-scope or operator fusion methods across scopes
  virtual Status LastMatchScopesAndOPs(std::shared_ptr<ScopeGraph> &scope_graph,
                                       std::vector<ScopesResult> &results) = 0;
  // Subclasses implement their own results and set the input and output of the final fusion operator
  virtual void GenerateFusionResult(const std::vector<Scope *> &scopes, FusionScopesResult *fusion_rlt) = 0;

 private:
  class ScopeBasePassImpl;
  std::unique_ptr<ScopeBasePassImpl> impl_;
  friend class ge::ScopePassManager;
  friend class ScopeBasePassImpl;
 };

 class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeFusionPassRegistry {
 public:
  using CreateFn = ScopeBasePass *(*)();
  ~ScopeFusionPassRegistry();

  static ScopeFusionPassRegistry &GetInstance() {
    static ScopeFusionPassRegistry instance;
    return instance;
  }

  void RegisterScopeFusionPass(const std::string &pass_name, CreateFn create_fn, bool is_general);

 private:
  ScopeFusionPassRegistry();
  class ScopeFusionPassRegistryImpl;
  /*lint -e148*/
  std::unique_ptr<ScopeFusionPassRegistryImpl> impl_;
  friend class TensorFlowModelParser;
 };

 class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeUtil {
 public:
  static std::string StringReplaceAll(std::string str, const std::string &old_value, const std::string &new_value);
  static void FreeScopePatterns(ScopeFusionPatterns &patterns);
  static void FreeOneBatchPattern(std::vector<ScopePattern *> &one_batch_pattern);
 };

 class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY ScopeFusionPassRegistrar {
 public:
  ScopeFusionPassRegistrar(const char *pass_name, ScopeBasePass *(*create_fn)(), bool is_general);
  ~ScopeFusionPassRegistrar() {}
 };

 #define REGISTER_SCOPE_FUSION_PASS(pass_name, scope_pass, is_general) \
  REGISTER_SCOPE_FUSION_PASS_UNIQ_HELPER(__COUNTER__, pass_name, scope_pass, is_general)

 #define REGISTER_SCOPE_FUSION_PASS_UNIQ_HELPER(ctr, pass_name, scope_pass, is_general) \
  REGISTER_SCOPE_FUSION_PASS_UNIQ(ctr, pass_name, scope_pass, is_general)

 #define REGISTER_SCOPE_FUSION_PASS_UNIQ(ctr, pass_name, scope_pass, is_general)                   \
  static ::ge::ScopeFusionPassRegistrar register_scope_fusion_pass##ctr __attribute__((unused)) = \
    ::ge::ScopeFusionPassRegistrar(                                                               \
      pass_name, []() -> ::ge::ScopeBasePass * { return new (std::nothrow) scope_pass(); }, is_general)
 }  // namespace ge

 #endif  // EXTERNAL_REGISTER_SCOPE_SCOPE_FUSION_PASS_REGISTER_H_
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -22,7 +22,7 @@
 #include <string>
 #include <vector>

 #include "common/fmk_error_codes.h"
 #include "framework/common/fmk_error_codes.h"
 #include "ge/ge_api_error_codes.h"
 #include "external/graph/types.h"
 #include "external/ge/ge_api_types.h"
@@ -49,6 +49,7 @@ enum OpEngineType {
 };

 const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM";
 const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement";

 // Data cache, including data address and length
 struct DataBuffer {
@@ -128,6 +129,7 @@ struct OriginInputInfo {

 // The structure of AIPP info
 struct AippConfigInfo {
  int8_t aipp_mode;
  int8_t input_format;
  int32_t src_image_size_w;
  int32_t src_image_size_h;
@@ -175,6 +177,9 @@ struct AippConfigInfo {
  float var_reci_chn_1;
  float var_reci_chn_2;
  float var_reci_chn_3;
  int8_t support_rotation;
  uint32_t related_input_rank;
  uint32_t max_src_image_size;
 };

 // The structure of offline Modeldata
@@ -250,5 +255,31 @@ struct ComputeGraphDescInfo {
  std::vector<std::vector<int64_t>> output_shape;
  std::vector<DataType> output_data_type;
 };

 struct OpDescInfo {
  std::string op_name;
  uint32_t task_id;
  uint32_t stream_id;
  std::vector<Format> input_format;
  std::vector<std::vector<int64_t>> input_shape;
  std::vector<DataType> input_data_type;
  std::vector<void *> input_addrs;
  std::vector<Format> output_format;
  std::vector<std::vector<int64_t>> output_shape;
  std::vector<DataType> output_data_type;
  std::vector<void *> output_addrs;
 };
 struct ModelDumpConfig {
  std::string model_name;
  std::vector<std::string> layers;
 };

 struct DumpConfig {
  std::string dump_path;
  std::string dump_mode;
  std::string dump_status;
  std::string dump_op_switch;
  std::vector<ModelDumpConfig> dump_list;
 };
 }  // namespace ge
 #endif  // INC_FRAMEWORK_COMMON_GE_TYPES_H_
--- a/inc/framework/common/types.h
+++ b/inc/framework/common/types.h
@@ -606,6 +606,7 @@ static constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 79;
 /// @brief INPUT node type
 ///
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string INPUT_TYPE;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMMY_DATA;

 ///
 /// @ingroup domi_omg
--- a/inc/framework/common/util.h
+++ b/inc/framework/common/util.h
@@ -347,6 +347,14 @@ std::string ToString(const google::protobuf::RepeatedField<T> &rpd_field) {
 ///
 uint64_t GetCurrentTimestap();

 ///
 /// @ingroup domi_common
 /// @brief Obtains the absolute time (timestamp) of the current system.
 /// @return Timestamp, in seconds (US)
 ///
 ///
 uint32_t GetCurrentSecondTimestap();

 ///
 /// @ingroup domi_common
 /// @brief Check whether the product of two int64 numbers exceeds the int64 range.
--- a/inc/framework/engine/dnnengine.h
+++ b/inc/framework/engine/dnnengine.h
@@ -31,6 +31,7 @@ enum PriorityEnum {
  COST_1,
  COST_2,
  COST_9 = 9,
  COST_10 = 10,
 };

 struct DNNEngineAttribute {
--- a/inc/framework/executor/ge_executor.h
+++ b/inc/framework/executor/ge_executor.h
@@ -135,6 +135,15 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
  ///
  ge::Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);

  ///
  /// @ingroup ge
  /// @brief Get user designeate shape order
  /// @param [in] model_id
  /// @param [out] user_designate_shape_order
  /// @return execute result
  ///
  ge::Status GetUserDesignateShapeOrder(uint32_t model_id, std::vector<std::string> &user_designate_shape_order);

  ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);

  ///
@@ -162,6 +171,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {

  ge::Status CommandHandle(const ge::Command &command);

  ge::Status SetDump(const DumpConfig &dump_config);

  ///
  /// @ingroup ge
  /// @brief Query model memory consuming interface
@@ -261,6 +272,7 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
  ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
  ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
                                       std::vector<InputOutputDims> &output_dims);
  ge::Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info);

 private:
  static bool isInit_;
--- a/inc/framework/memory/memory_api.h
+++ b/inc/framework/memory/memory_api.h
@@ -27,6 +27,7 @@ namespace ge {
 enum MemStorageType {
  HBM = 0,
  RDMA_HBM,
  HOST_DDR,
 };

 struct HostVarInfo {
--- a/inc/framework/omg/omg.h
+++ b/inc/framework/omg/omg.h
@@ -96,6 +96,10 @@ Status CheckCustomAiCpuOpLib();

 Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file);

 Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format);

 Status GetOutputLeaf(ge::NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info);

 void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info,
                                std::vector<std::string> &output_nodes_name);

--- a/inc/graph/debug/ge_attr_define.h
+++ b/inc/graph/debug/ge_attr_define.h
@@ -883,6 +883,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string REF_VAR_

 // Assign
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ASSIGN_VALIDATE_SHAPE;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ASSIGN_VAR_NAME;

 // ShapeN
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string SHAPEN_ATTR_N;
@@ -939,6 +940,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_NUM;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_LABEL;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_COMBINED_BATCH;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_USER_DESIGNEATE_SHAPE_ORDER;

 // Control flow
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_SWITCH_COND;
@@ -957,7 +959,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM

 // Function Op
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_NODE_INDEX;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_CONST_TYPE;

 // Used for mark the active node is for loop, type:bool
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IS_LOOP_ACTIVE;
@@ -968,6 +969,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM

 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_MEMORY_TYPE_WORKSPACE;

 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_MEMORY_TYPE_RANGE;

 // Atomic addr clean attrs
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATOMIC_ATTR_INPUT_INDEX;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATOMIC_ATTR_OUTPUT_INDEX;
--- a/inc/graph/ge_context.h
+++ b/inc/graph/ge_context.h
@@ -24,6 +24,7 @@ namespace ge {
 class GEContext {
 public:
  graphStatus GetOption(const std::string &key, std::string &option);
  bool GetHostExecFlag();
  uint64_t SessionId();
  uint32_t DeviceId();
  uint64_t TraceId();
--- a/inc/graph/op_desc.h
+++ b/inc/graph/op_desc.h
@@ -153,9 +153,6 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder {

  graphStatus AddDynamicOutputDesc(const string &name, const unsigned int num, bool isPushBack = true);

  void RemoveInputDesc(uint32_t index);
  void RemoveOutputDesc(uint32_t index);

  bool IsOptionalInput(const string &name) const;

  bool IsOptionalInput(uint32_t index) const;
--- a/inc/graph/utils/node_utils.h
+++ b/inc/graph/utils/node_utils.h
@@ -20,6 +20,7 @@
 #include <set>
 #include <map>
 #include <vector>
 #include "external/graph/operator.h"
 #include "graph/node.h"

 namespace ge {
@@ -63,8 +64,11 @@ class NodeUtils {
  static void UnlinkAll(const Node &node);
  static graphStatus UpdatePeerNodeInputDesc(const NodePtr &node_ptr);

  static graphStatus AppendInputAnchor(const NodePtr &node, uint32_t index);
  static graphStatus RemoveInputAnchor(const NodePtr &node, uint32_t index);
  static graphStatus AppendInputAnchor(const NodePtr &node, uint32_t num);
  static graphStatus RemoveInputAnchor(const NodePtr &node, uint32_t num);

  static graphStatus AppendOutputAnchor(const NodePtr &node, uint32_t num);
  static graphStatus RemoveOutputAnchor(const NodePtr &node, uint32_t num);

  static bool IsInNodesEmpty(const Node &node);
  static GeTensorDesc GetOutputDesc(const Node &node, uint32_t index);
@@ -77,6 +81,7 @@ class NodeUtils {
  static graphStatus GetNodeUnknownShapeStatus(const Node &node, bool &is_unknow);

  static std::string GetNodeType(const Node &node);
  static std::string GetNodeType(const NodePtr &node);

  static ComputeGraphPtr GetSubgraph(const Node &node, uint32_t index);
  static graphStatus SetSubgraph(Node &node, uint32_t index, const ComputeGraphPtr &subgraph);
@@ -100,8 +105,17 @@ class NodeUtils {
  /// @param [in] node
  /// @return Node
  ///
  static NodePtr GetParentInput(const Node &node);
  static NodePtr GetParentInput(const NodePtr &node);

  ///
  /// @brief Get is dynamic shape graph from node.
  /// @param [in] node
  /// @return bool
  ///
  static bool IsDynamicShape(const Node &node);
  static bool IsDynamicShape(const NodePtr &node);

  ///
  /// @brief Check is varying_input for while node
  /// @param [in] node: Data node for subgraph
@@ -115,7 +129,7 @@ class NodeUtils {
  /// @param [out] string
  /// @return bool
  ///
  static bool GetConstOpType(const NodePtr &in_node, std::string &op_type);
  static bool GetConstOpType(const NodePtr &node, std::string &type);

  ///
  /// @brief Remove node-related subgraphs, including subgraphs of nodes in the subgraph.
@@ -138,9 +152,15 @@ class NodeUtils {
  ///
  static vector<NodePtr> GetSubgraphOutputNodes(const Node &node);

  static NodePtr GetInDataNodeByIndex(const Node &node, int index);
  static NodePtr GetInDataNodeByIndex(const Node &node, const int index);

  static vector<pair<InDataAnchorPtr, NodePtr>> GetOutDataNodesWithAnchorByIndex(const Node &node, const int index);

  static ge::ConstNodePtr GetNodeFromOperator(const Operator &oprt);

  static graphStatus GetInputConstData(const ConstNodePtr &node_ptr, const string &dst_name, GeTensorPtr &ge_tensor);

  static vector<NodePtr> GetOutDataNodesByIndex(const Node &node, int index);
  static graphStatus GetInputConstData(const Node &node, const string &dst_name, GeTensorPtr &ge_tensor);

 private:
  static std::map<NodePtr, std::vector<uint32_t>> map_send_info_;
--- a/inc/graph/utils/type_utils.h
+++ b/inc/graph/utils/type_utils.h
@@ -34,6 +34,7 @@ class TypeUtils {
  static bool IsFormatValid(Format format);
  static bool IsInternalFormat(Format format);

  static std::string ImplyTypeToSerialString(domi::ImplyType imply_type);
  static std::string DataTypeToSerialString(DataType data_type);
  static DataType SerialStringToDataType(const std::string &str);
  static std::string FormatToSerialString(Format format);
--- a/src/common/graph/ge_attr_define.cc
+++ b/src/common/graph/ge_attr_define.cc
@@ -830,6 +830,7 @@ const std::string REF_VAR_PRE_PEER_OUT_INDEX = "ref_var_pre_peer_out_index";

 // Assign
 const std::string ASSIGN_VALIDATE_SHAPE = "validate_shape";
 const std::string ASSIGN_VAR_NAME = "_assign_var_name";

 //  space2bacth batch2space
 const std::string BATCH_SPACE_ATTR_BLOCK = "block";
@@ -931,7 +932,6 @@ const std::string ATTR_NAME_NEXT_ITERATION = "_next_iteration_node";

 // Function Op
 const std::string ATTR_NAME_PARENT_NODE_INDEX = "_parent_node_index";
 const std::string ATTR_NAME_PARENT_CONST_TYPE = "_parent_const_type";

 // Used for mark the active node is for loop, type:bool
 const std::string ATTR_NAME_IS_LOOP_ACTIVE = "is_loop_active";
@@ -942,6 +942,8 @@ const std::string ATTR_NAME_MEMORY_TYPE_OUTPUT = "memory_type_output";

 const std::string ATTR_NAME_MEMORY_TYPE_WORKSPACE = "memory_type_workspace";

 const std::string ATTR_NAME_MEMORY_TYPE_RANGE = "_memory_type_range";

 const std::string MODEL_ATTR_SESSION_ID = "session_id";

 // lx fusion
@@ -991,6 +993,8 @@ const std::string ATTR_MBATCH_ORIGIN_INPUT_DIMS = "_mbatch_origin_input_dims";

 const std::string ATTR_DYNAMIC_TYPE = "mbatch_dynamic_type";

 const std::string ATTR_USER_DESIGNEATE_SHAPE_ORDER = "user_designate_shape_order";

 // For inserted op
 const std::string ATTR_INSERTED_BY_GE = "_inserted_by_ge";

--- a/src/common/graph/op_desc.cc
+++ b/src/common/graph/op_desc.cc
@@ -684,18 +684,6 @@ graphStatus OpDesc::AddDynamicOutputDesc(const string &name, const unsigned int
  return GRAPH_SUCCESS;
 }

 void OpDesc::RemoveInputDesc(uint32_t index) {
  while (inputs_desc_.size() > index) {
    inputs_desc_.pop_back();
  }
 }

 void OpDesc::RemoveOutputDesc(uint32_t index) {
  while (outputs_desc_.size() > index) {
    outputs_desc_.pop_back();
  }
 }

 bool OpDesc::IsOptionalInput(const string &name) const {
  return optional_input_names_.find(name) != optional_input_names_.end();
 }
--- a/src/common/graph/operator.cc
+++ b/src/common/graph/operator.cc
@@ -277,6 +277,22 @@ class OperatorImpl : public std::enable_shared_from_this<OperatorImpl> {
    return output_ptr;
  }

  OutHandler GetOutput(uint32_t index) {
    GE_CHK_BOOL_EXEC(op_desc_ != nullptr, return nullptr, "op_desc_ is nullptr.");

    string name = op_desc_->GetOutputNameByIndex(index);
    if (name.empty()) {
      GELOGE(GRAPH_FAILED, "Find src name by index failed. index[%u]", index);
      return nullptr;
    }
    shared_ptr<OpIO> output_ptr = ComGraphMakeShared<OpIO>(name, index, shared_from_this());
    if (output_ptr == nullptr) {
      GELOGE(GRAPH_FAILED, "OpIO make shared failed");
      return nullptr;
    }
    return output_ptr;
  }

  GeTensorDesc GetOutputDesc(const string &name) const {
    GE_CHK_BOOL_EXEC(op_desc_ != nullptr, return GeTensorDesc(), "op_desc_ is nullptr.");

@@ -540,6 +556,13 @@ Operator &Operator::SetInput(const std::string &dst_name, const ge::Operator &sr
  return *this;
 }

 Operator &Operator::SetInput(const std::string &dst_name, const ge::Operator &src_oprt, uint32_t index) {
  auto out_handler = src_oprt.GetOutput(index);
  GE_CHK_BOOL_EXEC(out_handler != nullptr, return *this, "out_handler is nullptr.");
  (void)SetInput(dst_name, out_handler);
  return *this;
 }

 Operator &Operator::AddControlInput(const Operator &src_oprt) {
  if (operator_impl_ == nullptr) {
    GELOGE(GRAPH_FAILED, "operator impl is nullptr.");
@@ -621,6 +644,11 @@ graphStatus Operator::GetInputConstDataOut(const string &dst_name, Tensor &data)
  return GRAPH_FAILED;
 }

 std::shared_ptr<const Node> Operator::GetNode() const {
  GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return nullptr, "operator impl is nullptr.");
  return operator_impl_->GetNode();
 }

 TensorDesc Operator::GetInputDesc(const std::string &name) const {
  GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return TensorDesc(), "operator impl is nullptr.");
  return TensorAdapter::GeTensorDesc2TensorDesc(operator_impl_->GetInputDesc(name));
@@ -657,6 +685,11 @@ OutHandler Operator::GetOutput(const string &name) const {
  return operator_impl_->GetOutput(name);
 }

 OutHandler Operator::GetOutput(uint32_t index) const {
  GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return nullptr, "operator impl is nullptr.");
  return operator_impl_->GetOutput(index);
 }

 TensorDesc Operator::GetOutputDesc(const std::string &name) const {
  GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return TensorDesc(), "operator impl is nullptr.");
  return TensorAdapter::GeTensorDesc2TensorDesc(operator_impl_->GetOutputDesc(name));
@@ -1540,6 +1573,7 @@ void GraphUtils::BreakConnect(const std::map<OperatorImplPtr, NodePtr> &all_node
    }
    op_impl->ClearOutputLinks();
    op_impl->ClearInputLinks();
    OperatorKeeper::GetInstance().CheckOutOperator(op_impl);
  }
 }
 }  // namespace ge
--- a/src/common/graph/option/ge_context.cc
+++ b/src/common/graph/option/ge_context.cc
@@ -17,12 +17,14 @@
 #include "./ge_context.h"
 #include "./ge_global_options.h"
 #include "./ge_local_context.h"
 #include "framework/common/ge_types.h"
 #include "framework/common/debug/ge_log.h"

 namespace ge {
 namespace {
 const int64_t kMinTrainingTraceJobId = 256;
 const int kDecimal = 10;
 const char *kHostExecPlacement = "HOST";
 }  // namespace
 GEContext &GetContext() {
  static GEContext ge_context{};
@@ -33,6 +35,16 @@ graphStatus GEContext::GetOption(const std::string &key, std::string &option) {
  return GetThreadLocalContext().GetOption(key, option);
 }

 bool GEContext::GetHostExecFlag() {
  std::string exec_placement;
  if (GetThreadLocalContext().GetOption(GE_OPTION_EXEC_PLACEMENT, exec_placement) != GRAPH_SUCCESS) {
    GELOGW("get option OPTION_EXEC_PLACEMENT failed.");
    return false;
  }
  GELOGD("Option ge.exec.placement is %s.", exec_placement.c_str());
  return exec_placement == kHostExecPlacement;
 }

 std::map<std::string, std::string> &GetMutableGlobalOptions() {
  static std::map<std::string, std::string> global_options{};
  return global_options;
--- a/src/common/graph/ref_relation.cc
+++ b/src/common/graph/ref_relation.cc
@@ -243,8 +243,8 @@ graphStatus RefRelations::Impl::BuildRefRelationsForWhile(
    }
    auto in_data_anchor_idx = in_anchor->GetIdx();
    auto net_in_desc = netoutput->GetOpDesc()->MutableInputDesc(static_cast<uint32_t>(in_data_anchor_idx));
    int ref_d;
    int ref_n;
    int ref_d = 0;
    int ref_n = 0;
    (void)AttrUtils::GetInt(peer_out_data_node->GetOpDesc(), kRefIndex, ref_d);
    (void)AttrUtils::GetInt(net_in_desc, kRefIndex, ref_n);

--- a/src/common/graph/shape_refiner.cc
+++ b/src/common/graph/shape_refiner.cc
@@ -351,6 +351,66 @@ graphStatus UpdateParentNodeOutTensor(const ConstNodePtr &node) {
  }
  return UpdateParentNodeForBranch(node, ref_out_tensors);
 }

 string Serial(const vector<int64_t> &dims) {
  string serial_string;
  serial_string += "[";
  for (int64_t dim : dims) {
    serial_string += std::to_string(dim) + " ";
  }
  serial_string += "]";
  return serial_string;
 }

 graphStatus UpdateOpInputDesc(const ConstNodePtr &node_ptr) {
  GE_IF_BOOL_EXEC(node_ptr == nullptr, GELOGE(GRAPH_FAILED, "node is null."); return GRAPH_FAILED);
  GE_IF_BOOL_EXEC(node_ptr->GetOpDesc() == nullptr, GELOGE(GRAPH_FAILED, "op_desc is null."); return GRAPH_FAILED);
  for (const auto &in_anchor : node_ptr->GetAllInDataAnchors()) {
    auto in_idx = in_anchor->GetIdx();
    auto peer_out_data_anchor = in_anchor->GetPeerOutAnchor();
    if (peer_out_data_anchor == nullptr) {
      continue;
    }
    auto peer_out_data_node = peer_out_data_anchor->GetOwnerNode();
    if (peer_out_data_node == nullptr || peer_out_data_node->GetOpDesc() == nullptr) {
      continue;
    }
    int peer_out_idx = peer_out_data_anchor->GetIdx();
    auto in_desc = node_ptr->GetOpDesc()->MutableInputDesc(static_cast<uint32_t>(in_idx));
    auto peer_out_desc = peer_out_data_node->GetOpDesc()->MutableOutputDesc(static_cast<uint32_t>(peer_out_idx));

    // check shape and dtype continuity. do not stop process
    auto in_shape = in_desc->GetShape().GetDims();
    auto in_dtype = in_desc->GetDataType();
    auto peer_out_shape = peer_out_desc->GetShape().GetDims();
    auto peer_out_dtype = peer_out_desc->GetDataType();
    if (peer_out_dtype != in_dtype) {
      GELOGW(
        "current node [%s] [%d]\'th out_dtype is [%s].peer output node [%s] [%d]\'th "
        "output_dtype is [%s].The two dtype should be same! Please check graph and fix it",
        node_ptr->GetName().c_str(), in_idx, TypeUtils::DataTypeToSerialString(in_dtype).c_str(),
        peer_out_data_node->GetName().c_str(), peer_out_idx, TypeUtils::DataTypeToSerialString(peer_out_dtype).c_str());
    } else if ((!in_shape.empty()) && (in_shape != peer_out_shape)) {
      string in_shape_str = Serial(in_shape);
      string peer_out_shape_str = Serial(peer_out_shape);
      GELOGW(
        "current node [%s] [%d]\'th out_shape is [%s].peer input node [%s] [%d]\'th "
        "input_shape is [%s].The two shape should be same! Please check graph and fix it",
        node_ptr->GetName().c_str(), in_idx, in_shape_str.c_str(), peer_out_data_node->GetName().c_str(), peer_out_idx,
        peer_out_shape_str.c_str());
    }
    // refresh current node input desc
    in_desc->SetOriginShape(peer_out_desc->GetOriginShape());
    in_desc->SetShape(peer_out_desc->GetShape());
    in_desc->SetDataType(peer_out_desc->GetDataType());
    in_desc->SetOriginDataType(peer_out_desc->GetOriginDataType());
    std::vector<std::pair<int64_t, int64_t>> shape_range;
    (void)peer_out_desc->GetShapeRange(shape_range);
    in_desc->SetShapeRange(shape_range);
    ge::TensorUtils::SetRealDimCnt(*in_desc, static_cast<uint32_t>(peer_out_desc->GetShape().GetDims().size()));
  }
  return GRAPH_SUCCESS;
 }
 }  // namespace
 void ShapeRefiner::PrintInOutTensorShape(const ge::NodePtr &node, const std::string &phase) {
  if (!IsLogEnable(GE, DLOG_DEBUG)) {
@@ -427,9 +487,7 @@ graphStatus ShapeRefiner::InferShapeAndType(const ConstNodePtr &node, Operator &
  return InferShapeAndType(node, op, true);
 }
 graphStatus ShapeRefiner::InferShapeAndType(const ConstNodePtr &node, Operator &op, bool before_subgraph) {
  GE_IF_BOOL_EXEC(node == nullptr, GELOGE(GRAPH_FAILED, "node is null."); return GRAPH_FAILED);
  auto op_desc = node->GetOpDesc();
  GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(GRAPH_FAILED, "op_desc is null."); return GRAPH_FAILED);
  const auto &op_type = op_desc->GetType();

  graphStatus ret;
@@ -554,6 +612,19 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferSh
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferShapeAndType(const NodePtr &node,
                                                                                           bool before_subgraph) {
  GE_IF_BOOL_EXEC(node == nullptr, GELOGE(GRAPH_FAILED, "node is null."); return GRAPH_FAILED);
  bool is_unknown_graph = node->GetOwnerComputeGraph()->GetGraphUnknownFlag();
  auto opdesc = node->GetOpDesc();
  GE_IF_BOOL_EXEC(opdesc == nullptr, GELOGE(GRAPH_FAILED, "op_desc is null."); return GRAPH_FAILED);
  // some op can not infershape twice such as aipp
  bool need_update_input = !is_unknown_graph && !opdesc->HasAttr("has_infered_verified");
  if (need_update_input) {
    auto status = UpdateOpInputDesc(node);
    if (status != GRAPH_SUCCESS) {
      GELOGE(GRAPH_FAILED, "update op input_desc failed!");
      return status;
    }
  }

  if (node->Verify() != GRAPH_SUCCESS) {
    GELOGE(GRAPH_FAILED, "Verifying %s failed.", node->GetName().c_str());
    return GRAPH_FAILED;
@@ -561,7 +632,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferSh
  PrintInOutTensorShape(node, "before_infershape");
  Operator op = OpDescUtils::CreateOperatorFromNode(node);

  bool is_unknown_graph = node->GetOwnerComputeGraph()->GetGraphUnknownFlag();
  if (!is_unknown_graph) {
    auto inference_context = CreateInferenceContext(context_map, node);
    if (inference_context == nullptr) {
@@ -574,7 +644,21 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferSh

  graphStatus status = InferShapeAndType(node, op, before_subgraph);
  if (status == GRAPH_PARAM_INVALID || status == GRAPH_SUCCESS) {
    (void)ge::NodeUtils::UpdatePeerNodeInputDesc(node);
    if (is_unknown_graph) {
      return GRAPH_SUCCESS;
    }
    auto op_desc = node->GetOpDesc();
    for (const auto &out_anchor : node->GetAllOutDataAnchors()) {
      auto output_tensor = op_desc->MutableOutputDesc(out_anchor->GetIdx());
      ge::TensorUtils::SetRealDimCnt(*output_tensor, static_cast<uint32_t>(output_tensor->GetShape().GetDims().size()));
      output_tensor->SetOriginShape(output_tensor->GetShape());
      output_tensor->SetOriginDataType(output_tensor->GetDataType());

      GELOGD("node name is %s, origin shape is %ld, origin format is %s, origin data type is %s",
             node->GetName().c_str(), output_tensor->GetOriginShape().GetShapeSize(),
             TypeUtils::FormatToSerialString(output_tensor->GetOriginFormat()).c_str(),
             TypeUtils::DataTypeToSerialString(output_tensor->GetOriginDataType()).c_str());
    }
  } else {
    GELOGE(GRAPH_FAILED, "%s call infer function failed.", node->GetName().c_str());
    return GRAPH_FAILED;
--- a/src/common/graph/utils/node_utils.cc
+++ b/src/common/graph/utils/node_utils.cc
@@ -15,6 +15,7 @@
 */

 #include "utils/node_utils.h"
 #include "utils/op_desc_utils.h"
 #include "graph/utils/graph_utils.h"
 #include "debug/ge_op_types.h"
 #include "debug/ge_util.h"
@@ -301,6 +302,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer
  }
  for (const auto &out_anchor : node_ptr->GetAllOutDataAnchors()) {
    auto output_tensor = op_desc->MutableOutputDesc(out_anchor->GetIdx());
    auto out_dims = output_tensor->GetShape().GetDims();
    auto out_dtype = output_tensor->GetDataType();
    ge::TensorUtils::SetRealDimCnt(*output_tensor, static_cast<uint32_t>(output_tensor->GetShape().GetDims().size()));
    output_tensor->SetOriginShape(output_tensor->GetShape());
    output_tensor->SetOriginDataType(output_tensor->GetDataType());
@@ -320,6 +323,35 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer
        GELOGE(GRAPH_FAILED, "peer_input_desc is nullptr");
        continue;
      }
      // check shape and dtype continuity. do not stop process
      auto peer_input_dims = peer_input_desc->GetShape().GetDims();
      auto peer_input_dtype = peer_input_desc->GetDataType();
      if (out_dtype != peer_input_dtype) {
        GELOGW(
          "current node [%s] [%d]\'th out_dtype is [%s].peer input node [%s] [%d]\'th "
          "input_dtype is [%s].The two dtype should be same! Please check graph and fix it",
          node_ptr->GetName().c_str(), out_anchor->GetIdx(), TypeUtils::DataTypeToSerialString(out_dtype).c_str(),
          peer_anchor->GetOwnerNode()->GetName().c_str(), peer_anchor->GetIdx(),
          TypeUtils::DataTypeToSerialString(peer_input_dtype).c_str());
      } else if ((!peer_input_dims.empty()) && (out_dims != peer_input_dims)) {
        string out_shape_str, peer_in_shape_str;
        out_shape_str += "[";
        for (int64_t dim : out_dims) {
          out_shape_str += std::to_string(dim) + " ";
        }
        out_shape_str += "]";
        peer_in_shape_str += "[";
        for (int64_t dim : peer_input_dims) {
          peer_in_shape_str += std::to_string(dim) + " ";
        }
        peer_in_shape_str += "]";

        GELOGW(
          "current node [%s] [%d]\'th out_shape is [%s].peer input node [%s] [%d]\'th "
          "input_shape is [%s].The two shape should be same! Please check graph and fix it",
          node_ptr->GetName().c_str(), out_anchor->GetIdx(), out_shape_str.c_str(),
          peer_anchor->GetOwnerNode()->GetName().c_str(), peer_anchor->GetIdx(), peer_in_shape_str.c_str());
      }
      GELOGI("Peer input opdesc name is %s, need to flush: shape size is %zu, datatype is %d, original datatype is %d",
             peer_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), output_tensor->GetShape().GetDimNum(),
             output_tensor->GetDataType(), output_tensor->GetOriginDataType());
@@ -341,15 +373,15 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer
 }

 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::AppendInputAnchor(const NodePtr &node,
                                                                                        uint32_t index) {
                                                                                        uint32_t num) {
  if (node == nullptr) {
    GELOGE(GRAPH_FAILED, "Nodeptr is nullptr");
    GELOGE(GRAPH_FAILED, "Input node is null");
    return GRAPH_FAILED;
  }

  GeTensorDesc data_desc(GeShape(), FORMAT_ND, DT_FLOAT);
  OpDescPtr op_desc = node->op_;
  for (size_t i = op_desc->GetInputsSize(); i < index; ++i) {
  const auto &op_desc = node->GetOpDesc();
  for (size_t i = op_desc->GetInputsSize(); i < num; ++i) {
    if (op_desc->AddInputDesc(data_desc) != GRAPH_SUCCESS) {
      GELOGE(GRAPH_FAILED, "Add input desc failed");
      return GRAPH_FAILED;
@@ -357,7 +389,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::AppendInpu

    auto anchor = ComGraphMakeShared<InDataAnchor>(node, i);
    if (anchor == nullptr) {
      GELOGE(GRAPH_FAILED, "Current in_data_anchor is null, malloc shared_ptr failed.");
      GELOGE(OUT_OF_MEMORY, "Current in data anchor is null, make shared_ptr failed.");
      return GRAPH_FAILED;
    }
    node->in_data_anchors_.push_back(anchor);
@@ -367,22 +399,81 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::AppendInpu
 }

 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::RemoveInputAnchor(const NodePtr &node,
                                                                                        uint32_t index) {
                                                                                        uint32_t num) {
  if (node == nullptr) {
    GELOGE(GRAPH_FAILED, "Nodeptr is nullptr");
    GELOGE(GRAPH_FAILED, "Input node is null");
    return GRAPH_FAILED;
  }

  OpDescPtr op_desc = node->op_;
  op_desc->RemoveInputDesc(index);
  const auto &op_desc = node->GetOpDesc();
  while (op_desc->GetInputsSize() > num) {
    if (!OpDescUtils::ClearInputDesc(op_desc, num)) {
      return GRAPH_FAILED;
    }
  }

  while (node->in_data_anchors_.size() > index) {
  auto input_names = op_desc->GetAllInputName();
  (void)op_desc->UpdateInputName(input_names);
  auto is_input_const = op_desc->GetIsInputConst();
  is_input_const.resize(num);
  op_desc->SetIsInputConst(is_input_const);

  while (node->in_data_anchors_.size() > num) {
    node->in_data_anchors_.pop_back();
  }

  return GRAPH_SUCCESS;
 }

 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::AppendOutputAnchor(const NodePtr &node,
                                                                                         uint32_t num) {
  if (node == nullptr) {
    GELOGE(GRAPH_FAILED, "Input node is null");
    return GRAPH_FAILED;
  }

  GeTensorDesc data_desc(GeShape(), FORMAT_ND, DT_FLOAT);
  const OpDescPtr &op_desc = node->GetOpDesc();
  for (size_t i = op_desc->GetOutputsSize(); i < num; ++i) {
    if (op_desc->AddOutputDesc(data_desc) != GRAPH_SUCCESS) {
      GELOGE(GRAPH_FAILED, "Add output desc failed");
      return GRAPH_FAILED;
    }

    auto anchor = ComGraphMakeShared<OutDataAnchor>(node, i);
    if (anchor == nullptr) {
      GELOGE(OUT_OF_MEMORY, "Current out data anchor is null, make shared_ptr failed.");
      return GRAPH_FAILED;
    }
    node->out_data_anchors_.push_back(anchor);
  }

  return GRAPH_SUCCESS;
 }

 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::RemoveOutputAnchor(const NodePtr &node,
                                                                                         uint32_t num) {
  if (node == nullptr) {
    GELOGE(GRAPH_FAILED, "Input node is null");
    return GRAPH_FAILED;
  }

  const auto &op_desc = node->GetOpDesc();
  auto output_names = op_desc->GetAllOutputName();
  while (op_desc->GetOutputsSize() > num) {
    if (!OpDescUtils::ClearOutputDesc(op_desc, num)) {
      return GRAPH_FAILED;
    }
  }
  (void)op_desc->UpdateOutputName(output_names);

  while (node->out_data_anchors_.size() > num) {
    node->out_data_anchors_.pop_back();
  }

  return GRAPH_SUCCESS;
 }

 bool NodeUtils::IsInNodesEmpty(const Node &node) {
  for (const auto &in_anchor : node.in_data_anchors_) {
    if (in_anchor != nullptr) {
@@ -488,11 +579,22 @@ std::string NodeUtils::GetNodeType(const Node &node) {
  if (node.GetType() != FRAMEWORKOP) {
    return node.GetType();
  }

  std::string type;
  (void)AttrUtils::GetStr(node.GetOpDesc(), ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type);
  return type;
 }

 std::string NodeUtils::GetNodeType(const NodePtr &node) { return node == nullptr ? "" : GetNodeType(*node); }

 graphStatus NodeUtils::GetInputConstData(const ConstNodePtr &node_ptr, const string &dst_name, GeTensorPtr &ge_tensor) {
  return GRAPH_SUCCESS;
 }

 graphStatus NodeUtils::GetInputConstData(const Node &node, const string &dst_name, GeTensorPtr &ge_tensor) {
  return GRAPH_SUCCESS;
 }

 ComputeGraphPtr NodeUtils::GetSubgraph(const Node &node, uint32_t index) {
  auto op_desc = node.GetOpDesc();
  if (op_desc == nullptr) {
@@ -544,16 +646,17 @@ bool NodeUtils::IsSubgraphInput(const NodePtr &node) {
  if (parent_op_desc == nullptr) {
    return false;
  }
  if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE)) {
    bool is_unknown_shape = false;
    (void)AttrUtils::GetBool(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape);
    if (is_unknown_shape) return false;
  }

  if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE) &&
      kCaseOpTypes.count(parent_op_desc->GetType()) == 0 && kWhileOpTypes.count(parent_op_desc->GetType()) == 0 &&
      kForOpTypes.count(parent_op_desc->GetType()) == 0 && kIfOpTypes.count(parent_op_desc->GetType()) == 0) {
    return false;
  // dynamic shape unknown graph false
  // dynamic shape known graph with functional subgraph maybe true
  if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE)) {
    if (node->GetOwnerComputeGraph()->GetParentGraph()->GetGraphUnknownFlag()) {
      return false;
    } else {
      if (node->GetOwnerComputeGraph()->GetParentNode()->GetOwnerComputeGraph()->GetParentNode() == nullptr) {
        return false;
      }
    }
  }

  return node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX);
@@ -576,15 +679,13 @@ bool NodeUtils::IsSubgraphOutput(const NodePtr &node) {
  }

  if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE)) {
    bool is_unknown_shape = false;
    (void)AttrUtils::GetBool(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape);
    if (is_unknown_shape) return false;
  }

  if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE) &&
      kCaseOpTypes.count(parent_op_desc->GetType()) == 0 && kWhileOpTypes.count(parent_op_desc->GetType()) == 0 &&
      kForOpTypes.count(parent_op_desc->GetType()) == 0 && kIfOpTypes.count(parent_op_desc->GetType()) == 0) {
    return false;
    if (node->GetOwnerComputeGraph()->GetParentGraph()->GetGraphUnknownFlag()) {
      return false;
    } else {
      if (node->GetOwnerComputeGraph()->GetParentNode()->GetOwnerComputeGraph()->GetParentNode() == nullptr) {
        return false;
      }
    }
  }

  for (GeTensorDesc &tensor : node->GetOpDesc()->GetAllInputsDesc()) {
@@ -601,16 +702,14 @@ bool NodeUtils::IsSubgraphOutput(const NodePtr &node) {
 /// @param [in] node
 /// @return Node
 ///
 NodePtr NodeUtils::GetParentInput(const NodePtr &node) {
  GE_CHECK_NOTNULL_EXEC(node, return nullptr);

 NodePtr NodeUtils::GetParentInput(const Node &node) {
  uint32_t parent_index = 0;
  if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
  if (!AttrUtils::GetInt(node.GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
    return nullptr;
  }

  // Subgraph Data Node, check for constant input.
  const ComputeGraphPtr &graph = node->GetOwnerComputeGraph();
  const ComputeGraphPtr &graph = node.GetOwnerComputeGraph();
  GE_CHECK_NOTNULL_EXEC(graph, return nullptr);

  const NodePtr &parent_node = graph->GetParentNode();
@@ -625,6 +724,26 @@ NodePtr NodeUtils::GetParentInput(const NodePtr &node) {
  return peer_out_anchor->GetOwnerNode();
 }

 NodePtr NodeUtils::GetParentInput(const NodePtr &node) { return node == nullptr ? node : GetParentInput(*node); }

 ///
 /// @brief Get is dynamic shape graph from node.
 /// @param [in] node
 /// @return bool
 ///
 bool NodeUtils::IsDynamicShape(const Node &node) {
  const auto graph = GraphUtils::FindRootGraph(node.GetOwnerComputeGraph());
  if (graph == nullptr) {
    return false;
  }

  bool is_dynamic_shape = false;
  (void)AttrUtils::GetBool(graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape);
  return is_dynamic_shape;
 }

 bool NodeUtils::IsDynamicShape(const NodePtr &node) { return node == nullptr ? false : IsDynamicShape(*node); }

 ///
 /// @brief Check is varying_input for while node
 /// @param [in] node: Data node for subgraph
@@ -678,27 +797,22 @@ bool NodeUtils::IsWhileVaryingInput(const ge::NodePtr &node) {
 /// @param [out] string
 /// @return bool
 ///
 bool NodeUtils::GetConstOpType(const NodePtr &in_node, std::string &op_type) {
  GE_CHECK_NOTNULL_EXEC(in_node, return false);
 bool NodeUtils::GetConstOpType(const NodePtr &node, std::string &type) {
  if (node == nullptr) {
    return false;
  }

  if ((in_node->GetType() == CONSTANT) || (in_node->GetType() == CONSTANTOP)) {
    op_type = in_node->GetType();
  if ((node->GetType() == CONSTANT) || (node->GetType() == CONSTANTOP)) {
    type = node->GetType();
    return true;
  }

  if (in_node->GetType() == DATA) {
    std::string const_type;
    if (!AttrUtils::GetStr(in_node->GetOpDesc(), ATTR_NAME_PARENT_CONST_TYPE, const_type)) {
      return false;
    }

    if ((const_type == CONSTANT) || (const_type == CONSTANTOP)) {
      op_type = const_type;
      return true;
    }
  if (node->GetType() != DATA) {
    return false;  // not subgraph input node
  }

  return false;
  const auto &parent = GetParentInput(node);
  return GetConstOpType(parent, type);
 }

 ///
@@ -809,7 +923,7 @@ vector<NodePtr> NodeUtils::GetSubgraphOutputNodes(const Node &node) {
  return out_data_node_vec;
 }

 NodePtr NodeUtils::GetInDataNodeByIndex(const Node &node, int index) {
 NodePtr NodeUtils::GetInDataNodeByIndex(const Node &node, const int index) {
  if (node.GetInDataAnchor(index) == nullptr) {
    return nullptr;
  }
@@ -819,12 +933,13 @@ NodePtr NodeUtils::GetInDataNodeByIndex(const Node &node, int index) {
  return node.GetInDataAnchor(index)->GetPeerOutAnchor()->GetOwnerNode();
 }

 vector<NodePtr> NodeUtils::GetOutDataNodesByIndex(const Node &node, int index) {
  vector<NodePtr> out_data_nodes;
 vector<pair<InDataAnchorPtr, NodePtr>> NodeUtils::GetOutDataNodesWithAnchorByIndex(const Node &node, const int index) {
  vector<pair<InDataAnchorPtr, NodePtr>> out_data_nodes;
  auto out_data_anchor = node.GetOutDataAnchor(index);
  if (out_data_anchor == nullptr) {
    return out_data_nodes;
  }

  for (const auto peer_in_anchor : out_data_anchor->GetPeerInDataAnchors()) {
    if (peer_in_anchor == nullptr) {
      continue;
@@ -832,8 +947,10 @@ vector<NodePtr> NodeUtils::GetOutDataNodesByIndex(const Node &node, int index) {
    if (peer_in_anchor->GetOwnerNode() == nullptr) {
      continue;
    }
    out_data_nodes.emplace_back(peer_in_anchor->GetOwnerNode());
    out_data_nodes.emplace_back(std::make_pair(peer_in_anchor, peer_in_anchor->GetOwnerNode()));
  }
  return out_data_nodes;
 }

 ConstNodePtr NodeUtils::GetNodeFromOperator(const Operator &oprt) { return oprt.GetNode(); }
 }  // namespace ge
--- a/src/common/graph/utils/op_desc_utils.cc
+++ b/src/common/graph/utils/op_desc_utils.cc
@@ -438,6 +438,11 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector<ge::NodePtr> OpDescUtils::
      if (switch_input.size() > 0) {
        ret.insert(ret.end(), switch_input.begin(), switch_input.end());
      }
    } else if (in_node->GetType() == DATA) {
      auto parent = NodeUtils::GetParentInput(in_node);
      if ((parent != nullptr) && (parent->GetType() == CONSTANT)) {
        ret.push_back(parent);
      }
    }
  }
  return ret;
--- a/src/common/graph/utils/type_utils.cc
+++ b/src/common/graph/utils/type_utils.cc
@@ -244,6 +244,21 @@ static const std::map<domi::FrameworkType, std::string> kFmkTypeToString = {
  {domi::ANDROID_NN, "android_nn"}, {domi::ONNX, "onnx"},           {domi::FRAMEWORK_RESERVED, "framework_reserved"},
 };

 static const std::map<domi::ImplyType, std::string> kImplyTypeToString = {
  {domi::ImplyType::BUILDIN, "buildin"}, {domi::ImplyType::TVM, "tvm"},        {domi::ImplyType::CUSTOM, "custom"},
  {domi::ImplyType::AI_CPU, "ai_cpu"},   {domi::ImplyType::CCE, "cce"},        {domi::ImplyType::GELOCAL, "gelocal"},
  {domi::ImplyType::HCCL, "hccl"},       {domi::ImplyType::INVALID, "invalid"}};

 std::string TypeUtils::ImplyTypeToSerialString(domi::ImplyType imply_type) {
  auto it = kImplyTypeToString.find(imply_type);
  if (it != kImplyTypeToString.end()) {
    return it->second;
  } else {
    GELOGE(GRAPH_FAILED, "ImplyTypeToSerialString: imply_type not support %u", imply_type);
    return "UNDEFINED";
  }
 }

 bool TypeUtils::IsDataTypeValid(DataType dt) {
  uint32_t num = static_cast<uint32_t>(dt);
  GE_CHK_BOOL_EXEC((num <= DT_UNDEFINED), return false, "The DataType is invalid");
--- a/src/ge/CMakeLists.txt
+++ b/src/ge/CMakeLists.txt
@@ -56,6 +56,9 @@ include_directories(${CMAKE_BINARY_DIR}/proto/ge)
 # need to remove dependencies on pb files later
 file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
        "client/ge_api.cc"
        "common/dump/dump_manager.cc"
        "common/dump/dump_properties.cc"
        "common/dump/dump_op.cc"
        "common/formats/format_transfers/*.cc"
        "common/formats/formats.cc"
        "common/formats/utils/formats_trans_utils.cc"
@@ -124,6 +127,7 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
        "graph/preprocess/insert_op/ge_aipp_op.cc"
        "graph/preprocess/insert_op/util_insert_aipp_op.cc"
        "graph/preprocess/multi_batch_copy_graph.cc"
        "graph/preprocess/multi_batch_options.cc"
        "host_kernels/add_kernel.cc"
        "host_kernels/broadcast_args_kernel.cc"
        "host_kernels/broadcast_gradient_args_kernel.cc"
@@ -138,6 +142,7 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
        "host_kernels/floormod_kernel.cc"
        "host_kernels/gather_v2_kernel.cc"
        "host_kernels/greater_kernel.cc"
        "host_kernels/identity_kernel.cc"
        "host_kernels/kernel_utils.cc"
        "host_kernels/maximum_kernel.cc"
        "host_kernels/mul_kernel.cc"
@@ -172,10 +177,18 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
        "hybrid/node_executor/aicpu/aicpu_node_executor.cc"
        "hybrid/node_executor/compiledsubgraph/known_node_executor.cc"
        "hybrid/node_executor/controlop/control_op_executor.cc"
        "hybrid/node_executor/ge_local/ge_local_node_executor.cc"
        "hybrid/node_executor/hccl/hccl_node_executor.cc"
        "hybrid/node_executor/hostcpu/ge_local_node_executor.cc"
        "hybrid/node_executor/host_cpu/host_cpu_node_executor.cc"
        "hybrid/node_executor/host_cpu/kernel_factory.cc"
        "hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc"
        "hybrid/node_executor/host_cpu/kernel/variable_kernel.cc"
        "hybrid/node_executor/host_cpu/kernel/assign_kernel.cc"
        "hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc"
        "hybrid/node_executor/node_executor.cc"
        "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
        "hybrid/node_executor/rts/rts_node_executor.cc"
        "hybrid/node_executor/task_context.cc"
        "init/gelib.cc"
        "model/ge_model.cc"
@@ -215,6 +228,9 @@ target_link_libraries(ge_runner
 ######### libge_compiler.so #############
 # need to remove dependencies on pb files later
 file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
        "common/dump/dump_properties.cc"
        "common/dump/dump_manager.cc"
        "common/dump/dump_op.cc"
        "common/formats/format_transfers/*.cc"
        "common/formats/formats.cc"
        "common/formats/utils/formats_trans_utils.cc"
@@ -274,6 +290,7 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
        "graph/preprocess/insert_op/ge_aipp_op.cc"
        "graph/preprocess/insert_op/util_insert_aipp_op.cc"
        "graph/preprocess/multi_batch_copy_graph.cc"
        "graph/preprocess/multi_batch_options.cc"
        "host_kernels/add_kernel.cc"
        "host_kernels/broadcast_args_kernel.cc"
        "host_kernels/broadcast_gradient_args_kernel.cc"
@@ -288,6 +305,7 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
        "host_kernels/floormod_kernel.cc"
        "host_kernels/gather_v2_kernel.cc"
        "host_kernels/greater_kernel.cc"
        "host_kernels/identity_kernel.cc"
        "host_kernels/kernel_utils.cc"
        "host_kernels/maximum_kernel.cc"
        "host_kernels/mul_kernel.cc"
--- a/src/ge/client/ge_api.cc
+++ b/src/ge/client/ge_api.cc
@@ -390,6 +390,22 @@ Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorIn
  }
  return SUCCESS;
 }

 Status Session::GetVariables(const std::vector<std::string> &var_names, std::vector<Tensor> &var_values) {
  auto instance_ptr = ge::GELib::GetInstance();
  if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
    GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");
    return FAILED;
  }
  GELOGT(TRACE_RUNNING, "Get Variables");
  Status ret = ge::GELib::GetInstance()->SessionManagerObj().GetVariables(sessionId_, var_names, var_values);
  if (ret != SUCCESS) {
    GELOGE(ret, "SessionManager RunGraphAsync failed");
    return FAILED;
  }
  return SUCCESS;
 }

 bool Session::IsGraphNeedRebuild(uint32_t graph_id) {
  return ge::GELib::GetInstance()->SessionManagerObj().IsGraphNeedRebuild(sessionId_, graph_id);
 }
--- a/src/ge/common/dump/dump_manager.cc
+++ b/src/ge/common/dump/dump_manager.cc
@@ -0,0 +1,120 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "common/dump/dump_manager.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/debug/log.h"

 namespace {
 const char *const kDumpOFF = "OFF";
 const char *const kDumpoff = "off";
 const char *const kDumpOn = "on";
 }  // namespace
 namespace ge {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpManager &DumpManager::GetInstance() {
  static DumpManager instance;
  return instance;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf(const DumpConfig &dump_config) {
  std::lock_guard<std::mutex> lock(mutex_);
  dump_properties_.ClearDumpPropertyValue();
  dump_properties_.ClearDumpInfo();
  std::string dump_status;
  std::string dump_path;
  std::string dump_mode;
  std::string dump_op_switch;

  if (dump_config.dump_status.empty()) {
    GELOGI("Dump does not open");
    return SUCCESS;
  }

  dump_status = dump_config.dump_status;
  GELOGI("Dump status is %s", dump_status.c_str());
  if (dump_config.dump_status == kDumpoff || dump_config.dump_status == kDumpOFF) {
    dump_properties_.ClearDumpPropertyValue();
    return SUCCESS;
  }
  dump_op_switch = dump_config.dump_op_switch;
  if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) {
    GELOGE(PARAM_INVALID, "Dump list is invalid,dump_op_switch is %s", dump_op_switch.c_str());
    return PARAM_INVALID;
  }

  if (!dump_config.dump_list.empty()) {
    for (auto model_dump : dump_config.dump_list) {
      std::string model_name = model_dump.model_name;
      GELOGI("Dump model is %s", model_name.c_str());
      std::set<std::string> dump_layers;
      for (auto layer : model_dump.layers) {
        GELOGI("Dump layer is %s in model", layer.c_str());
        dump_layers.insert(layer);
      }
      dump_properties_.AddPropertyValue(model_name, dump_layers);
    }
    if (dump_op_switch == kDumpOn) {
      GELOGI("Start to dump model and single op,dumo op switch is %s", dump_op_switch.c_str());
    } else {
      GELOGI("Only dump model,dump op switch is %s", dump_op_switch.c_str());
    }
  } else {
    GELOGI("Only dump single op,dumo op switch is %s", dump_op_switch.c_str());
  }

  dump_path = dump_config.dump_path;
  if (dump_path.empty()) {
    GELOGE(PARAM_INVALID, "Dump path is empty");
    return PARAM_INVALID;
  }

  if (dump_path[dump_path.size() - 1] != '/') {
    dump_path = dump_path + "/";
  }
  dump_path = dump_path + CurrentTimeInStr() + "/";
  GELOGI("Dump path is %s", dump_path.c_str());
  dump_properties_.SetDumpPath(dump_path);

  dump_mode = dump_config.dump_mode;
  GELOGI("Dump mode is %s", dump_mode.c_str());
  dump_properties_.SetDumpMode(dump_mode);

  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpManager::IsDumpOpen() {
  std::lock_guard<std::mutex> lock(mutex_);
  if (!dump_properties_.GetDumpPath().empty()) {
    return true;
  }
  return false;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const DumpProperties &DumpManager::GetDumpProperties() {
  std::lock_guard<std::mutex> lock(mutex_);
  return dump_properties_;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpManager::SetModelName(const std::string &model_name) {
  std::lock_guard<std::mutex> lock(mutex_);
  model_name_ = model_name;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpManager::GetModelName() {
  std::lock_guard<std::mutex> lock(mutex_);
  return model_name_;
 }
 }  // namespace ge
--- a/src/ge/common/dump/dump_manager.h
+++ b/src/ge/common/dump/dump_manager.h
@@ -0,0 +1,42 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef GE_COMMON_DUMP_DUMP_MANAGER_H_
 #define GE_COMMON_DUMP_DUMP_MANAGER_H_

 #include <mutex>

 #include "common/dump/dump_properties.h"
 #include "common/ge_types.h"

 namespace ge {
 class DumpManager {
 public:
  static DumpManager &GetInstance();

  Status SetDumpConf(const DumpConfig &dump_config);
  bool IsDumpOpen();
  const DumpProperties &GetDumpProperties();
  void SetModelName(const std::string &model_name);
  const std::string &GetModelName();

 private:
  DumpProperties dump_properties_;
  std::mutex mutex_;
  std::string model_name_;
 };
 }  // namespace ge
 #endif  // GE_COMMON_DUMP_DUMP_MANAGER_H_
--- a/src/ge/common/dump/dump_op.cc
+++ b/src/ge/common/dump/dump_op.cc
@@ -0,0 +1,255 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "common/dump/dump_op.h"

 #include "aicpu/common/aicpu_task_struct.h"
 #include "common/dump/dump_manager.h"
 #include "common/ge/datatype_util.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/util.h"
 #include "graph/anchor.h"
 #include "graph/ge_tensor.h"
 #include "graph/op_desc.h"
 #include "graph/utils/tensor_utils.h"
 #include "proto/ge_ir.pb.h"
 #include "proto/op_mapping_info.pb.h"
 #include "runtime/mem.h"

 namespace {
 const uint32_t kAicpuLoadFlag = 1;
 const char *const kDumpOutput = "output";
 const char *const kDumpInput = "input";
 const char *const kDumpAll = "all";
 const char *const kDumpKernelsDumpOp = "DumpDataInfo";
 }  // namespace

 namespace ge {
 DumpOp::~DumpOp() {
  if (proto_dev_mem_ != nullptr) {
    (void)rtFree(proto_dev_mem_);
  }
  if (proto_size_dev_mem_ != nullptr) {
    (void)rtFree(proto_size_dev_mem_);
  }
  proto_dev_mem_ = nullptr;
  proto_size_dev_mem_ = nullptr;
 }

 void DumpOp::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond) {
  global_step_ = reinterpret_cast<uintptr_t>(global_step);
  loop_per_iter_ = reinterpret_cast<uintptr_t>(loop_per_iter);
  loop_cond_ = reinterpret_cast<uintptr_t>(loop_cond);
 }

 void DumpOp::SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id) {
  dynamic_model_name_ = dynamic_model_name;
  dynamic_model_id_ = dynamic_model_id;
 }

 static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uintptr_t loop_cond,
                                 aicpu::dump::OpMappingInfo &op_mapping_info) {
  if (step_id != 0) {
    GELOGI("step_id exists.");
    op_mapping_info.set_step_id_addr(static_cast<uint64_t>(step_id));
  } else {
    GELOGI("step_id is null.");
  }

  if (loop_per_iter != 0) {
    GELOGI("loop_per_iter exists.");
    op_mapping_info.set_iterations_per_loop_addr(static_cast<uint64_t>(loop_per_iter));
  } else {
    GELOGI("loop_per_iter is null.");
  }

  if (loop_cond != 0) {
    GELOGI("loop_cond exists.");
    op_mapping_info.set_loop_cond_addr(static_cast<uint64_t>(loop_cond));
  } else {
    GELOGI("loop_cond is null.");
  }
 }

 Status DumpOp::DumpOutput(aicpu::dump::Task &task) {
  GELOGI("Start dump output in Launch dump op");
  const auto &output_descs = op_desc_->GetAllOutputsDesc();
  for (size_t i = 0; i < output_descs.size(); ++i) {
    aicpu::dump::Output output;
    output.set_data_type(static_cast<int32_t>(DataTypeUtil::GetIrDataType(output_descs.at(i).GetDataType())));
    output.set_format(static_cast<int32_t>(output_descs.at(i).GetFormat()));
    for (auto dim : output_descs.at(i).GetShape().GetDims()) {
      output.mutable_shape()->add_dim(dim);
    }
    int64_t output_size = 0;
    if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
      GELOGE(PARAM_INVALID, "Get output size filed");
      return PARAM_INVALID;
    }
    GELOGD("Get output size in lanch dump op is %ld", output_size);
    output.set_size(output_size);
    output.set_address(static_cast<uint64_t>(output_addrs_[i]));
    task.mutable_output()->Add(std::move(output));
  }
  return SUCCESS;
 }

 Status DumpOp::DumpInput(aicpu::dump::Task &task) {
  GELOGI("Start dump input in Launch dump op");
  const auto &input_descs = op_desc_->GetAllInputsDesc();
  for (size_t i = 0; i < input_descs.size(); ++i) {
    aicpu::dump::Input input;
    input.set_data_type(static_cast<int32_t>(DataTypeUtil::GetIrDataType(input_descs.at(i).GetDataType())));
    input.set_format(static_cast<int32_t>(input_descs.at(i).GetFormat()));

    for (auto dim : input_descs.at(i).GetShape().GetDims()) {
      input.mutable_shape()->add_dim(dim);
    }
    int64_t input_size = 0;
    if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
      GELOGE(PARAM_INVALID, "Get output size filed");
      return PARAM_INVALID;
    }
    GELOGD("Get input size in lanch dump op is %ld", input_size);
    input.set_size(input_size);
    input.set_address(static_cast<uint64_t>(input_addrs_[i]));
    task.mutable_input()->Add(std::move(input));
  }
  return SUCCESS;
 }

 void DumpOp::SetDumpInfo(const DumpProperties &dump_properties, const OpDescPtr &op_desc, vector<uintptr_t> input_addrs,
                         vector<uintptr_t> output_addrs, rtStream_t stream) {
  dump_properties_ = dump_properties;
  op_desc_ = op_desc;
  input_addrs_ = input_addrs;
  output_addrs_ = output_addrs;
  stream_ = stream;
 }

 Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) {
  std::string proto_msg;
  size_t proto_size = op_mapping_info.ByteSizeLong();
  bool ret = op_mapping_info.SerializeToString(&proto_msg);
  if (!ret || proto_size == 0) {
    GELOGE(FAILED, "Protobuf serialize failed,proto_size is %zu", proto_size);
    return FAILED;
  }

  rtError_t rt_ret = rtMalloc(&proto_dev_mem_, proto_size, RT_MEMORY_HBM);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
    return RT_FAILED;
  }

  rt_ret = rtMemcpy(proto_dev_mem_, proto_size, proto_msg.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
    return RT_FAILED;
  }

  rt_ret = rtMalloc(&proto_size_dev_mem_, sizeof(size_t), RT_MEMORY_HBM);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
    return RT_FAILED;
  }
  rt_ret = rtMemcpy(proto_size_dev_mem_, sizeof(size_t), &proto_size, sizeof(size_t), RT_MEMCPY_HOST_TO_DEVICE);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
    return RT_FAILED;
  }

  constexpr int32_t ioAddrNum = 2;
  constexpr uint32_t argsSize = sizeof(aicpu::AicpuParamHead) + ioAddrNum * sizeof(uint64_t);
  char args[argsSize] = {0};
  auto paramHead = reinterpret_cast<aicpu::AicpuParamHead *>(args);
  paramHead->length = argsSize;
  paramHead->ioAddrNum = ioAddrNum;
  auto ioAddr = reinterpret_cast<uint64_t *>(args + sizeof(aicpu::AicpuParamHead));
  ioAddr[0] = reinterpret_cast<uintptr_t>(proto_dev_mem_);
  ioAddr[1] = reinterpret_cast<uintptr_t>(proto_size_dev_mem_);
  rt_ret = rtCpuKernelLaunch(nullptr, kDumpKernelsDumpOp,
                             1,  // blockDim default 1
                             args, argsSize,
                             nullptr,  // no need smDesc
                             stream_);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rtCpuKernelLaunch failed,rt_ret:0x%X", rt_ret);
    return rt_ret;
  }
  GELOGI("Kernel launch dump op success");
  return SUCCESS;
 }

 Status DumpOp::LaunchDumpOp() {
  GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str());
  int32_t device_id = 0;
  rtError_t rt_ret = rtGetDevice(&device_id);
  if (rt_ret != RT_ERROR_NONE || device_id < 0) {
    GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
    return RT_FAILED;
  }
  aicpu::dump::OpMappingInfo op_mapping_info;
  auto dump_path = dump_properties_.GetDumpPath() + std::to_string(device_id) + "/";
  op_mapping_info.set_dump_path(dump_path);
  op_mapping_info.set_flag(kAicpuLoadFlag);
  op_mapping_info.set_dump_step(dump_properties_.GetDumpStep());
  if (!dynamic_model_name_.empty()) {
    op_mapping_info.set_model_name(dynamic_model_name_);
    op_mapping_info.set_model_id(dynamic_model_id_);
  }
  SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
  GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(),
         dump_path.c_str());

  aicpu::dump::Task task;
  task.mutable_op()->set_op_name(op_desc_->GetName());
  task.mutable_op()->set_op_type(op_desc_->GetType());
  if (dump_properties_.GetDumpMode() == kDumpOutput) {
    if (DumpOutput(task) != SUCCESS) {
      GELOGE(FAILED, "Dump output failed");
      return FAILED;
    }
    op_mapping_info.mutable_task()->Add(std::move(task));
  }
  if (dump_properties_.GetDumpMode() == kDumpInput) {
    if (DumpInput(task) != SUCCESS) {
      GELOGE(FAILED, "Dump input failed");
      return FAILED;
    }
    op_mapping_info.mutable_task()->Add(std::move(task));
  }
  if (dump_properties_.GetDumpMode() == kDumpAll) {
    auto ret = DumpOutput(task);
    if (ret != SUCCESS) {
      GELOGE(FAILED, "Dump output failed when in dumping all");
      return FAILED;
    }
    ret = DumpInput(task);
    if (ret != SUCCESS) {
      GELOGE(FAILED, "Dump input failed when in dumping all");
      return FAILED;
    }
    op_mapping_info.mutable_task()->Add(std::move(task));
  }
  auto ret = ExecutorDumpOp(op_mapping_info);
  if (ret != SUCCESS) {
    GELOGE(ret, "Executor dump op failed");
    return ret;
  }
  return SUCCESS;
 }
 }  // namespace ge
--- a/src/ge/common/dump/dump_op.h
+++ b/src/ge/common/dump/dump_op.h
@@ -0,0 +1,61 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef GE_COMMON_DUMP_DUMP_OP_H_
 #define GE_COMMON_DUMP_DUMP_OP_H_

 #include <string>

 #include "common/ge_inner_error_codes.h"
 #include "common/properties_manager.h"
 #include "proto/op_mapping_info.pb.h"
 #include "runtime/stream.h"

 namespace ge {
 class DumpOp {
 public:
  DumpOp() = default;
  ~DumpOp();

  void SetDumpInfo(const DumpProperties &dump_properties, const OpDescPtr &op_desc, vector<uintptr_t> input_addrs,
                   vector<uintptr_t> output_addrs, rtStream_t stream);
  Status LaunchDumpOp();
  void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond);
  void SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id);

 private:
  Status ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info);
  Status DumpOutput(aicpu::dump::Task &task);
  Status DumpInput(aicpu::dump::Task &task);

  DumpProperties dump_properties_;
  OpDescPtr op_desc_;
  std::vector<uintptr_t> input_addrs_;
  std::vector<uintptr_t> output_addrs_;

  void *proto_dev_mem_ = nullptr;
  void *proto_size_dev_mem_ = nullptr;
  rtStream_t stream_;
  uintptr_t global_step_;
  uintptr_t loop_per_iter_;
  uintptr_t loop_cond_;

  std::string dynamic_model_name_;
  std::uint32_t dynamic_model_id_;
 };
 }  // namespace ge

 #endif  // GE_COMMON_DUMP_DUMP_OP_H_
--- a/src/ge/common/dump/dump_properties.cc
+++ b/src/ge/common/dump/dump_properties.cc
@@ -0,0 +1,238 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "common/dump/dump_properties.h"

 #include <cstdio>
 #include <string>

 #include "common/ge/ge_util.h"
 #include "common/util.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/debug/log.h"
 #include "framework/common/ge_types.h"
 #include "framework/common/types.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/ge_context.h"
 #include "graph/utils/attr_utils.h"

 namespace {
 const std::string kEnableFlag = "1";

 const uint32_t kAicoreOverflow = (0x1 << 0);
 const uint32_t kAtomicOverflow = (0x1 << 1);
 const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow);
 }  // namespace
 namespace ge {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) {
  CopyFrom(other);
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties::operator=(
  const DumpProperties &other) {
  CopyFrom(other);
  return *this;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOptions() {
  enable_dump_.clear();
  enable_dump_debug_.clear();
  dump_path_.clear();
  dump_step_.clear();
  dump_mode_.clear();
  is_op_debug_ = false;
  op_debug_mode_ = 0;

  std::string enable_dump;
  (void)GetContext().GetOption(OPTION_EXEC_ENABLE_DUMP, enable_dump);
  enable_dump_ = enable_dump;

  std::string enable_dump_debug;
  (void)GetContext().GetOption(OPTION_EXEC_ENABLE_DUMP_DEBUG, enable_dump_debug);
  enable_dump_debug_ = enable_dump_debug;

  if ((enable_dump_ == kEnableFlag) || (enable_dump_debug_ == kEnableFlag)) {
    std::string dump_path;
    if (GetContext().GetOption(OPTION_EXEC_DUMP_PATH, dump_path) == GRAPH_SUCCESS) {
      if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') {
        dump_path = dump_path + "/";
      }
      dump_path = dump_path + CurrentTimeInStr() + "/";
      GELOGI("Get dump path %s successfully", dump_path.c_str());
      SetDumpPath(dump_path);
    } else {
      GELOGW("Dump path is not set");
    }
  }

  if (enable_dump_ == kEnableFlag) {
    std::string dump_step;
    if (GetContext().GetOption(OPTION_EXEC_DUMP_STEP, dump_step) == GRAPH_SUCCESS) {
      GELOGD("Get dump step %s successfully", dump_step.c_str());
      SetDumpStep(dump_step);
    }
    string dump_mode;
    if (GetContext().GetOption(OPTION_EXEC_DUMP_MODE, dump_mode) == GRAPH_SUCCESS) {
      GELOGD("Get dump mode %s successfully", dump_mode.c_str());
      SetDumpMode(dump_mode);
    }
    AddPropertyValue(DUMP_ALL_MODEL, {});
  }

  SetDumpDebugOptions();
 }

 // The following is the new dump scenario of the fusion operator
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::AddPropertyValue(
  const std::string &model, const std::set<std::string> &layers) {
  for (const std::string &layer : layers) {
    GELOGI("This model %s config to dump layer %s", model.c_str(), layer.c_str());
  }

  model_dump_properties_map_[model] = layers;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::DeletePropertyValue(const std::string &model) {
  auto iter = model_dump_properties_map_.find(model);
  if (iter != model_dump_properties_map_.end()) {
    model_dump_properties_map_.erase(iter);
  }
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::ClearDumpPropertyValue() {
  model_dump_properties_map_.clear();
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::ClearDumpInfo() {
  enable_dump_.clear();
  enable_dump_debug_.clear();
  dump_path_.clear();
  dump_step_.clear();
  dump_mode_.clear();
  is_op_debug_ = false;
  op_debug_mode_ = 0;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpProperties::GetAllDumpModel() const {
  std::set<std::string> model_list;
  for (auto &iter : model_dump_properties_map_) {
    model_list.insert(iter.first);
  }

  return model_list;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpProperties::GetPropertyValue(
  const std::string &model) const {
  auto iter = model_dump_properties_map_.find(model);
  if (iter != model_dump_properties_map_.end()) {
    return iter->second;
  }
  return {};
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsLayerNeedDump(
  const std::string &model, const std::string &om_name, const std::string &op_name) const {
  // if dump all
  if (model_dump_properties_map_.find(DUMP_ALL_MODEL) != model_dump_properties_map_.end()) {
    return true;
  }

  // if this model need dump
  auto om_name_iter = model_dump_properties_map_.find(om_name);
  auto model_name_iter = model_dump_properties_map_.find(model);
  if (om_name_iter != model_dump_properties_map_.end() || model_name_iter != model_dump_properties_map_.end()) {
    // if no dump layer info, dump all layer in this model
    auto model_iter = om_name_iter != model_dump_properties_map_.end() ? om_name_iter : model_name_iter;
    if (model_iter->second.empty()) {
      return true;
    }

    return model_iter->second.find(op_name) != model_iter->second.end();
  }

  GELOGD("Model %s is not seated to be dump.", model.c_str());
  return false;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpPath(const std::string &path) {
  dump_path_ = path;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperties::GetDumpPath() const {
  return dump_path_;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpStep(const std::string &step) {
  dump_step_ = step;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperties::GetDumpStep() const {
  return dump_step_;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpMode(const std::string &mode) {
  dump_mode_ = mode;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperties::GetDumpMode() const {
  return dump_mode_;
 }

 void DumpProperties::CopyFrom(const DumpProperties &other) {
  if (&other != this) {
    enable_dump_ = other.enable_dump_;
    enable_dump_debug_ = other.enable_dump_debug_;
    dump_path_ = other.dump_path_;
    dump_step_ = other.dump_step_;
    dump_mode_ = other.dump_mode_;

    model_dump_properties_map_ = other.model_dump_properties_map_;
    is_op_debug_ = other.is_op_debug_;
    op_debug_mode_ = other.op_debug_mode_;
  }
 }

 void DumpProperties::SetDumpDebugOptions() {
  if (enable_dump_debug_ == kEnableFlag) {
    std::string dump_debug_mode;
    if (GetContext().GetOption(OPTION_EXEC_DUMP_DEBUG_MODE, dump_debug_mode) == GRAPH_SUCCESS) {
      GELOGD("Get dump debug mode %s successfully", dump_debug_mode.c_str());
    } else {
      GELOGW("Dump debug mode is not set.");
      return;
    }

    if (dump_debug_mode == OP_DEBUG_AICORE) {
      GELOGD("ge.exec.dumpDebugMode=aicore_overflow, op debug is open.");
      is_op_debug_ = true;
      op_debug_mode_ = kAicoreOverflow;
    } else if (dump_debug_mode == OP_DEBUG_ATOMIC) {
      GELOGD("ge.exec.dumpDebugMode=atomic_overflow, op debug is open.");
      is_op_debug_ = true;
      op_debug_mode_ = kAtomicOverflow;
    } else if (dump_debug_mode == OP_DEBUG_ALL) {
      GELOGD("ge.exec.dumpDebugMode=all, op debug is open.");
      is_op_debug_ = true;
      op_debug_mode_ = kAllOverflow;
    } else {
      GELOGW("ge.exec.dumpDebugMode is invalid.");
    }
  } else {
    GELOGI("ge.exec.enableDumpDebug is false or is not set.");
  }
 }
 }  // namespace ge
--- a/src/ge/common/dump/dump_properties.h
+++ b/src/ge/common/dump/dump_properties.h
@@ -0,0 +1,86 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef GE_COMMON_DUMP_DUMP_PROPERTIES_H_
 #define GE_COMMON_DUMP_DUMP_PROPERTIES_H_

 #include <map>
 #include <set>
 #include <string>
 #include <vector>

 namespace ge {
 class DumpProperties {
 public:
  DumpProperties() = default;

  ~DumpProperties() = default;

  DumpProperties(const DumpProperties &dump);

  DumpProperties &operator=(const DumpProperties &dump);

  void InitByOptions();

  void AddPropertyValue(const std::string &model, const std::set<std::string> &layers);

  void DeletePropertyValue(const std::string &model);

  void ClearDumpPropertyValue();

  void ClearDumpInfo();

  std::set<std::string> GetAllDumpModel() const;

  std::set<std::string> GetPropertyValue(const std::string &model) const;

  bool IsLayerNeedDump(const std::string &model, const std::string &om_name, const std::string &op_name) const;

  void SetDumpPath(const std::string &path);

  const std::string &GetDumpPath() const;

  void SetDumpStep(const std::string &step);

  const std::string &GetDumpStep() const;

  void SetDumpMode(const std::string &mode);

  const std::string &GetDumpMode() const;

  bool IsOpDebugOpen() const { return is_op_debug_; }

  uint32_t GetOpDebugMode() const { return op_debug_mode_; }

 private:
  void CopyFrom(const DumpProperties &other);

  void SetDumpDebugOptions();

  std::string enable_dump_;
  std::string enable_dump_debug_;

  std::string dump_path_;
  std::string dump_step_;
  std::string dump_mode_;
  std::map<std::string, std::set<std::string>> model_dump_properties_map_;

  bool is_op_debug_ = false;
  uint32_t op_debug_mode_ = 0;
 };
 }  // namespace ge

 #endif  // GE_COMMON_DUMP_DUMP_PROPERTIES_H_
--- a/src/ge/common/ge/datatype_util.cc
+++ b/src/ge/common/ge/datatype_util.cc
@@ -15,23 +15,54 @@
 */

 #include "common/ge/datatype_util.h"
 #include "proto/ge_ir.pb.h"

 #include <map>

 namespace {
 const std::vector<ge::DataType> kEmptyDatatypeVector;
 std::map<ge::DataType, std::vector<ge::DataType>> g_translatable_data_type = {
    // key:src datatype, value:dst datatype
    {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}},
    {ge::DT_BOOL, {ge::DT_INT32}},
    {ge::DT_FLOAT16, {ge::DT_FLOAT, ge::DT_FLOAT16}},
    {ge::DT_INT64, {ge::DT_INT32}}};
  // key:src datatype, value:dst datatype
  {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}},
  {ge::DT_BOOL, {ge::DT_INT32}},
  {ge::DT_FLOAT16, {ge::DT_FLOAT, ge::DT_FLOAT16}},
  {ge::DT_INT64, {ge::DT_INT32}}};

 std::map<ge::DataType, std::vector<ge::DataType>> g_reverse_translatable_data_type = {
    // key:dst datatype,value:src datatype
    {ge::DT_FLOAT16, {ge::DT_FLOAT, ge::DT_FLOAT16}},
    {ge::DT_INT32, {ge::DT_BOOL, ge::DT_INT64}},
    {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}};
  // key:dst datatype,value:src datatype
  {ge::DT_FLOAT16, {ge::DT_FLOAT, ge::DT_FLOAT16}},
  {ge::DT_INT32, {ge::DT_BOOL, ge::DT_INT64}},
  {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}};

 static const std::map<ge::DataType, ge::proto::DataType> g_dump_data_type_map = {
  // key:ge datatype,value:proto datatype
  {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED},
  {ge::DT_FLOAT, ge::proto::DT_FLOAT},
  {ge::DT_FLOAT16, ge::proto::DT_FLOAT16},
  {ge::DT_INT8, ge::proto::DT_INT8},
  {ge::DT_UINT8, ge::proto::DT_UINT8},
  {ge::DT_INT16, ge::proto::DT_INT16},
  {ge::DT_UINT16, ge::proto::DT_UINT16},
  {ge::DT_INT32, ge::proto::DT_INT32},
  {ge::DT_INT64, ge::proto::DT_INT64},
  {ge::DT_UINT32, ge::proto::DT_UINT32},
  {ge::DT_UINT64, ge::proto::DT_UINT64},
  {ge::DT_BOOL, ge::proto::DT_BOOL},
  {ge::DT_DOUBLE, ge::proto::DT_DOUBLE},
  {ge::DT_DUAL, ge::proto::DT_DUAL},
  {ge::DT_DUAL_SUB_INT8, ge::proto::DT_DUAL_SUB_INT8},
  {ge::DT_DUAL_SUB_UINT8, ge::proto::DT_DUAL_SUB_UINT8},
  {ge::DT_COMPLEX64, ge::proto::DT_COMPLEX64},
  {ge::DT_COMPLEX128, ge::proto::DT_COMPLEX128},
  {ge::DT_QINT8, ge::proto::DT_QINT8},
  {ge::DT_QINT16, ge::proto::DT_QINT16},
  {ge::DT_QINT32, ge::proto::DT_QINT32},
  {ge::DT_QUINT8, ge::proto::DT_QUINT8},
  {ge::DT_QUINT16, ge::proto::DT_QUINT16},
  {ge::DT_RESOURCE, ge::proto::DT_RESOURCE},
  {ge::DT_STRING_REF, ge::proto::DT_STRING_REF},
  {ge::DT_STRING, ge::proto::DT_STRING},
 };
 }  // namespace

 namespace ge {
@@ -67,4 +98,13 @@ const std::vector<ge::DataType> &DataTypeUtil::GetTranslatableDataTypesByDst(con

  return search->second;
 }

 int32_t DataTypeUtil::GetIrDataType(ge::DataType data_type) {
  auto iter = g_dump_data_type_map.find(data_type);
  if (iter == g_dump_data_type_map.end()) {
    return static_cast<int32_t>(ge::proto::DT_UNDEFINED);
  }

  return static_cast<int32_t>(iter->second);
 }
 }  // namespace ge
--- a/src/ge/common/ge/datatype_util.h
+++ b/src/ge/common/ge/datatype_util.h
@@ -37,16 +37,17 @@ static const int32_t kGeSizeUint16 = sizeof(uint16_t);
 static const int32_t kGeSizeUint32 = sizeof(uint32_t);

 static std::map<ge::DataType, int32_t> CONST_OPDATA_TYPE_SIZE_MAP = {
    {ge::DT_FLOAT, kGeSizeFloat},   {ge::DT_FLOAT16, kGeSizeHalfFloat}, {ge::DT_INT8, kGeSizeInt8},
    {ge::DT_INT16, kGeSizeInt16},   {ge::DT_INT32, kGeSizeInt32},        {ge::DT_INT64, kGeSizeInt64},
    {ge::DT_UINT8, kGeSizeUint8},   {ge::DT_UINT16, kGeSizeUint16},      {ge::DT_UINT32, kGeSizeUint32},
    {ge::DT_UINT64, kGeSizeUint64}, {ge::DT_DOUBLE, kGeSizeDouble},      {ge::DT_BOOL, kGeSizeBool}};
  {ge::DT_FLOAT, kGeSizeFloat},   {ge::DT_FLOAT16, kGeSizeHalfFloat}, {ge::DT_INT8, kGeSizeInt8},
  {ge::DT_INT16, kGeSizeInt16},   {ge::DT_INT32, kGeSizeInt32},       {ge::DT_INT64, kGeSizeInt64},
  {ge::DT_UINT8, kGeSizeUint8},   {ge::DT_UINT16, kGeSizeUint16},     {ge::DT_UINT32, kGeSizeUint32},
  {ge::DT_UINT64, kGeSizeUint64}, {ge::DT_DOUBLE, kGeSizeDouble},     {ge::DT_BOOL, kGeSizeBool}};

 class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY DataTypeUtil {
 public:
  static bool DataTypeTranslatable(const ge::DataType &src_out_data_type, const ge::DataType &dst_in_data_type);
  static const std::vector<ge::DataType> &GetTranslatableDataTypesBySrc(const ge::DataType &src_out_data_type);
  static const std::vector<ge::DataType> &GetTranslatableDataTypesByDst(const ge::DataType &dst_in_data_type);
  static int32_t GetIrDataType(ge::DataType data_type);
 };
 }  // namespace ge
 #endif  // GE_COMMON_GE_DATATYPE_UTIL_H_
--- a/src/ge/common/ge/tbe_plugin_manager.cc
+++ b/src/ge/common/ge/tbe_plugin_manager.cc
@@ -187,8 +187,8 @@ void TBEPluginManager::LoadCustomOpLib() {
  std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas;
  GELOGI("The size of registration_datas is: %zu", registration_datas.size());
  for (OpRegistrationData reg_data : registration_datas) {
    GELOGD("Begin to register optype: %s, imply_type: %u", reg_data.GetOmOptype().c_str(),
           static_cast<uint32_t>(reg_data.GetImplyType()));
    GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(),
           TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str());
    domi::OpRegistry::Instance()->Register(reg_data);
  }
 }
--- a/src/ge/common/ge_common.mk
+++ b/src/ge/common/ge_common.mk
@@ -36,7 +36,6 @@ GE_COMMON_LOCAL_SRC_FILES := \
    properties_manager.cc \
    types.cc\
    model_parser/base.cc \
    model_parser/graph_parser_util.cc \
    tbe_kernel_store.cc \
    op/attr_value_util.cc \
    op/ge_op_utils.cc \
--- a/src/ge/common/math/math_util.h
+++ b/src/ge/common/math/math_util.h
@@ -562,7 +562,6 @@ inline Status CheckUint64MulOverflow(uint64_t a, uint64_t b) {
 /// @return Status
 inline Status CheckFp16MulOverflow(fp16_t a, fp16_t b) {
  fp16_t result = static_cast<fp16_t>(a) * static_cast<fp16_t>(b);
  printf("result: %u, 0x%x\n", result.val, result.val);
  if (FP16_IS_INVALID(result.val)) {
    return FAILED;
  }
@@ -885,6 +884,23 @@ inline Status CheckInt32DivOverflow(int32_t a, int32_t b) {
           static_cast<uint32_t>(b));                                                                                  \
    return INTERNAL_ERROR;                                                                                             \
  }
 }  // namespace ge

 #define FMK_FP16_ZEROCHECK(a)                               \
  if (fabs(a) < DBL_EPSILON) {                              \
    GELOGE(INTERNAL_ERROR, "fp16 %f can not be zero !", a); \
    return INTERNAL_ERROR;                                  \
  }

 #define FMK_FLOAT_ZEROCHECK(a)                               \
  if (fabs(a) < FLT_EPSILON) {                               \
    GELOGE(INTERNAL_ERROR, "float %f can not be zero !", a); \
    return INTERNAL_ERROR;                                   \
  }

 #define FMK_DOUBLE_ZEROCHECK(a)                                \
  if (fabs(a) < DBL_EPSILON) {                                 \
    GELOGE(INTERNAL_ERROR, "double %lf can not be zero !", a); \
    return INTERNAL_ERROR;                                     \
  }
 }  // namespace ge
 #endif  // GE_COMMON_MATH_MATH_UTIL_H_
--- a/src/ge/common/model_parser/graph_parser_util.cc
+++ b/src/ge/common/model_parser/graph_parser_util.cc
@@ -1,501 +0,0 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "graph_parser_util.h"
 #include <memory>
 #include "common/auth/file_saver.h"
 #include "common/convert/pb2json.h"
 #include "common/debug/log.h"
 #include "common/debug/memory_dumper.h"
 #include "common/model_parser/base.h"
 #include "common/model_saver.h"
 #include "common/properties_manager.h"
 #include "common/string_util.h"
 #include "common/types.h"
 #include "common/util.h"
 #include "common/util/error_manager/error_manager.h"
 #include "external/register/register_types.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/omg/parser/parser_inner_ctx.h"
 #include "graph/compute_graph.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/optimize/common/params.h"
 #include "graph/utils/type_utils.h"
 #include "omg/omg_inner_types.h"
 #include "omg/parser/model_parser.h"
 #include "omg/parser/parser_factory.h"
 #include "omg/parser/weights_parser.h"
 #include "parser/common/pre_checker.h"
 #include "proto/ge_ir.pb.h"
 #include "register/op_registry.h"

 namespace ge {
 namespace {
 // The function is incomplete. Currently, only l2_optimize, off_optimize is supported.
 const char *const kInputShapeSample1 = "\"input_name1:n1,c1,h1,w1\"";
 const char *const kInputShapeSample2 = "\"input_name1:1,3,224,224\"";
 const char *const kSplitError1 = "size not equal to 2 split by \":\"";
 const char *const kEmptyError = "can not be empty";
 const char *const kFloatNumError = "exist float number";
 const char *const kDigitError = "is not digit";
 const char *const kOutputTypeSample = "correct sample is \"opname:index:dtype\"";
 const char *const kOutputTypeSupport = "only support FP32, FP16, UINT8";
 const char *const kOutputTypeError = "The multiple out nodes set in output_type must be found in out_nodes.";

 vector<string> SplitInputShape(const std::string &input_shape) {
  vector<string> shape_pair_vec;
  size_t pos = input_shape.rfind(":");
  if (pos != std::string::npos) {
    shape_pair_vec.emplace_back(input_shape.substr(0, pos));
    shape_pair_vec.emplace_back(input_shape.substr(pos + 1, input_shape.size() - pos));
  }
  return shape_pair_vec;
 }

 static std::map<std::string, ge::DataType> output_type_str_to_datatype = {
  {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}};

 static bool CheckInputTrueOrFalse(const std::string &s, const std::string &atc_param) {
  if ((s == "true") || (s == "false")) {
    return true;
  } else {
    ErrorManager::GetInstance().ATCReportErrMessage("E10033", {"parameter", "value"}, {atc_param, s});
    GELOGE(PARAM_INVALID, "Input parameter[--%s]'s value[%s] must be true or false.", atc_param.c_str(), s.c_str());
    return false;
  }
 }

 bool CheckDigitStr(std::string &str) {
  for (char c : str) {
    if (!isdigit(c)) {
      GELOGE(domi::FAILED, "value[%s] is not positive integer", str.c_str());
      return false;
    }
  }
  return true;
 }

 Status StringToInt(std::string &str, int32_t &value) {
  try {
    if (!CheckDigitStr(str)) {
      GELOGE(PARAM_INVALID, "Invalid of digit string: %s ", str.c_str());
      ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
                                                      {"--output_type", str, "is not positive integer"});
      return PARAM_INVALID;
    }
    value = stoi(str);
  } catch (std::invalid_argument &) {
    GELOGE(PARAM_INVALID, "Invalid of digit string: %s, catch invalid_argument.", str.c_str());
    ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"output_type", str});
    return PARAM_INVALID;
  } catch (std::out_of_range &) {
    GELOGE(PARAM_INVALID, "Invalid of digit string: %s, catch out_of_range.", str.c_str());
    ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, {"output_type", str});
    return PARAM_INVALID;
  }
  return SUCCESS;
 }

 Status VerifyOutputTypeAndOutNodes(std::vector<std::string> &out_type_vec) {
  std::vector<std::pair<std::string, int32_t>> user_out_nodes = domi::GetContext().user_out_nodes;
  std::set<std::string> out_nodes_info;
  for (uint32_t i = 0; i < user_out_nodes.size(); ++i) {
    // out_nodes set should include output_type and output_format
    std::string tmp = user_out_nodes[i].first + ":" + to_string(user_out_nodes[i].second);
    out_nodes_info.emplace(tmp);
  }
  for (uint32_t i = 0; i < out_type_vec.size(); ++i) {
    if (out_nodes_info.find(out_type_vec[i]) == out_nodes_info.end()) {
      ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
                                                      {"--output_type", out_type_vec[i], kOutputTypeError});
      GELOGE(domi::FAILED, "Invalid value for --output_type[%s], %s.", out_type_vec[i].c_str(), kOutputTypeError);
      return domi::FAILED;
    }
  }
  return domi::SUCCESS;
 }

 Status ParseOutputType(const std::string &output_type, std::map<std::string, vector<uint32_t>> &out_type_index_map,
                       std::map<std::string, vector<ge::DataType>> &out_type_dt_map) {
  if (output_type.find(':') == std::string::npos) {
    GELOGI("output_type is not multiple nodes, means all out nodes");
    auto it = output_type_str_to_datatype.find(output_type);
    if (it == output_type_str_to_datatype.end()) {
      ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
                                                      {"--output_type", output_type, kOutputTypeSupport});
      GELOGE(PARAM_INVALID, "Invalid value for --output_type[%s], %s.", output_type.c_str(), kOutputTypeSupport);
      return domi::FAILED;
    }
    return domi::SUCCESS;
  }
  std::vector<std::string> out_type_vec;
  vector<string> nodes_v = StringUtils::Split(output_type, ';');
  for (const string &node : nodes_v) {
    vector<string> node_index_type_v = StringUtils::Split(node, ':');
    if (node_index_type_v.size() != 3) {  // The size must be 3.
      ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
                                                      {"--output_type", node, kOutputTypeSample});
      GELOGE(PARAM_INVALID, "Invalid value for --output_type[%s], %s.", node.c_str(), kOutputTypeSample);
      return domi::FAILED;
    }
    ge::DataType tmp_dt;
    std::string node_name = StringUtils::Trim(node_index_type_v[0]);
    std::string index_str = StringUtils::Trim(node_index_type_v[1]);
    int32_t index;
    if (StringToInt(index_str, index) != SUCCESS) {
      GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s.", index_str.c_str());
      return domi::FAILED;
    }
    std::string dt_value = StringUtils::Trim(node_index_type_v[2]);
    auto it = output_type_str_to_datatype.find(dt_value);
    if (it == output_type_str_to_datatype.end()) {
      ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
                                                      {"--output_type", dt_value, kOutputTypeSupport});
      GELOGE(ge::PARAM_INVALID, "Invalid value for --output_type[%s], %s.", dt_value.c_str(), kOutputTypeSupport);
      return domi::FAILED;
    } else {
      tmp_dt = it->second;
    }
    out_type_vec.push_back(node_name + ":" + index_str);
    auto it_index = out_type_index_map.find(node_name);
    if (it_index == out_type_index_map.end()) {
      vector<uint32_t> tmp_vec;
      tmp_vec.push_back(index);
      out_type_index_map.emplace(node_name, tmp_vec);
    } else {
      it_index->second.push_back(index);
    }

    auto it_dt = out_type_dt_map.find(node_name);
    if (it_dt == out_type_dt_map.end()) {
      vector<ge::DataType> tmp_vec;
      tmp_vec.push_back(tmp_dt);
      out_type_dt_map.emplace(node_name, tmp_vec);
    } else {
      it_dt->second.push_back(tmp_dt);
    }
  }
  return VerifyOutputTypeAndOutNodes(out_type_vec);
 }

 Status CheckOutNode(ge::OpDescPtr op_desc, int32_t index) {
  int32_t out_size = op_desc->GetOutputsSize();
  if (index < 0 || index >= out_size) {
    GELOGE(domi::FAILED,
           "out_node [%s] output index:%d must be smaller "
           "than node output size:%d and can not be negative!",
           op_desc->GetName().c_str(), index, out_size);
    std::string fail_reason = "output index:" + to_string(index) +
                              " must be smaller than output size:" + to_string(out_size) + " and can not be negative!";
    ErrorManager::GetInstance().ATCReportErrMessage("E10003", {"parameter", "value", "reason"},
                                                    {"out_nodes", op_desc->GetName(), fail_reason});
    return domi::FAILED;
  }
  return domi::SUCCESS;
 }

 Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info) {
  ge::OpDescPtr tmpDescPtr = node->GetOpDesc();
  if (tmpDescPtr == nullptr) {
    GELOGE(domi::FAILED, "Get outnode op desc fail.");
    return domi::FAILED;
  }
  size_t size = tmpDescPtr->GetOutputsSize();
  if (node->GetType() != NETOUTPUT) {
    for (size_t index = 0; index < size; ++index) {
      output_nodes_info.push_back(std::make_pair(node, index));
    }
  } else {
    const auto in_anchors = node->GetAllInDataAnchors();
    for (auto in_anchor : in_anchors) {
      auto out_anchor = in_anchor->GetPeerOutAnchor();
      if (out_anchor == nullptr) {
        GELOGE(domi::FAILED, "Get leaf node op desc fail.");
        return domi::FAILED;
      }
      auto out_node = out_anchor->GetOwnerNode();
      output_nodes_info.push_back(std::make_pair(out_node, out_anchor->GetIdx()));
    }
  }
  return SUCCESS;
 }

 void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info,
                                std::vector<std::string> &output_nodes_name) {
  output_nodes_name.clear();
  if (domi::GetContext().out_top_names.empty()) {
    // tf process, no top name.
    for (const auto output_node_info : output_nodes_info) {
      std::string node_name = output_node_info.first->GetName();
      int32_t index = output_node_info.second;
      output_nodes_name.push_back(node_name + ":" + std::to_string(index));
    }
    return;
  }
  // caffe process, need add top name after node_name:index
  for (size_t i = 0; i < output_nodes_info.size(); ++i) {
    std::string node_name = output_nodes_info[i].first->GetName();
    int32_t index = output_nodes_info[i].second;
    if (i < domi::GetContext().out_top_names.size()) {
      output_nodes_name.push_back(node_name + ":" + std::to_string(index) + ":" + domi::GetContext().out_top_names[i]);
    } else {
      GELOGW("Get top name of node [%s] fail.", node_name.c_str());
      output_nodes_name.push_back(node_name + ":" + std::to_string(index));
    }
  }
 }
 }  // namespace

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOutputFp16NodesFormat(const string &is_output_fp16) {
  if (is_output_fp16.empty()) {
    return SUCCESS;
  }

  vector<domiTensorFormat_t> &output_formats = domi::GetContext().output_formats;
  output_formats.clear();
  vector<string> node_format_vec = StringUtils::Split(is_output_fp16, ',');
  for (auto &is_fp16 : node_format_vec) {
    StringUtils::Trim(is_fp16);
    if (!CheckInputTrueOrFalse(is_fp16, "is_output_adjust_hw_layout")) {
      GELOGE(PARAM_INVALID, "Invalid Param, is_output_adjust_hw_layout only support true/false: but is [%s]",
             is_output_fp16.c_str());
      return PARAM_INVALID;
    }
    if (is_fp16 == "false") {
      output_formats.push_back(DOMI_TENSOR_ND);
    } else if (is_fp16 == "true") {
      output_formats.push_back(domi::DOMI_TENSOR_NC1HWC0);
    }
  }
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SetOutputNodeInfo(ge::Graph &graph,
                                                                          const std::string &output_type,
                                                                          const std::string &output) {
  ge::ComputeGraphPtr compute_graph = ge::GraphUtils::GetComputeGraph(graph);
  GE_CHECK_NOTNULL(compute_graph);

  std::vector<std::pair<std::string, int32_t>> user_out_nodes = domi::GetContext().user_out_nodes;
  std::vector<domiTensorFormat_t> output_formats = domi::GetContext().output_formats;
  std::vector<std::pair<ge::NodePtr, int32_t>> output_nodes_info;
  std::vector<std::string> output_nodes_name;
  std::map<std::string, vector<uint32_t>> out_type_index_map;
  std::map<std::string, vector<ge::DataType>> out_type_dt_map;
  if (!output_type.empty()) {
    if (ParseOutputType(output_type, out_type_index_map, out_type_dt_map) != SUCCESS) {
      GELOGE(domi::FAILED, "Parse output_type failed.");
      return domi::FAILED;
    }
  }

  // User declared outputs
  for (uint32_t i = 0; i < user_out_nodes.size(); ++i) {
    ge::NodePtr out_node = compute_graph->FindNode(user_out_nodes[i].first);
    if (out_node == nullptr) {
      GELOGE(domi::FAILED, "Can not find src node (%s) in graph.", user_out_nodes[i].first.c_str());
      return domi::FAILED;
    }
    auto op_desc = out_node->GetOpDesc();
    GE_CHECK_NOTNULL(op_desc);
    if (CheckOutNode(op_desc, user_out_nodes[i].second) != SUCCESS) {
      GELOGE(domi::FAILED, "Check out node (%s) fail.", user_out_nodes[i].first.c_str());
      return domi::FAILED;
    }
    if (i < output_formats.size()) {
      if (output_formats[i] == domi::DOMI_TENSOR_NC1HWC0) {
        GELOGI("The output node [%s] should be set NC1HWC0", user_out_nodes[i].first.c_str());
        if (!ge::AttrUtils::SetBool(op_desc, "output_set_fp16_nc1hwc0", true)) {
          GELOGW("The output node [%s] set NC1HWC0 failed", user_out_nodes[i].first.c_str());
        }
      }
    }
    auto it_index = out_type_index_map.find(user_out_nodes[i].first);
    auto it_dt = out_type_dt_map.find(user_out_nodes[i].first);
    if ((it_index != out_type_index_map.end()) && (it_dt != out_type_dt_map.end())) {
      GELOGI("The output node [%s] need to be set output_type", user_out_nodes[i].first.c_str());
      (void)ge::AttrUtils::SetListDataType(op_desc, "_output_dt_list", it_dt->second);
      (void)ge::AttrUtils::SetListInt(op_desc, "_output_dt_index", it_index->second);
    }
    output_nodes_info.push_back(std::make_pair(out_node, user_out_nodes[i].second));
  }
  // default output node (leaf)
  if (user_out_nodes.empty()) {
    for (ge::NodePtr node : compute_graph->GetDirectNode()) {
      if (!node->GetInDataNodes().empty() && node->GetOutDataNodes().empty()) {
        Status ret = GetOutputLeaf(node, output_nodes_info);
        GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "find leaf fail.");
      }
    }
  }
  GetOutputNodesNameAndIndex(output_nodes_info, output_nodes_name);
  compute_graph->SetGraphOutNodesInfo(output_nodes_info);
  domi::GetContext().net_out_nodes = output_nodes_name;
  return domi::SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ParseInputShape(
  const string &input_shape, unordered_map<string, vector<int64_t>> &shape_map,
  vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input) {
  vector<string> shape_vec = StringUtils::Split(input_shape, ';');
  const int DEFAULT_SHAPE_PAIR_SIZE = 2;
  for (const auto &shape : shape_vec) {
    vector<string> shape_pair_vec = SplitInputShape(shape);
    if (shape_pair_vec.size() != DEFAULT_SHAPE_PAIR_SIZE) {
      ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"},
                                                      {shape, kSplitError1, kInputShapeSample1});
      GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.",
             shape.c_str(), kSplitError1, kInputShapeSample1);
      return false;
    }
    if (shape_pair_vec[1].empty()) {
      ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"},
                                                      {shape, kEmptyError, kInputShapeSample1});
      GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.",
             shape.c_str(), kEmptyError, kInputShapeSample1);
      return false;
    }

    vector<string> shape_value_strs = StringUtils::Split(shape_pair_vec[1], ',');
    vector<int64_t> shape_values;
    for (auto &shape_value_str : shape_value_strs) {
      // stoul: The method may throw an exception: invalid_argument/out_of_range
      if (std::string::npos != shape_value_str.find('.')) {
        ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"},
                                                        {shape, kFloatNumError, kInputShapeSample2});
        GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.",
               shape.c_str(), kFloatNumError, kInputShapeSample2);
        return false;
      }

      long left_result = 0;
      try {
        left_result = stol(StringUtils::Trim(shape_value_str));
        if (!shape_value_str.empty() && (shape_value_str.front() == '-')) {
          // The value maybe dynamic shape [-1], need substr it and verify isdigit.
          shape_value_str = shape_value_str.substr(1);
        }
        for (char c : shape_value_str) {
          if (!isdigit(c)) {
            ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"},
                                                            {shape, kDigitError, kInputShapeSample2});
            GELOGE(PARAM_INVALID, "--input_shape's shape value[%s] is not digit", shape_value_str.c_str());
            return false;
          }
        }
      } catch (const std::out_of_range &) {
        ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"},
                                                        {"input_shape", shape_value_str});
        GELOGW("Input parameter[--input_shape]’s value[%s] cause out of range execption!", shape_value_str.c_str());
        return false;
      } catch (const std::invalid_argument &) {
        ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"},
                                                        {"input_shape", shape_value_str});
        GELOGW("Input parameter[--input_shape]’s value[%s] cause invalid argument!", shape_value_str.c_str());
        return false;
      } catch (...) {
        ErrorManager::GetInstance().ATCReportErrMessage("E10015", {"parameter", "value"},
                                                        {"input_shape", shape_value_str});
        GELOGW("Input parameter[--input_shape]’s value[%s] cause unkown execption!", shape_value_str.c_str());
        return false;
      }
      int64_t result = left_result;
      // - 1 is not currently supported
      if (!is_dynamic_input && result <= 0) {
        ErrorManager::GetInstance().ATCReportErrMessage("E10011", {"shape", "result"}, {shape, std::to_string(result)});
        GELOGW(
          "Input parameter[--input_shape]’s shape value[%s] is invalid, "
          "expect positive integer, but value is %ld.",
          shape.c_str(), result);
        return false;
      }
      shape_values.push_back(result);
    }

    shape_map.emplace(make_pair(StringUtils::Trim(shape_pair_vec[0]), shape_values));
    user_shape_map.push_back(make_pair(StringUtils::Trim(shape_pair_vec[0]), shape_values));
  }

  return true;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOutputNodes(const string &out_nodes) {
  try {
    // parse output node
    if (!out_nodes.empty()) {
      domi::GetContext().out_nodes_map.clear();
      domi::GetContext().user_out_nodes.clear();

      vector<string> nodes_v = StringUtils::Split(out_nodes, ';');
      for (const string &node : nodes_v) {
        vector<string> key_value_v = StringUtils::Split(node, ':');
        if (key_value_v.size() != 2) {  // The size must be 2.
          ErrorManager::GetInstance().ATCReportErrMessage(
            "E10001", {"parameter", "value", "reason"},
            {"--out_nodes", node, "the correct format is \"node_name1:0;node_name1:1;node_name2:0\""});
          GELOGE(PARAM_INVALID,
                 "The input format of --out_nodes is invalid, the correct format is "
                 "\"node_name1:0;node_name1:1;node_name2:0\", while the actual input is %s.",
                 node.c_str());
          return PARAM_INVALID;
        }
        auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]);
        // stoi: The method may throw an exception: invalid_argument/out_of_range
        if (!CheckDigitStr(key_value_v[1])) {
          ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
                                                          {"--out_nodes", out_nodes, "is not positive integer"});
          GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s", out_nodes.c_str());
          return PARAM_INVALID;
        }
        int32_t index = stoi(StringUtils::Trim(key_value_v[1]));
        if (iter != domi::GetContext().out_nodes_map.end()) {
          iter->second.emplace_back(index);
        } else {
          std::vector<int32_t> index_v;
          index_v.emplace_back(index);
          domi::GetContext().out_nodes_map.emplace(key_value_v[0], index_v);
        }
        domi::GetContext().user_out_nodes.push_back(std::make_pair(key_value_v[0], index));
      }
    }
  } catch (std::invalid_argument &) {
    GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", out_nodes.c_str());
    ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"out_nodes", out_nodes});
    return PARAM_INVALID;
  } catch (std::out_of_range &) {
    GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", out_nodes.c_str());
    ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, {"out_nodes", out_nodes});
    return PARAM_INVALID;
  }
  return SUCCESS;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOpConf(const char *op_conf) {
  if (op_conf != nullptr && *op_conf != '\0') {
    // divided by ":"
    PropertiesManager::Instance().SetPropertyDelimiter(OP_CONF_DELIMITER);
    // Parsing the op_conf configuration item file
    if (!PropertiesManager::Instance().Init(op_conf)) {
      GELOGE(FAILED, "op_name_map init failed!");
      return FAILED;
    }
    // Return map and put it into ATC global variable
    domi::GetContext().op_conf_map = PropertiesManager::Instance().GetPropertyMap();
  }
  return SUCCESS;
 }
 }  // namespace ge
--- a/src/ge/common/model_parser/graph_parser_util.h
+++ b/src/ge/common/model_parser/graph_parser_util.h
@@ -1,62 +0,0 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef GE_COMMON_GRAPH_PARSER_UTIL_H_
 #define GE_COMMON_GRAPH_PARSER_UTIL_H_

 #include <google/protobuf/message.h>
 #include <string>
 #include <unordered_map>
 #include <vector>
 #include "framework/common/types.h"
 #include "framework/omg/omg_inner_types.h"
 #include "proto/ge_ir.pb.h"
 #include "proto/om.pb.h"

 #include "graph/compute_graph.h"
 #include "graph/graph.h"
 #include "graph/model.h"
 #include "runtime/kernel.h"

 using domi::Status;
 using std::pair;
 using std::string;
 using std::unordered_map;
 using std::vector;

 namespace ge {
 Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format);

 Status ParseOutputFp16NodesFormat(const string &is_output_fp16);

 Status ParseOutputNodes(const string &out_nodes);

 bool ParseInputShape(const string &input_shape, unordered_map<string, vector<int64_t>> &shape_map,
                     vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input);

 Status ParseOpConf(const char *op_conf);
 }  // namespace ge

 namespace domi {
 /**
 * @ingroup domi_omg
 * @brief get omg context
 * @return reference of OmgContext
 */
 ge::OmgContext &GetContext();
 }  // namespace domi

 #endif  // GE_COMMON_GRAPH_PARSER_UTIL_H_
--- a/src/ge/common/profiling/profiling_manager.cc
+++ b/src/ge/common/profiling/profiling_manager.cc
@@ -76,8 +76,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
    for (size_t i = 0; i < device_id_.size(); ++i) {
      ret = StartProfiling(0, device_id_[i]);
      if (ret != SUCCESS) {
        GELOGE(ret, "Profiling start failed on device %d.", device_id_[i]);
        return FAILED;
        GELOGW("Profiling start failed on device %d.", device_id_[i]);
        continue;
      }
      GELOGI("Profiling init succ on device %d.", device_id_[i]);
    }
@@ -316,7 +316,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St
    ProfMgrCfg prof_cfg = {send_profiling_config_};
    void *prof_handle = ProfMgrStartUp(&prof_cfg);
    if (prof_handle == nullptr) {
      GELOGW("ProfMgrStartUp failed.");
      GELOGW("ProfMgrStartUp failed on device %d ", device_id);
      return FAILED;
    }
    GELOGD("StartProfiling, prof_handle: %p", prof_handle);
--- a/src/ge/common/properties_manager.cc
+++ b/src/ge/common/properties_manager.cc
@@ -31,193 +31,6 @@
 #include "graph/utils/attr_utils.h"

 namespace ge {
 namespace {
 const string kEnableFlag = "1";

 const uint32_t kAicoreOverflow = (0x1 << 0);
 const uint32_t kAtomicOverflow = (0x1 << 1);
 const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow);
 }  // namespace

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) {
  CopyFrom(other);
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties::operator=(
  const DumpProperties &other) {
  CopyFrom(other);
  return *this;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOptions() {
  enable_dump_.clear();
  enable_dump_debug_.clear();
  dump_path_.clear();
  dump_step_.clear();
  dump_mode_.clear();
  is_op_debug_ = false;
  op_debug_mode_ = 0;

  string enable_dump;
  (void)GetContext().GetOption(OPTION_EXEC_ENABLE_DUMP, enable_dump);
  enable_dump_ = enable_dump;

  string enable_dump_debug;
  (void)GetContext().GetOption(OPTION_EXEC_ENABLE_DUMP_DEBUG, enable_dump_debug);
  enable_dump_debug_ = enable_dump_debug;

  if ((enable_dump_ == kEnableFlag) || (enable_dump_debug_ == kEnableFlag)) {
    string dump_path;
    if (GetContext().GetOption(OPTION_EXEC_DUMP_PATH, dump_path) == GRAPH_SUCCESS) {
      if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') {
        dump_path = dump_path + "/";
      }
      dump_path = dump_path + CurrentTimeInStr() + "/";
      GELOGI("Get dump path %s successfully", dump_path.c_str());
      SetDumpPath(dump_path);
    } else {
      GELOGW("DUMP_PATH is not set");
    }
  }

  if (enable_dump_ == kEnableFlag) {
    string dump_step;
    if (GetContext().GetOption(OPTION_EXEC_DUMP_STEP, dump_step) == GRAPH_SUCCESS) {
      GELOGD("Get dump step %s successfully", dump_step.c_str());
      SetDumpStep(dump_step);
    }
    string dump_mode;
    if (GetContext().GetOption(OPTION_EXEC_DUMP_MODE, dump_mode) == GRAPH_SUCCESS) {
      GELOGD("Get dump mode %s successfully", dump_mode.c_str());
      SetDumpMode(dump_mode);
    }
    AddPropertyValue(DUMP_ALL_MODEL, {});
  }

  SetDumpDebugOptions();
 }

 // The following is the new dump scenario of the fusion operator
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::AddPropertyValue(
  const std::string &model, const std::set<std::string> &layers) {
  for (const std::string &layer : layers) {
    GELOGI("This model %s config to dump layer %s", model.c_str(), layer.c_str());
  }

  model_dump_properties_map_[model] = layers;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::DeletePropertyValue(const std::string &model) {
  auto iter = model_dump_properties_map_.find(model);
  if (iter != model_dump_properties_map_.end()) {
    model_dump_properties_map_.erase(iter);
  }
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpProperties::GetAllDumpModel() const {
  std::set<std::string> model_list;
  for (auto &iter : model_dump_properties_map_) {
    model_list.insert(iter.first);
  }

  return model_list;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpProperties::GetPropertyValue(
  const std::string &model) const {
  auto iter = model_dump_properties_map_.find(model);
  if (iter != model_dump_properties_map_.end()) {
    return iter->second;
  }
  return {};
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsLayerNeedDump(
  const std::string &model, const std::string &om_name, const std::string &op_name) const {
  // if dump all
  if (model_dump_properties_map_.find(DUMP_ALL_MODEL) != model_dump_properties_map_.end()) {
    return true;
  }

  // if this model need dump
  auto om_name_iter = model_dump_properties_map_.find(om_name);
  auto model_name_iter = model_dump_properties_map_.find(model);
  if (om_name_iter != model_dump_properties_map_.end() || model_name_iter != model_dump_properties_map_.end()) {
    // if no dump layer info, dump all layer in this model
    auto model_iter = om_name_iter != model_dump_properties_map_.end() ? om_name_iter : model_name_iter;
    if (model_iter->second.empty()) {
      return true;
    }

    return model_iter->second.find(op_name) != model_iter->second.end();
  }

  GELOGD("Model %s is not seated to be dump.", model.c_str());
  return false;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpPath(const std::string &path) {
  dump_path_ = path;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string DumpProperties::GetDumpPath() const { return dump_path_; }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpStep(const std::string &step) {
  dump_step_ = step;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string DumpProperties::GetDumpStep() const { return dump_step_; }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpMode(const std::string &mode) {
  dump_mode_ = mode;
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string DumpProperties::GetDumpMode() const { return dump_mode_; }

 void DumpProperties::CopyFrom(const DumpProperties &other) {
  if (&other != this) {
    enable_dump_ = other.enable_dump_;
    enable_dump_debug_ = other.enable_dump_debug_;
    dump_path_ = other.dump_path_;
    dump_step_ = other.dump_step_;
    dump_mode_ = other.dump_mode_;

    model_dump_properties_map_ = other.model_dump_properties_map_;
    is_op_debug_ = other.is_op_debug_;
    op_debug_mode_ = other.op_debug_mode_;
  }
 }

 void DumpProperties::SetDumpDebugOptions() {
  if (enable_dump_debug_ == kEnableFlag) {
    string dump_debug_mode;
    if (GetContext().GetOption(OPTION_EXEC_DUMP_DEBUG_MODE, dump_debug_mode) == GRAPH_SUCCESS) {
      GELOGD("Get dump debug mode %s successfully", dump_debug_mode.c_str());
    } else {
      GELOGW("Dump debug mode is not set.");
      return;
    }

    if (dump_debug_mode == OP_DEBUG_AICORE) {
      GELOGD("ge.exec.dumpDebugMode=aicore_overflow, op debug is open.");
      is_op_debug_ = true;
      op_debug_mode_ = kAicoreOverflow;
    } else if (dump_debug_mode == OP_DEBUG_ATOMIC) {
      GELOGD("ge.exec.dumpDebugMode=atomic_overflow, op debug is open.");
      is_op_debug_ = true;
      op_debug_mode_ = kAtomicOverflow;
    } else if (dump_debug_mode == OP_DEBUG_ALL) {
      GELOGD("ge.exec.dumpDebugMode=all, op debug is open.");
      is_op_debug_ = true;
      op_debug_mode_ = kAllOverflow;
    } else {
      GELOGW("ge.exec.dumpDebugMode is invalid.");
    }
  } else {
    GELOGI("ge.exec.enableDumpDebug is false or is not set.");
  }
 }

 PropertiesManager::PropertiesManager() : is_inited_(false), delimiter("=") {}
 PropertiesManager::~PropertiesManager() {}

--- a/src/ge/common/properties_manager.h
+++ b/src/ge/common/properties_manager.h
@@ -24,6 +24,7 @@
 #include <vector>

 #include "graph/op_desc.h"
 #include "common/dump/dump_properties.h"

 namespace ge {
 // Configuration property management
@@ -32,50 +33,6 @@ static const char *USE_FUSION __attribute__((unused)) = "FMK_USE_FUSION";
 static const char *TIMESTAT_ENABLE __attribute__((unused)) = "DAVINCI_TIMESTAT_ENABLE";
 static const char *ANNDROID_DEBUG __attribute__((unused)) = "ANNDROID_DEBUG";

 class DumpProperties {
 public:
  DumpProperties() = default;
  ~DumpProperties() = default;
  DumpProperties(const DumpProperties &dump);
  DumpProperties &operator=(const DumpProperties &dump);

  void InitByOptions();

  void AddPropertyValue(const std::string &model, const std::set<std::string> &layers);
  void DeletePropertyValue(const std::string &model);

  std::set<std::string> GetAllDumpModel() const;
  std::set<std::string> GetPropertyValue(const std::string &model) const;
  bool IsLayerNeedDump(const std::string &model, const std::string &om_name, const std::string &op_name) const;

  void SetDumpPath(const std::string &path);
  std::string GetDumpPath() const;

  void SetDumpStep(const std::string &step);
  std::string GetDumpStep() const;

  void SetDumpMode(const std::string &mode);
  std::string GetDumpMode() const;

  bool IsOpDebugOpen() const { return is_op_debug_; }
  uint32_t GetOpDebugMode() const { return op_debug_mode_; }

 private:
  void CopyFrom(const DumpProperties &other);
  void SetDumpDebugOptions();

  string enable_dump_;
  string enable_dump_debug_;

  std::string dump_path_;
  std::string dump_step_;
  std::string dump_mode_;
  std::map<std::string, std::set<std::string>> model_dump_properties_map_;

  bool is_op_debug_ = false;
  uint32_t op_debug_mode_ = 0;
 };

 class PropertiesManager {
 public:
  // Singleton
--- a/src/ge/common/types.cc
+++ b/src/ge/common/types.cc
@@ -502,6 +502,7 @@ const uint32_t MODEL_FILE_HEAD_LEN = 256;
 /// @brief Input node type
 ///
 const std::string INPUT_TYPE = "Input";
 const std::string DUMMY_DATA = "DummyData";

 ///
 /// @ingroup domi_omg
--- a/src/ge/common/util.cc
+++ b/src/ge/common/util.cc
@@ -57,7 +57,7 @@ const int kWarningThreshold = 536870912 * 2;  // 536870912 represent 512M
 /// Based on the security coding specification and the current actual (protobuf) model size, it is determined as 2G-1
 const int kMaxFileSizeLimit = INT_MAX;
 const int kMaxBuffSize = 256;
 const char *const kPathValidReason = "The path can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' and chinese character";
 const char *const kPathValidReason = "The path can only contain 'a-z' 'A-Z' '0-9' '-' '.' '_' and chinese character";
 }  // namespace

 namespace ge {
@@ -311,6 +311,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t GetCurrentTimestap() {
  return static_cast<uint64_t>(total_use_time);
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint32_t GetCurrentSecondTimestap() {
  struct timeval tv {};
  int ret = gettimeofday(&tv, nullptr);
  GE_LOGE_IF(ret != 0, "Func gettimeofday may failed: ret=%d", ret);
  auto total_use_time = tv.tv_sec;  // seconds
  return static_cast<uint32_t>(total_use_time);
 }

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInt64MulOverflow(int64_t a, int64_t b) {
  if (a > 0) {
    if (b > 0) {
@@ -372,10 +380,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const
  }

  // A regular matching expression to verify the validity of the input file path
  // ^(/|./|(../)+|)([.]?[\u4e00-\u9fa5A-Za-z0-9_.-]+/)*[\u4e00-\u9fa5A-Za-z0-9_+.-]+$
  // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores
  // File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.)
  std::string mode = "^(/+|./+|(../+)+|)(../|([.]?[\u4e00-\u9fa5A-Za-z0-9_.-]+)/+)*[\u4e00-\u9fa5A-Za-z0-9_+.-]+$";
  std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$";

  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
    !ValidateStr(real_path, mode),
@@ -408,10 +415,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const
    return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), PATH_MAX);

  // A regular matching expression to verify the validity of the input file path
  // ^(/|./|(../)+|)([.]?[\u4e00-\u9fa5A-Za-z0-9_-]+/)*[\u4e00-\u9fa5A-Za-z0-9_+.-]+$
  // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores
  // File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.)
  std::string mode = "^(/+|./+|(../+)+|)(../|([.]?[\u4e00-\u9fa5A-Za-z0-9_.-]+)/+)*[\u4e00-\u9fa5A-Za-z0-9_+.-]+$";
  std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$";

  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
    !ValidateStr(file_path, mode),
@@ -460,9 +466,9 @@ FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::str
  int ret = regcomp(&reg, mode.c_str(), cflags);
  if (ret) {
    regerror(ret, &reg, ebuff, kMaxBuffSize);
    GELOGE(ge::PARAM_INVALID, "regcomp failed, reason: %s", ebuff);
    GELOGW("regcomp failed, reason: %s", ebuff);
    regfree(&reg);
    return false;
    return true;
  }

  ret = regexec(&reg, str.c_str(), 0, nullptr, 0);
--- a/src/ge/engine_manager/dnnengine_manager.cc
+++ b/src/ge/engine_manager/dnnengine_manager.cc
@@ -42,6 +42,8 @@ const char *const kVectorCore = "VectorCore";
 const char *const kVectorEngine = "VectorEngine";
 const char *const kAIcoreEngine = "AIcoreEngine";
 const char *const kCustomOpFlag = "_custom_op_flag";
 const char *const kHostCpuEngineName = "DNN_VM_HOST_CPU";
 const char *const kHostCpuOpKernelLibName = "DNN_VM_HOST_CPU_OP_STORE";
 }  // namespace

 namespace ge {
@@ -181,6 +183,7 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) {
    GELOGI("DNNEngineManager: Can not get op info by op type %s", op_desc->GetType().c_str());
    return "";
  }
  GE_IF_BOOL_EXEC(ge::GetContext().GetHostExecFlag(), return GetHostCpuEngineName(op_infos, op_desc));
  std::string ge_core_type;
  Status ret = ge::GetContext().GetOption(ge::CORE_TYPE, ge_core_type);
  GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE"));
@@ -245,6 +248,22 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) {
  return "";
 }

 std::string DNNEngineManager::GetHostCpuEngineName(const std::vector<OpInfo> &op_infos,
                                                   const OpDescPtr &op_desc) const {
  for (const auto &it : op_infos) {
    if ((it.engine == kHostCpuEngineName) && (it.opKernelLib == kHostCpuOpKernelLibName)) {
      op_desc->SetOpEngineName(kHostCpuEngineName);
      op_desc->SetOpKernelLibName(kHostCpuOpKernelLibName);
      GELOGI("DNNEngineManager: Set OpKernelLibName %s and OpEngineName %s to %s", kHostCpuOpKernelLibName,
             kHostCpuEngineName, op_desc->GetName().c_str());
      return kHostCpuEngineName;
    }
  }
  GELOGE(FAILED, "DNNEngineManager: HostCpuEngine not support [%s, %s].", op_desc->GetName().c_str(),
         op_desc->GetType().c_str());
  return "";
 }

 const std::map<std::string, SchedulerConf> &DNNEngineManager::GetSchedulers() const { return schedulers_; }

 Status DNNEngineManager::ParserJsonFile() {
--- a/src/ge/engine_manager/dnnengine_manager.h
+++ b/src/ge/engine_manager/dnnengine_manager.h
@@ -76,6 +76,7 @@ class DNNEngineManager {
  Status ParserEngineMessage(const json engines_json, const string &scheduler_mark,
                             map<string, EngineConfPtr> &engines);
  Status CheckJsonFile();
  std::string GetHostCpuEngineName(const std::vector<OpInfo> &op_infos, const OpDescPtr &op_desc) const;
  PluginManager plugin_mgr_;
  std::map<std::string, DNNEnginePtr> engines_map_;
  std::map<std::string, ge::DNNEngineAttribute> engines_attrs_map_;
--- a/src/ge/engine_manager/engine_conf.json
+++ b/src/ge/engine_manager/engine_conf.json
@@ -5,6 +5,13 @@
      "name": "1980_hwts",
      "ex_attrs": "",
      "cal_engines": [
        {
          "id": "DNN_VM_HOST_CPU",
          "name": "HOST_CPU",
          "independent": false,
          "skip_assign_stream": true,
          "attach": true
        },
        {
          "id": "DNN_VM_GE_LOCAL",
          "name": "GE_LOCAL",
--- a/src/ge/executor/CMakeLists.txt
+++ b/src/ge/executor/CMakeLists.txt
@@ -26,6 +26,9 @@ file(GLOB PROTO_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}

 file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
        "ge_executor.cc"
        "../common/dump/dump_properties.cc"
        "../common/dump/dump_manager.cc"
        "../common/dump/dump_op.cc"
        "../common/ge/op_tiling_manager.cc"
        "../common/ge/plugin_manager.cc"
        "../common/profiling/profiling_manager.cc"
--- a/src/ge/executor/ge_executor.cc
+++ b/src/ge/executor/ge_executor.cc
@@ -23,6 +23,7 @@
 #include "common/ge/ge_util.h"
 #include "common/helper/model_helper.h"
 #include "common/profiling/profiling_manager.h"
 #include "common/dump/dump_manager.h"
 #include "common/util.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/util.h"
@@ -35,6 +36,8 @@
 #include "graph/utils/graph_utils.h"
 #include "mmpa/mmpa_api.h"
 #include "single_op/single_op_manager.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/load/new_model_manager/davinci_model.h"

 using std::string;
 using std::vector;
@@ -348,18 +351,46 @@ Status GeExecutor::SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, u
  }

  vector<uint64_t> cur_dynamic_dims;
  if (GetCurDynamicDims(model_id, dynamic_dims, cur_dynamic_dims) != SUCCESS) {
    GELOGE(FAILED, "GetCurDynamicDims failed.");
  std::vector<ge::TensorDesc> input_desc;
  std::vector<ge::TensorDesc> output_desc;
  ret = GetModelDescInfo(model_id, input_desc, output_desc);
  if (ret != ge::SUCCESS) {
    GELOGE(FAILED, "GetModelDescInfo failed.");
    return FAILED;
  }

  vector<string> user_designate_shape_order;
  vector<int64_t> all_data_dims;
  ret = GetUserDesignateShapeOrder(model_id, user_designate_shape_order);
  if (ret != ge::SUCCESS) {
    GELOGE(FAILED, "GetUserDesignateShapeOrder failed.");
    return FAILED;
  }
  for (auto &data_name : user_designate_shape_order) {
    for (size_t j = 0; j < input_desc.size(); ++j) {
      if (input_desc.at(j).GetName() == data_name) {
        for (auto dim : input_desc.at(j).GetShape().GetDims()) {
          all_data_dims.push_back(dim);
        }
        break;
      }
    }
  }
  if (dynamic_dims.size() != all_data_dims.size()) {
    GELOGE(FAILED, "Dynamic input size [%lu] is not equal with all data dims size [%lu]!", dynamic_dims.size(),
           all_data_dims.size());
    return FAILED;
  }
  for (std::size_t i = 0; i < all_data_dims.size(); ++i) {
    if (all_data_dims[i] < 0) {
      cur_dynamic_dims.push_back(dynamic_dims[i]);
    }
  }
  size_t dynamic_dim_num = cur_dynamic_dims.size();
  uint64_t dynamic_input_size = static_cast<uint64_t>(dynamic_dim_num * sizeof(uint64_t));
  if (length < dynamic_input_size) {
    GELOGE(FAILED, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size);
    return FAILED;
  }

  for (uint32_t i = 0; i < dynamic_dim_num; ++i) {
    // Memcpy dynamic dim[i] from host to device
    if (rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(dynamic_input_addr) + sizeof(uint64_t) * i),
@@ -549,6 +580,12 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
    GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id);
    return FAILED;
  }

  std::shared_ptr<DavinciModel> davinci_model = ModelManager::GetInstance()->GetModel(model_id);
  if (davinci_model != nullptr) {
    uint64_t session_id = davinci_model->GetSessionId();
    VarManagerPool::Instance().RemoveVarManager(session_id);
  }
  return GraphLoader::UnloadModel(model_id);
 }

@@ -658,6 +695,30 @@ Status GeExecutor::GetCombinedDynamicDims(uint32_t model_id, vector<vector<int64
  return SUCCESS;
 }

 ///
 /// @ingroup ge
 /// @brief Get user designeate shape order
 /// @param [in] model_id
 /// @param [out] user_designate_shape_order
 /// @return execute result
 ///
 Status GeExecutor::GetUserDesignateShapeOrder(uint32_t model_id, vector<string> &user_designate_shape_order) {
  GELOGI("Begin to get user designate shape info.");
  if (!isInit_) {
    GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return GE_EXEC_NOT_INIT;
  }

  Status ret = GraphExecutor::GetUserDesignateShapeOrder(model_id, user_designate_shape_order);
  if (ret != SUCCESS) {
    GELOGE(ret, "GetUserDesignateShapeOrder failed.");
    return ret;
  }

  GELOGI("Get user designate shape order succ.");
  return SUCCESS;
 }

 ///
 /// @ingroup ge
 /// @brief Get AIPP input format
@@ -674,7 +735,7 @@ Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo
  }
  Status ret = GraphExecutor::GetAIPPInfo(model_id, index, aipp_info);
  if (ret != SUCCESS) {
    GELOGE(ret, "GetAIPPInfo failed.");
    GELOGW("GetAIPPInfo is not success.");
    return ret;
  }
  GELOGI("GetAIPPInfo succ.");
@@ -1020,4 +1081,26 @@ Status GeExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index,
  GELOGI("GetAllAippInputOutputDims succ.");
  return SUCCESS;
 }

 Status GeExecutor::GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) {
  GELOGI("Begin to GetOpDescInfo.");
  Status ret = GraphExecutor::GetOpDescInfo(device_id, stream_id, task_id, op_desc_info);
  if (ret != SUCCESS) {
    GELOGE(ret, "GetOpDescInfo failed.");
    return ret;
  }
  GELOGI("GetOpDescInfo succ.");
  return SUCCESS;
 }

 Status GeExecutor::SetDump(const DumpConfig &dump_config) {
  GELOGI("Start to set dump config");
  auto ret = DumpManager::GetInstance().SetDumpConf(dump_config);
  if (ret != SUCCESS) {
    GELOGE(ret, "Set dump conf failed");
    return ret;
  }
  GELOGI("Set dump config succ.");
  return SUCCESS;
 }
 }  // namespace ge
--- a/src/ge/executor/module.mk
+++ b/src/ge/executor/module.mk
@@ -3,6 +3,9 @@ LOCAL_PATH := $(call my-dir)
 local_ge_executor_src_files :=  \
    ge_executor.cc \
    ../common/profiling/profiling_manager.cc \
    ../common/dump/dump_properties.cc \
    ../common/dump/dump_manager.cc \
    ../common/dump/dump_op.cc \
    ../common/ge/plugin_manager.cc \
    ../common/ge/op_tiling_manager.cc \
    ../graph/load/graph_loader.cc \
--- a/src/ge/ge_inference.mk
+++ b/src/ge/ge_inference.mk
@@ -26,6 +26,9 @@ COMMON_LOCAL_SRC_FILES := \
    common/formats/format_transfers/format_transfer_nchw_fz_c04.cc \
    common/formats/formats.cc \
    common/profiling/profiling_manager.cc \
    common/dump/dump_properties.cc \
    common/dump/dump_manager.cc \
    common/dump/dump_op.cc \
    common/helper/model_cache_helper.cc \
    ge_local_engine/engine/host_cpu_engine.cc \

@@ -42,6 +45,7 @@ GRAPH_MANAGER_LOCAL_SRC_FILES := \
    graph/manager/graph_manager_utils.cc \
    graph/manager/graph_context.cc \
    graph/preprocess/graph_preprocess.cc \
    graph/preprocess/multi_batch_options.cc \
    graph/preprocess/multi_batch_copy_graph.cc \
    graph/execute/graph_execute.cc \
    graph/load/graph_loader.cc \
@@ -149,6 +153,7 @@ OMG_HOST_SRC_FILES := \
    host_kernels/slice_kernel.cc \
    host_kernels/slice_d_kernel.cc \
    host_kernels/dynamic_stitch_kernel.cc \
    host_kernels/identity_kernel.cc \
    graph/passes/stop_gradient_pass.cc \
    graph/passes/prevent_gradient_pass.cc \
    graph/passes/identity_pass.cc \
@@ -165,12 +170,16 @@ OMG_HOST_SRC_FILES := \
    graph/passes/switch_to_stream_switch_pass.cc \
    graph/passes/attach_stream_label_pass.cc \
    graph/passes/multi_batch_pass.cc \
    graph/passes/multi_batch_clone_pass.cc \
    graph/passes/subexpression_migration_pass.cc \
    graph/passes/unused_args_clean_pass.cc \
    graph/passes/next_iteration_pass.cc \
    graph/passes/control_trigger_pass.cc \
    graph/passes/cond_pass.cc \
    graph/passes/cond_remove_pass.cc \
    graph/passes/for_pass.cc \
    graph/passes/enter_pass.cc \
    graph/passes/assign_pass.cc \
    graph/passes/addn_pass.cc \
    graph/passes/common_subexpression_elimination_pass.cc \
    graph/passes/transop_symmetry_elimination_pass.cc \
@@ -185,11 +194,10 @@ OMG_HOST_SRC_FILES := \
    graph/passes/transpose_transdata_pass.cc \
    graph/passes/hccl_memcpy_pass.cc \
    graph/passes/flow_ctrl_pass.cc \
    graph/passes/global_step_insert_pass.cc \
    graph/passes/link_gen_mask_nodes_pass.cc \
    graph/passes/replace_with_empty_const_pass.cc \
    graph/passes/hccl_group_pass.cc \
    graph/passes/switch_fusion_pass.cc \
    graph/passes/switch_split_pass.cc \
    graph/passes/memcpy_addr_async_pass.cc \
    graph/passes/set_input_output_offset_pass.cc \

--- a/src/ge/ge_runner.mk
+++ b/src/ge/ge_runner.mk
@@ -26,6 +26,9 @@ LIBGE_LOCAL_SRC_FILES := \
    common/ge/op_tiling_manager.cc\
    common/helper/model_cache_helper.cc \
    common/profiling/profiling_manager.cc \
    common/dump/dump_manager.cc \
    common/dump/dump_properties.cc \
    common/dump/dump_op.cc \
    engine_manager/dnnengine_manager.cc \
    ge_local_engine/engine/host_cpu_engine.cc \
    generator/ge_generator.cc \
@@ -93,7 +96,6 @@ LIBGE_LOCAL_SRC_FILES := \
    graph/manager/util/variable_accelerate_ctrl.cc               \
    graph/optimize/graph_optimize.cc \
    graph/optimize/mem_rw_conflict_optimize.cc \
    graph/optimize/optimizer/allreduce_fusion_pass.cc \
    graph/optimize/summary_optimize.cc \
    graph/partition/engine_place.cc \
    graph/partition/graph_partition.cc \
@@ -119,10 +121,10 @@ LIBGE_LOCAL_SRC_FILES := \
    graph/passes/dimension_compute_pass.cc \
    graph/passes/dropout_pass.cc \
    graph/passes/hccl_group_pass.cc \
    graph/passes/switch_fusion_pass.cc \
    graph/passes/switch_split_pass.cc \
    graph/passes/enter_pass.cc \
    graph/passes/assign_pass.cc \
    graph/passes/flow_ctrl_pass.cc \
    graph/passes/global_step_insert_pass.cc \
    host_kernels/transpose_kernel.cc \
    host_kernels/add_kernel.cc \
    host_kernels/broadcast_args_kernel.cc \
@@ -131,6 +133,7 @@ LIBGE_LOCAL_SRC_FILES := \
    host_kernels/concat_offset_kernel.cc \
    host_kernels/concat_v2_kernel.cc \
    host_kernels/dynamic_stitch_kernel.cc \
    host_kernels/identity_kernel.cc \
    host_kernels/empty_kernel.cc \
    host_kernels/expanddims_kernel.cc \
    host_kernels/fill_kernel.cc \
@@ -172,6 +175,9 @@ LIBGE_LOCAL_SRC_FILES := \
    graph/passes/link_gen_mask_nodes_pass.cc \
    graph/passes/merge_pass.cc \
    graph/passes/multi_batch_pass.cc \
    graph/passes/multi_batch_clone_pass.cc \
    graph/passes/subexpression_migration_pass.cc \
    graph/passes/unused_args_clean_pass.cc \
    graph/passes/net_output_pass.cc \
    graph/passes/next_iteration_pass.cc \
    graph/passes/no_use_reshape_remove_pass.cc \
@@ -225,6 +231,7 @@ LIBGE_LOCAL_SRC_FILES := \
    graph/preprocess/graph_preprocess.cc \
    graph/preprocess/insert_op/ge_aipp_op.cc \
    graph/preprocess/insert_op/util_insert_aipp_op.cc \
    graph/preprocess/multi_batch_options.cc \
    graph/preprocess/multi_batch_copy_graph.cc \
    init/gelib.cc \
    model/ge_model.cc \
@@ -267,10 +274,17 @@ LIBGE_LOCAL_SRC_FILES := \
    hybrid/node_executor/aicpu/aicpu_ext_info.cc                         \
    hybrid/node_executor/aicpu/aicpu_node_executor.cc                    \
    hybrid/node_executor/compiledsubgraph/known_node_executor.cc         \
    hybrid/node_executor/hostcpu/ge_local_node_executor.cc               \
    hybrid/node_executor/ge_local/ge_local_node_executor.cc              \
    hybrid/node_executor/host_cpu/host_cpu_node_executor.cc              \
    hybrid/node_executor/host_cpu/kernel_factory.cc                      \
    hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc                 \
    hybrid/node_executor/host_cpu/kernel/variable_kernel.cc              \
    hybrid/node_executor/host_cpu/kernel/assign_kernel.cc                \
    hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc        \
    hybrid/node_executor/controlop/control_op_executor.cc                \
    hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \
    hybrid/node_executor/hccl/hccl_node_executor.cc                      \
    hybrid/node_executor/rts/rts_node_executor.cc                        \
    hybrid/node_executor/node_executor.cc                                \
    hybrid/node_executor/task_context.cc                                 \
    hybrid/hybrid_davinci_model.cc                                       \
@@ -343,7 +357,6 @@ LOCAL_SHARED_LIBRARIES := \
    libgraph \
    libregister \
    libge_common \
    libhccl \
    libmsprof \
    liberror_manager \

@@ -425,7 +438,6 @@ LOCAL_SHARED_LIBRARIES := \
    libc_sec \
    libslog \
    libmmpa \
    libhccl \
    libmsprof \

 LOCAL_LDFLAGS := -lrt -ldl
@@ -457,7 +469,6 @@ LOCAL_SHARED_LIBRARIES := \
    libc_sec \
    libslog \
    libmmpa \
    libhccl \
    libmsprof \

 LOCAL_LDFLAGS := -lrt -ldl
--- a/src/ge/generator/ge_generator.cc
+++ b/src/ge/generator/ge_generator.cc
@@ -658,10 +658,13 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor>

  if (ret != SUCCESS) {
    GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph fail, graph id: %u", id);
    VarManagerPool::Instance().RemoveVarManager(session_id);
    return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED;
  }
  id += 1;

  VarManagerPool::Instance().RemoveVarManager(session_id);

  return SUCCESS;
 }

--- a/src/ge/graph/build/graph_builder.cc
+++ b/src/ge/graph/build/graph_builder.cc
@@ -28,6 +28,7 @@
 #include "graph/common/ge_call_wrapper.h"
 #include "init/gelib.h"
 #include "model/ge_model.h"
 #include "graph/ge_context.h"

 using domi::BuildMode;

@@ -166,11 +167,15 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfo
  return SUCCESS;
 }

 Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph,
                                             std::vector<SubGraphInfoPtr> &subgraph_ptr_list, GeModelPtr &ge_model_ptr,
                                             uint64_t session_id) {
 Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_list,
                                             GeModelPtr &ge_model_ptr, uint64_t session_id) {
  if (ge::GetContext().GetHostExecFlag()) {
    GE_CHK_STATUS_RET(BuildForHostCpuGraph(comp_graph, ge_model_ptr, session_id), "Build for host-cpu graph failed.");
    return SUCCESS;
  }

  GELOGI("Begin to build known shape graph[%s].", comp_graph->GetName().c_str());
  Status ret = SecondPartition(comp_graph, subgraph_ptr_list);
  Status ret = SecondPartition(comp_graph, subgraph_list);
  GE_CHK_STATUS_RET(ret, "Graph[%s] second partition Failed.", comp_graph->GetName().c_str());
  auto subgraph_map = graph_partitioner_.GetSubGraphMap();

@@ -257,6 +262,10 @@ Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeMo
  return SUCCESS;
 }

 Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id) {
  return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id);
 }

 Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
                                               std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
                                               GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
--- a/src/ge/graph/build/graph_builder.h
+++ b/src/ge/graph/build/graph_builder.h
@@ -63,10 +63,12 @@ class GraphBuilder {
  Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
                                   GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
                                   uint64_t session_id = INVALID_SESSION_ID);
  Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
  Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_list,
                                 GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID);
  Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr,
                                   uint64_t session_id = INVALID_SESSION_ID);
  Status BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr,
                              uint64_t session_id = INVALID_SESSION_ID);
  int build_mode_;

  std::map<std::string, int> stream_max_parallel_num_;
--- a/src/ge/graph/build/memory/block_mem_assigner.cc
+++ b/src/ge/graph/build/memory/block_mem_assigner.cc
@@ -745,6 +745,23 @@ bool BlockMemAssigner::IsContinuousOutput(const NodePtr &n) {
  return false;
 }

 bool BlockMemAssigner::IsZeroCopyBlock(const NodePtr &node, bool continuous) {
  if (NodeUtils::IsDynamicShape(node)) {
    return ((node->GetType() == DATA_TYPE) && !continuous) || (node->GetType() == NETOUTPUT);
  }

  if ((node->GetType() == DATA_TYPE) && !continuous) {
    return !node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX);
  }

  if (node->GetType() == NETOUTPUT) {
    const auto &owner = node->GetOwnerComputeGraph();
    return owner->GetParentGraph() == nullptr;
  }

  return false;
 }

 MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size,
                                           MemoryType mem_type, const NodePtr &n, uint32_t out_index,
                                           const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem,
@@ -793,9 +810,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "new an object failed.");

  // Data and netoutput need zero copy block
  if ((node_op_desc->GetType() == DATA_TYPE && !continuous) || (node_op_desc->GetType() == NETOUTPUT)) {
    block->is_zero_copy_ = true;
  }
  block->is_zero_copy_ = IsZeroCopyBlock(n, continuous);

  block->Init(real_size, mem_type, n, out_index, no_align_size);
  block->stream_id_ = node_op_desc->GetStreamId();
@@ -970,6 +985,14 @@ bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool i
  return false;
 }

 bool IsKnownSubgraphData(const NodePtr &node) {
  if (NodeUtils::IsDynamicShape(node)) {
    return false;
  }

  return node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX);
 }

 void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory) {
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(to_release == nullptr, return, "Input parameter to_release is null.");
  GE_CHK_TRUE_EXEC_INFO(to_release->ref_count_ <= 0, return, "Release memory");
@@ -1092,7 +1115,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
  (void)ge::AttrUtils::GetBool(op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic);
  // Allocate memory for the current node and release node memory of the same size in the workspace
  GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1",
                  ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_[stream_id]);)
                  ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_[stream_id]));
  if (IsContinuousOutput(node)) {
    (void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_);
    return SUCCESS;
@@ -1118,6 +1141,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
      out_node_set_continuous_input = IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index);
      no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input);
    }
    no_need_assign_memory = (no_need_assign_memory || IsKnownSubgraphData(node));
    if (no_need_assign_memory) {
      zero_memory_list_.emplace_back(node, kOutput, i, false);
      continue;
@@ -1474,8 +1498,8 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, siz
      return;
    }

    if ((op_desc->GetType() == DATA) || (op_desc->GetType() == AIPP_DATA_TYPE) || (op_desc->GetType() == MULTISHAPE) ||
        (op_desc->GetType() == NETOUTPUT)) {
    static const set<string> kSetOffsetTypes = {DATA_TYPE, AIPP_DATA_TYPE, MULTISHAPE, NETOUTPUT};
    if ((kSetOffsetTypes.count(op_desc->GetType()) > 0) && !IsKnownSubgraphData(node_type.node)) {
      if ((output_list[node_type.index] == kInvalidOffset) || (output_list[node_type.index] < offset)) {
        output_list.at(node_type.index) = offset;
      }
--- a/src/ge/graph/build/memory/block_mem_assigner.h
+++ b/src/ge/graph/build/memory/block_mem_assigner.h
@@ -352,6 +352,8 @@ class BlockMemAssigner : public MemAssigner {

  void AssignContinuousBlocks();

  bool IsZeroCopyBlock(const NodePtr &node, bool continuous);

  bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name,
                                   uint32_t &peer_input_index);

--- a/src/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/src/ge/graph/build/memory/graph_mem_assigner.cc
@@ -1227,6 +1227,18 @@ ge::Status GraphMemoryAssigner::SetInputOffset() {
  return ge::SUCCESS;
 }

 NodePtr GraphMemoryAssigner::GetKnownInputNode(const NodePtr &node) const {
  if (!node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) {
    return node;
  }

  if (NodeUtils::IsDynamicShape(node)) {
    return node;
  }

  return NodeUtils::GetParentInput(node);
 }

 ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  uint32_t parent_index = 0;
  if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
@@ -1235,13 +1247,29 @@ ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vecto

  // Subgraph Data Node, check for constant input.
  std::string op_type;
  NodePtr in_node = NodeUtils::GetParentInput(node);
  if (!NodeUtils::GetConstOpType(in_node, op_type)) {
    return SUCCESS;  // not constant input.
  const auto &in_node = NodeUtils::GetParentInput(node);
  if (NodeUtils::GetConstOpType(in_node, op_type)) {
    input_list = in_node->GetOpDesc()->GetOutputOffset();
    node->GetOpDesc()->SetOutputOffset(input_list);  // Set Data output same as const output.
    return SUCCESS;                                  // Constant input.
  }

  // Memory allocated for dynamic shape subgraph Data.
  if (NodeUtils::IsDynamicShape(node)) {
    return SUCCESS;
  }

  const auto &owner = node->GetOwnerComputeGraph();
  const auto &parent_desc = owner->GetParentNode()->GetOpDesc();
  const auto parent_inputs = parent_desc->GetInputOffset();
  if (parent_inputs.size() <= parent_index) {
    GELOGE(FAILED, "Get Parent input offset failed, node: %s, input size: %zu, parent index: %u",
           node->GetName().c_str(), parent_inputs.size(), parent_index);
    return FAILED;
  }

  vector<int64_t> const_input_list = in_node->GetOpDesc()->GetOutputOffset();
  node->GetOpDesc()->SetOutputOffset(const_input_list);  // Set Data output same as const output.
  input_list = {parent_inputs[parent_index]};
  node->GetOpDesc()->SetOutputOffset(input_list);  // Set Data output same as parent input.
  return SUCCESS;
 }

@@ -1287,7 +1315,8 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<
               input_list.back());
      } else {
        int64_t output_offset = output_list.at(peer_out_anchor->GetIdx());
        if (peer_out_anchor->GetOwnerNode()->GetType() == CONSTANT) {
        const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode());
        if (in_node->GetType() == CONSTANT) {
          GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(input_index);
          GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, output_offset));
        }
--- a/src/ge/graph/build/memory/graph_mem_assigner.h
+++ b/src/ge/graph/build/memory/graph_mem_assigner.h
@@ -181,6 +181,8 @@ class GraphMemoryAssigner {

  ge::Status UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const;

  NodePtr GetKnownInputNode(const NodePtr &node) const;

  MemoryOffsetList memory_offset_;
  ge::ComputeGraphPtr compute_graph_;
  HybridMemAssignerPtr mem_assigner_;
--- a/src/ge/graph/build/model_builder.cc
+++ b/src/ge/graph/build/model_builder.cc
@@ -182,38 +182,26 @@ void ModelBuilder::SetInputIsConst(const ge::NodePtr &n) {
  for (size_t i = 0; i < is_input_const.size(); i++) {
    is_input_const[i] = false;
  }

  std::string const_type;
  auto in_data_anchors = n->GetAllInDataAnchors();
  for (size_t index = 0; index < in_data_anchors.size(); index++) {
    auto in_data_anchor = in_data_anchors.at(index);
    const auto &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
    GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
    const auto &src_node = peer_out_anchor->GetOwnerNode();
    if (src_node->GetType() == CONSTANT) {
    if (!NodeUtils::GetConstOpType(src_node, const_type)) {
      continue;
    }

    if (const_type == CONSTANT) {
      if (!SetInputConst(node_op_desc, src_node, index, is_input_const)) {
        return;
      }
    } else if (src_node->GetType() == CONSTANTOP) {
    } else {
      if ((index < is_input_const.size()) && is_input_const[index]) {
        is_input_const[index] = false;
      }
    } else if (src_node->GetType() == DATA) {
      uint32_t parent_index = 0;
      if (!AttrUtils::GetInt(src_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
        continue;
      }

      // Subgraph Data Node, check for constant input.
      std::string op_type;
      const NodePtr in_node = NodeUtils::GetParentInput(src_node);
      if (!NodeUtils::GetConstOpType(in_node, op_type)) {
        continue;  // not constant input.
      }

      if (op_type == CONSTANT) {
        if (!SetInputConst(node_op_desc, in_node, index, is_input_const)) {
          return;
        }
      }
    }
  }

--- a/src/ge/graph/build/stream_allocator.cc
+++ b/src/ge/graph/build/stream_allocator.cc
@@ -16,6 +16,7 @@

 #include "graph/build/stream_allocator.h"
 #include <memory>
 #include <algorithm>
 #include "common/ge/ge_util.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/fmk_error_codes.h"
@@ -374,8 +375,8 @@ Status StreamAllocator::InsertOneEventInTwoNodes(const NodePtr &cur_node, const
    return SUCCESS;
  }

  if ((cur_node->GetType() == ENTER) || (cur_node->GetType() == REFENTER)) {
    GELOGD("No need to insert event after enter_node %s.", cur_node->GetName().c_str());
  if (((cur_node->GetType() == ENTER) || (cur_node->GetType() == REFENTER)) && (next_node->GetType() != STREAMACTIVE)) {
    GELOGD("No need to insert event between %s and %s.", cur_node->GetName().c_str(), next_node->GetName().c_str());
    return SUCCESS;
  }

@@ -721,6 +722,7 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
      GELOGE(FAILED, "SplitStreams:streamid(%ld) > last_stream_id(%ld)", stream_id, last_stream_id);
      return FAILED;
    }
    bool is_stream_first_node = (stream_node_num_vec[stream_id] == 0);
    AddNodeNum(cur_node, stream_node_num_vec[stream_id]);
    stream_2_nodes_map[stream_id].push_back(cur_node);
    // The maximum number of tasks per stream.
@@ -737,7 +739,7 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
      stream_continuous_2_nodes_map[continuous_stream_label].push_back(cur_node);
    }
    // Split the stream if it exceeds the maximum number of nodes in the stream.
    if (NeedSpiltNewStream(stream_node_num_vec[stream_id], max_node_num_one_stream, op_desc)) {
    if (NeedSpiltNewStream(stream_node_num_vec[stream_id], max_node_num_one_stream, op_desc, is_stream_first_node)) {
      last_stream_id++;
      GELOGI(
        "stream_node_num_vec[%ld]= %ld > max_node_num_one_stream : %ld, "
@@ -801,7 +803,11 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
 }

 bool StreamAllocator::NeedSpiltNewStream(int64_t stream_node_num, int64_t max_node_num_one_stream,
                                         const OpDescPtr &op_desc) const {
                                         const OpDescPtr &op_desc, bool is_stream_first_node) const {
  if (is_stream_first_node) {
    GELOGD("First node of stream does not need to split new stream");
    return false;
  }
  const set<string> label_op_types({LABELSET, LABELGOTO, LABELGOTOEX, LABELSWITCH, LABELSWITCHBYINDEX});
  bool is_first_active_node = false;
  (void)AttrUtils::GetBool(op_desc, ATTR_NAME_SUBGRAPH_FIRST_ACTIVE, is_first_active_node);
@@ -1019,6 +1025,18 @@ Status StreamAllocator::SetActiveStreamsForLoop() {
      loop_active_streams.emplace_back(static_cast<uint32_t>(stream_id));
    }
  }
  map<int64_t, NodePtr> stream_id_to_last_node;
  set<int64_t> streams_skip_iterator_event;
  for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) {
    int64_t stream_id = node->GetOpDesc()->GetStreamId();
    if (find(loop_active_streams.begin(), loop_active_streams.end(), stream_id) != loop_active_streams.end()) {
      stream_id_to_last_node[stream_id] = node;
      // last node in stream which has streamswitch or IF may be not execute, it will cause block if add event on them
      if (node->GetOpDesc()->GetType() == STREAMSWITCH) {
        streams_skip_iterator_event.insert(stream_id);
      }
    }
  }
  // Set the stream that needs to be activated
  for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) {
    GE_CHECK_NOTNULL(node->GetOpDesc());
@@ -1031,7 +1049,31 @@ Status StreamAllocator::SetActiveStreamsForLoop() {
                         GELOGE(FAILED, "SetListInt failed.");
                         return FAILED);
        for (const auto &stream_id : loop_active_streams) {
          GELOGI("Active stream %u for node: %s", stream_id, node->GetName().c_str());
          GELOGI("Active stream %u for node: %s.", stream_id, node->GetName().c_str());
        }

        // In switch group optimze case, some data input branch may exec slowly.
        // when condition input branch judge false and some switch has no false branch,
        // In this condition, data branch has no synchronize point,
        // it may cause some stream actived by iterator next step when this stream still alive.
        // If above situation happen, active message will lose, cause process block in next iteration.
        // In order to avoid this abnormal happen,
        // add event between each last node and iterator active node in target active stream
        GELOGI("there are %zu next iterator target streams has streamswitch node.", streams_skip_iterator_event.size());
        for (auto iter : stream_id_to_last_node) {
          if (streams_skip_iterator_event.find(iter.first) != streams_skip_iterator_event.end()) {
            GELOGI("skip stream %ld which has streamswitch node when add event to next iterator active node",
                   iter.first);
            continue;
          }
          if (iter.second->GetOwnerComputeGraph()->GetParentGraph() != nullptr) {
            GELOGI("skip stream %ld which last node in subgraph when add event to next iterator active node",
                   iter.first);
            continue;
          }
          AddSendEventId(iter.second, event_num_);
          AddRecvEventId(node, event_num_);
          event_num_++;
        }

        break;
@@ -1132,7 +1174,7 @@ Status StreamAllocator::InsertSyncEventNodes() {
        return status;
      }

      GELOGI("Insert recv event %u before node: %s", event_id, node->GetName().c_str());
      GELOGI("Insert recv event %u before node: %s.", event_id, node->GetName().c_str());
    }

    // Add the node corresponding to the send event
@@ -1160,7 +1202,7 @@ Status StreamAllocator::InsertSyncEventNodes() {
        return status;
      }

      GELOGI("Insert send event %u after node: %s", event_id, node->GetName().c_str());
      GELOGI("Insert send event %u after node: %s.", event_id, node->GetName().c_str());
    }
  }

--- a/src/ge/graph/build/stream_allocator.h
+++ b/src/ge/graph/build/stream_allocator.h
@@ -58,7 +58,8 @@ class StreamAllocator {
  bool IsActiveAfterNextIteration(const NodePtr &active_node_ptr) const;

  Status SplitStreams(std::vector<std::set<int64_t>> &split_streams);
  bool NeedSpiltNewStream(int64_t stream_node_num, int64_t max_node_num_one_stream, const OpDescPtr &op_desc) const;
  bool NeedSpiltNewStream(int64_t stream_node_num, int64_t max_node_num_one_stream, const OpDescPtr &op_desc,
                          bool is_stream_first_node) const;

  Status UpdateActiveStreams(const std::vector<std::set<int64_t>> &split_streams);
  void UpdateLabelStreams(const std::vector<std::set<int64_t>> &split_streams);
--- a/src/ge/graph/build/task_generator.cc
+++ b/src/ge/graph/build/task_generator.cc
@@ -95,8 +95,8 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t
                   GELOGE(FAILED, "SetListStr failed.");
                   return FAILED);

  GELOGI("Call GenerateTask Success, task_def_list.size:%zu, op_name_map.size:%zu", task_def_list.size(),
         op_name_map.size());
  GELOGI("GenerateTask Success, task list:%zu, op map:%zu, logic mem base:%p, logic weight base:%p, logic var base:%p",
         task_def_list.size(), op_name_map.size(), run_context.dataMemBase, run_context.weightMemBase, var_mem_base_);

  // Init and serialize model_task_def
  ModelTaskDef model_task_def;
@@ -260,7 +260,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
  int64_t group_key;
  uint32_t node_index = 0;
  rtStream_t stream = nullptr;
  bool is_unknown_shape = graph->GetGraphUnknownFlag();
  bool is_unknown_shape = graph->GetGraphUnknownFlag() || GetContext().GetHostExecFlag();
  if (is_unknown_shape) {
    GE_CHK_STATUS_RET(SetUnknownShapeStream(run_context, stream), "Set unknown shape stream failed.");
  }
@@ -479,7 +479,12 @@ Status TaskGenerator::UpdateAnchorStatus(const NodePtr &node) {
        GELOGE(INTERNAL_ERROR, "AnchorUtils::SetStatus failed.");
        return INTERNAL_ERROR;
      }
    } else if (peer_anchor->GetOwnerNode()->GetType() == CONSTANT) {
      continue;
    }

    std::string const_type;
    bool is_const = NodeUtils::GetConstOpType(peer_anchor->GetOwnerNode(), const_type);
    if (is_const && (const_type == CONSTANT)) {
      if (AnchorUtils::SetStatus(anchor, ANCHOR_CONST) != GRAPH_SUCCESS) {
        GELOGE(INTERNAL_ERROR, "AnchorUtils::SetStatus failed.");
        return INTERNAL_ERROR;
--- a/src/ge/graph/common/transop_util.cc
+++ b/src/ge/graph/common/transop_util.cc
@@ -17,9 +17,13 @@
 #include "graph/common/transop_util.h"

 #include "common/types.h"
 #include "graph/utils/type_utils.h"
 #include "framework/common/debug/ge_log.h"

 namespace {
 const int kInvalidTransopDataIndex = -1;
 const int kTransOpOutIndex = 0;
 std::map<ge::DataType, ge::DataType> precision_loss_transfer_map = {{ge::DT_FLOAT, ge::DT_BOOL}};
 }  // namespace

 namespace ge {
@@ -60,4 +64,20 @@ int TransOpUtil::GetTransOpDataIndex(const std::string &type) {
  }
  return kInvalidTransopDataIndex;
 }

 bool TransOpUtil::CheckPrecisionLoss(const ge::NodePtr &src_node) {
  auto idx = TransOpUtil::GetTransOpDataIndex(src_node);
  auto input_desc = src_node->GetOpDesc()->GetInputDesc(idx);
  auto output_desc = src_node->GetOpDesc()->GetOutputDesc(kTransOpOutIndex);
  auto src_dtype = input_desc.GetDataType();
  auto dst_dtype = output_desc.GetDataType();
  auto iter = precision_loss_transfer_map.find(src_dtype);
  if (iter != precision_loss_transfer_map.end() && iter->second == dst_dtype) {
    GELOGW("Node %s transfer data type from %s to %s ,it will cause precision loss. ignore pass.",
           src_node->GetName().c_str(), TypeUtils::DataTypeToSerialString(src_dtype).c_str(),
           TypeUtils::DataTypeToSerialString(dst_dtype).c_str());
    return false;
  }
  return true;
 }
 }  // namespace ge
--- a/src/ge/graph/common/transop_util.h
+++ b/src/ge/graph/common/transop_util.h
@@ -33,6 +33,8 @@ class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY TransOpUtil {

  static int GetTransOpDataIndex(const std::string &type);

  static bool CheckPrecisionLoss(const NodePtr &src_node);

 private:
  TransOpUtil();

--- a/src/ge/graph/execute/graph_execute.cc
+++ b/src/ge/graph/execute/graph_execute.cc
@@ -519,6 +519,25 @@ Status GraphExecutor::GetCombinedDynamicDims(uint32_t model_id, std::vector<std:
  return SUCCESS;
 }

 ///
 /// @ingroup ge
 /// @brief Get user designate shape order
 /// @param [in] model_id
 /// @param [out] user_input_shape_order
 /// @return execute result
 ///
 ge::Status GraphExecutor::GetUserDesignateShapeOrder(uint32_t model_id,
                                                     std::vector<std::string> &user_input_shape_order) {
  auto model_manager = ge::ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->GetUserDesignateShapeOrder(model_id, user_input_shape_order);
  if (ret != SUCCESS) {
    GELOGE(ret, "GetUserDesignateShapeOrder failed.");
    return ret;
  }
  return SUCCESS;
 }

 Status GraphExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) {
  auto model_manager = ge::ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
@@ -570,7 +589,7 @@ Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigI
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->GetAIPPInfo(model_id, index, aipp_info);
  if (ret != SUCCESS) {
    GELOGE(ret, "GetAIPPInfo failed.");
    GELOGW("GetAIPPInfo is not success.");
    return ret;
  }

@@ -602,4 +621,16 @@ Status GraphExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t inde

  return SUCCESS;
 }

 Status GraphExecutor::GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id,
                                    OpDescInfo &op_desc_info) {
  auto model_manager = ge::ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
  Status ret = model_manager->GetOpDescInfo(device_id, stream_id, task_id, op_desc_info);
  if (ret != SUCCESS) {
    GELOGE(ret, "GetOpDescInfo failed.");
    return ret;
  }
  return SUCCESS;
 }
 }  // namespace ge
--- a/src/ge/graph/execute/graph_execute.h
+++ b/src/ge/graph/execute/graph_execute.h
@@ -95,6 +95,15 @@ class GraphExecutor {
  ///
  static Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);

  ///
  /// @ingroup ge
  /// @brief Get user designate shape order
  /// @param [in] model_id
  /// @param [out] user_input_shape_order
  /// @return execute result
  ///
  static Status GetUserDesignateShapeOrder(uint32_t model_id, std::vector<std::string> &user_input_shape_order);

  static Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);

  static Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info);
@@ -107,6 +116,8 @@ class GraphExecutor {
  static Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
                                          std::vector<InputOutputDims> &output_dims);

  static Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info);

 private:
  Status PrepareInputData(const std::vector<GeTensor> &input_tensor, InputData &graph_input_data,
                          OutputData &graph_output_data, std::vector<InputOutputDescInfo> &output_desc);
--- a/src/ge/graph/load/new_model_manager/aipp_utils.cc
+++ b/src/ge/graph/load/new_model_manager/aipp_utils.cc
@@ -38,7 +38,9 @@ namespace ge {

 Status AippUtils::ConvertAippParams2AippInfo(domi::AippOpParams *aipp_params, AippConfigInfo &aipp_info) {
  GE_CHECK_NOTNULL(aipp_params);
  AIPP_CONVERT_TO_AIPP_INFO(aipp_mode);
  AIPP_CONVERT_TO_AIPP_INFO(input_format);
  AIPP_CONVERT_TO_AIPP_INFO(related_input_rank);
  AIPP_CONVERT_TO_AIPP_INFO(src_image_size_w);
  AIPP_CONVERT_TO_AIPP_INFO(src_image_size_h);
  AIPP_CONVERT_TO_AIPP_INFO(crop);
@@ -85,6 +87,8 @@ Status AippUtils::ConvertAippParams2AippInfo(domi::AippOpParams *aipp_params, Ai
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_1, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_2, 0);
  AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_3, 0);
  AIPP_CONVERT_TO_AIPP_INFO(support_rotation);
  AIPP_CONVERT_TO_AIPP_INFO(max_src_image_size);
  return SUCCESS;
 }
 }  // namespace ge
--- a/src/ge/graph/load/new_model_manager/data_dumper.cc
+++ b/src/ge/graph/load/new_model_manager/data_dumper.cc
@@ -171,6 +171,44 @@ void DataDumper::SaveOpDebugId(uint32_t task_id, uint32_t stream_id, void *op_de
  is_op_debug_ = is_op_debug;
 }

 void DataDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id,
                                uint32_t stream_id) {
  GELOGD("Start SaveDumpOpInfo of task_id: %u, stream_id: %u", task_id, stream_id);
  OpDescInfo op_desc_info;
  op_desc_info.op_name = op->GetName();
  op_desc_info.task_id = task_id;
  op_desc_info.stream_id = stream_id;
  for (size_t i = 0; i < op->GetInputsSize(); ++i) {
    GeTensorDesc input_desc = op->GetInputDesc(i);
    op_desc_info.input_format.emplace_back(input_desc.GetFormat());
    op_desc_info.input_shape.emplace_back(input_desc.GetShape().GetDims());
    op_desc_info.input_data_type.emplace_back(input_desc.GetDataType());
  }
  for (size_t j = 0; j < op->GetOutputsSize(); ++j) {
    GeTensorDesc output_desc = op->GetOutputDesc(j);
    op_desc_info.output_format.emplace_back(output_desc.GetFormat());
    op_desc_info.output_shape.emplace_back(output_desc.GetShape().GetDims());
    op_desc_info.output_data_type.emplace_back(output_desc.GetDataType());
  }
  op_desc_info.input_addrs = ModelUtils::GetInputDataAddrs(model_param, op);
  op_desc_info.output_addrs = ModelUtils::GetOutputDataAddrs(model_param, op);

  op_desc_info_.emplace_back(op_desc_info);
 }

 bool DataDumper::GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const {
  GELOGI("There are %zu op need to dump.", op_desc_info_.size());
  for (size_t index = 0; index < op_desc_info_.size(); ++index) {
    OpDescInfo dump_op_info = op_desc_info_.at(index);
    if (dump_op_info.task_id == task_id && dump_op_info.stream_id == stream_id) {
      GELOGI("find exception op of task_id: %u, stream_id: %u.", task_id, stream_id);
      op_desc_info = dump_op_info;
      return true;
    }
  }
  return false;
 }

 void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc,
                              uintptr_t args) {
  if (op_desc == nullptr) {
@@ -325,17 +363,24 @@ Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicp
    // check dump output tensor desc is redirected by attr ATTR_DATA_DUMP_REF
    if (AttrUtils::GetStr(&output_desc, ATTR_DATA_DUMP_REF, node_name_index)) {
      GE_CHK_STATUS_RET(DumpRefOutput(inner_dump_info, output, i, node_name_index), "DumpRefOutput failed");
      task.mutable_output()->Add(std::move(output));
    } else {
      GE_IF_BOOL_EXEC(
        IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i),
        GELOGD("DumpOutputWithTask[%s] output[%zu] is l1 addr, skip it", inner_dump_info.op->GetName().c_str(), i);
        continue;);

      const auto input_size = inner_dump_info.op->GetInputsSize();
      auto addr = inner_dump_info.args + (i + input_size) * kAddrLen;
      GE_CHK_STATUS_RET(GenerateOutput(output, output_descs, addr, i), "Generate output failed");
      if (IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i)) {
        GELOGI("[L1Fusion] DumpOutputWithTask[%s] output[%zu] is l1 addr.", inner_dump_info.op->GetName().c_str(), i);
        int64_t output_size = 0;
        if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
          GELOGE(PARAM_INVALID, "Get output size failed.");
          return PARAM_INVALID;
        }
        GELOGI("Get output size of l1_fusion_dump is %ld", output_size);
        GenerateOpBuffer(output_size, task);
      } else {
        const auto input_size = inner_dump_info.op->GetInputsSize();
        auto addr = inner_dump_info.args + (i + input_size) * kAddrLen;
        GE_CHK_STATUS_RET(GenerateOutput(output, output_descs, addr, i), "Generate output failed");
        task.mutable_output()->Add(std::move(output));
      }
    }
    task.mutable_output()->Add(std::move(output));
  }
  return SUCCESS;
 }
@@ -468,20 +513,38 @@ Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::
    // check dump input tensor desc is redirected by attr ATTR_DATA_DUMP_REF
    if (AttrUtils::GetStr(&input_descs.at(i), ATTR_DATA_DUMP_REF, node_name_index)) {
      GE_CHK_STATUS_RET(DumpRefInput(inner_dump_info, input, i, node_name_index), "DumpRefInput failed");
      task.mutable_input()->Add(std::move(input));
      // normal dump without attr
    } else {
      GE_IF_BOOL_EXEC(IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i),
                      GELOGD("DumpInput[%s] input[%zu] is l1 addr, skip it", inner_dump_info.op->GetName().c_str(), i);
                      continue;);

      auto addr = inner_dump_info.args + kAddrLen * i;
      GE_CHK_STATUS_RET(GenerateInput(input, input_descs, addr, i), "Generate input failed");
      if (IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i)) {
        GELOGI("[L1Fusion] DumpInput[%s] input[%zu] is l1 addr", inner_dump_info.op->GetName().c_str(), i);
        int64_t input_size = 0;
        if (AttrUtils::GetInt(input_descs.at(i), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
          GELOGI("Get aipp input size according to attr is %ld", input_size);
        } else if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
          GELOGE(PARAM_INVALID, "Get input size failed.");
          return PARAM_INVALID;
        }
        GELOGI("Get input size of l1_fusion_dump is %ld", input_size);
        GenerateOpBuffer(input_size, task);
      } else {
        auto addr = inner_dump_info.args + kAddrLen * i;
        GE_CHK_STATUS_RET(GenerateInput(input, input_descs, addr, i), "Generate input failed");
        task.mutable_input()->Add(std::move(input));
      }
    }
    task.mutable_input()->Add(std::move(input));
  }
  return SUCCESS;
 }

 void DataDumper::GenerateOpBuffer(const int64_t &size, aicpu::dump::Task &task) {
  aicpu::dump::OpBuffer op_buffer;
  op_buffer.set_buffer_type(aicpu::dump::BufferType::L1);
  op_buffer.set_address(reinterpret_cast<uintptr_t>(l1_fusion_addr_));
  op_buffer.set_size(size);
  task.mutable_buffer()->Add(std::move(op_buffer));
 }

 Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info) {
  std::string proto_str;
  size_t proto_size = op_mapping_info.ByteSizeLong();
@@ -720,7 +783,7 @@ void DataDumper::PrintCheckLog(string &dump_list_key) {
  bool not_find_by_omname = model_list.find(om_name_) == model_list.end();
  bool not_find_by_modelname = model_list.find(model_name_) == model_list.end();
  dump_list_key = not_find_by_omname ? model_name_ : om_name_;
  GELOGI("%zu op need dump in %s.", op_list_.size(), dump_list_key.c_str());
  GELOGI("%zu op need dump in known shape model %s.", op_list_.size(), dump_list_key.c_str());

  if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) {
    if (not_find_by_omname && not_find_by_modelname) {
--- a/src/ge/graph/load/new_model_manager/data_dumper.h
+++ b/src/ge/graph/load/new_model_manager/data_dumper.h
@@ -30,6 +30,7 @@
 #include "proto/op_mapping_info.pb.h"
 #include "runtime/mem.h"
 #include "task_info/task_info.h"
 #include "framework/common/ge_types.h"

 namespace ge {
 class DataDumper {
@@ -64,10 +65,14 @@ class DataDumper {

  void SetRefInfo(const std::map<OpDescPtr, void *> &ref_info) { ref_info_ = ref_info; };

  void SetL1FusionAddr(void *addr) { l1_fusion_addr_ = addr; };

  void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond);

  void SaveDumpInput(const std::shared_ptr<Node> &node);

  void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id);

  // args is device memory stored first output addr
  void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args);
  void SaveEndGraphId(uint32_t task_id, uint32_t stream_id);
@@ -81,6 +86,7 @@ class DataDumper {

  void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; }
  const DumpProperties &GetDumpProperties() const { return dump_properties_; }
  bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const;

 private:
  void ReleaseDevMem(void **ptr) noexcept;
@@ -100,6 +106,7 @@ class DataDumper {
  struct InnerDumpInfo;
  struct InnerInputMapping;

  std::vector<OpDescInfo> op_desc_info_;
  std::vector<InnerDumpInfo> op_list_;
  uint32_t end_graph_task_id_ = 0;
  uint32_t end_graph_stream_id_ = 0;
@@ -111,6 +118,7 @@ class DataDumper {
  uintptr_t loop_cond_;
  ComputeGraphPtr compute_graph_;
  std::map<OpDescPtr, void *> ref_info_;
  void *l1_fusion_addr_ = nullptr;

  uint32_t op_debug_task_id_ = 0;
  uint32_t op_debug_stream_id_ = 0;
@@ -135,6 +143,7 @@ class DataDumper {
                       const uintptr_t &addr, size_t index);
  Status GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
                        const uintptr_t &addr, size_t index);
  void GenerateOpBuffer(const int64_t &size, aicpu::dump::Task &task);
 };
 struct DataDumper::InnerDumpInfo {
  uint32_t task_id;
--- a/src/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/src/ge/graph/load/new_model_manager/davinci_model.cc
@@ -84,6 +84,8 @@ const uint32_t kAddrLen = sizeof(void *);
 const int kDecimal = 10;
 const int kBytes = 8;
 const uint32_t kDataMemAlignSizeCompare = 64;
 const uint32_t kDumpL1FusionOpMByteSize = 2 * 1024 * 1024;
 const uint32_t kDumpFlagOfL1Fusion = 0;
 const char *const kDefaultBatchLable = "Batch_default";

 inline bool IsDataOp(const std::string &node_type) {
@@ -97,7 +99,6 @@ inline bool IsNoTaskAndDumpNeeded(const OpDescPtr &op_desc) {
 }  // namespace

 std::mutex DavinciModel::tvm_bin_mutex_;
 std::set<std::string> DavinciModel::tvm_bin_kernel_;

 DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener> &listener)
    : weights_mem_base_(nullptr),
@@ -179,6 +180,10 @@ DavinciModel::~DavinciModel() {

      FreeFeatureMapMem();

      if (l1_fusion_addr_ != nullptr) {
        GE_CHK_RT(rtFree(l1_fusion_addr_));
      }

      if (rt_model_handle_ != nullptr) {
        GE_CHK_RT(rtModelDestroy(rt_model_handle_));
        rt_model_handle_ = nullptr;
@@ -305,7 +310,7 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p
    if (weight_ptr == nullptr) {
      weights_mem_base_ = MallocWeightsMem(weights_size);
      if (weights_mem_base_ == nullptr) {
        GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size);
        GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size);
        return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED;
      }
      is_inner_weight_base_ = true;
@@ -367,7 +372,7 @@ void DavinciModel::InitRuntimeParams() {
  session_id_ = runtime_param_.session_id;

  GELOGI(
    "InitRuntimeParams(), session_id:%u, stream_num:%lu, event_num:%u, label_num:%u, "
    "InitRuntimeParams(), session_id:%lu, stream_num:%u, event_num:%u, label_num:%u, "
    "logic_mem_base:0x%lx, logic_weight_base:0x%lx, logic_var_base:0x%lx, "
    "memory_size:%lu, weight_size:%lu, var_size:%lu",
    runtime_param_.session_id, runtime_param_.stream_num, runtime_param_.event_num, runtime_param_.label_num,
@@ -401,6 +406,7 @@ void DavinciModel::CheckHasHcomOp() {
 ///
 Status DavinciModel::BindModelStream() {
  // Stream not in active_stream_indication_ is active stream.
  is_stream_list_bind_ = false;
  if ((!input_queue_ids_.empty() || !output_queue_ids_.empty()) || (deploy_type_ == AICPU_DEPLOY_CROSS_THREAD)) {
    for (size_t i = 0; i < stream_list_.size(); ++i) {
      if (active_stream_indication_.count(i) == 0) {
@@ -419,7 +425,7 @@ Status DavinciModel::BindModelStream() {
      GE_CHK_RT_RET(rtModelBindStream(rt_model_handle_, stream_list_[i], RT_HEAD_STREAM));
    }
  }

  is_stream_list_bind_ = true;
  return SUCCESS;
 }

@@ -600,6 +606,12 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
  // create model_handle to load model
  GE_CHK_RT_RET(rtModelCreate(&rt_model_handle_, 0));
  GE_CHK_RT_RET(rtModelGetId(rt_model_handle_, &runtime_model_id_));
  // malloc 2M for dump l1fusion op
  GE_CHK_RT_RET(rtMalloc(&l1_fusion_addr_, kDumpL1FusionOpMByteSize, RT_MEMORY_DDR));

  // send l1fusion dump addr to rts
  GE_CHK_RT_RET(rtDumpAddrSet(rt_model_handle_, l1_fusion_addr_, kDumpL1FusionOpMByteSize, kDumpFlagOfL1Fusion));

  // inference will use default graph_id 0;
  runtime_param_.graph_id = compute_graph->GetGraphID();

@@ -748,11 +760,18 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {

  typedef Status (DavinciModel::*OpDescCall)(const OpDescPtr &);
  static std::map<std::string, OpDescCall> op_desc_handle = {
    {VARIABLE, &DavinciModel::InitVariable},           {CONSTANTOP, &DavinciModel::InitConstant},
    {STREAMACTIVE, &DavinciModel::InitStreamActive},   {STREAMSWITCH, &DavinciModel::InitStreamSwitch},
    {STREAMSWITCHN, &DavinciModel::InitStreamSwitchN}, {LABELSET, &DavinciModel::InitLabelSet},
    {VARIABLE, &DavinciModel::InitVariable},
    {CONSTANTOP, &DavinciModel::InitConstant},
    {STREAMACTIVE, &DavinciModel::InitStreamActive},
    {STREAMSWITCH, &DavinciModel::InitStreamSwitch},
    {STREAMSWITCHN, &DavinciModel::InitStreamSwitchN},
    {LABELSET, &DavinciModel::InitLabelSet},
    {CASE, &DavinciModel::InitCase},
  };

  GE_CHK_STATUS_RET(InitInputOutputForDynamic(compute_graph), "InitInputOutputForDynamic failed.");

  map<uint32_t, OpDescPtr> data_by_index;
  auto nodes = compute_graph->GetAllNodes();
  const TBEKernelStore &tbekernel_store = ge_model_->GetTBEKernelStore();
  for (size_t i = 0; i < nodes.size(); i++) {
@@ -770,7 +789,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
    GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc);

    if (IsDataOp(op_desc->GetType())) {
      if (InitDataOp(node, data_op_index) != SUCCESS) {
      if (InitDataOp(node, data_op_index, data_by_index) != SUCCESS) {
        GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str());
        return PARAM_INVALID;
      }
@@ -839,21 +858,44 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
    GE_TIMESTAMP_ADD(InitTbeHandle);
  }

  AdjustDataOpList(data_by_index);
  GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc.");
  GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle.");
  return SUCCESS;
 }

 Status DavinciModel::InitInputOutputForDynamic(const ComputeGraphPtr &compute_graph) {
  if (!known_node_) return SUCCESS;
  // for dynamic shape
  auto direct_nodes = compute_graph->GetDirectNode();
  for (size_t i = 0; i < direct_nodes.size(); i++) {
    auto node = direct_nodes.at(i);
    auto op_desc = node->GetOpDesc();
    if (op_desc == nullptr) {
      GELOGE(PARAM_INVALID, "op_desc is null.");
      return PARAM_INVALID;
    }
    if (IsDataOp(op_desc->GetType())) {
      GELOGD("init data op %s", op_desc->GetName().c_str());
      data_op_list_.push_back(op_desc);
    }
    if (op_desc->GetType() == NETOUTPUT) {
      GELOGD("init netouput op %s", op_desc->GetName().c_str());
      output_op_list_.push_back(op_desc);
    }
  }
  return SUCCESS;
 }

 /// @ingroup ge
 /// @brief Data Op Initialize.
 /// @param [in] NodePtr: Data Op.
 /// @param [in/out] data_op_index: NetOutput addr size info.
 /// @return Status
 Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index) {
 Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, map<uint32_t, OpDescPtr> &data_by_index) {
  // op_desc Checked by Init: Data, valid.
  auto op_desc = node->GetOpDesc();
  if (known_node_) {
    data_op_list_.push_back(op_desc);
    return SUCCESS;
  }
  uint32_t parent_index = 0;  // Ignore subgraph Data Node.
@@ -885,6 +927,7 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index) {
    return PARAM_INVALID;
  }
  new_input_data_info_[data_index] = zero_copy_offset;
  data_by_index[data_index] = op_desc;

  for (size_t index = 0; index < virtual_addr_list.size(); ++index) {
    void *addr = virtual_addr_list.at(index);
@@ -904,6 +947,24 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index) {
  return SUCCESS;
 }

 ///
 /// @ingroup ge
 /// @brief Sort Data op list by index.
 /// @param [in] data_by_index: map of Data Op.
 /// @return
 ///
 void DavinciModel::AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_index) {
  if (data_by_index.size() != data_op_list_.size()) {
    GELOGW("Data map size: %zu, Data list size: %zu.", data_by_index.size(), data_op_list_.size());
    return;
  }

  data_op_list_.clear();
  for (auto &item : data_by_index) {
    data_op_list_.emplace_back(item.second);
  }
 }

 ///
 /// @ingroup ge
 /// @brief input zero copy node Initialize.
@@ -946,7 +1007,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) {
  auto op_desc = node->GetOpDesc();
  // excludes the function op sub graph, e.g. case,if
  if (known_node_) {
    output_op_list_.push_back(op_desc);
    return SUCCESS;
  }
  ComputeGraphPtr owner_graph = node->GetOwnerComputeGraph();
@@ -989,9 +1049,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) {
    new_output_data_info_[num + idx] = zero_copy_offset;
    void *addr = virtual_addr_list.at(idx);
    int64_t input_offset = input_offset_list.at(idx);
    if (new_output_outside_addrs_.find(addr) != new_output_outside_addrs_.end()) {
      continue;
    }
    vector<void *> tensor_addrs;
    zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs);
    auto rslt = new_output_outside_addrs_.insert(std::pair<void *, ZeroCopyOffset>(addr, zero_copy_offset));
@@ -1464,6 +1521,17 @@ void DavinciModel::GetCombinedDynamicDims(std::vector<std::vector<int64_t>> &bat
  batch_info = combined_batch_info_;
 }

 ///
 /// @ingroup ge
 /// @brief Get user designate shape order
 /// @param [out] user_input_shape_order
 /// @return None
 ///
 void DavinciModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) const {
  user_input_shape_order.clear();
  user_input_shape_order = user_designate_shape_order_;
 }

 ///
 /// @ingroup ge
 /// @brief Get AIPP input info
@@ -1475,7 +1543,7 @@ Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) {
  GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index);
  OpDescPtr data_op = data_op_list_[index];
  if (!data_op->HasAttr(ATTR_NAME_AIPP)) {
    GELOGE(GE_AIPP_NOT_EXIST, "GetAIPPInfo: there is not AIPP related with index %u.", index);
    GELOGW("GetAIPPInfo: there is not AIPP related with index %u.", index);
    return GE_AIPP_NOT_EXIST;
  }

@@ -1488,10 +1556,6 @@ Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) {
  GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed");
  GELOGI("GetAIPPInfo: node data: %s, type: %s, current index: %u, current node related input rank: %u",
         data_op->GetName().c_str(), data_op->GetType().c_str(), index, aipp_params->related_input_rank());
  if (aipp_params->aipp_mode() == domi::AippOpParams::dynamic) {
    GELOGI("GetAIPPInfo,  dynamic Aipp is not support to query temporarily.");
    return GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY;
  }

  GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(aipp_params.get(), aipp_info),
                    "convert aipp params to aipp config info failed");
@@ -1563,51 +1627,51 @@ Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInf
  return SUCCESS;
 }

 void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input) {
 void DavinciModel::SetInputDimsInfo(const vector<int64_t> &model_input_dims, Format &format,
                                    InputOutputDescInfo &input) {
  uint32_t n, c, h, w;
  n = format == FORMAT_NHWC ? NHWC_DIM_N : NCHW_DIM_N;
  c = format == FORMAT_NHWC ? NHWC_DIM_C : NCHW_DIM_C;
  h = format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H;
  w = format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W;

  if (model_input_dims.size() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) {
    input.shape_info.num = model_input_dims[n];
    input.shape_info.height = model_input_dims[h];
    input.shape_info.width = model_input_dims[w];
    input.shape_info.channel = model_input_dims[c];
  }
  for (size_t k = 0; k < model_input_dims.size(); ++k) {
    input.shape_info.dims.push_back(model_input_dims[k]);
  }
  return;
 }

 void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input) {
  if (is_new_model_desc_ && op_desc->HasAttr(ATTR_NAME_INPUT_DIMS)) {
    // When static aipp is set, need to get the model input dims which processed by aipp
    vector<int64_t> model_input_dims;
    (void)AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_DIMS, model_input_dims);
    if (model_input_dims.size() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) {
      input.shape_info.num = model_input_dims[n];
      input.shape_info.height = model_input_dims[h];
      input.shape_info.width = model_input_dims[w];
      input.shape_info.channel = model_input_dims[c];
    }
    for (size_t k = 0; k < model_input_dims.size(); ++k) {
      input.shape_info.dims.push_back(model_input_dims[k]);
    }
    is_new_model_desc_ = false;
    SetInputDimsInfo(model_input_dims, format, input);
    return;
  }

  if (!op_desc->HasAttr(ATTR_MBATCH_ORIGIN_INPUT_DIMS)) {
    if (op_desc->GetInputDescPtr(0)->GetShape().GetDimNum() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) {
      input.shape_info.num = op_desc->GetInputDescPtr(0)->GetShape().GetDim(n);
      input.shape_info.height = op_desc->GetInputDescPtr(0)->GetShape().GetDim(h);
      input.shape_info.width = op_desc->GetInputDescPtr(0)->GetShape().GetDim(w);
      input.shape_info.channel = op_desc->GetInputDescPtr(0)->GetShape().GetDim(c);
    }
    for (size_t k = 0; k < op_desc->GetInputDescPtr(0)->GetShape().GetDimNum(); k++) {
      input.shape_info.dims.push_back(op_desc->GetInputDescPtr(0)->GetShape().GetDim(k));
    }
  // judge if this data is linked dynamic aipp first, multiply batch has been considered
  if (op_desc->HasAttr("_dynamic_aipp_input_dims")) {
    vector<int64_t> dynamic_aipp_input_dims;
    (void)AttrUtils::GetListInt(op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_input_dims);
    SetInputDimsInfo(dynamic_aipp_input_dims, format, input);
    return;
  } else {
    vector<int64_t> origin_input_dims;
    (void)AttrUtils::GetListInt(op_desc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims);
    if (origin_input_dims.size() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) {
      input.shape_info.num = origin_input_dims[n];
      input.shape_info.height = origin_input_dims[h];
      input.shape_info.width = origin_input_dims[w];
      input.shape_info.channel = origin_input_dims[c];
    }
    for (size_t k = 0; k < origin_input_dims.size(); ++k) {
      input.shape_info.dims.push_back(origin_input_dims[k]);
    // judge if this data is multiply batch
    if (!op_desc->HasAttr(ATTR_MBATCH_ORIGIN_INPUT_DIMS)) {
      vector<int64_t> input_dims = op_desc->GetInputDescPtr(0)->GetShape().GetDims();
      SetInputDimsInfo(input_dims, format, input);
      return;
    } else {
      vector<int64_t> origin_input_dims;
      (void)AttrUtils::GetListInt(op_desc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims);
      SetInputDimsInfo(origin_input_dims, format, input);
      return;
    }
  }
 }
@@ -1630,6 +1694,8 @@ Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, s
    formats.push_back(format);
    input_desc.push_back(input);
  }
  // cause GetInputDescInfo called not only once, set is_new_model_desc_ to false after calc the model input dims
  is_new_model_desc_ = false;
  return SUCCESS;
 }

@@ -2106,22 +2172,24 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r
      return FAILED;
    }

    if ((kind == RT_MEMCPY_DEVICE_TO_DEVICE) && (copy_only_addrs_.count(output.second.GetBasicAddr()) == 0)) {
      continue;  // Skip: Feed by zero copy.
    }

    DataBuffer &buffer = blobs[output.first];
    uint64_t mem_size = static_cast<uint64_t>(output.second.GetDataSize());
    if ((buffer.length == 0) || (mem_size == 0)) {
      GELOGI("Length of data is zero, No need copy. output tensor index=%u", output.first);
      continue;
    }
    if (buffer.length < mem_size) {
    if (is_dynamic_) {
      GELOGI("No need to check output data size.");
    } else if (buffer.length < mem_size) {
      GELOGE(FAILED, "Tensor data size=%lu, buffer size=%u", mem_size, buffer.length);
      return FAILED;
    } else if (buffer.length > mem_size) {
      GELOGW("Tensor data size=%lu, buffer size=%u", mem_size, buffer.length);
    }

    if ((kind == RT_MEMCPY_DEVICE_TO_DEVICE) && (copy_only_addrs_.count(output.second.GetBasicAddr()) == 0)) {
      continue;  // Skip: Feed by zero copy.
    }
    uint64_t data_size = output.second.GetDataSize();
    uint64_t buffer_length = buffer.length;
    void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data));
@@ -2564,10 +2632,12 @@ Status DavinciModel::ModelRunStop() {
 void DavinciModel::UnbindTaskSinkStream() {
  // unbinding hcom stream
  UnbindHcomStream();
  for (size_t i = 0; i < stream_list_.size(); i++) {
    // unbind rt_model_handle and streams
    GE_LOGW_IF(rtModelUnbindStream(rt_model_handle_, stream_list_[i]) != RT_ERROR_NONE,
               "Unbind stream from model failed! Index: %zu", i);
  if (is_stream_list_bind_) {
    for (size_t i = 0; i < stream_list_.size(); i++) {
      // unbind rt_model_handle and streams
      GE_LOGW_IF(rtModelUnbindStream(rt_model_handle_, stream_list_[i]) != RT_ERROR_NONE,
                 "Unbind stream from model failed! Index: %zu", i);
    }
  }

  if (is_inner_model_stream_) {
@@ -2610,11 +2680,7 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const
    return SUCCESS;
  }
  const vector<void *> addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, output_op_list_[kDataIndex]);
  if (outputs.size() > addr_list.size()) {
    GELOGE(FAILED, "output data addr %u should less than output op number %u.", outputs.size(), addr_list.size());
    return FAILED;
  }
  for (size_t i = 0; i < addr_list.size(); ++i) {
  for (size_t i = 0; i < addr_list.size() && i < outputs.size(); ++i) {
    knonw_output_data_info_[addr_list[i]] = outputs[i];
    GELOGI("DavinciModel::CreateKnownZeroCopyMap output %d,v addr %p,p addr %p .", i, addr_list[i], outputs[i]);
  }
@@ -2755,19 +2821,21 @@ Status DavinciModel::DistributeTask() {
  }

  const auto &model_task_def = ge_model_->GetModelTaskDefPtr();
  GELOGI("there are %zu task need to save.", task_list_.size());
  for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) {
    auto &task = task_list_.at(task_index);
    GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index);
    // for data dump
    if (reinterpret_cast<void *>(task->GetDumpArgs()) != nullptr) {
      auto op_index = std::max(model_task_def->task(task_index).kernel().context().op_index(),
                               model_task_def->task(task_index).kernel_ex().op_index());
      OpDescPtr op = GetOpByIndex(op_index);
      if (op == nullptr) {
        GELOGE(PARAM_INVALID, "Op index %u is null, op list size %zu.", op_index, op_list_.size());
        return PARAM_INVALID;
      }
    auto op_index = std::max(model_task_def->task(task_index).kernel().context().op_index(),
                             model_task_def->task(task_index).kernel_ex().op_index());
    OpDescPtr op = GetOpByIndex(op_index);
    if (op == nullptr) {
      GELOGE(PARAM_INVALID, "Op index %u is null, op list size %zu.", op_index, op_list_.size());
      return PARAM_INVALID;
    }

    SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId());
    if (reinterpret_cast<void *>(task->GetDumpArgs()) != nullptr) {
      bool call_dump = GetDumpProperties().IsLayerNeedDump(name_, om_name_, op->GetName()) && task->CallSaveDumpInfo();
      if (call_dump) {
        SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs());
@@ -2873,7 +2941,7 @@ void DavinciModel::DisableZeroCopy(const void *addr) {
 void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<void *> &outside_addrs, const void *info,
                                   void *args, size_t size, size_t offset) {
  // Internal call has ensured that op_desc is not nullptr
  GELOGI("[ZCPY] SetZeroCopyAddr for %s.", op_desc->GetName().c_str());
  GELOGD("[ZCPY] SetZeroCopyAddr for %s.", op_desc->GetName().c_str());
  size_t nums = outside_addrs.size();
  ZeroCopyTask zero_copy_task(op_desc->GetName(), static_cast<uint8_t *>(args), size);
  for (size_t i = 0; i < nums; ++i) {
@@ -2994,7 +3062,7 @@ Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &outp
  }

  for (ZeroCopyTask &task : zero_copy_tasks_) {
    GE_CHK_STATUS_RET(task.DistributeParam(is_async_mode_ ? rt_model_stream_ : nullptr), "[ZCPY] Update args failed.");
    GE_CHK_STATUS_RET(task.DistributeParam(is_async_mode_, rt_model_stream_), "[ZCPY] Update args failed.");
  }

  output_data.index = input_data.index;
@@ -3106,7 +3174,6 @@ const char *DavinciModel::GetRegisterStub(const string &binfile, const string &s
  } else {
    binfile_key = session_graph_id + "_" + binfile;
  }
  std::lock_guard<std::mutex> lock(tvm_bin_mutex_);
  auto it = tvm_bin_kernel_.find(binfile_key);
  if (it != tvm_bin_kernel_.end()) {
    return it->c_str();
@@ -3242,7 +3309,6 @@ void DavinciModel::StoreTbeHandle(const std::string &handle_key) {
  // Online mode FE may call rtFunctionRegister.
  TBEHandleStore &kernel_store = TBEHandleStore::GetInstance();

  // Need protection of tvm_bin_mutex_.
  auto it = used_tbe_handle_map_.find(handle_key);
  if (it != used_tbe_handle_map_.end()) {
    // GE registered, increase reference.
@@ -3262,9 +3328,9 @@ void DavinciModel::StoreTbeHandle(const std::string &handle_key) {
 void DavinciModel::CleanTbeHandle() {
  TBEHandleStore &kernel_store = TBEHandleStore::GetInstance();

  std::lock_guard<std::mutex> lock(tvm_bin_mutex_);
  kernel_store.EraseTBEHandle(used_tbe_handle_map_);
  used_tbe_handle_map_.clear();
  tvm_bin_kernel_.clear();
 }

 ///
@@ -3315,21 +3381,26 @@ Status DavinciModel::InitStreamSwitchN(const OpDescPtr &op_desc) {
    GELOGI("StreamSwitchNOp node:%s, active_stream_id=%u.", op_desc->GetName().c_str(), active_stream_list[j]);
  }

  (void)AttrUtils::GetInt(op_desc, ATTR_DYNAMIC_TYPE, dynamic_type_);

  batch_info_.clear();
  combined_batch_info_.clear();
  uint32_t batch_num = 0;
  if (!AttrUtils::GetInt(op_desc, ATTR_NAME_BATCH_NUM, batch_num)) {
    GELOGE(FAILED, "Failed to get attr ATTR_NAME_BATCH_NUM, StreamSwitchN: %s.", op_desc->GetName().c_str());
    return FAILED;
  }

  for (uint32_t i = 0; i < batch_num; i++) {
  return SetDynamicBatchInfo(op_desc, batch_num);
 }

 Status DavinciModel::SetDynamicBatchInfo(const OpDescPtr &op_desc, uint32_t batch_num) {
  batch_info_.clear();
  combined_batch_info_.clear();

  (void)AttrUtils::GetInt(op_desc, ATTR_DYNAMIC_TYPE, dynamic_type_);
  (void)AttrUtils::GetListStr(op_desc, ATTR_USER_DESIGNEATE_SHAPE_ORDER, user_designate_shape_order_);
  for (uint32_t i = 0; i < batch_num; ++i) {
    std::vector<int64_t> batch_shape;
    const std::string attr_name = ATTR_NAME_PRED_VALUE + "_" + std::to_string(i);
    if (!AttrUtils::GetListInt(op_desc, attr_name, batch_shape)) {
      GELOGE(FAILED, "Failed to get attr ATTR_NAME_PRED_VALUE, StreamSwitchN: %s.", op_desc->GetName().c_str());
      GELOGE(FAILED, "Get attr ATTR_NAME_PRED_VALUE failed, Node: %s", op_desc->GetName().c_str());
      batch_info_.clear();
      return FAILED;
    }
@@ -3344,6 +3415,16 @@ Status DavinciModel::InitStreamSwitchN(const OpDescPtr &op_desc) {
  return SUCCESS;
 }

 Status DavinciModel::InitCase(const OpDescPtr &op_desc) {
  uint32_t batch_num = 0;
  if (!AttrUtils::GetInt(op_desc, ATTR_NAME_BATCH_NUM, batch_num)) {
    GELOGI("Not multi-batch Node: %s", op_desc->GetName().c_str());
    return SUCCESS;
  }

  return SetDynamicBatchInfo(op_desc, batch_num);
 }

 bool DavinciModel::IsBroadCastOpData(const ge::NodePtr &var_node) {
  for (auto out_anchor : var_node->GetAllOutDataAnchors()) {
    GE_RT_FALSE_CHECK_NOTNULL(out_anchor);
@@ -3406,12 +3487,13 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
  GELOGI("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_);
  GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed.");

  if (!input_data.is_dynamic_batch) {
  is_dynamic_ = input_data.is_dynamic_batch;
  if (!is_dynamic_) {
    zero_copy_batch_label_addrs_.clear();
  }

  GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_PRE_PROC_START));
  Status ret = CopyModelData(input_data, output_data, input_data.is_dynamic_batch);
  Status ret = CopyModelData(input_data, output_data, is_dynamic_);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u",
                                 model_id_);

@@ -3587,6 +3669,7 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &compute_graph) {
  data_dumper_.SetOmName(om_name_);
  data_dumper_.SetComputeGraph(compute_graph);
  data_dumper_.SetRefInfo(saved_task_addrs_);
  data_dumper_.SetL1FusionAddr(l1_fusion_addr_);

  int32_t device_id = 0;
  rtError_t rt_ret = rtGetDevice(&device_id);
@@ -3627,19 +3710,9 @@ void DavinciModel::PushHcclStream(rtStream_t value) {
  all_hccl_stream_list_.push_back(value);
 }

 void DavinciModel::CreateHcclFollowStream(rtStream_t stream, int64_t remain_cap) {
 void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t stream) {
  std::lock_guard<std::mutex> lock(capacity_of_stream_mutex_);
  capacity_of_stream_.emplace_back(make_pair(stream, remain_cap));
 }

 void DavinciModel::ReuseHcclFollowStream(int64_t remain_cap, int64_t &index) {
  std::lock_guard<std::mutex> lock(capacity_of_stream_mutex_);
  if (remain_cap == 0) {
    capacity_of_stream_.erase(capacity_of_stream_.begin() + index);
  } else {
    capacity_of_stream_.at(index).second = remain_cap;
    index++;
  }
  main_follow_stream_mapping_[main_stream_id].emplace_back(stream);
 }

 Status DavinciModel::GetComputeGraphInfo(const ComputeGraphPtr &graph, vector<ComputeGraphDescInfo> &graph_desc_info) {
@@ -3756,8 +3829,7 @@ Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vector<Input
      (void)TensorUtils::GetSize(*(data_op->GetInputDescPtr(kDataIndex)), data_input_size);
      GELOGD(
        "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %u, tensor_size: %zu, format: "
        "%s, "
        "data_type: %s, shape: %s .",
        "%s, data_type: %s, shape: %s .",
        index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size,
        TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(),
        TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(),
--- a/src/ge/graph/load/new_model_manager/davinci_model.h
+++ b/src/ge/graph/load/new_model_manager/davinci_model.h
@@ -184,10 +184,10 @@ class DavinciModel {
  size_t TotalMemSize() const { return runtime_param_.mem_size; }

  // model name
  string Name() { return name_; }
  string Name() const { return name_; }

  // om_name
  string OmName() { return om_name_; }
  string OmName() const { return om_name_; }
  // version
  uint32_t Version() const { return version_; }

@@ -268,7 +268,7 @@ class DavinciModel {
  /// @brief For TVM Op, avoid Addr Reuse.
  /// @return void*
  ///
  static const char *GetRegisterStub(const string &tvm_binfile_key, const string &session_graph_model_id = "");
  const char *GetRegisterStub(const string &tvm_binfile_key, const string &session_graph_model_id = "");

  ///
  /// @ingroup ge
@@ -299,6 +299,8 @@ class DavinciModel {
  ///
  void GetCombinedDynamicDims(std::vector<std::vector<int64_t>> &batch_info) const;

  void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) const;

  void GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynamic_type);

  void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info);
@@ -440,6 +442,10 @@ class DavinciModel {

  Status SinkTimeProfile(const InputData &current_data);

  void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) {
    data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id);
  }

  void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args) {
    data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args);
  }
@@ -449,9 +455,8 @@ class DavinciModel {

  DavinciModel(const DavinciModel &model) = delete;

  const vector<std::pair<rtStream_t, int64_t>> &GetHcclFolowStream() { return capacity_of_stream_; }
  void CreateHcclFollowStream(rtStream_t stream, int64_t remain_cap);
  void ReuseHcclFollowStream(int64_t remain_cap, int64_t &index);
  const map<int64_t, std::vector<rtStream_t>> &GetHcclFolowStream() { return main_follow_stream_mapping_; }
  void SaveHcclFollowStream(int64_t main_stream_id, rtStream_t stream);

  void InitRuntimeParams();
  Status InitVariableMem();
@@ -500,6 +505,16 @@ class DavinciModel {
  void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); }
  const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); }

  void SetMemcpyOffsetAndAddr(map<int64_t, void *> &memcpy_4g_offset_addr) {
    memcpy_4g_offset_addr_.insert(memcpy_4g_offset_addr.begin(), memcpy_4g_offset_addr.end());
  }
  const map<int64_t, void *> &GetMemcpyOffsetAndAddr() const { return memcpy_4g_offset_addr_; }

  bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const {
    return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info);
  }
  Status InitInputOutputForDynamic(const ComputeGraphPtr &compute_graph);

 private:
  // memory address of weights
  uint8_t *weights_mem_base_;
@@ -575,6 +590,8 @@ class DavinciModel {

  void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input);

  void SetInputDimsInfo(const vector<int64_t> &model_input_dims, Format &format, InputOutputDescInfo &input);

  Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats);

  Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo);
@@ -619,7 +636,15 @@ class DavinciModel {
  /// @param [in/out] data_op_index: NetOutput addr size info.
  /// @return Status
  ///
  Status InitDataOp(const NodePtr &node, uint32_t &data_op_index);
  Status InitDataOp(const NodePtr &node, uint32_t &data_op_index, map<uint32_t, OpDescPtr> &data_by_index);

  ///
  /// @ingroup ge
  /// @brief Sort Data op list by index.
  /// @param [in] data_by_index: map of Data Op.
  /// @return
  ///
  void AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_index);

  ///
  /// @ingroup ge
@@ -666,6 +691,15 @@ class DavinciModel {

  Status InitStreamSwitchN(const OpDescPtr &op_desc);

  ///
  /// @ingroup ge
  /// @brief Case Op Init.
  /// @return Status
  ///
  Status InitCase(const OpDescPtr &op_desc);

  Status SetDynamicBatchInfo(const OpDescPtr &op_desc, uint32_t batch_num);

  ///
  /// @ingroup ge
  /// @brief TVM Op Init.
@@ -840,7 +874,7 @@ class DavinciModel {

  // for reuse hccl_follow_stream
  std::mutex capacity_of_stream_mutex_;
  std::vector<std::pair<rtStream_t, int64_t>> capacity_of_stream_;
  std::map<int64_t, std::vector<rtStream_t>> main_follow_stream_mapping_;

  vector<rtEvent_t> event_list_;

@@ -866,6 +900,7 @@ class DavinciModel {

  bool is_async_mode_;  // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_.

  bool is_stream_list_bind_{false};
  bool is_pure_head_stream_{false};
  rtStream_t rt_head_stream_{nullptr};
  rtStream_t rt_entry_stream_{nullptr};
@@ -891,8 +926,8 @@ class DavinciModel {
  std::set<uint32_t> hcom_streams_;
  RuntimeParam runtime_param_;

  static std::mutex tvm_bin_mutex_;  // lock for tvm maps.
  static std::set<std::string> tvm_bin_kernel_;
  static std::mutex tvm_bin_mutex_;
  std::set<std::string> tvm_bin_kernel_;

  std::map<std::string, uint32_t> used_tbe_handle_map_;

@@ -906,6 +941,7 @@ class DavinciModel {
  uint64_t iterator_count_;
  bool is_l1_fusion_enable_;
  std::map<OpDescPtr, void *> saved_task_addrs_;
  void *l1_fusion_addr_ = nullptr;

  bool known_node_ = false;
  uint32_t total_args_size_ = 0;
@@ -921,7 +957,9 @@ class DavinciModel {

  vector<vector<int64_t>> batch_info_;
  std::vector<std::vector<int64_t>> combined_batch_info_;
  vector<string> user_designate_shape_order_;
  int32_t dynamic_type_ = 0;
  bool is_dynamic_ = false;

  vector<uint64_t> batch_size_;
  // key: input tensor name, generally rts op;
@@ -938,6 +976,8 @@ class DavinciModel {
  void *op_debug_addr_ = nullptr;
  void *p2p_debug_addr_ = nullptr;
  bool is_new_model_desc_{false};

  std::map<int64_t, void *> memcpy_4g_offset_addr_;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_
--- a/src/ge/graph/load/new_model_manager/model_manager.cc
+++ b/src/ge/graph/load/new_model_manager/model_manager.cc
@@ -20,6 +20,7 @@

 #include "common/l2_cache_optimize.h"
 #include "common/profiling/profiling_manager.h"
 #include "common/dump/dump_manager.h"
 #include "common/properties_manager.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/util.h"
@@ -172,7 +173,7 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
    return GE_EXEC_MODEL_ID_INVALID;
  }
  uint64_t session_id = it->second->GetSessionId();
  GELOGI("Destroy aicpu session for infer, session id is %u.", session_id);
  GELOGI("Destroy aicpu session for infer, session id is %lu.", session_id);
  DestroyAicpuSession(session_id);
  return SUCCESS;
 }
@@ -259,7 +260,7 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
  bool is_shape_unknown = false;
  GE_CHK_STATUS_RET(ge_root_model->CheckIsUnknownShape(is_shape_unknown), "CheckIsUnknownShape failed, model id:%u",
                    model_id);
  if (is_shape_unknown) {
  if (is_shape_unknown || GetContext().GetHostExecFlag()) {
    return DoLoadHybridModelOnline(model_id, ge_root_model, listener);
  }

@@ -729,6 +730,22 @@ Status ModelManager::GetCombinedDynamicDims(const uint32_t model_id, vector<vect
  return SUCCESS;
 }

 ///
 /// @ingroup ge
 /// @brief Get user designate shape order
 /// @param [in] model_id
 /// @param [out] user_input_shape_order
 /// @return execute result
 ///
 Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id,
                                                std::vector<std::string> &user_input_shape_order) {
  auto davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID,
                         "GetUserDesignateShapeOrder Failed, Invalid Model ID %u!", model_id)
  davinci_model->GetUserDesignateShapeOrder(user_input_shape_order);
  return SUCCESS;
 }

 Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) {
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHECK_NOTNULL(davinci_model);
@@ -831,7 +848,11 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
    }
    davinci_model->SetDeviceId(device_id);
    davinci_model->SetOmName(model.om_name);
    davinci_model->SetDumpProperties(dump_properties_);
    if (DumpManager::GetInstance().IsDumpOpen()) {
      davinci_model->SetDumpProperties(DumpManager::GetInstance().GetDumpProperties());
    } else {
      davinci_model->SetDumpProperties(dump_properties_);
    }

    /// In multi-threaded inference,  using the same session_id among multiple threads may cause some threads to fail.
    /// These session_ids come from the same model, so the values of session_id are the same.
@@ -1070,4 +1091,19 @@ ge::Status ModelManager::SyncExecuteModel(uint32_t model_id, const vector<GeTens

  return model->Execute(inputs, outputs);
 }

 Status ModelManager::GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) {
  for (const auto &model : model_map_) {
    auto davinci_model = model.second;
    if (davinci_model->GetDeviceId() == device_id) {
      GELOGI("Start to GetOpDescInfo of device_id: %u.", device_id);
      if (davinci_model->GetOpDescInfo(stream_id, task_id, op_desc_info)) {
        GELOGI("Find specific node of stream_id: %u, task_id: %u.", stream_id, task_id);
        return SUCCESS;
      }
    }
  }
  return FAILED;
 }

 }  // namespace ge
--- a/src/ge/graph/load/new_model_manager/model_manager.h
+++ b/src/ge/graph/load/new_model_manager/model_manager.h
@@ -201,6 +201,15 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
  ///
  ge::Status GetCombinedDynamicDims(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);

  ///
  /// @ingroup ge
  /// @brief Get user designate shape order
  /// @param [in] model_id
  /// @param [out] user_input_shape_order
  /// @return execute result
  ///
  Status GetUserDesignateShapeOrder(const uint32_t model_id, std::vector<std::string> &user_input_shape_order);

  ///
  /// @ingroup ge
  /// @brief Get AIPP info
@@ -263,6 +272,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
                                       std::vector<InputOutputDims> &output_dims);

  bool IsDynamicShape(uint32_t model_id);
  ge::Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info);

 private:
  ///
--- a/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc
+++ b/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc
@@ -28,7 +28,6 @@ namespace {
 const uint32_t kMaxTaskOfStream = 200;
 }

 uint32_t HcclTaskInfo::max_node_of_hccl_stream_ = 0;
 std::mutex HcclTaskInfo::hccl_follow_stream_mutex_;

 HcclTaskInfo::~HcclTaskInfo() {
@@ -41,7 +40,6 @@ HcclTaskInfo::~HcclTaskInfo() {
  }
  davinci_model_ = nullptr;
  ops_kernel_store_ = nullptr;
  max_node_of_hccl_stream_ = 0;
  args_ = nullptr;
 }
 Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
@@ -133,45 +131,39 @@ Status HcclTaskInfo::SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciM
  }

  std::lock_guard<std::mutex> lock(hccl_follow_stream_mutex_);
  if (max_node_of_hccl_stream_ == 0) {
    uint32_t max_stream_count;
    uint32_t max_task_count;
    ret = rtGetMaxStreamAndTask(RT_NORMAL_STREAM, &max_stream_count, &max_task_count);
    if (ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Get max stream and task count by rts failed.");
      return RT_ERROR_TO_GE_STATUS(ret);
    }
    max_node_of_hccl_stream_ = max_task_count / kMaxTaskOfStream;
  }
  int64_t main_stream_id = op_desc->GetStreamId();
  const std::map<int64_t, std::vector<rtStream_t>> &main_follow_stream_mapping = davinci_model->GetHcclFolowStream();

  if (static_cast<size_t>(hccl_stream_num) <= davinci_model->GetHcclFolowStream().size()) {
    GELOGI("capacity of follow stream is enough to be reused.");
    ReuseStream(hccl_stream_num, davinci_model);
  if (main_follow_stream_mapping.find(main_stream_id) != main_follow_stream_mapping.end()) {
    const std::vector<rtStream_t> &follow_stream_usage = main_follow_stream_mapping.at(main_stream_id);
    if (static_cast<size_t>(hccl_stream_num) <= follow_stream_usage.size()) {
      GELOGI("capacity of follow stream is enough to be reused.");
      for (int64_t i = 0; i < hccl_stream_num; i++) {
        hccl_stream_list_.emplace_back(follow_stream_usage.at(i));
      }
    } else {
      GELOGI("need to reuse follow stream and create new follow stream.");
      size_t created_stream_num = follow_stream_usage.size();
      hccl_stream_list_ = follow_stream_usage;
      ret = CreateStream(hccl_stream_num - created_stream_num, davinci_model, main_stream_id);
      if (ret != SUCCESS) {
        GELOGE(RT_FAILED, "Create hccl stream failed.");
        return RT_ERROR_TO_GE_STATUS(ret);
      }
    }
    GELOGI("Initialize hccl slave stream success, hcclStreamNum =%ld", hccl_stream_num);
  } else {
    GELOGI("need to reuse follow stream and create new follow stream.");
    size_t created_stream_num = davinci_model->GetHcclFolowStream().size();
    ReuseStream(created_stream_num, davinci_model);
    ret = CreateStream(hccl_stream_num - created_stream_num, davinci_model);
    GELOGI("need to create follow stream for %s with new mainstream %ld.", op_desc->GetName().c_str(), main_stream_id);
    ret = CreateStream(hccl_stream_num, davinci_model, main_stream_id);
    if (ret != SUCCESS) {
      GELOGE(RT_FAILED, "Create hccl stream failed.");
      return RT_ERROR_TO_GE_STATUS(ret);
    }
  }
  GELOGI("Initialize hccl slave stream success, hcclStreamNum =%ld", hccl_stream_num);
  return SUCCESS;
 }

 void HcclTaskInfo::ReuseStream(int64_t stream_num, DavinciModel *davinci_model) {
  GELOGI("Start to reuse %ld follow stream.", stream_num);
  int64_t index = 0;
  for (int64_t i = 0; i < stream_num; i++) {
    hccl_stream_list_.emplace_back(davinci_model->GetHcclFolowStream().at(index).first);
    int64_t remain_cap = davinci_model->GetHcclFolowStream().at(index).second - 1;
    davinci_model->ReuseHcclFollowStream(remain_cap, index);
  }
 }

 Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_model) {
 Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_model, int64_t main_stream_id) {
  GELOGI("Start to create %ld hccl stream.", stream_num);
  for (int64_t i = 0; i < stream_num; ++i) {
    rtStream_t stream = nullptr;
@@ -189,8 +181,7 @@ Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_mode
      return RT_ERROR_TO_GE_STATUS(rt_ret);
    }
    GELOGD("hccl_stream addr is=%p", stream);
    int64_t remain_cap = max_node_of_hccl_stream_ - 1;
    davinci_model->CreateHcclFollowStream(stream, remain_cap);
    davinci_model->SaveHcclFollowStream(main_stream_id, stream);

    hccl_stream_list_.emplace_back(stream);
    davinci_model->PushHcclStream(stream);
--- a/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.h
+++ b/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.h
@@ -60,9 +60,7 @@ class HcclTaskInfo : public TaskInfo {

  void GetPrivateDefByTaskDef(const domi::TaskDef &task);

  void ReuseStream(int64_t stream_num, DavinciModel *davinci_model);

  ge::Status CreateStream(int64_t stream_num, DavinciModel *davinci_model);
  ge::Status CreateStream(int64_t stream_num, DavinciModel *davinci_model, int64_t main_stream_id);

  Status SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciModel *davinci_model);

@@ -77,7 +75,6 @@ class HcclTaskInfo : public TaskInfo {
  void *private_def_;
  uint32_t private_def_len_;
  static std::mutex hccl_follow_stream_mutex_;
  static uint32_t max_node_of_hccl_stream_;
  vector<GETaskKernelHcclInfo> kernel_hccl_infos_;
  vector<void *> input_data_addrs_;
  vector<void *> output_data_addrs_;
--- a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
+++ b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
@@ -25,7 +25,6 @@
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/l2_cache_optimize.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "graph/load/new_model_manager/model_utils.h"
 #include "runtime/kernel.h"
@@ -92,7 +91,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
  string session_graph_model_id;
  davinci_model_->GetUniqueId(op_desc_, session_graph_model_id);
  // get bin_file_key
  const char *bin_file_key = DavinciModel::GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
  const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
  // new aicpu kernel(rtCpuKernelLaunch) no need to check function
  if (kernel_type_ == cce::ccKernelType::CCE_AI_CORE) {
    rtError_t rt_ret;
@@ -395,7 +394,14 @@ Status KernelTaskInfo::Distribute() {
      "stubfunc:%p blockdim:%u stream:%p",
      call_skt, task_id_, skt_id_, skt_info_.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_);
    // l1 fusion enable and env flag open (kCloseSkt for skt debug)
    if (call_skt && (env_flag != kCloseSkt)) {
    bool open_dump = false;
    auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel();
    if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() ||
        all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() ||
        all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) {
      open_dump = true;
    }
    if (call_skt && (env_flag != kCloseSkt) && !open_dump) {
      GE_RETURN_IF_ERROR(SuperKernelDistribute());
    } else {
      // call rtKernelLaunch for current task
@@ -577,7 +583,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
  // When inferencing, stub_func_ is different from dynamic-registration to runtime, and needs to be modified.
  string session_graph_model_id;
  davinci_model_->GetUniqueId(op_desc, session_graph_model_id);
  const char *bin_file_key = DavinciModel::GetRegisterStub(op_desc->GetName(), session_graph_model_id);
  const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc->GetName(), session_graph_model_id);
  rtError_t rt_ret = rtQueryFunctionRegistered(const_cast<char *>(bin_file_key));
  if (rt_ret != RT_ERROR_NONE) {
    stub_func_ = const_cast<char *>(bin_file_key);
@@ -634,7 +640,11 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
  skt_dump_args_ = static_cast<char *>(args_) + offset;
  if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                          op_desc->GetName())) {
    dump_flag_ = RT_KERNEL_DUMPFLAG;
    if (IsL1FusionOp(op_desc)) {
      dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG;
    } else {
      dump_flag_ = RT_KERNEL_DUMPFLAG;
    }
    dump_args_ = static_cast<char *>(args_) + offset;
  }

@@ -653,6 +663,25 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
  return SUCCESS;
 }

 bool KernelTaskInfo::IsL1FusionOp(const OpDescPtr &op_desc) {
  std::vector<int64_t> input_memory_type;
  (void)ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, input_memory_type);
  for (size_t i = 0; i < input_memory_type.size(); ++i) {
    if (input_memory_type.at(i) == RT_MEMORY_L1) {
      return true;
    }
  }

  std::vector<int64_t> output_memory_type;
  (void)ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, output_memory_type);
  for (size_t i = 0; i < output_memory_type.size(); ++i) {
    if (output_memory_type.at(i) == RT_MEMORY_L1) {
      return true;
    }
  }
  return false;
 }

 Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::KernelDef &kernel_def) {
  GELOGI("Do InitAICPUCustomTask");
  OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index);
@@ -904,7 +933,11 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k

  if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
                                                          op_desc->GetName())) {
    dump_flag_ = RT_KERNEL_DUMPFLAG;
    if (IsL1FusionOp(op_desc)) {
      dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG;
    } else {
      dump_flag_ = RT_KERNEL_DUMPFLAG;
    }
    dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead);
  }

--- a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
+++ b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
@@ -127,6 +127,7 @@ class KernelTaskInfo : public TaskInfo {
  static void FreeRtMem(void **ptr);

  Status SuperKernelDistribute();
  bool IsL1FusionOp(const OpDescPtr &op_desc);

  // For super kernel
  Status SaveSKTDumpInfo();
--- a/src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc
+++ b/src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc
@@ -59,7 +59,12 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel

  // malloc args memory
  size_t args_size = sizeof(void *) * io_addrs.size();
  rtError_t rt_ret = rtMalloc(&args_, args_size + kAlignBytes, RT_MEMORY_HBM);
  rtMemType_t memory_type = RT_MEMORY_HBM;
  if (op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE)) {
    memory_type = RT_MEMORY_TS_4G;
  }
  GELOGI("memory_type: %u", memory_type);
  rtError_t rt_ret = rtMalloc(&args_, args_size + kAlignBytes, memory_type);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
--- a/src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc
+++ b/src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc
@@ -36,6 +36,12 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
  count_ = memcpy_async.count();
  kind_ = memcpy_async.kind();
  dst_max_ = memcpy_async.dst_max();
  OpDescPtr op_desc = davinci_model->GetOpByIndex(memcpy_async.op_index());
  if (op_desc == nullptr) {
    GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async.op_index());
    return INTERNAL_ERROR;
  }

  if (davinci_model->IsKnownNode()) {
    src_ = reinterpret_cast<uint8_t *>(davinci_model_->GetCurrentArgsAddr(args_offset_));
    dst_ = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(src_) + sizeof(void *));
@@ -49,9 +55,17 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
    return ret;
  }

  ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.dst(), dst_);
  if (ret != SUCCESS) {
    return ret;
  // dst_ needs different address for different chips
  if (op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE)) {
    ret = AllocTsMemoryForMemcpy(op_desc, davinci_model);
    if (ret != SUCCESS) {
      return ret;
    }
  } else {
    ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.dst(), dst_);
    if (ret != SUCCESS) {
      return ret;
    }
  }

  GELOGI("MemcpyAsyncTaskInfo Init Success, logic[0x%lx, 0x%lx], src:%p, dst:%p, max:%lu, count:%lu",
@@ -108,5 +122,33 @@ Status MemcpyAsyncTaskInfo::UpdateArgs() {
  return SUCCESS;
 }

 Status MemcpyAsyncTaskInfo::AllocTsMemoryForMemcpy(const OpDescPtr &op_desc, DavinciModel *davinci_model) {
  int64_t size = 0;
  auto tensor_desc = op_desc->GetOutputDescPtr(0);
  if ((tensor_desc == nullptr) || (TensorUtils::GetTensorSizeInBytes(*tensor_desc, size) != GRAPH_SUCCESS)) {
    GELOGE(FAILED, "GetTensorSizeInBytes failed!");
    return FAILED;
  }

  rtError_t rt_ret = rtMalloc(&memory_4g_, size, RT_MEMORY_TS_4G);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "rtMalloc failed, ret: 0x%X", rt_ret);
    return FAILED;
  }

  // map save the opdesc's offset and special address, for update the streamSwitchN's input address
  std::map<int64_t, void *> memcpy_4g_offset_addr;
  vector<int64_t> offsets = op_desc->GetOutputOffset();
  if (offsets.empty()) {
    GELOGE(FAILED, "GetOutputOffset failed!");
    return FAILED;
  }
  memcpy_4g_offset_addr.insert(std::pair<int64_t, void *>(offsets[0], memory_4g_));
  davinci_model->SetMemcpyOffsetAndAddr(memcpy_4g_offset_addr);

  dst_ = reinterpret_cast<uint8_t *>(memory_4g_);
  return SUCCESS;
 }

 REGISTER_TASK_INFO(RT_MODEL_TASK_MEMCPY_ASYNC, MemcpyAsyncTaskInfo);
 }  // namespace ge
--- a/src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h
+++ b/src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h
@@ -18,15 +18,24 @@
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_

 #include "graph/load/new_model_manager/task_info/task_info.h"
 #include "graph/op_desc.h"

 namespace ge {
 class MemcpyAsyncTaskInfo : public TaskInfo {
 public:
  MemcpyAsyncTaskInfo() : dst_(nullptr), dst_max_(0), src_(nullptr), count_(0), kind_(0) {}
  MemcpyAsyncTaskInfo() : dst_(nullptr), dst_max_(0), src_(nullptr), count_(0), kind_(0), memory_4g_(nullptr) {}

  ~MemcpyAsyncTaskInfo() override {
    src_ = nullptr;
    dst_ = nullptr;

    if (memory_4g_ != nullptr) {
      rtError_t ret = rtFree(memory_4g_);
      if (ret != RT_ERROR_NONE) {
        GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret);
      }
      memory_4g_ = nullptr;
    }
  }

  Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override;
@@ -38,6 +47,7 @@ class MemcpyAsyncTaskInfo : public TaskInfo {
  Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override;

 private:
  Status AllocTsMemoryForMemcpy(const OpDescPtr &op_desc, DavinciModel *davinci_model);
  uint8_t *dst_;
  uint64_t dst_max_;
  uint8_t *src_;
@@ -46,6 +56,7 @@ class MemcpyAsyncTaskInfo : public TaskInfo {
  DavinciModel *davinci_model_ = nullptr;
  uint32_t args_offset_ = 0;
  domi::MemcpyAsyncDef memcpy_async;
  void *memory_4g_;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_
--- a/src/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc
+++ b/src/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc
@@ -66,16 +66,13 @@ Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *
    GELOGE(FAILED, "Get true stream ptr of switchN op failed.");
    return FAILED;
  }
  if (davinci_model->IsKnownNode()) {
    input_ptr_ = davinci_model->GetCurrentFixedAddr(args_offset_);
  } else {
    auto input_data_addr = ModelUtils::GetInputDataAddrs(davinci_model->GetRuntimeParam(), op_desc);
    if (input_data_addr.empty()) {
      GELOGE(FAILED, "Input data addr is nullptr.");
      return FAILED;
    }
    input_ptr_ = input_data_addr[0];

  // update StreamSwitchN's input_ptr_
  Status ret = InputPtrUpdate(op_desc, davinci_model);
  if (ret != SUCCESS) {
    return ret;
  }

  davinci_model->DisableZeroCopy(input_ptr_);
  GELOGI("StreamSwitchNTaskInfo Init Success, inputSize:%u, elementSize:%d, trueStreamID:%ld.", input_size_,
         element_size_, op_desc->GetStreamId());
@@ -154,5 +151,36 @@ Status StreamSwitchNTaskInfo::CalculateArgs(const domi::TaskDef &task_def, Davin
  GELOGI("Calculate stream switchn task args , tensor_size %ld, args_offset %ld", tensor_size, args_offset_);
  return SUCCESS;
 }

 Status StreamSwitchNTaskInfo::InputPtrUpdate(const OpDescPtr &op_desc, DavinciModel *davinci_model) {
  bool is_4g_mem = false;
  const map<int64_t, void *> memcpy_4g_offset_addr = davinci_model->GetMemcpyOffsetAndAddr();
  vector<int64_t> input_offset = op_desc->GetInputOffset();
  if (input_offset.empty()) {
    GELOGE(FAILED, "Get StreamSwitchN's input offset failed.");
    return FAILED;
  }

  auto iter = memcpy_4g_offset_addr.find(input_offset[0]);
  if (iter != memcpy_4g_offset_addr.end()) {
    input_ptr_ = iter->second;
    is_4g_mem = true;
  }

  if (is_4g_mem == false) {
    if (davinci_model->IsKnownNode()) {
      input_ptr_ = davinci_model->GetCurrentFixedAddr(args_offset_);
    } else {
      auto input_data_addr = ModelUtils::GetInputDataAddrs(davinci_model->GetRuntimeParam(), op_desc);
      if (input_data_addr.empty()) {
        return FAILED;
      }
      input_ptr_ = input_data_addr[0];
    }
  }

  GELOGI("StreamSwitchN's input_ptr is %p, is_4g_mem: %d", input_ptr_, is_4g_mem);
  return SUCCESS;
 }
 REGISTER_TASK_INFO(RT_MODEL_TASK_STREAM_SWITCH_N, StreamSwitchNTaskInfo);
 }  // namespace ge
 }  // namespace ge
--- a/src/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h
+++ b/src/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h
@@ -42,6 +42,7 @@ class StreamSwitchNTaskInfo : public TaskInfo {

 private:
  Status GetTrueStreamPtr(const OpDescPtr &op_desc, DavinciModel *davinci_model);
  Status InputPtrUpdate(const OpDescPtr &op_desc, DavinciModel *davinci_model);
  void *input_ptr_;
  uint32_t input_size_;
  void *value_ptr_;
--- a/src/ge/graph/load/new_model_manager/zero_copy_task.cc
+++ b/src/ge/graph/load/new_model_manager/zero_copy_task.cc
@@ -143,10 +143,11 @@ Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const ma
 /**
 * @ingroup ge
 * @brief Update task param to device.
 * @param [in] async_mode: true for asychronous mode.
 * @param [in] stream: Stream for asychronous update.
 * @return: 0 SUCCESS / others FAILED
 */
 Status ZeroCopyTask::DistributeParam(rtStream_t stream) {
 Status ZeroCopyTask::DistributeParam(bool async_mode, rtStream_t stream) {
  if (!is_updated_) {
    return SUCCESS;
  }
@@ -154,7 +155,7 @@ Status ZeroCopyTask::DistributeParam(rtStream_t stream) {
  is_updated_ = false;
  GE_CHECK_NOTNULL(args_addr_);
  rtError_t rt_err = RT_ERROR_NONE;
  if (stream != nullptr) {
  if (async_mode) {
    rt_err =
      rtMemcpyAsync(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE_EX, stream);
  } else {
--- a/src/ge/graph/load/new_model_manager/zero_copy_task.h
+++ b/src/ge/graph/load/new_model_manager/zero_copy_task.h
@@ -77,10 +77,11 @@ class ZeroCopyTask {
  /**
   * @ingroup ge
   * @brief Update task param to device.
   * @param [in] async_mode: true for asychronous mode.
   * @param [in] stream: Stream for asychronous update.
   * @return: 0 SUCCESS / others FAILED
   */
  ge::Status DistributeParam(rtStream_t stream);
  ge::Status DistributeParam(bool async_mode, rtStream_t stream);

 protected:
  bool CheckDynamicBatch(const map<string, set<uintptr_t>> &batch_addrs, const string &batch_label, uintptr_t addr);
@@ -97,4 +98,4 @@ class ZeroCopyTask {
  map<uintptr_t, vector<size_t>> task_addr_offset_;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_TASK_H_
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_TASK_H_
--- a/src/ge/graph/manager/graph_manager.cc
+++ b/src/ge/graph/manager/graph_manager.cc
@@ -91,7 +91,13 @@
 #include "graph/passes/variable_ref_delete_op_pass.h"
 #include "graph/passes/variable_ref_useless_control_out_delete_pass.h"
 #include "graph/passes/end_of_sequence_add_control_pass.h"
 #include "graph/passes/subexpression_migration_pass.h"
 #include "graph/passes/unused_args_clean_pass.h"
 #include "graph/passes/global_step_insert_pass.h"
 #include "graph/utils/tensor_adapter.h"
 #include "graph/utils/type_utils.h"
 #include "graph/graph_util.h"
 #include "graph/types.h"
 #include "inc/pass_manager.h"
 #include "init/gelib.h"

@@ -102,6 +108,8 @@ const char *const kNetOutput = "NetOutput";
 const char *const kVariable = "Variable";
 const char *const kSend = "Send";
 const char *const kRecv = "Recv";
 const char *const kCheckPointForGetVar = "CheckPointGraphForGetVar";
 const char *const kCheckPointGraph = "checkpoint_graph";

 bool IsTailingOptimization() {
  string is_tailing_optimization_option;
@@ -380,6 +388,11 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge

  GM_RUN_AND_DUMP_PERF("PrepareRunningFormatRefiner", graph_preparer_.PrepareRunningFormatRefiner);
  GM_RUN_AND_DUMP_PERF("RefineRunningFormat", graph_optimize_.OptimizeOriginalGraphJudgeInsert, compute_graph);
  if (std::getenv("AnalyzeMode")) {
    GELOGI("Do return failed after refine_running_format when in analyze mode!");
    return FAILED;
  }
  GM_RUN_AND_DUMP_PERF("SubexpressionMigration", SubexpressionMigration, compute_graph);
  GE_RUN(GraphManager, graph_preparer_.RecordAIPPInfo, compute_graph);
  if (IsTailingOptimization()) {
    GM_RUN_AND_DUMP_PERF("OptimizeSwitchOp", graph_preparer_.SwitchOpOptimize, compute_graph);
@@ -392,9 +405,11 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
    GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass))
    GE_CHK_STATUS_RET(graph_pass.Run(compute_graph));
  }

  GE_CHK_STATUS_RET(graph_optimize_.IdentifyReference(compute_graph), "Identify reference failed.");
  GM_RUN_AND_DUMP_PERF("OptimizeSubgraph", OptimizeSubgraph, graph_node, compute_graph, session_id);
  GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph);
  GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts", graph_optimize_.OptimizeGraphBeforeBuildForRts, compute_graph);
  GM_RUN_AND_DUMP_PERF("Build", Build, graph_node, compute_graph, ge_root_model, session_id);

  // when set incre build, save om model and var manager
@@ -403,12 +418,25 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
  if (save_ret != SUCCESS) {
    GELOGW("Fail to save cache.");
  }
  // release rts generate context
  RtContextUtil::GetInstance().DestroyRtContexts(session_id);
  GEEVENT("[GEPERFTRACE] GE PreRun End");
  return SUCCESS;
 }
 #undef RUN_AND_DUMP

 Status GraphManager::SubexpressionMigration(ComputeGraphPtr &compute_graph) {
  PassManager pass_manager;
  GE_CHK_STATUS_RET(pass_manager.AddPass("SubexpressionMigrationPass", new (std::nothrow) SubexpressionMigrationPass));
  GE_CHK_STATUS_RET(pass_manager.AddPass("UnusedArgsCleanPass", new (std::nothrow) UnusedArgsCleanPass));

  GE_TIMESTAMP_START(SubexpressionMigrationPass);
  auto ret = pass_manager.Run(compute_graph);
  GE_TIMESTAMP_END(SubexpressionMigrationPass, "GraphManager::OptimizeStage1_1");
  if (ret != SUCCESS && ret != NOT_CHANGED) {
    GELOGE(ret, "Run SubexpressionMigrationPass failed, ret:%u.", ret);
    return ret;
  }

  return SUCCESS;
 }

 Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs,
                                      GeRootModelPtr &ge_root_model, uint64_t session_id) {
@@ -427,6 +455,8 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std:
    ret = IncreBuild(graph_node, ge_model);
    if (ret != SUCCESS) {
      ret = PreRun(graph_node, inputs, ge_root_model, session_id);
      // release rts generate context
      RtContextUtil::GetInstance().DestroyRtContexts(session_id);
      if (ret != SUCCESS) {
        GELOGE(ret, "PreRun Failed.");
        return ret;
@@ -1388,6 +1418,9 @@ bool GraphManager::CheckNetOutputForCheckpointGraph(NodePtr &node) {
 }

 bool GraphManager::CheckVariableForCheckpointGraph(NodePtr &node) {
  if (node->GetOpDesc()->HasAttr(kCheckPointForGetVar)) {
    return false;
  }
  auto out = node->GetOutDataAnchor(0);
  if (out == nullptr) {
    GELOGE(GE_GRAPH_PARAM_NULLPTR, "out is nullptr.");
@@ -1573,48 +1606,6 @@ Status GraphManager::RemoveIsolatedConst(ge::ComputeGraphPtr &compute_graph) {
  return SUCCESS;
 }

 Status GraphManager::NewOptimizeAfterMergeSubGraph(ge::ComputeGraphPtr &compute_graph) {
  GELOGD("NewOptimizeAfterMergeSubGraph in");

  GEPass ge_passes(compute_graph);
  NamesToPass names_to_passes;
  ConstantFoldingPass constant_folding_pass;
  names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass);
  GE_TIMESTAMP_START(names_to_passes);
  auto ret = ge_passes.Run(names_to_passes);
  GE_TIMESTAMP_END(names_to_passes, "GraphManager::ge_passes");
  if (ret != SUCCESS) {
    GELOGE(ret, "Run ge_passes optimize for OptimizeAfterMergeSubGraph failed, ret:%d.", ret);
    return ret;
  }

  ret = RemoveIsolatedConst(compute_graph);
  if (ret != SUCCESS) {
    GELOGE(ret, "Remove isolated Constant failed, ret:%d.", ret);
    return ret;
  }

  PassManager passes;
  GE_CHK_STATUS_RET(passes.AddPass("MultiBatchPass", new (std::nothrow) MultiBatchPass));
  GE_CHK_STATUS_RET(passes.AddPass("CompileNodesPass", new (std::nothrow) CompileNodesPass));
  GE_CHK_STATUS_RET(passes.AddPass("AtomicAddrCleanPass", new (std::nothrow) AtomicAddrCleanPass));

  GE_TIMESTAMP_START(passes);
  ret = passes.Run(compute_graph);
  GE_TIMESTAMP_END(passes, "GraphManager::passes");
  if (ret != SUCCESS && ret != NOT_CHANGED) {
    GELOGE(ret, "Run passes optimize for OptimizeAfterMergeSubGraph failed");
    return ret;
  }

  ret = compute_graph->TopologicalSorting();
  if (ret != SUCCESS) {
    GELOGE(ret, "Graph topological sort failed, ret:%d.", ret);
    return ret;
  }
  return SUCCESS;
 }

 Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
  string options = "default";
  if (GetContext().GetOption("ge.exec.variable_acc", options) != SUCCESS) {
@@ -1721,10 +1712,17 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
    graph_pass.AddPass("OptimizeStage1_3::SwitchToStreamSwitchPass", new (std::nothrow) SwitchToStreamSwitchPass))
  GE_CHK_STATUS_RET(
    graph_pass.AddPass("OptimizeStage1_3::AttachStreamLabelPass", new (std::nothrow) AttachStreamLabelPass))
  GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::MultiBatchPass", new (std::nothrow) MultiBatchPass(true)))
  GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::IteratorOpPass", new (std::nothrow) IteratorOpPass))
  GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::VariableRefUselessControlOutDeletePass",
                                       new (std::nothrow) VariableRefUselessControlOutDeletePass))
  GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ReshapeRecoveryPass", new (std::nothrow) ReshapeRecoveryPass))
  if (options_.train_graph_flag) {
    // Priority: The GlobalStepInsertPass should work before graph partitioner.
    // Reason: Make sure that the var "global_step" can be partitioned to known sub graph and allocated memory
    GE_CHK_STATUS_RET(
      graph_pass.AddPass("OptimizeStage1_3::GlobalStepInsertPass", new (std::nothrow) GlobalStepInsertPass))
  }
  GE_TIMESTAMP_START(graph_pass);
  ret = graph_pass.Run(compute_graph);
  GE_TIMESTAMP_END(graph_pass, "GraphManager::OptimizeStage1_3");
@@ -1787,11 +1785,8 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {

  PassManager pass_for_control_attr_optimize;
  if (options_.train_graph_flag) {
    const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION");
    if (unknown_shape_skip == nullptr) {
      GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::FlowCtrlPass",
                                                               new (std::nothrow) FlowCtrlPass))
    }
    GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::FlowCtrlPass",
                                                             new (std::nothrow) FlowCtrlPass))
  }

  GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::MultiBatchPass",
@@ -1821,14 +1816,10 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {
    pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::"
                                           "EndOfSequenceAddControlPass",
                                           new (std::nothrow) EndOfSequenceAddControlPass))

  const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION");
  if (unknown_shape_skip == nullptr) {
    // SubgraphPass solves memory_assign_conflicts by insert MemcpyAsync node, which depends on multi attrs and
    // graph-structure. So try not to add new pass after SubgraphPass.
    GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::SubgraphPass",
                                                             new (std::nothrow) SubgraphPass))
  }
  // SubgraphPass solves memory_assign_conflicts by insert MemcpyAsync node, which depends on multi attrs and
  // graph-structure. So try not to add new pass after SubgraphPass.
  GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::SubgraphPass",
                                                           new (std::nothrow) SubgraphPass))
  // AttachStreamLabelPass modifies attr without changing structure of compute_graph
  GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::AttachStreamLabelPass",
                                                           new (std::nothrow) AttachStreamLabelPass))
@@ -1870,120 +1861,6 @@ void GraphManager::ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_gr
    }
  }
 }
 Status GraphManager::OptimizeAfterMergeSubGraph(ge::ComputeGraphPtr &compute_graph) {
  GELOGI("Start optimize after merge sub graph.");

  GEPass ge_passes_for_shape(compute_graph);
  NamesToPass names_to_passes_for_shape;
  CastRemovePass cast_remove_pass;
  names_to_passes_for_shape.emplace_back("CastRemovePass", &cast_remove_pass);
  TransposeTransDataPass transpose_transdata_pass;
  names_to_passes_for_shape.emplace_back("TransposeTransDataPass", &transpose_transdata_pass);
  GE_TIMESTAMP_START(ge_passes_for_shape);
  Status ret = ge_passes_for_shape.Run(names_to_passes_for_shape);
  GE_TIMESTAMP_END(ge_passes_for_shape, "GraphManager::GePassesForShape");
  GE_CHK_STATUS_RET(ret, "Run ge_passes_for_shape optimize for OptimizeAfterMergeSubGraph failed, ret:%d.", ret);

  string options = "default";
  if (GetContext().GetOption("ge.exec.variable_acc", options) != SUCCESS) {
    GELOGI("get ge.exec.variable_acc failed. set default value.");
  }
  PassManager after_merge_passes;
  GE_CHK_STATUS_RET(after_merge_passes.AddPass("PermutePass", new (std::nothrow) PermutePass));
  GE_IF_BOOL_EXEC(options == "default" || options == "1", GELOGI("turn on variable accelerator"); GE_CHK_STATUS_RET(
                    after_merge_passes.AddPass("VariableOpPass", new (std::nothrow) VariableOpPass(&var_acc_ctrl_))));
  ret = after_merge_passes.Run(compute_graph);
  if (ret != SUCCESS && ret != NOT_CHANGED) {
    GELOGE(ret, "Run passes after merge sub graph failed, ret:%d.", ret);
    return ret;
  }

  // reshape remove + symmetry_elimination_pass to replace transop depth fusion pass
  GEPass ge_passes_symmetry(compute_graph);
  NamesToPass names_to_passes_for_symmetry;
  ReshapeRemovePass reshape_remove_pass;
  names_to_passes_for_symmetry.emplace_back("ReshapeRemovePass", &reshape_remove_pass);
  TransOpSymmetryEliminationPass symmetry_elimination_pass;
  names_to_passes_for_symmetry.emplace_back("TransOpSymmetryEliminationPass", &symmetry_elimination_pass);
  ret = ge_passes_symmetry.Run(names_to_passes_for_symmetry);
  GE_CHK_STATUS_RET(ret, "Run ge_passes optimize for OptimizeAfterMergeSubGraph failed, ret:%d.", ret);

  PassManager after_merge_fusion_passes;
  GE_CHK_STATUS_RET(after_merge_fusion_passes.AddPass("TransOpWithoutReshapeFusionPass",
                                                      new (std::nothrow) TransOpWithoutReshapeFusionPass));
  GE_CHK_STATUS_RET(
    after_merge_fusion_passes.AddPass("TransOpBreadthFusionPass", new (std::nothrow) TransOpBreadthFusionPass));
  GE_CHK_STATUS_RET(
    after_merge_fusion_passes.AddPass("VariableRefDeleteOpPass", new (std::nothrow) VariableRefDeleteOpPass));
  GE_CHK_STATUS_RET(after_merge_fusion_passes.AddPass("SameTransdataBreadthFusionPass",
                                                      new (std::nothrow) SameTransdataBreadthFusionPass));
  GE_CHK_STATUS_RET(
    after_merge_fusion_passes.AddPass("MarkGraphUnknownStatusPass", new (std::nothrow) MarkGraphUnknownStatusPass));
  GE_CHK_STATUS_RET(after_merge_fusion_passes.AddPass("AtomicAddrCleanPass", new (std::nothrow) AtomicAddrCleanPass));
  GE_CHK_STATUS_RET(after_merge_fusion_passes.AddPass(
    "LinkGenMaskNodesPass", new (std::nothrow) LinkGenMaskNodesPass(options_.stream_max_parallel_num)));
  GE_TIMESTAMP_START(after_merge_fusion_passes);
  ret = after_merge_fusion_passes.Run(compute_graph);
  GE_TIMESTAMP_END(after_merge_fusion_passes, "GraphManager::AfterMergePasses");
  if (ret != SUCCESS && ret != NOT_CHANGED) {
    GELOGE(ret, "Run passes after merge sub graph failed, ret:%d.", ret);
    return ret;
  }

  // add variable attr for hccl broadcast,need to be removed after variable pass online
  for (const ge::NodePtr &node : compute_graph->GetDirectNode()) {
    if (node->GetOpDesc()->GetType() != VARIABLE) {
      continue;
    }

    if (IsBroadCastOpData(node)) {
      AdjustBroadCastOpData(node);
    }
    if (IsAssignOpData(node)) {
      AdjustAssignOpData(node);
    }
  }

  GEPass ge_passes(compute_graph);
  NamesToPass names_to_passes;
  TransOpNearbyAllreduceFusionPass trans_op_nearby_allreduce_fusion_pass;
  names_to_passes.emplace_back("TransOpNearbyAllreduceFusionPass", &trans_op_nearby_allreduce_fusion_pass);
  names_to_passes_for_shape.emplace_back("ReshapeRemovePass", &reshape_remove_pass);
  ConstantFoldingPass constant_folding_pass;
  names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass);
  DimensionAdjustPass dimension_adjust_pass;
  names_to_passes.emplace_back("DimensionAdjustPass", &dimension_adjust_pass);
  CondRemovePass condition_remove_pass;
  names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass);
  GE_TIMESTAMP_START(names_to_passes);
  ret = ge_passes.Run(names_to_passes);
  GE_TIMESTAMP_END(names_to_passes, "GraphManager::MergedGraphNameToPasses");
  GE_CHK_STATUS_RET(ret, "Run ge_passes optimize for OptimizeAfterMergeSubGraph failed, ret:%d.", ret);

  ret = RemoveIsolatedConst(compute_graph);
  GE_CHK_STATUS_RET(ret, "Remove isolated Constant failed, ret:%d.", ret);

  PassManager pass_for_optimize;
  const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION");
  if (unknown_shape_skip == nullptr) {
    GE_CHK_STATUS_RET(pass_for_optimize.AddPass("SubgraphPass", new (std::nothrow) SubgraphPass));
  }
  GE_CHK_STATUS_RET(pass_for_optimize.AddPass("MultiBatchPass", new (std::nothrow) MultiBatchPass));
  GE_CHK_STATUS_RET(pass_for_optimize.AddPass("CompileNodesPass", new (std::nothrow) CompileNodesPass));
  GE_TIMESTAMP_START(pass_for_optimize);
  ret = pass_for_optimize.Run(compute_graph);
  GE_TIMESTAMP_END(pass_for_optimize, "GraphManager::OptimizePass");
  if (ret != SUCCESS && ret != NOT_CHANGED) {
    GELOGE(ret, "Run optimize pass failed");
    return ret;
  }

  ret = compute_graph->TopologicalSorting();
  GE_CHK_STATUS_RET(ret, "Graph topological sort failed, ret:%d.", ret);

  GELOGI("End optimize after merge sub graph.");
  return SUCCESS;
 }

 Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) {
  GELOGI("[LoadGraphAsync] run_graph_flag[%d], graph_id[%u]", options_.run_graph_flag, graph_node->GetGraphId());
@@ -2185,6 +2062,19 @@ Status GraphManager::IncreBuild(const GraphNodePtr &graph_node, GeModelPtr &ge_m
  return FAILED;
 }

 void GraphManager::ConstructGeInput(std::vector<ge::GeTensor> &ge_inputs, PreRunArgs &args) {
  for (auto const &input : args.input_tensor) {
    std::vector<int64_t> input_dims;
    std::transform(input.dims.begin(), input.dims.end(), std::back_inserter(input_dims),
                   [](int64_t x) -> int64_t { return x; });
    GeShape input_shape(input_dims);
    GeTensorDesc input_tensor_desc;
    input_tensor_desc.SetShape(input_shape);
    input_tensor_desc.SetDataType(static_cast<ge::DataType>(input.data_type));
    ge_inputs.emplace_back(input_tensor_desc);
  }
 }

 void GraphManager::PreRunThread(GraphManager *graph_manager) {
  if (prctl(PR_SET_NAME, ("GE_PreRun")) != 0) {
    GELOGW("Set thread name failed.");
@@ -2198,16 +2088,8 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) {
    GetThreadLocalContext() = args.context;
    GELOGI("A new loop start.");
    std::vector<ge::GeTensor> ge_inputs;
    for (auto const &input : args.input_tensor) {
      std::vector<int64_t> input_dims;
      std::transform(input.dims.begin(), input.dims.end(), std::back_inserter(input_dims),
                     [](int64_t x) -> int64_t { return x; });
      GeShape input_shape(input_dims);
      GeTensorDesc input_tensor_desc;
      input_tensor_desc.SetShape(input_shape);
      input_tensor_desc.SetDataType(static_cast<ge::DataType>(input.data_type));
      ge_inputs.emplace_back(input_tensor_desc);
    }
    ConstructGeInput(ge_inputs, args);

    // find graph
    GraphNodePtr graph_node = nullptr;
    Status ret = graph_manager->GetGraphNode(args.graph_id, graph_node);
@@ -2229,14 +2111,11 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) {
    graph_node->SetRunFlag(true);

    ComputeGraphPtr compute_graph_tmp = GraphUtils::GetComputeGraph(*(graph_node->GetGraph()));

    if (graph_manager->GetTrainFlag()) {
      if (compute_graph_tmp == nullptr) {
        ReturnError(graph_manager, args.callback, GE_GRAPH_GRAPH_NODE_NULL,
                    "[RunGraph] compute_graph_tmp is NULL, graph id = %u.");
        graph_node->Unlock();
        return;
      }
    if (compute_graph_tmp == nullptr) {
      ReturnError(graph_manager, args.callback, GE_GRAPH_GRAPH_NODE_NULL,
                  "[RunGraph] compute_graph_tmp is NULL, graph id = %u.");
      graph_node->Unlock();
      return;
    }
    // when set incre build, save cache helper.
    graph_manager->AddModelCacheHelperToMap(args.graph_id, args.session_id, compute_graph_tmp);
@@ -2266,11 +2145,19 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) {
      GeModelPtr ge_model = nullptr;
      if (graph_manager->IncreBuild(graph_node, ge_model) != SUCCESS) {
        ret = graph_manager->PreRun(graph_node, ge_inputs, ge_root_model, args.session_id);
        // release rts generate context
        RtContextUtil::GetInstance().DestroyRtContexts(args.session_id);
        if (ret != SUCCESS) {
          graph_node->SetRunFlag(false);
          ReturnError(graph_manager, args.callback, ret, "PreRun Failed, thread exit..");
          graph_node->Unlock();
          return;
          if (!std::getenv("AnalyzeMode")) {
            ReturnError(graph_manager, args.callback, ret, "PreRun Failed, thread exit..");
            graph_node->Unlock();
            return;
          } else {
            ReturnError(graph_manager, graph_node, args.callback, ret, "PreRun Failed, keep geop continue!");
            graph_node->Unlock();
            continue;
          }
        }
      }
      graph_node->SetBuildFlag(true);
@@ -2350,13 +2237,74 @@ void GraphManager::ReturnError(GraphManager *graph_manager, RunAsyncCallback cal
  if (graph_manager == nullptr) {
    return;
  }

  GELOGE(ret, "%s.", log.c_str());
  StopQueue(graph_manager);
  GELOGE(ret, "%s.", log.c_str());
  std::vector<ge::OutputTensorInfo> outputs;
  callback(ret, outputs);
 }

 void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, RunAsyncCallback callback,
                               Status ret, const string &log) {
  std::vector<ge::OutputTensorInfo> outputs;
  auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph());
  if (graph_manager == nullptr || compute_graph == nullptr) {
    GELOGE(GRAPH_FAILED, "[Analyze Mode] compute graph is null!");
    callback(GRAPH_FAILED, outputs);
    return;
  }

  for (const auto &node : compute_graph->GetAllNodes()) {
    if (node->GetType() != "NetOutput") {
      continue;
    }
    for (size_t i = 0; i < node->GetAllInDataAnchorsSize(); i++) {
      auto input_desc = node->GetOpDesc()->MutableInputDesc(i);
      ge::OutputTensorInfo tensor;
      tensor.dims = input_desc->GetShape().GetDims();
      tensor.data_type = static_cast<uint32_t>(input_desc->GetDataType());
      int64_t len = 1;
      if (input_desc->GetShape().GetDims() != std::vector<int64_t>({})) {
        len = input_desc->GetShape().GetShapeSize();
      }
      if (len < 0) {
        GELOGE(GRAPH_FAILED, "Analyze Mode does not support GEOP output unknown shape!");
        callback(GRAPH_FAILED, outputs);
        return;
      } else if (len == 0) {
        GELOGI("getted shape size is 0.Do process as empty tensor!");
        len = 1;
      }
      auto size = GetSizeByDataType(input_desc->GetDataType());
      if (size <= 0) {
        GELOGE(PARAM_INVALID, "Failed to get cube size, the data type %s is invalid",
               ge::TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str());
        callback(GRAPH_FAILED, outputs);
        return;
      }
      if (CheckInt64MulOverflow(len, static_cast<int64_t>(size)) != true) {
        GELOGE(MEMALLOC_FAILED, "int64 multiply happens overflow! a:%ld b:%d", len, size);
        callback(GRAPH_FAILED, outputs);
        return;
      }
      tensor.length = len * size;
      auto pbuff = new (std::nothrow) uint8_t[tensor.length];
      if (!pbuff) {
        GELOGE(MEMALLOC_FAILED, "new buff failed!");
        callback(GRAPH_FAILED, outputs);
        return;
      }
      // To avoid global step too small and can not stop, totally set a bigger value
      for (int64_t i = 0; i < tensor.length; i++) {
        *(pbuff + i) = 0x7F;  // here stands for a positive max value
      }
      tensor.data.reset(pbuff);
      outputs.emplace_back(std::move(tensor));
    }
  }
  callback(SUCCESS, outputs);
  return;
 }

 bool GraphManager::IsGraphNeedRebuild(uint32_t graph_id) {
  // find graph
  GraphNodePtr graph_node = nullptr;
@@ -2479,4 +2427,99 @@ Status GraphManager::Build(const GraphNodePtr &graph_node, ComputeGraphPtr &comp
  graph_node->SetGeRootModel(ge_root_model);
  return SUCCESS;
 }

 Status GraphManager::GenCheckPointGraph(const std::map<std::string, GeTensorDesc> &all_variables, Graph &graph) {
  ge::ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>(kCheckPointGraph);
  GE_CHECK_NOTNULL(compute_graph);
  OpDescPtr save_desc = MakeShared<ge::OpDesc>(compute_graph->GetName() + "_" + kSave, kSave);
  GE_CHECK_NOTNULL(save_desc);
  uint32_t save_index = 0;
  for (auto iter = all_variables.begin(); iter != all_variables.end(); ++iter) {
    GE_CHK_GRAPH_STATUS_RET(save_desc->AddInputDesc(save_index, iter->second));
    save_index++;
  }
  NodePtr save_node = compute_graph->AddNode(save_desc);

  uint32_t index = 0;
  for (auto iter = all_variables.begin(); iter != all_variables.end(); ++iter) {
    OpDescPtr var_desc = MakeShared<ge::OpDesc>(iter->first, VARIABLE);
    GE_CHECK_NOTNULL(var_desc);
    if (!AttrUtils::SetBool(var_desc, kCheckPointForGetVar, true)) {
      GELOGW("Set check point graph attr failed.");
    }
    GE_CHK_GRAPH_STATUS_RET(var_desc->AddOutputDesc(iter->second));
    NodePtr var_node = compute_graph->AddNode(var_desc);
    GE_CHK_STATUS(GraphUtils::AddEdge(var_node->GetOutDataAnchor(0), save_node->GetInDataAnchor(index)),
                  "Add edge[%s->%s] fail.", var_node->GetName().c_str(), save_node->GetName().c_str());
    index++;
  }
  compute_graph->Dump();
  graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph);
  return SUCCESS;
 }

 Status GraphManager::SaveVariables(const Graph &graph, const std::vector<std::string> &var_names,
                                   const std::vector<Tensor> &outputs, std::vector<Tensor> &var_values) {
  map<string, Tensor> var_results;
  GE_CHK_STATUS_RET(SaveCheckPointResult(graph, outputs, var_results), "Save check point result failed.");
  if (!var_names.empty()) {
    for (const auto &var_name : var_names) {
      if (var_results.count(var_name) == 0) {
        GELOGE(FAILED, "Fetch var[%s] value failed.", var_name.c_str());
        return FAILED;
      } else {
        var_values.emplace_back(var_results[var_name]);
      }
    }
  } else {
    for (auto iter = var_results.begin(); iter != var_results.end(); ++iter) {
      var_values.emplace_back(iter->second);
    }
  }
  return SUCCESS;
 }

 Status GraphManager::SaveCheckPointResult(const Graph &graph, const std::vector<Tensor> &outputs,
                                          map<string, Tensor> &var_results) {
  auto compute_graph = GraphUtils::GetComputeGraph(graph);
  NodePtr netoutput_node = nullptr;
  for (const auto &node : compute_graph->GetAllNodes()) {
    if (node->GetType() == NETOUTPUT) {
      netoutput_node = node;
      break;
    }
  }
  GE_CHECK_NOTNULL(netoutput_node);
  for (const auto &in : netoutput_node->GetAllInDataAnchors()) {
    auto out_anchor = in->GetPeerOutAnchor();
    GE_CHECK_NOTNULL(out_anchor);
    auto peer_node = out_anchor->GetOwnerNode();
    while (peer_node->GetType() != VARIABLE) {
      if (peer_node->GetAllInDataAnchors().size() != 1) {
        GELOGE(FAILED, "peer_node [%s] has more than 1 input in checkpoint Graph.", peer_node->GetName().c_str());
        return FAILED;
      }
      auto peer_node_in_anchor = peer_node->GetAllInDataAnchors().at(0);
      auto peer_node_out_anchor = peer_node_in_anchor->GetPeerOutAnchor();
      if (peer_node_out_anchor != nullptr) {
        peer_node = peer_node_out_anchor->GetOwnerNode();
        if (peer_node->GetType() == VARIABLE) {
          break;
        }
      }
    }
    if (peer_node->GetType() != VARIABLE) {
      GELOGE(FAILED, " peer_node %s is not variable in checkpoint Graph.", peer_node->GetName().c_str());
      return FAILED;
    }
    auto var_name = peer_node->GetName();
    GELOGI("[GraphManager] SaveVariables, varName is %s.", var_name.c_str());
    if (in->GetIdx() >= static_cast<int>(outputs.size())) {
      GELOGE(FAILED, "variable index[%d] out of range[%zu].", in->GetIdx(), outputs.size());
      return FAILED;
    }
    var_results.emplace(var_name, outputs.at(in->GetIdx()));
  }
  return SUCCESS;
 }
 }  // namespace ge
--- a/src/ge/graph/manager/graph_manager.h
+++ b/src/ge/graph/manager/graph_manager.h
@@ -159,6 +159,13 @@ class GraphManager {

  void SetOptionsRunGraphFlag(bool run_graph_flag);

  Status GenCheckPointGraph(const std::map<std::string, GeTensorDesc> &all_variables, Graph &graph);

  Status SaveVariables(const Graph &graph, const std::vector<std::string> &var_names,
                       const std::vector<Tensor> &outputs, std::vector<Tensor> &var_values);

  Status SaveCheckPointResult(const Graph &graph, const std::vector<Tensor> &outputs, map<string, Tensor> &var_results);

 private:
  struct PreRunArgs {
    GraphId graph_id;
@@ -267,9 +274,8 @@ class GraphManager {

  Status OptimizeStage1(ComputeGraphPtr &compute_graph);
  Status OptimizeStage2(ComputeGraphPtr &compute_graph);
  Status OptimizeAfterMergeSubGraph(ge::ComputeGraphPtr &compute_graph);

  Status NewOptimizeAfterMergeSubGraph(ge::ComputeGraphPtr &compute_graph);
  Status SubexpressionMigration(ComputeGraphPtr &compute_graph);

  Status LoadGraphAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node);

@@ -288,10 +294,13 @@ class GraphManager {
  Status IncreBuild(const GraphNodePtr &graph_node, GeModelPtr &ge_model);
  void RemoveModelCacheHelper(const GraphId &graph_id);

  static void ConstructGeInput(std::vector<ge::GeTensor> &ge_inputs, PreRunArgs &args);
  static void PreRunThread(GraphManager *graph_manager);
  static void RunThread(GraphManager *graph_manager);
  static void StopQueue(GraphManager *graph_manager);
  static void ReturnError(GraphManager *graph_manager, RunAsyncCallback callback, Status ret, const string &log);
  static void ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, RunAsyncCallback callback, Status ret,
                          const string &log);

  void ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_graph);

--- a/src/ge/graph/manager/graph_var_manager.cc
+++ b/src/ge/graph/manager/graph_var_manager.cc
@@ -855,6 +855,32 @@ void VarManager::RemoveAllocatedGraphId(const std::string &var_name) {
  var_resource_->RemoveAllocatedGraphId(var_name);
 }

 Status VarManager::GetAllVariables(std::map<std::string, GeTensorDesc> &all_variables) {
  std::lock_guard<std::recursive_mutex> lock(mutex_);
  if (var_resource_ == nullptr) {
    GELOGW("VarManager has not been inited.");
    return INTERNAL_ERROR;
  }
  auto new_variable_desc = var_resource_->GetAllVarDesc();
  if (new_variable_desc.size() == 0) {
    GELOGW("VarManager don't have variables.");
    return INTERNAL_ERROR;
  }

  for (auto iter = new_variable_desc.begin(); iter != new_variable_desc.end(); ++iter) {
    auto trans_road = var_resource_->GetTransRoad(iter->first);
    if (trans_road == nullptr || trans_road->empty()) {
      GELOGI("The variable %s does not have any trans road", iter->first.c_str());
      all_variables[iter->first] = iter->second;
      continue;
    }
    // get origin trans info : the first trans node info
    auto origin_trans_node_info = trans_road->at(0);
    all_variables[iter->first] = origin_trans_node_info.input;
  }
  return SUCCESS;
 }

 VarManagerPool::~VarManagerPool() { Destory(); }

 VarManagerPool &VarManagerPool::Instance() {
@@ -897,4 +923,22 @@ VarManager *VarManagerPool::GetVarManager(uint64_t session_id) {
  var_manager_map_[session_id] = var_manager;
  return var_manager;
 }

 void VarManagerPool::RemoveVarManager(uint64_t session_id) {
  VarManager *var_manager = nullptr;
  {
    std::lock_guard<std::mutex> lock(var_manager_mutex_);
    auto it = var_manager_map_.find(session_id);
    if (it != var_manager_map_.end()) {
      var_manager = it->second;
      var_manager_map_.erase(it);
    }
  }

  if (var_manager != nullptr) {
    var_manager->Destory();
    delete var_manager;
    var_manager = nullptr;
  }
 }
 }  // namespace ge
--- a/src/ge/graph/manager/graph_var_manager.h
+++ b/src/ge/graph/manager/graph_var_manager.h
@@ -157,6 +157,8 @@ class VarResource {

  bool IsVarAddr(const int64_t &offset);

  std::unordered_map<std::string, ge::GeTensorDesc> GetAllVarDesc() const { return cur_var_tensor_desc_map_; }

 private:
  std::string VarKey(const std::string &var_name, const ge::GeTensorDesc &tensor_desc);

@@ -276,6 +278,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager {

  uint8_t *GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type);

  Status GetAllVariables(std::map<std::string, GeTensorDesc> &all_variables);

 private:
  uint32_t version_;
  uint64_t session_id_;
@@ -300,6 +304,8 @@ class VarManagerPool {

  VarManager *GetVarManager(uint64_t session_id);

  void RemoveVarManager(uint64_t session_id);

  void Destory() noexcept;

  ge::Status Init() const;
--- a/src/ge/graph/manager/rdma_pool_allocator.cc
+++ b/src/ge/graph/manager/rdma_pool_allocator.cc
@@ -16,7 +16,6 @@

 #include "graph/manager/rdma_pool_allocator.h"
 #include "framework/common/debug/ge_log.h"
 #include "graph/manager/graph_mem_allocator.h"

 namespace {
 const size_t kAlignedSize = 512;
--- a/src/ge/graph/manager/rdma_pool_allocator.h
+++ b/src/ge/graph/manager/rdma_pool_allocator.h
@@ -27,12 +27,11 @@

 #include "framework/common/ge_inner_error_codes.h"
 #include "graph/manager/block_memory.h"
 #include "graph/manager/graph_mem_allocator.h"
 #include "graph/node.h"
 #include "runtime/mem.h"

 namespace ge {
 class MemoryAllocator;

 class RdmaPoolAllocator {
 public:
  explicit RdmaPoolAllocator(rtMemType_t memory_type);