!1548 optimize performance

From: @zhou_lili Reviewed-by: @xchu42,@youui Signed-off-by: @youui
4 years ago · 22f2f349c4
--- a/ge/generator/ge_generator.cc
+++ b/ge/generator/ge_generator.cc
@@ -822,7 +822,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
    auto node = comp_graph->FindNode(op_desc->GetName());
    Status ret = CheckEngineTypeSupport(node, engine_type);
    if (ret != SUCCESS) {
      GELOGE(ret, "[Check][EngineType]value:%d for node:%s not support", engine_type, node->GetName().c_str());
      GELOGE(ret, "[Check][EngineType]not support node:%s with engine of %d.", node->GetName().c_str(), engine_type);
      return ret;
    }
  }
@@ -848,6 +848,11 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
  bool all_shape = false;
  (void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape);
  GELOGD("Node: %s, all_shape is %d, compile_flag is %d.", op_desc->GetName().c_str(), all_shape, compile_flag);
  (void)AttrUtils::SetInt(ge_model, ATTR_NAME_BUILD_MODE, fuzz_compile_flag);
  if (all_shape) {
    (void)AttrUtils::SetBool(ge_model, kAicpuAllshape, all_shape);
  }
  if (all_shape && CheckNoAicore(root_graph)) {
    GELOGD("Get aicpu all_shape kernel!");
    vector<GeTensor> inputs_dynamic;
@@ -857,8 +862,6 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
    GE_CHK_STATUS_RET_NOLOG(
      impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic));
  } else if (fuzz_compile_flag) {
    GELOGD("Get fuzz build result of %s.", op_desc->GetName().c_str());
    (void)AttrUtils::SetInt(ge_model, ATTR_NAME_BUILD_MODE, fuzz_compile_flag);
    GeAttrValue::LIST_NAMED_ATTRS fuzz_build_attrs;
    if (GetFuzzBuildAttrs(op_desc, ge_root_model, fuzz_build_attrs) != SUCCESS) {
      GELOGE(FAILED, "[Get][FuzzRet]Failed to get fuzz build result of %s.", op_desc->GetName().c_str());
--- a/ge/hybrid/executor/worker/execution_engine.cc
+++ b/ge/hybrid/executor/worker/execution_engine.cc
@@ -364,20 +364,28 @@ Status ExecutionEngine::ExecuteAsync(NodeState &node_state,
                                     GraphExecutionContext &execution_context) {
  GELOGI("[%s] Node is ready for execution", task_context->GetNodeName());
  RECORD_EXECUTION_EVENT(&execution_context, task_context->GetNodeName(), "Start");
  auto cb = std::shared_ptr<NodeDoneCallback>(new(std::nothrow) NodeDoneCallback(&execution_context, task_context));
  GE_CHECK_NOTNULL(cb);
  auto callback = [task_context, cb]() {
    auto ret = cb->OnNodeDone();
    if (ret != SUCCESS) {
      task_context->OnError(ret);
    }
  };
  std::function<void()> callback = nullptr;
  GE_CHK_STATUS_RET_NOLOG(InitCallback(task_context, execution_context, callback));
  GE_CHK_STATUS_RET_NOLOG(DoExecuteAsync(node_state, *task_context, execution_context, callback));
  GE_CHK_STATUS_RET_NOLOG(PropagateOutputs(*node_state.GetNodeItem(), *task_context, execution_context));
  return SUCCESS;
 }
 Status ExecutionEngine::InitCallback(const std::shared_ptr<TaskContext> &task_context,
                                     GraphExecutionContext &execution_context, std::function<void()> &callback) {
  if (task_context->NeedCallback()) {
    auto cb = std::shared_ptr<NodeDoneCallback>(new(std::nothrow) NodeDoneCallback(&execution_context, task_context));
    GE_CHECK_NOTNULL(cb);
    callback = [task_context, cb]() {
      auto ret = cb->OnNodeDone();
      if (ret != SUCCESS) {
        task_context->OnError(ret);
      }
    };
  }
  return SUCCESS;
 }
 Status ExecutionEngine::DoExecuteAsync(NodeState &node_state,
                                       TaskContext &task_context,
                                       GraphExecutionContext &context,
@@ -385,7 +393,7 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state,
  const auto &task = node_state.GetKernelTask();
  if (task == nullptr) {
    GELOGE(INTERNAL_ERROR, "[Get][KernelTask] of [%s] is null.", node_state.GetName().c_str());
    REPORT_INNER_ERROR("E19999", "GetKernelTask of %s is null.", node_state.GetName().c_str());
    REPORT_INNER_ERROR("E19999", "GetKernelTask of %s failed.", node_state.GetName().c_str());
    return INTERNAL_ERROR;
  }
--- a/ge/hybrid/executor/worker/execution_engine.h
+++ b/ge/hybrid/executor/worker/execution_engine.h
@@ -35,6 +35,8 @@ class ExecutionEngine {
                               TaskContext &task_context,
                               GraphExecutionContext &context,
                               const std::function<void()> &callback);
  static Status InitCallback(const std::shared_ptr<TaskContext> &task_context,
                             GraphExecutionContext &execution_context, std::function<void()> &callback);
 };
 }  // namespace hybrid
 }  // namespace ge
--- a/ge/hybrid/node_executor/task_context.cc
+++ b/ge/hybrid/node_executor/task_context.cc
@@ -561,7 +561,8 @@ const DumpProperties &TaskContext::GetDumpProperties() const {
 }
 bool TaskContext::NeedCallback() {
  return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0;
  return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0 ||
         !execution_context_->model->IsSingleOp();
 }
 Status TaskContext::Synchronize() {
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -815,6 +815,7 @@ set(PROFILING_MNG_TEST_FILES
 set(HYBRID_TEST_FILES
    "hybrid/ge_hybrid_unittest.cc"
    "hybrid/known_node_executor_unittest.cc"
 	"hybrid/executor/worker/execution_engine_unittest.cc"
 )
 set(OTHERS_TEST_FILES
--- a/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc
+++ b/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc
@@ -0,0 +1,119 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include <gtest/gtest.h>
 #include <gmock/gmock.h>
 #include <vector>
 #include "runtime/rt.h"
 #define protected public
 #define private public
 #include "hybrid/model/hybrid_model.h"
 #include "hybrid/node_executor/node_executor.h"
 #include "hybrid/executor/hybrid_execution_context.h"
 #include "hybrid/executor/hybrid_model_executor.h"
 #include "hybrid/executor/worker/execution_engine.h"
 #undef private
 #undef protected
 using namespace std;
 using namespace testing;
 using namespace ge;
 using namespace hybrid;
 class UtestExecutionEngine : public testing::Test {
 protected:
  void SetUp() {}
  void TearDown() {
  }
 };
 namespace {
 const int kIntBase = 10;
 }
 static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") {
  auto op_desc = std::make_shared<ge::OpDesc>(name, type);
  op_desc->SetStreamId(0);
  op_desc->SetId(0);
  op_desc->SetWorkspace({});
  op_desc->SetWorkspaceBytes({});
  op_desc->SetInputOffset({});
  op_desc->SetOutputOffset({});
  ge::AttrUtils::SetStr(op_desc, ge::TVM_ATTR_NAME_MAGIC, "RT_DEV_BINARY_MAGIC_ELF_AIVEC");
  bool support_dynamic = true;
  ge::AttrUtils::GetBool(op_desc, "support_dynamicshape", support_dynamic);
  return op_desc;
 }
 TEST_F(UtestExecutionEngine, ExecuteAsync_without_kernel_task) {
  auto graph = make_shared<ComputeGraph>("graph");
  OpDescPtr op_desc = CreateOpDesc("Add", "Add");
  GeShape shape({2, 16});
  GeTensorDesc tensor_desc(shape);
  op_desc->AddInputDesc(tensor_desc);
  op_desc->AddOutputDesc(tensor_desc);
  auto node = graph->AddNode(op_desc);
  std::unique_ptr<NodeItem> node_item;
  NodeItem::Create(node, node_item);
  ASSERT_TRUE(node_item != nullptr);
  node_item->input_start = 0;
  node_item->output_start = 0;
  GraphExecutionContext execution_context;
  execution_context.profiling_level = 1;
  SubgraphContext subgraph_context(nullptr, &execution_context);
  NodeState node_state(*node_item, &subgraph_context);
  auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context);
  auto shared_task_context = std::shared_ptr<TaskContext>(task_context.release());
  node_state.SetTaskContext(shared_task_context);
  ExecutionEngine execution_engine;
  ASSERT_TRUE(node_state.GetTaskContext() != nullptr);
  EXPECT_EQ(execution_engine.ExecuteAsync(node_state, node_state.GetTaskContext(), execution_context), INTERNAL_ERROR);
 }
 TEST_F(UtestExecutionEngine, ExecuteAsync_without_callback_and_kernel_task) {
  auto graph = make_shared<ComputeGraph>("graph");
  OpDescPtr op_desc = CreateOpDesc("Add", "Add");
  GeShape shape({2, 16});
  GeTensorDesc tensor_desc(shape);
  op_desc->AddInputDesc(tensor_desc);
  op_desc->AddOutputDesc(tensor_desc);
  auto node = graph->AddNode(op_desc);
  std::unique_ptr<NodeItem> node_item;
  NodeItem::Create(node, node_item);
  ASSERT_TRUE(node_item != nullptr);
  node_item->input_start = 0;
  node_item->output_start = 0;
  GraphExecutionContext execution_context;
  GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph);
  HybridModel hybrid_model(ge_root_model);
  execution_context.model = &hybrid_model;
  SubgraphContext subgraph_context(nullptr, &execution_context);
  NodeState node_state(*node_item, &subgraph_context);
  auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context);
  auto shared_task_context = std::shared_ptr<TaskContext>(task_context.release());
  node_state.SetTaskContext(shared_task_context);
  ExecutionEngine execution_engine;
  ASSERT_TRUE(node_state.GetTaskContext() != nullptr);
  EXPECT_EQ(execution_engine.ExecuteAsync(node_state, node_state.GetTaskContext(), execution_context), INTERNAL_ERROR);
 }