!1389 add report errmsg and modify geloge

Merge pull request !1389 from ldy2021/master
4 years ago · 49b2c8bdba
--- a/ge/hybrid/common/npu_memory_allocator.cc
+++ b/ge/hybrid/common/npu_memory_allocator.cc
@@ -38,8 +38,11 @@ AllocationAttr::AllocationAttr(void *try_reuse_addr) : AllocationAttr(0, try_reu

 NpuMemoryAllocator *NpuMemoryAllocator::GetAllocator() {
  int32_t device_id = 0;
  if (rtGetDevice(&device_id) != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Failed to get device id");
  auto rt_result = rtGetDevice(&device_id);
  if (rt_result != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "[Get][Device] Failed, result:%d.", rt_result);
    REPORT_INNER_ERROR("E19999", "rtGetDevice failed when NpuMemoryAllocator %s, result:%d.",
        __FUNCTION__, rt_result);
    return nullptr;
  }

@@ -57,7 +60,10 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) {
  }

  if (allocate_size == 0) {
    GELOGE(MEMALLOC_FAILED, "Memory size is 0, device_id = %u, size = %zu", device_id_, allocate_size);
    GELOGE(MEMALLOC_FAILED, "[Check][Param:size_t]Memory size is 0, device_id = %u, size = %zu.",
        device_id_, allocate_size);
    REPORT_INNER_ERROR("E19999", "Memory size is 0, device_id = %u, size = %zu when %s.",
        device_id_, allocate_size, __FUNCTION__);
    return nullptr;
  }

@@ -68,7 +74,10 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) {
    buffer = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(allocate_size);
  } else {
    if (allocate_size > kMaxHbmMemorySize) {
      GELOGE(PARAM_INVALID, "Invalid HBM memory size: %zu", allocate_size);
      GELOGE(PARAM_INVALID, "[Check][Param:size_t]Invalid HBM memory size: %zu bigger than limit:%lu, check invalid.",
          allocate_size, kMaxHbmMemorySize);
      REPORT_CALL_ERROR("E19999", "Invalid HBM memory size: %zu bigger than limit:%lu, check invalid when %s.",
          allocate_size, kMaxHbmMemorySize, __FUNCTION__);
      return nullptr;
    }
    void *try_reuse_addr = nullptr;
@@ -87,7 +96,10 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) {
                 .Malloc(allocate_size, reinterpret_cast<uint8_t *>(try_reuse_addr), device_id_);
  }
  if (buffer == nullptr) {
    GELOGE(MEMALLOC_FAILED, "Failed to malloc memory, device_id = %u, size = %zu", device_id_, allocate_size);
    GELOGE(MEMALLOC_FAILED, "[Malloc][Memory] Failed, device_id = %u, size = %zu.",
        device_id_, allocate_size);
    REPORT_CALL_ERROR("E19999", "malloc memory failed, device_id = %u, size = %zu when %s.",
        device_id_, allocate_size, __FUNCTION__);
    return nullptr;
  }

--- a/ge/hybrid/common/tensor_value.cc
+++ b/ge/hybrid/common/tensor_value.cc
@@ -32,7 +32,8 @@ std::unique_ptr<TensorBuffer> TensorBuffer::Create(NpuMemoryAllocator *allocator
  }

  if (allocator == nullptr) {
    GELOGE(INTERNAL_ERROR, "allocator is NULL");
    GELOGE(INTERNAL_ERROR, "[Check][Param:NpuMemoryAllocator] allocator is NULL.");
    REPORT_INNER_ERROR("E19999", "input allocator is NULL, when %s.", __FUNCTION__);
    return nullptr;
  }

@@ -42,7 +43,8 @@ std::unique_ptr<TensorBuffer> TensorBuffer::Create(NpuMemoryAllocator *allocator
  }
  buffer = allocator->Allocate(size, attr);
  if (buffer == nullptr) {
    GELOGE(MEMALLOC_FAILED, "Failed to allocate memory. size = %zu", size);
    GELOGE(MEMALLOC_FAILED, "[Allocate][Memory] Failed. size = %zu.", size);
    REPORT_CALL_ERROR("E19999", "allocate failed, size = %zu, when %s.", size, __FUNCTION__);
    return nullptr;
  }

--- a/ge/hybrid/executor/hybrid_execution_context.cc
+++ b/ge/hybrid/executor/hybrid_execution_context.cc
@@ -59,7 +59,9 @@ Status GraphExecutionContext::Synchronize(rtStream_t rt_stream) {
    return SUCCESS;
  }

  GELOGE(RT_FAILED, "Failed to invoke rtStreamSynchronize, ret = %d", rt_ret);
  GELOGE(RT_FAILED, "[Invoke][rtStreamSynchronize] failed, ret = %d", rt_ret);
  REPORT_CALL_ERROR("E19999",
      "invoke rtStreamSynchronize failed when GraphExecutionContext %s, ret = %d", __FUNCTION__, rt_ret);
  return RT_FAILED;
 }
 }  // namespace hybrid
--- a/ge/hybrid/executor/hybrid_model_async_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_async_executor.cc
@@ -51,8 +51,13 @@ void HybridModelAsyncExecutor::SetModelName(const string &model_name) {
 }

 Status HybridModelAsyncExecutor::EnqueueData(const shared_ptr<InputDataWrapper> &data) {
  GE_CHK_STATUS_EXEC(data_inputer_->Push(data), return domi::DATA_QUEUE_ISFULL,
                     "Data queue is full, please call again later, model_id %u ", model_id_);
  if (data_inputer_->Push(data) != SUCCESS) {
    REPORT_CALL_ERROR("E19999", "Data queue is full, please call again later when %s, model_id %u.",
        __FUNCTION__, model_id_);
    GELOGE(domi::DATA_QUEUE_ISFULL,
        "[Push][Data] Data queue is full, please call again later, model_id %u ", model_id_);
    return domi::DATA_QUEUE_ISFULL;
  }
  GELOGD("EnqueueData successfully. model_id = %u, data_index = %u", data->GetInput().model_id, data->GetInput().index);
  return SUCCESS;
 }
@@ -60,8 +65,12 @@ Status HybridModelAsyncExecutor::EnqueueData(const shared_ptr<InputDataWrapper>
 Status HybridModelAsyncExecutor::Start(const std::shared_ptr<ModelListener> &listener) {
  GELOGD("HybridModelExecutor::Start IN, has listener = %d", listener != nullptr);
  std::lock_guard<std::mutex> lk(mu_);
  GE_CHK_BOOL_RET_STATUS(!run_flag_, INTERNAL_ERROR, "Model already started.");

  if (run_flag_) {
    REPORT_INNER_ERROR("E19999",
        "Model already started when HybridModelAsyncExecutor %s, model_id:%u.", __FUNCTION__, model_id_);
    GELOGE(INTERNAL_ERROR, "[Check][RunState] Model already started, model_id:%u.", model_id_);
    return INTERNAL_ERROR;
  }
  run_flag_ = true;
  listener_ = listener;
  future_ = std::async(std::launch::async, [&]() -> Status {
@@ -71,7 +80,8 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr<ModelListener> &lis
    return RunInternal();
  });

  GE_CHK_BOOL_RET_STATUS(future_.valid(), INTERNAL_ERROR, "Failed to start.");
  GE_CHK_BOOL_RET_STATUS(future_.valid(), INTERNAL_ERROR,
                         "[Check][RunState] Failed to start, model_id:%u.", model_id_);
  GELOGD("HybridModelExecutor::Start successfully");
  return SUCCESS;
 }
@@ -105,26 +115,29 @@ Status HybridModelAsyncExecutor::Init() {

  executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_));
  GE_CHECK_NOTNULL(executor_);
  GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine");
  GE_CHK_STATUS_RET(DumpOpDebug(), "Dump op debug failed in hybrid engine");
  GE_CHK_STATUS_RET(executor_->Init(),
                    "[Init][HybridModelExecutor] failed, model_id:%u.", model_id_);
  GE_CHK_STATUS_RET(DumpOpDebug(), "[Dump][OpDebug] failed, model_id:%u.", model_id_);

  GELOGI("HybridModel stage nums:%zu", model_->GetRootGraphItem()->NumGroups());
  if (model_->GetRootGraphItem()->NumGroups() >= kMinimumPiplineStages) {
    pipe_executor_ =
        std::unique_ptr<HybridModelPipelineExecutor>(new(std::nothrow) HybridModelPipelineExecutor(model_, device_id_));
    GE_CHECK_NOTNULL(pipe_executor_);
    GE_CHK_STATUS_RET(pipe_executor_->Init(), "Failed to init hybrid engine");
    GE_CHK_STATUS_RET(pipe_executor_->Init(),
                      "[Init][HybridModelPipelineExecutor] failed, model_id:%u.", model_id_);
  }

  GE_CHK_STATUS_RET(InitInputDesc(), "Failed to init input tensors");
  GE_CHK_STATUS_RET(InitInputDesc(), "[Init][InputDesc] failed, model_id:%u.", model_id_);

  return SUCCESS;
 }

 Status HybridModelAsyncExecutor::PreRun(InputData &current_data, HybridModelExecutor::ExecuteArgs &args) {
  GE_CHK_STATUS_RET(SyncVarData(), "Failed to sync var data");
  GE_CHK_STATUS_RET(SyncVarData(), "[Invoke][SyncVarData] failed, model_id:%u.", model_id_);
  RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[SyncVarData] End");
  GE_CHK_STATUS_RET(PrepareInputs(current_data, args), "Failed to copy input data to model");
  GE_CHK_STATUS_RET(PrepareInputs(current_data, args),
                    "[Invoke][PrepareInputs] failed to copy input data to model, model_id:%u.", model_id_);
  RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[CopyInputData] End");
  return SUCCESS;
 }
@@ -155,7 +168,7 @@ Status HybridModelAsyncExecutor::RunInternal() {
    GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
        ret != SUCCESS, (void) HandleResult(ret, current_data.index, args, data_wrapper->GetOutput());
        CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC);
        continue, "PreRun failed.");  // [No need to check value]
        continue, "[Invoke][PreRun] failed, model_id:%u.", model_id_);  // [No need to check value]

    if (pipe_executor_ != nullptr) {
      GELOGI("HybridModel will execute in pipeline mode");
@@ -199,7 +212,9 @@ Status HybridModelAsyncExecutor::HandleResult(Status exec_ret,
  }

  if (exec_ret != SUCCESS) {
    GELOGE(exec_ret, "Failed to execute graph. model_id = %u", model_id_);
    GELOGE(exec_ret, "[Check][Param:Status] failed to execute graph. model_id = %u", model_id_);
    REPORT_INNER_ERROR("E19999",
        "failed to execute graph when HybridModelAsyncExecutor %s. model_id = %u", __FUNCTION__, model_id_);
    return OnComputeDone(data_id, INTERNAL_ERROR, output_tensor_info_list);
  }

@@ -235,8 +250,12 @@ Status HybridModelAsyncExecutor::SyncVarData() {

 Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, HybridModelExecutor::ExecuteArgs &args) {
  if (current_data.blobs.size() < input_tensor_desc_.size()) {
    GELOGE(PARAM_INVALID, "Blob size mismatches, expect at least %zu, but got %zu",
           input_tensor_desc_.size(), current_data.blobs.size());
    GELOGE(PARAM_INVALID,
        "[Check][Size]Blob size mismatches, expect at least %zu, but got %zu, model_id = %u",
        input_tensor_desc_.size(), current_data.blobs.size(), model_id_);
    REPORT_INNER_ERROR("E19999",
        "Blob size mismatches, expect at least %zu, but got %zu when HybridModelAsyncExecutor %s, model_id = %u.",
        input_tensor_desc_.size(), current_data.blobs.size(), __FUNCTION__, model_id_);
    return PARAM_INVALID;
  }

@@ -248,8 +267,12 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, Hy
    auto tensor_size = input_sizes_[input_index];
    if (is_input_dynamic_[input_index]) {
      if (input_index >= current_data.shapes.size()) {
        GELOGE(PARAM_INVALID, "Shape index out of range, index = %zu, shape size = %zu",
               input_index, current_data.shapes.size());
        GELOGE(PARAM_INVALID,
            "[Check][Range]Shape index out of range, index = %zu, shape size = %zu model_id = %u.",
            input_index, current_data.shapes.size(), model_id_);
        REPORT_INNER_ERROR("E19999",
            "Shape index out of range, index = %zu, shape size = %zu when HybridModelAsyncExecutor %s, model_id = %u.",
            input_index, current_data.shapes.size(), __FUNCTION__, model_id_);
        return PARAM_INVALID;
      }
      auto &tensor_desc = input_tensor_desc_[input_index];
@@ -257,15 +280,19 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, Hy
      std::vector<std::pair<int64_t, int64_t>> range;
      auto range_ret = tensor_desc->GetShapeRange(range);
      GE_CHK_BOOL_RET_STATUS(range_ret == GRAPH_SUCCESS, INTERNAL_ERROR,
                             "Get shape range failed, ret=%u.", range_ret);
                             "[Invoke][GetShapeRange] failed, ret=%u, model_id = %u.", range_ret, model_id_);
      for (size_t k = 0; k < range.size(); ++k) {
        if (k >= shape.GetDimNum()) {
          break;
        }
        // range[k].second can be -1
        if (shape.GetDim(k) < range[k].first || (range[k].second >= 0 && shape.GetDim(k) > range[k].second)) {
          GELOGE(PARAM_INVALID, "Dim out of range, shape idx = %zu, dim idx = %zu, dim = %ld, range = [%ld, %ld]",
                 input_index, k, shape.GetDim(k), range[k].first, range[k].second);
          GELOGE(PARAM_INVALID,
              "[Check][Range]Dim out of range, shape idx = %zu, dim idx = %zu, dim = %ld, range = [%ld, %ld], model_id = %u.",
              input_index, k, shape.GetDim(k), range[k].first, range[k].second, model_id_);
          REPORT_INNER_ERROR("E19999",
              "Dim out of range, shape idx = %zu, dim idx = %zu, dim = %ld, range = [%ld, %ld], model_id = %u.",
              input_index, k, shape.GetDim(k), range[k].first, range[k].second, model_id_);
          return PARAM_INVALID;
        }
      }
@@ -273,9 +300,8 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, Hy
      args.input_desc[input_index] = tensor_desc;
      GELOGD("Update shape of input[%zu] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str());
      GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, tensor_size),
                              "Failed to calc tensor size, index = %zu, shape = [%s]",
                              input_index,
                              tensor_desc->GetShape().ToString().c_str());
          "[Invoke][GetTensorMemorySizeInBytes]Failed to calc tensor size, index = %zu, shape = [%s], model_id = %u.",
          input_index, tensor_desc->GetShape().ToString().c_str(), model_id_);
      GELOGD("Input tensor[%zu] size = %zu", input_index, tensor_size);
    }

@@ -291,12 +317,16 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, Hy
    GELOGD("To copy input data for input[%zu]", input_index);
    const DataBuffer &data_buf = blobs[input_index];
    auto mem_size = static_cast<uint64_t>(tensor_size);
    GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length,
                           PARAM_INVALID,
                           "input data size(%lu) does not match model required size(%lu), ret failed.",
                           data_buf.length,
                           mem_size);

    if (mem_size < data_buf.length) {
      REPORT_INNER_ERROR("E19999",
          "input data size(%lu) does not match model required size(%lu) when %s, ret failed, model_id = %u.",
          data_buf.length, mem_size, __FUNCTION__, model_id_);
      GELOGE(PARAM_INVALID,
          "[Check][Size]input data size(%lu) does not match model required size(%lu), ret failed, model_id = %u.",
          data_buf.length, mem_size, model_id_);
      return PARAM_INVALID;
    }
    if (data_buf.length > 0) {
      GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%zu] memaddr[%p] mem_size[%zu] datasize[%lu]",
             model_->root_runtime_param_.graph_id,
@@ -351,7 +381,7 @@ Status HybridModelAsyncExecutor::OnComputeDone(uint32_t data_index, uint32_t res
  GELOGD("OnComputeDone. model id = %u, data index = %u, execution ret = %u", model_id_, data_index, result_code);
  if (listener_ != nullptr) {
    GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_index, result_code, outputs),
                  "OnComputeDone failed");
                  "[Invoke][OnComputeDone] failed, model_id = %u.", model_id_);
  }

  return result_code;
@@ -365,9 +395,11 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a
  std::vector<TensorValue> &output_tensors = args.outputs;
  if (output_tensor_desc_list.size() != output_tensors.size()) {
    GELOGE(INTERNAL_ERROR,
           "Output sizes mismatch. From op_desc = %zu, and from output tensors = %zu",
           output_tensor_desc_list.size(),
           output_tensors.size());
        "[Check][Size]Output sizes mismatch. From op_desc = %zu, and from output tensors = %zu, model_id = %u.",
        output_tensor_desc_list.size(), output_tensors.size(), model_id_);
    REPORT_INNER_ERROR("E19999", "Output sizes mismatch. From op_desc = %zu, and from output tensors = %zu, "
        "when HybridModelAsyncExecutor %s, model_id = %u.",
        output_tensor_desc_list.size(), output_tensors.size(), __FUNCTION__, model_id_);
    return INTERNAL_ERROR;
  }

@@ -399,8 +431,12 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a
    GE_CHECK_LE(output_size, UINT32_MAX);
    if (output_tensor.GetSize() < static_cast<size_t>(output_size)) {
      GELOGE(INTERNAL_ERROR,
             "output[%zu] tensor size(%zu) is not enough for output shape [%s]",
             i, output_tensor.GetSize(), tensor_desc->GetShape().ToString().c_str());
          "[Check][Size]output[%zu] tensor size(%zu) is not enough for output shape [%s], model_id = %u.",
          i, output_tensor.GetSize(), tensor_desc->GetShape().ToString().c_str(), model_id_);
      REPORT_INNER_ERROR("E19999",
          "output[%zu] tensor size(%zu) is not enough for output shape [%s] model_id = %u,"
          " when HybridModelAsyncExecutor %s.",
          i, output_tensor.GetSize(), tensor_desc->GetShape().ToString().c_str(), model_id_, __FUNCTION__);
      return INTERNAL_ERROR;
    }

@@ -456,7 +492,7 @@ Status HybridModelAsyncExecutor::Execute(const std::vector<DataBuffer> &inputs,
    args.input_desc.emplace_back(tensor_desc_ptr);
  }

  GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model.");
  GE_CHK_STATUS_RET(executor_->Execute(args), "[Invoke][Execute] Failed, model_id = %u.", model_id_);
  for (const auto &output_tensor_desc : args.output_desc) {
    output_desc.emplace_back(*output_tensor_desc);
  }
@@ -477,13 +513,15 @@ Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector<
  }

  HybridModelExecutor::ExecuteArgs args;
  GE_CHK_STATUS_RET(PrepareInputs(input_data, args), "Failed to copy input data to model");
  GE_CHK_STATUS_RET(PrepareInputs(input_data, args),
      "[Invoke][PrepareInputs]Failed to copy input data to model, model_id = %u", model_id_);
  GELOGD("Done copying input data successfully.");
  GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model.");
  GE_CHK_STATUS_RET(executor_->Execute(args), "[Invoke][Execute] Failed, model_id = %u.", model_id_);

  std::vector<ge::OutputTensorInfo> output_tensor_info_list;
  OutputData output_data;
  GE_CHK_STATUS_RET(CopyOutputs(args, &output_data, output_tensor_info_list), "Failed to copy outputs.");
  GE_CHK_STATUS_RET(CopyOutputs(args, &output_data, output_tensor_info_list),
      "[Invoke][CopyOutputs]Failed to copy outputs, model_id = %u.", model_id_);
  GELOGD("Done copying output data successfully. output count = %zu", output_tensor_info_list.size());

  int out_index = 0;
@@ -534,7 +572,8 @@ Status HybridModelAsyncExecutor::DumpOpDebug() {
      loop_cond = const_cast<void *>(varible_loop_cond->GetData());
    }
    data_dumper_.SetLoopAddr(global_step, loop_per_iter, loop_cond);
    GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "LoadDumpInfo failed in hybrid engine");
    GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(),
        "[Invoke][LoadDumpInfo] failed in hybrid engine, model_id = %u.", model_id_);
    GELOGD("Dump op debug SUCCESS in hybrid engine");
  }
  return SUCCESS;
--- a/ge/hybrid/executor/hybrid_model_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_executor.cc
@@ -72,7 +72,7 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) {
  if (ret == END_OF_SEQUENCE) {
    args.is_eos = true;
  } else {
    GE_CHK_STATUS_RET(ret, "Failed to execute model");
    GE_CHK_STATUS_RET(ret, "[Invoke][ExecuteGraphInternal] Failed, ret:%d.", ret);
  }
  return SUCCESS;
 }
--- a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc
@@ -59,23 +59,27 @@ Status StageExecutor::Start(const std::vector<TensorValue> &inputs, const std::v
    task_queue_.Pop(task_info);
    GELOGD("[Executor: %d] Got task, stage = %d, iteration = %ld", id_, task_info.stage, task_info.iteration);
    if (task_info.iteration >= pipe_config_->iteration_end) {
      GELOGE(INTERNAL_ERROR, "[Executor: %d] Unexpected iteration: %d", id_, task_info.iteration);
      GELOGE(INTERNAL_ERROR, "[Check][Range][Executor: %d] Unexpected iteration: %ld.",
          id_, task_info.iteration);
      REPORT_INNER_ERROR("E19999", "[Executor: %d] Unexpected iteration: %ld when StageExecutor %s.",
          id_, task_info.iteration, __FUNCTION__);
      return INTERNAL_ERROR;
    }

    if (task_info.event != nullptr) {
      GELOGD("[%d] Add StreamWaitEvent", id_);
      GE_CHK_RT_RET(rtStreamWaitEvent(stream_, task_info.event));
      RECORD_MODEL_EXECUTION_EVENT(&context_, "[iteration = %d] [Stage = %d] End", task_info.iteration - 1,
      RECORD_MODEL_EXECUTION_EVENT(&context_, "[iteration = %ld] [Stage = %d] End", task_info.iteration - 1,
                                   task_info.stage);
    }

    RECORD_MODEL_EXECUTION_EVENT(&context_, "[iteration = %d] [Stage = %d] Start", task_info.iteration,
    RECORD_MODEL_EXECUTION_EVENT(&context_, "[iteration = %lld] [Stage = %d] Start", task_info.iteration,
                                 task_info.stage);

    if (task_info.stage == 0) {
      GELOGD("[Executor: %d] To ResetExecutionContext", id_);
      GE_CHK_STATUS_RET(ResetExecutionContext(context_), "[Executor: %d] Failed to reset context", id_);
      GE_CHK_STATUS_RET(ResetExecutionContext(context_),
          "[Invoke][ResetExecutionContext][Executor: %d] Failed to reset context", id_);
      context_.iteration = task_info.iteration;
      GE_CHK_STATUS_RET_NOLOG(SetInputs(inputs, input_desc));
    }
@@ -92,19 +96,22 @@ Status StageExecutor::Start(const std::vector<TensorValue> &inputs, const std::v

    auto sync_result = Synchronize();
    if (sync_result != SUCCESS) {
      GELOGE(sync_result, "[Executor: %d] Failed to sync result. iteration = %d", id_, task_info.iteration);

      GELOGE(sync_result,
          "[Invoke][Synchronize][Executor: %d] Failed to sync result:%d. iteration = %ld",
          id_, sync_result, task_info.iteration);
      REPORT_CALL_ERROR("E19999", "[Executor: %d] Failed to sync result:%d when StageExecutor %s. iteration = %ld",
          id_, sync_result, __FUNCTION__, task_info.iteration);
      context_.profiler->Dump(std::cout);
      context_.callback_manager->Destroy();
      RuntimeInferenceContext::DestroyContext(std::to_string(context_.context_id));
      return sync_result;
    }

    RECORD_MODEL_EXECUTION_EVENT(&context_, "[iteration = %d] [Stage = %d] End", task_info.iteration, task_info.stage);
    RECORD_MODEL_EXECUTION_EVENT(&context_, "[iteration = %ld] [Stage = %d] End", task_info.iteration, task_info.stage);

    // if not end stage
    if (task_info.stage >= pipe_config_->num_stages - 1) {
      RECORD_MODEL_EXECUTION_EVENT(&context_, "[iteration = %d] Schedule End", task_info.iteration);
      RECORD_MODEL_EXECUTION_EVENT(&context_, "[iteration = %ld] Schedule End", task_info.iteration);
      GELOGD("[Executor: %d] End of iteration [%ld]", id_, task_info.iteration);
      context_.callback_manager->Destroy();
      RuntimeInferenceContext::DestroyContext(std::to_string(context_.context_id));
@@ -242,7 +249,9 @@ Status HybridModelPipelineExecutor::Execute(HybridModelExecutor::ExecuteArgs &ar
    GELOGD("Start to sync result of executor[%zu]", i);
    auto ret = futures[i].get();
    if (ret != SUCCESS) {
      GELOGE(ret, "[Executor: %zu] Failed to schedule tasks.", i);
      GELOGE(ret, "[Check][Result][Executor: %zu] Failed to schedule tasks.", i);
      REPORT_INNER_ERROR("E19999", "[Executor: %zu] Failed to schedule tasks when HybridModelPipelineExecutor %s.",
          i, __FUNCTION__);
      has_error = true;
      continue;
    }
@@ -250,7 +259,9 @@ Status HybridModelPipelineExecutor::Execute(HybridModelExecutor::ExecuteArgs &ar
    ret = stage_executors_[i]->Synchronize();

    if (ret != SUCCESS) {
      GELOGE(ret, "[Executor: %zu] Failed to synchronize result.", i);
      GELOGE(ret, "[Invoke][Synchronize] failed for [Executor: %zu].", i);
      REPORT_CALL_ERROR("E19999", "[Executor: %zu] failed to Synchronize result when HybridModelPipelineExecutor %s.",
          i, __FUNCTION__);
      has_error = true;
      continue;
    }
@@ -266,13 +277,14 @@ Status HybridModelPipelineExecutor::Execute(HybridModelExecutor::ExecuteArgs &ar
  iteration_ = config_.iteration_end;

  if (has_error) {
    GELOGE(FAILED, "Error occurred while execution");
    GELOGE(FAILED, "[Check][Error]Error occurred while execution.");
    REPORT_INNER_ERROR("E19999", "Error occurred while execution when HybridModelPipelineExecutor %s.", __FUNCTION__);
    return FAILED;
  }

  auto last_iter_executor_idx = loop_count % stage_executors_.size();
  GE_CHK_STATUS_RET(stage_executors_[last_iter_executor_idx]->GetOutputs(args.outputs, args.output_desc),
                    "Failed to get output from executor[%zu]", last_iter_executor_idx);
                    "[Get][Outputs]Failed from executor[%zu]", last_iter_executor_idx);
  return SUCCESS;
 }

--- a/ge/hybrid/executor/hybrid_profiler.cc
+++ b/ge/hybrid/executor/hybrid_profiler.cc
@@ -40,7 +40,8 @@ void HybridProfiler::RecordEvent(EventType event_type, const char *fmt, ...) {

  char buf[kEventDescMax];
  if (vsnprintf_s(buf, kEventDescMax, kEventDescMax - 1, fmt, args) == -1) {
    GELOGE(FAILED, "Format %s failed.", fmt);
    GELOGE(FAILED, "[Parse][Param:fmt]Format %s failed.", fmt);
    REPORT_CALL_ERROR("E19999", "Parse Format %s failed when HybridProfiler %s.", fmt, __FUNCTION__);
    va_end(args);
    return;
  }
@@ -48,7 +49,10 @@ void HybridProfiler::RecordEvent(EventType event_type, const char *fmt, ...) {
  va_end(args);
  auto index = counter_++;
  if (index >= static_cast<int>(events_.size())) {
    GELOGE(INTERNAL_ERROR, "index out of range. index = %d, max event size = %zu", index, events_.size());
    GELOGE(INTERNAL_ERROR,
        "[Check][Range]index out of range. index = %d, max event size = %zu", index, events_.size());
    REPORT_INNER_ERROR("E19999", "index out of range when HybridProfiler %s. index = %d, max event size = %zu",
        __FUNCTION__, index, events_.size());
    return;
  }
  auto &evt = events_[index];
--- a/ge/hybrid/executor/node_done_manager.cc
+++ b/ge/hybrid/executor/node_done_manager.cc
@@ -28,7 +28,8 @@ bool NodeDoneManager::Cond::Await() {
  if (!cv_.wait_for(lk,
                    std::chrono::seconds(kDefaultWaitTimeoutInSec),
                    [&]() { return is_released_ || is_cancelled_; })) {
    GELOGE(INTERNAL_ERROR, "Wait timed out.");
    GELOGE(INTERNAL_ERROR, "[Invoke][wait_for]Wait timed out.");
    REPORT_INNER_ERROR("E19999", "wait timed out[%d] when %s.", kDefaultWaitTimeoutInSec, __FUNCTION__);
    return false;
  }

--- a/ge/hybrid/executor/node_state.cc
+++ b/ge/hybrid/executor/node_state.cc
@@ -67,7 +67,10 @@ Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target
    Format format = input_desc.GetFormat();
    DataType data_type = input_desc.GetDataType();
    if (TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size) != GRAPH_SUCCESS) {
      GELOGE(FAILED, "[%s] Calculate tensor memory size failed.", node_item.NodeName().c_str());
      GELOGE(FAILED, "[Invoke][CalcTensorMemSize] failed for [%s].",
          node_item.NodeName().c_str());
      REPORT_CALL_ERROR("E19999", "CalcTensorMemSize failed for [%s] when ShapeInferenceState %s.",
          node_item.NodeName().c_str(), __FUNCTION__);
      return FAILED;
    }
  }
@@ -121,13 +124,19 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex
      }

      if (context.GetStatus() != SUCCESS) {
        GELOGE(FAILED, "[%s] Await pending shape cancelled", node_item.NodeName().c_str());
        GELOGE(FAILED, "[Check][Status][%s] Await pending shape cancelled.",
            node_item.NodeName().c_str());
        REPORT_CALL_ERROR("E19999", "[%s] Await pending shape cancelled when %s.",
            node_item.NodeName().c_str(), __FUNCTION__);
        break;
      }
    }

    if (!wait_success) {
      GELOGE(FAILED, "[%s] Wait for shape timeout.", node_item.NodeName().c_str());
      GELOGE(FAILED, "[Check][Status][%s] Wait for shape timeout:%d.",
          node_item.NodeName().c_str(), kWaitInternal);
      REPORT_CALL_ERROR("E19999", "[%s] Wait for shape timeout:%d when %s.",
          node_item.NodeName().c_str(), kWaitInternal, __FUNCTION__);
      return FAILED;
    }
  }
@@ -232,7 +241,7 @@ Status NodeState::WaitForPrepareDone() {
  if (prepare_future_.valid()) {
    GELOGD("[%s] Start to wait for prepare future.", GetName().c_str());
    GE_CHK_STATUS_RET(prepare_future_.get(),
                      "[%s] PreRun failed.", GetName().c_str());
        "[Check][Status][%s] PreRun failed.", GetName().c_str());
  }

  return SUCCESS;
--- a/ge/hybrid/executor/rt_callback_manager.cc
+++ b/ge/hybrid/executor/rt_callback_manager.cc
@@ -27,7 +27,8 @@ Status CallbackManager::RegisterCallback(rtStream_t stream, rtCallback_t callbac
  GE_CHK_RT_RET(rtEventCreate(&event));
  auto rt_ret = rtEventRecord(event, stream);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Failed to invoke rtEventRecord, error code = %d", rt_ret);
    GELOGE(RT_FAILED, "[Invoke][rtEventRecord] failed, error code = %d", rt_ret);
    REPORT_CALL_ERROR("E19999", "Invoke rtEventRecord failed when %s, error code = %d", __FUNCTION__, rt_ret);
    (void) rtEventDestroy(event);
    return RT_FAILED;
  }
@@ -50,7 +51,8 @@ Status CallbackManager::Init() {
    return CallbackProcess(context);
  }, ctx);
  if (!ret_future_.valid()) {
    GELOGE(INTERNAL_ERROR, "Failed to init callback manager.");
    GELOGE(INTERNAL_ERROR, "[Check][ShareState]Failed to init callback manager.");
    REPORT_INNER_ERROR("E19999", "Failed to init callback manager.");
    return INTERNAL_ERROR;
  }

@@ -73,7 +75,9 @@ Status CallbackManager::CallbackProcess(rtContext_t context) {

    auto rt_err = rtEventSynchronize(event);
    if (rt_err != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "rtEventSynchronize failed. ret = %d", rt_err);
      GELOGE(RT_FAILED, "[Invoke][rtEventSynchronize] failed. ret = %d", rt_err);
      REPORT_CALL_ERROR("E19999",
          "Invoke rtEventSynchronize failed when CallbackManager %s, ret = %d.", __FUNCTION__, rt_err);
      GE_CHK_RT(rtEventDestroy(event));
      return RT_FAILED;
    }
--- a/ge/hybrid/executor/subgraph_context.cc
+++ b/ge/hybrid/executor/subgraph_context.cc
@@ -50,9 +50,11 @@ NodeStatePtr SubgraphContext::GetOrCreateNodeState(const NodeItem *node_item) {
 Status SubgraphContext::SetInput(int index, const TensorValue &tensor) {
  if (static_cast<size_t>(index) >= all_inputs_.size()) {
    GELOGE(INTERNAL_ERROR,
           "output index output range. all input num = %zu, input index = %d",
           all_inputs_.size(),
           index);
        "[Check][Param:index]input index out of range. all input num = %zu, input index = %d",
        all_inputs_.size(), index);
    REPORT_INNER_ERROR("E19999",
        "input param index out of range when SubgraphContext %s, all input num = %zu, input index = %d.",
        __FUNCTION__, all_inputs_.size(), index);
    return INTERNAL_ERROR;
  }
  all_inputs_[index] = tensor;
@@ -68,10 +70,11 @@ Status SubgraphContext::SetOutput(const NodeItem &node_item, int output_index, c
  auto index = node_item.output_start + output_index;
  if ((output_index >= node_item.num_outputs) || (static_cast<size_t>(index) >= all_outputs_.size())) {
    GELOGE(INTERNAL_ERROR,
           "output index output range. all output num = %zu, node_item = %s, output index = %d",
           all_outputs_.size(),
           node_item.DebugString().c_str(),
           output_index);
        "[Check][Param:output_index]output index out of range. all output num = %zu, node_item = %s,"
        "output index = %d.", all_outputs_.size(), node_item.DebugString().c_str(), output_index);
    REPORT_INNER_ERROR("E19999", "output index out of range when SubgraphContext %s. "
        "all output num = %zu, node_item = %s, output index = %d.",
        __FUNCTION__, all_outputs_.size(), node_item.DebugString().c_str(), output_index);
    return INTERNAL_ERROR;
  }

@@ -126,7 +129,10 @@ Status SubgraphContext::Await(const NodePtr &node) {

 void SubgraphContext::OnError(Status error) {
  if (error != END_OF_SEQUENCE) {
    GELOGE(error, "[%s] Error occurred while executing graph.", graph_item_->GetName().c_str());
    GELOGE(error, "[Check][Param:error][%s] Error:%d occurred while executing graph.",
        graph_item_->GetName().c_str(), error);
    REPORT_INNER_ERROR("E19999", "[%s] Error:%d occurred while executing graph when SubgraphContext %s.",
        graph_item_->GetName().c_str(), error, __FUNCTION__);
  }
  node_done_manager_.Destroy();
 }
--- a/ge/hybrid/executor/subgraph_executor.cc
+++ b/ge/hybrid/executor/subgraph_executor.cc
@@ -44,7 +44,8 @@ Status SubgraphExecutor::Init(const std::vector<TensorValue> &inputs,
                              const std::vector<ConstGeTensorDescPtr> &input_desc) {
  subgraph_context_.reset(new(std::nothrow)SubgraphContext(graph_item_, context_));
  GE_CHECK_NOTNULL(subgraph_context_);
  GE_CHK_STATUS_RET(subgraph_context_->Init(), "[%s] Failed to init subgraph context.", graph_item_->GetName().c_str());
  GE_CHK_STATUS_RET(subgraph_context_->Init(),
      "[Init][SubgraphContext][%s] Failed to init subgraph context.", graph_item_->GetName().c_str());

  shape_inference_engine_.reset(new(std::nothrow) ShapeInferenceEngine(context_, subgraph_context_.get()));
  GE_CHECK_NOTNULL(shape_inference_engine_);
@@ -55,8 +56,8 @@ Status SubgraphExecutor::Init(const std::vector<TensorValue> &inputs,
                      graph_item_->GetName().c_str());
  } else {
    GE_CHK_STATUS_RET(InitInputsForKnownShape(inputs),
                      "[%s] Failed to init subgraph executor for known shape subgraph.",
                      graph_item_->GetName().c_str());
        "[Invoke][InitInputsForKnownShape][%s] Failed to init subgraph executor for known shape subgraph.",
        graph_item_->GetName().c_str());
  }

  return SUCCESS;
@@ -67,8 +68,13 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue
  // Number of inputs of parent node should be greater or equal than that of subgraph
  auto input_nodes = graph_item_->GetInputNodes();
  if (inputs.size() < input_nodes.size()) {
    GELOGE(INTERNAL_ERROR, "[%s] Number of inputs [%zu] is not sufficient for subgraph which needs [%zu] inputs.",
           graph_item_->GetName().c_str(), inputs.size(), input_nodes.size());
    GELOGE(INTERNAL_ERROR,
        "[Check][Size][%s] Number of inputs [%zu] is not sufficient for subgraph which needs [%zu] inputs.",
        graph_item_->GetName().c_str(), inputs.size(), input_nodes.size());
    REPORT_INNER_ERROR("E19999",
        "[%s] Number of inputs [%zu] is not sufficient for subgraph which needs [%zu] inputs,"
        "check invalid when SubgraphExecutor %s.",
        graph_item_->GetName().c_str(), inputs.size(), input_nodes.size(), __FUNCTION__);
    return INTERNAL_ERROR;
  }

@@ -87,9 +93,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue
           input_tensor.DebugString().c_str());

    GE_CHK_STATUS_RET(subgraph_context_->SetInput(*input_node, kDataInputIndex, input_tensor),
                      "[%s] Failed to set input tensor[%zu]",
                      graph_item_->GetName().c_str(),
                      i);
        "[Invoke][SetInput] failed for grap_item[%s] input tensor[%zu]", graph_item_->GetName().c_str(), i);

    if (force_infer_shape_ || input_node->is_dynamic) {
      GELOGD("[%s] Start to update input[%zu] for subgraph data node.", graph_item_->GetName().c_str(), i);
@@ -112,11 +116,12 @@ Status SubgraphExecutor::InitInputsForKnownShape(const std::vector<TensorValue>
    auto &parent_input_index = input_index_mapping[i];
    if (static_cast<size_t>(parent_input_index) >= inputs.size()) {
      GELOGE(INTERNAL_ERROR,
             "[%s] Number of inputs [%zu] is not sufficient for subgraph which needs at lease [%d] inputs",
             graph_item_->GetName().c_str(),
             inputs.size(),
             parent_input_index + 1);

          "[Check][Size][%s] Number of inputs [%zu] is not sufficient for subgraph which needs at lease [%d] inputs",
          graph_item_->GetName().c_str(), inputs.size(), parent_input_index + 1);
      REPORT_INNER_ERROR("E19999",
          "[%s] Number of inputs [%zu] is not sufficient for subgraph which needs at lease [%d] inputs,"
          "check invalid when %s.",
          graph_item_->GetName().c_str(), inputs.size(), parent_input_index + 1, __FUNCTION__);
      return INTERNAL_ERROR;
    }

@@ -136,10 +141,10 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
                                      const std::vector<ConstGeTensorDescPtr> &input_desc,
                                      const std::vector<TensorValue> &outputs) {
  GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false");
  GE_CHK_STATUS_RET(Init(inputs, input_desc), "[%s] Failed to init executor.", graph_item_->GetName().c_str());
  GE_CHK_STATUS_RET(Init(inputs, input_desc), "[Invoke][Init]failed for [%s].", graph_item_->GetName().c_str());
  if (!outputs.empty()) {
    GE_CHK_STATUS_RET(EnableOutputZeroCopy(outputs),
                      "Failed to enable output zero copy by user provided outputs.");
        "[Invoke][EnableOutputZeroCopy] Failed by user provided outputs.");
  }
  if (!graph_item_->IsDynamic()) {
    return ExecuteAsyncForKnownShape(inputs);
@@ -194,12 +199,11 @@ Status SubgraphExecutor::ExecuteAsync(TaskContext &task_context) {
  }

  GE_CHK_STATUS_RET(ExecuteAsync(inputs, input_desc),
                    "[%s] Failed to execute subgraph.",
                    graph_item_->GetName().c_str());
      "[Invoke][ExecuteAsync] failed for [%s].", graph_item_->GetName().c_str());

  GE_CHK_STATUS_RET(SetOutputsToParentNode(task_context),
                    "[%s] Failed to set output shapes to parent node.",
                    graph_item_->GetName().c_str());
      "[Invoke][SetOutputsToParentNode][%s] Failed to set output shapes to parent node.",
      graph_item_->GetName().c_str());
  return SUCCESS;
 }

@@ -239,7 +243,7 @@ Status SubgraphExecutor::PrepareNodes(int group) {
        if (node_item.kernel_task == nullptr) {
          GELOGW("[%s] Node of static shape got no task.", node_item.NodeName().c_str());
          GE_CHK_STATUS_RET(TaskCompileEngine::Compile(*p_node_state, context_),
                            "[%s] Failed to create task.", p_node_state->GetName().c_str());
              "[Invoke][Compile] failed for [%s].", p_node_state->GetName().c_str());
        } else {
          node_state->SetKernelTask(node_item.kernel_task);
        }
@@ -248,7 +252,9 @@ Status SubgraphExecutor::PrepareNodes(int group) {
        GE_CHECK_NOTNULL(unique_task_context);
        const auto &task = node_state->GetKernelTask();
        if (task == nullptr) {
          GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state->GetName().c_str());
          GELOGE(INTERNAL_ERROR, "[Get][KernelTask] failed for[%s], NodeTask is null.", node_state->GetName().c_str());
          REPORT_CALL_ERROR("E19999", "invoke GetKernelTask failed for %s when %s, nodetask is null.",
              node_state->GetName().c_str(), __FUNCTION__);
          return INTERNAL_ERROR;
        }
        auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
@@ -261,8 +267,10 @@ Status SubgraphExecutor::PrepareNodes(int group) {
        GELOGD("Got end of sequence");
        return SUCCESS;
      }
      GELOGE(INTERNAL_ERROR, "[%s] Error occurs while launching tasks. quit from preparing nodes.",
             graph_item_->GetName().c_str());
      GELOGE(INTERNAL_ERROR, "[Check][State][%s] Error occurs while launching tasks. quit from preparing nodes.",
          graph_item_->GetName().c_str());
      REPORT_INNER_ERROR("E19999", "[%s] Error occurs while launching tasks. quit from preparing nodes when %s.",
          graph_item_->GetName().c_str(), __FUNCTION__);
      return INTERNAL_ERROR;
    }

@@ -275,9 +283,9 @@ Status SubgraphExecutor::PrepareNodes(int group) {

 Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const {
  HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state),
                        "[%s] Failed to InferShape.", node_state.GetName().c_str());
      "[Invoke][InferShape] failed for [%s].", node_state.GetName().c_str());
  HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state),
                        "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str());
      "[Invoke][PropagateOutputShapes] failed for [%s].", node_state.GetName().c_str());
  return SUCCESS;
 }

@@ -285,7 +293,7 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta
  auto &node_item = *node_state.GetNodeItem();
  if (node_item.kernel_task == nullptr) {
    GE_CHK_STATUS_RET(TaskCompileEngine::Compile(node_state, ctx),
                      "Failed to create task for node[%s]", node_state.GetName().c_str());
        "[Invoke][Compile] Failed for node[%s]", node_state.GetName().c_str());
  } else {
    node_state.SetKernelTask(node_item.kernel_task);
  }
@@ -293,7 +301,9 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta
  GE_CHECK_NOTNULL(unique_task_context);
  const auto &task = node_state.GetKernelTask();
  if (task == nullptr) {
    GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state.GetName().c_str());
    GELOGE(INTERNAL_ERROR, "[Invoke][GetKernelTask] failed for[%s], NodeTask is null.", node_state.GetName().c_str());
    REPORT_CALL_ERROR("E19999", "invoke GetKernelTask failed for %s, NodeTask is null when %s.",
        node_state.GetName().c_str(), __FUNCTION__);
    return INTERNAL_ERROR;
  }
  auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
@@ -309,7 +319,8 @@ Status SubgraphExecutor::LaunchTasks() {
  while (true) {
    NodeState *node_state = nullptr;
    if (!ready_queue_.Pop(node_state)) {
      GELOGE(INTERNAL_ERROR, "[%s] Failed to pop node.", graph_item_->GetName().c_str());
      GELOGE(INTERNAL_ERROR, "[Invoke][Pop] failed for [%s].", graph_item_->GetName().c_str());
      REPORT_CALL_ERROR("E19999", "invoke pop failed for %s when %s", graph_item_->GetName().c_str(), __FUNCTION__);
      return INTERNAL_ERROR;
    }

@@ -334,8 +345,7 @@ Status SubgraphExecutor::LaunchTasks() {
    GE_CHECK_NOTNULL(shared_task_context);
    shared_task_context->SetForceInferShape(force_infer_shape_);
    HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_),
                          "[%s] Execute node failed.",
                          node_state->GetName().c_str());
        "[Invoke][ExecuteAsync] failed for [%s].", node_state->GetName().c_str());
    GELOGD("[%s] Done executing node successfully.", node_state->GetName().c_str());
  }
 }
@@ -361,8 +371,7 @@ Status SubgraphExecutor::ScheduleTasks(int group) {
  }

  GE_CHK_STATUS_RET(prepare_future.get(),
                    "[%s] Error occurred in task preparation.",
                    graph_item_->GetName().c_str());
      "[Invoke][get] [%s] Error occurred in task preparation.", graph_item_->GetName().c_str());

  GELOGD("[%s] Done launching all tasks successfully.", graph_item_->GetName().c_str());
  return SUCCESS;
@@ -373,17 +382,18 @@ Status SubgraphExecutor::GetOutputs(vector<TensorValue> &outputs) {
 }

 Status SubgraphExecutor::GetOutputs(vector<TensorValue> &outputs, std::vector<ConstGeTensorDescPtr> &output_desc) {
  GE_CHK_STATUS_RET(GetOutputs(outputs), "[%s] Failed to get output tensors.", graph_item_->GetName().c_str());
  GE_CHK_STATUS_RET(GetOutputs(outputs), "[Invoke][GetOutputs] failed for [%s].", graph_item_->GetName().c_str());

  // copy output data from op to designated position
  GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc),
                    "[%s] Failed to get output tensor desc.",
                    graph_item_->GetName().c_str());
      "[Invoke][GetOutputDescList][%s] Failed to get output tensor desc.", graph_item_->GetName().c_str());
  if (outputs.size() != output_desc.size()) {
    GELOGE(INTERNAL_ERROR,
           "Number of output tensors(%zu) mismatch number of output tensor desc(%zu).",
           outputs.size(),
           output_desc.size());
        "[Check][Size]Number of outputs(%zu) mismatch number of output_desc(%zu).",
        outputs.size(), output_desc.size());
    REPORT_INNER_ERROR("E19999", "Number of outputs(%zu) mismatch number of output_desc(%zu),"
        "check invlid when SubgraphExecutor %s.",
        outputs.size(), output_desc.size(), __FUNCTION__);
    return INTERNAL_ERROR;
  }
  return SUCCESS;
@@ -401,17 +411,17 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) {
  std::vector<TensorValue> outputs;
  std::vector<ConstGeTensorDescPtr> output_desc_list;
  GE_CHK_STATUS_RET(subgraph_context_->GetOutputs(outputs),
                    "[%s] Failed to get output tensors.",
                    graph_item_->GetName().c_str());
      "[Invoke][GetOutputs][%s] Failed to get output tensors.", graph_item_->GetName().c_str());
  GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc_list),
                    "[%s] Failed to get output tensor desc.",
                    graph_item_->GetName().c_str());
      "[Invoke][GetOutputDescList][%s] Failed to get output tensor desc.", graph_item_->GetName().c_str());

  if (outputs.size() != output_desc_list.size()) {
    GELOGE(INTERNAL_ERROR, "[%s] num output tensors = %zu, num output tensor desc = %zu",
           graph_item_->GetName().c_str(),
           outputs.size(),
           output_desc_list.size());
    GELOGE(INTERNAL_ERROR, "[Check][Size][%s] num of output tensors = %zu, num of output tensor desc = %zu not equal",
        graph_item_->GetName().c_str(), outputs.size(), output_desc_list.size());
    REPORT_INNER_ERROR("E19999",
        "%s num of output tensors = %zu, num of output tensor desc = %zu not equal,"
        "check invalid when SubgraphExecutor %s",
        graph_item_->GetName().c_str(), outputs.size(), output_desc_list.size(), __FUNCTION__);
    return INTERNAL_ERROR;
  }

@@ -460,9 +470,10 @@ Status SubgraphExecutor::EnableOutputZeroCopy(const vector<TensorValue> &outputs
  const auto &output_edges = graph_item_->GetOutputEdges();
  // Op -> MetOutput, set the output tensor of Op that output to the NetOutput node
  if (outputs.size() != output_edges.size()) {
    GELOGE(PARAM_INVALID, "Output number mismatches, expect = %zu, but given = %zu",
           output_edges.size(),
           outputs.size());
    GELOGE(PARAM_INVALID, "[Check][Size]Output number mismatches, expect = %zu, but given = %zu",
        output_edges.size(), outputs.size());
    REPORT_INNER_ERROR("E19999", "Output number mismatches, expect = %zu, but given = %zu when %s",
        output_edges.size(), outputs.size(), __FUNCTION__);
    return PARAM_INVALID;
  }

@@ -478,9 +489,7 @@ Status SubgraphExecutor::EnableOutputZeroCopy(const vector<TensorValue> &outputs
           output_tensor.DebugString().c_str());

    GE_CHK_STATUS_RET(subgraph_context_->SetOutput(*output_node, output_idx, output_tensor),
                      "[%s] Failed to set input tensor[%zu]",
                      graph_item_->GetName().c_str(),
                      i);
        "[Invoke][SetOutput][%s] Failed to set input tensor[%zu]", graph_item_->GetName().c_str(), i);
  }

  GELOGD("Done enabling zero copy for outputs successfully.");
--- a/ge/hybrid/executor/worker/execution_engine.cc
+++ b/ge/hybrid/executor/worker/execution_engine.cc
@@ -102,11 +102,13 @@ Status NodeDoneCallback::PrepareConstInputs(const NodeItem &node_item) {

    if (output_tensor->GetSize() < static_cast<size_t>(tensor_size)) {
      GELOGE(INTERNAL_ERROR,
             "[%s] Tensor size is not enough. output index = %d, required size = %ld, tensor = %s",
             node_item.NodeName().c_str(),
             output_idx,
             tensor_size,
             output_tensor->DebugString().c_str());
          "[Check][Size][%s] Tensor size is not enough. output index = %d, required size = %ld, tensor = %s.",
          node_item.NodeName().c_str(), output_idx, tensor_size,
          output_tensor->DebugString().c_str());
      REPORT_INNER_ERROR("E19999",
          "[%s] Tensor size is not enough. output index = %d, required size = %ld, tensor = %s when %s.",
          node_item.NodeName().c_str(), output_idx, tensor_size,
          output_tensor->DebugString().c_str(), __FUNCTION__);
      return INTERNAL_ERROR;
    }

@@ -128,7 +130,7 @@ Status NodeDoneCallback::PrepareConstInputs(const NodeItem &node_item) {
    GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(context_id, &runtime_infer_ctx),
                            "Failed to get RuntimeInferenceContext, context_id = %s", context_id.c_str());
    GE_CHK_STATUS_RET(runtime_infer_ctx->SetTensor(node_item.node_id, output_idx, std::move(tensor)),
                      "Failed to SetTensor, node = %s, output_index = %d", node_item.NodeName().c_str(), output_idx);
                      "[Set][Tensor] Failed, node = %s, output_index = %d", node_item.NodeName().c_str(), output_idx);
    GELOGD("[%s] Output[%d] cached successfully in context: %s. node_id = %d, shape = [%s]",
           node_item.NodeName().c_str(),
           output_idx,
@@ -173,7 +175,8 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel *
 Status NodeDoneCallback::ProfilingReport() {
  auto node = context_->GetNodeItem().node;
  if (node == nullptr) {
    GELOGE(PARAM_INVALID, "Get node is nullptr");
    GELOGE(PARAM_INVALID, "[Get][Node] value is nullptr.");
    REPORT_INNER_ERROR("E19999", "Get node failed, when %s.", __FUNCTION__);
    return PARAM_INVALID;
  }

@@ -190,7 +193,8 @@ Status NodeDoneCallback::ProfilingReport() {
  std::vector<TaskDescInfo> task_desc_info;
  auto profiling_ret = GetTaskDescInfo(node, model, task_desc_info);
  if (profiling_ret != RT_ERROR_NONE) {
    GELOGE(profiling_ret, "Get task info of node[%s] failed.", node->GetName().c_str());
    GELOGE(profiling_ret, "[Get][TaskDescInfo] of node:%s failed.", node->GetName().c_str());
    REPORT_CALL_ERROR("E19999", "GetTaskDescInfo of node:%s failed, when %s.", node->GetName().c_str(), __FUNCTION__);
    return profiling_ret;
  }

@@ -202,7 +206,8 @@ Status NodeDoneCallback::ProfilingReport() {
 Status NodeDoneCallback::DumpDynamicNode() {
  auto node = context_->GetNodeItem().node;
  if (node == nullptr) {
    GELOGE(PARAM_INVALID, "Get node is nullptr");
    GELOGE(PARAM_INVALID, "[Get][Node] value is nullptr.");
    REPORT_INNER_ERROR("E19999", "get node is nullptr when %s.", __FUNCTION__);
    return PARAM_INVALID;
  }
  auto op_desc = node->GetOpDesc();
@@ -211,13 +216,13 @@ Status NodeDoneCallback::DumpDynamicNode() {
  vector<uintptr_t> output_addrs;
  for (int i = 0; i < context_->NumInputs(); i++) {
    auto tensor_value = context_->GetInput(i);
    GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr");
    GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "[Get][Tensor] value is nullptr.");
    uint64_t input_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData());
    input_addrs.emplace_back(input_addr);
  }
  for (int j = 0; j < context_->NumOutputs(); j++) {
    auto tensor_value = context_->GetOutput(j);
    GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr");
    GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "[Get][Tensor] value is nullptr.");
    uint64_t output_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData());
    output_addrs.emplace_back(output_addr);
  }
@@ -245,11 +250,12 @@ Status NodeDoneCallback::DumpDynamicNode() {
  void *global_step = context_->GetExecutionContext()->global_step;
  dump_op_.SetLoopAddr(global_step, loop_per_iter, loop_cond);

  GE_CHK_STATUS_RET(dump_op_.LaunchDumpOp(), "Failed to launch dump op in hybird model");
  GE_CHK_STATUS_RET(dump_op_.LaunchDumpOp(), "[Launch][DumpOp] failed in hybird model.");

  auto rt_ret = rtStreamSynchronize(stream);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(rt_ret, "rtStreamSynchronize failed");
    GELOGE(rt_ret, "[Call][rtStreamSynchronize] failed, ret = %d.", rt_ret);
    REPORT_CALL_ERROR("E19999", "call rtStreamSynchronize failed when %s, ret = %d.", __FUNCTION__, rt_ret);
    return rt_ret;
  }
  return SUCCESS;
@@ -264,12 +270,12 @@ Status NodeDoneCallback::OnNodeDone() {
  const DumpProperties &dump_properties = context_->GetDumpProperties();
  if (dump_properties.IsDumpOpen() || context_->IsOverFlow()) {
    GELOGI("Start to dump dynamic shape op");
    GE_CHK_STATUS_RET(DumpDynamicNode(), "Failed to dump dynamic node");
    GE_CHK_STATUS_RET(DumpDynamicNode(), "[Call][DumpDynamicNode] Failed.");
  }

  if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
    GE_CHK_STATUS_RET(ProfilingReport(), "Report node[%s] to profiling failed.",
                      node_item.NodeName().c_str());
    GE_CHK_STATUS_RET(ProfilingReport(), "[Report][Profiling] of node[%s] failed.",
        node_item.NodeName().c_str());
  }

  // release workspace
@@ -292,8 +298,7 @@ Status NodeDoneCallback::OnNodeDone() {
    }

    GE_CHK_STATUS_RET(context_->PropagateOutputs(),
                      "[%s] Failed to propagate outputs failed",
                      node_item.NodeName().c_str());
        "[Propagate][Outputs] of [%s] failed.", node_item.NodeName().c_str());

    RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[PropagateOutputs] End");
  }
@@ -333,7 +338,8 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state,
                                       const std::function<void()> &callback) {
  const auto &task = node_state.GetKernelTask();
  if (task == nullptr) {
    GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state.GetName().c_str());
    GELOGE(INTERNAL_ERROR, "[Get][KernelTask] of [%s] is null.", node_state.GetName().c_str());
    REPORT_INNER_ERROR("E19999", "GetKernelTask of %s is null when %s.", node_state.GetName().c_str(), __FUNCTION__);
    return INTERNAL_ERROR;
  }

@@ -348,8 +354,7 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state,
  GE_CHECK_NOTNULL(executor);
  RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PrepareTask] Start");
  GE_CHK_STATUS_RET(executor->PrepareTask(*task, task_context),
                    "[%s] Failed to prepare task",
                    node_state.GetName().c_str());
      "[Prepare][Task] for [%s] failed.", node_state.GetName().c_str());
  RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PrepareTask] End");
  GELOGD("[%s] Done task preparation successfully.", node_state.GetName().c_str());

@@ -360,7 +365,8 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state,
    }
  }

  GE_CHK_STATUS_RET(ValidateInputTensors(node_state, task_context), "Failed to validate input tensors.");
  GE_CHK_STATUS_RET(ValidateInputTensors(node_state, task_context), "[Validate][InputTensors] for %s failed.",
      node_state.GetName().c_str());
  RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[ValidateInputTensors] End");

  if (context.profiling_level > 0) {
@@ -414,11 +420,10 @@ Status ExecutionEngine::ValidateInputTensors(const NodeState &node_state, const
               input_tensor->GetSize());
      } else {
        GELOGE(INTERNAL_ERROR,
               "[%s] Input[%d]: tensor size mismatches. expected: %ld, but given %zu",
               task_context.GetNodeName(),
               i,
               expected_size,
               input_tensor->GetSize());
            "[Check][Size] for [%s] Input[%d]: tensor size mismatches. expected: %ld, but given %zu.",
            task_context.GetNodeName(), i, expected_size, input_tensor->GetSize());
        REPORT_INNER_ERROR("E19999", "[%s] Input[%d]: tensor size mismatches. expected: %ld, but given %zu when %s.",
            task_context.GetNodeName(), i, expected_size, input_tensor->GetSize(), __FUNCTION__);
        return INTERNAL_ERROR;
      }
    }
@@ -432,8 +437,7 @@ Status ExecutionEngine::PropagateOutputs(const NodeItem &node_item,
                                         GraphExecutionContext &context) {
  if (node_item.shape_inference_type != DEPEND_COMPUTE) {
    GE_CHK_STATUS_RET(task_context.PropagateOutputs(),
                      "[%s] Failed to propagate outputs.",
                      node_item.NodeName().c_str());
        "[Propagate][Outputs] for [%s] failed.", node_item.NodeName().c_str());
    RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PropagateOutputs] End");
    GELOGD("[%s] Done propagating outputs successfully.", node_item.NodeName().c_str());
  }
--- a/ge/hybrid/executor/worker/shape_inference_engine.cc
+++ b/ge/hybrid/executor/worker/shape_inference_engine.cc
@@ -70,7 +70,7 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
  {
    RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start");
    GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true),
                      "Invoke InferShapeAndType failed.");
        "[Invoke][InferShapeAndType] for %s failed.", node_item.NodeName().c_str());
    RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End");
  }

@@ -172,8 +172,7 @@ Status ShapeInferenceEngine::InferShapeForSubgraph(const NodeItem &node_item, co
    GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndType(node));
    GELOGD("[%s] Done invoking InferShapeAndType", node->GetName().c_str());
    GE_CHK_STATUS_RET(UpdatePeerNodeShape(*node),
                      "[%s] Failed to update shapes of peer node.",
                      node->GetName().c_str());
        "[Update][PeerNodeShape] failed for [%s].", node->GetName().c_str());
  }

  for (auto &it : fused_subgraph.output_mapping) {
@@ -205,7 +204,9 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) {
      GE_CHECK_NOTNULL(peer_op_desc);
      auto peer_input_desc = peer_op_desc->MutableInputDesc(peer_anchor->GetIdx());
      if (peer_input_desc == nullptr) {
        GELOGE(GRAPH_FAILED, "peer_input_desc is nullptr");
        GELOGE(GRAPH_FAILED, "[Call][MutableInputDesc] for %s return nullptr.", peer_op_desc->GetName().c_str());
        REPORT_CALL_ERROR("E19999", "%s call MutableInputDesc return nullptr when ShapeInferenceEngine %s.",
            peer_op_desc->GetName().c_str(), __FUNCTION__);
        continue;
      }

@@ -230,8 +231,11 @@ Status ShapeInferenceEngine::CanonicalizeShape(GeTensorDesc &tensor_desc,
  const auto &tensor_shape = tensor_desc.MutableShape();
  if (tensor_shape.IsUnknownShape()) {
    if (!fallback_with_range) {
      GELOGE(INTERNAL_ERROR, "Output shape is still unknown after shape inference. shape = [%s]",
             tensor_shape.ToString().c_str());
      GELOGE(INTERNAL_ERROR,
          "[Is][UnknownShape] Output shape is still unknown after shape inference. shape = [%s].",
          tensor_shape.ToString().c_str());
      REPORT_INNER_ERROR("E19999", "Output shape is still unknown after shape inference. "
          "shape = [%s] when ShapeInferenceEngine %s.", tensor_shape.ToString().c_str(), __FUNCTION__);
      return INTERNAL_ERROR;
    }

@@ -239,9 +243,10 @@ Status ShapeInferenceEngine::CanonicalizeShape(GeTensorDesc &tensor_desc,
    std::vector<std::pair<int64_t, int64_t>> shape_range;
    GE_CHK_GRAPH_STATUS_RET(tensor_desc.GetShapeRange(shape_range), "Failed to get shape range");
    if (shape_range.size() != shape.size()) {
      GELOGE(INTERNAL_ERROR, "Number of shape ranges (%zu) mismatches that of dims (%zu)",
             shape_range.size(),
             shape.size());
      GELOGE(INTERNAL_ERROR, "[Check][Size] Number of shape ranges (%zu) mismatches that of dims (%zu).",
          shape_range.size(), shape.size());
      REPORT_INNER_ERROR("E19999", "Number of shape ranges (%zu) mismatches that of dims (%zu)"
          " when ShapeInferenceEngine %s.", shape_range.size(), shape.size(), __FUNCTION__);
      return INTERNAL_ERROR;
    }

@@ -265,7 +270,10 @@ Status ShapeInferenceEngine::CalcTensorSize(DataType data_type,
  GELOGD("To calc tensor size by shape = [%s]", GeShape(shape).ToString().c_str());
  uint32_t type_size;
  if (!TypeUtils::GetDataTypeLength(data_type, type_size)) {
    GELOGE(INTERNAL_ERROR, "Failed to get data type size");
    GELOGE(INTERNAL_ERROR, "[Get][DataTypeLength] failed for type:%s.",
        TypeUtils::DataTypeToSerialString(data_type).c_str());
    REPORT_CALL_ERROR("E19999", "GetDataTypeLength failed for type:%s when ShapeInferenceEngine %s.",
        TypeUtils::DataTypeToSerialString(data_type).c_str(), __FUNCTION__);
    return INTERNAL_ERROR;
  }

@@ -273,15 +281,13 @@ Status ShapeInferenceEngine::CalcTensorSize(DataType data_type,
  for (const auto &dim : shape) {
    GE_CHECK_GE(dim, 0);
    GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim),
                      "Shape size overflow, shape = [%s]",
                      GeShape(shape).ToString().c_str());
        "[Check][Overflow] Shape size overflow, shape = [%s]", GeShape(shape).ToString().c_str());
    tensor_size *= dim;
  }

  GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1),
                    "Tensor size is too large: %ld, shape = [%s]",
                    tensor_size,
                    GeShape(shape).ToString().c_str());
      "[Check][Overflow]Tensor size is too large: %ld, shape = [%s] Shape size will overflow when add align.",
      tensor_size, GeShape(shape).ToString().c_str());
  tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment;
  return SUCCESS;
 }
@@ -294,16 +300,24 @@ Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bo
    const auto &shape = tensor_desc->MutableShape();
    // modify on copy
    auto dims = shape.GetDims();
    GE_CHK_STATUS_RET(CanonicalizeShape(*tensor_desc, dims, fallback_with_range),
                      "[%s] Failed to canonicalize shape for output %zu",
                      node_item.NodeName().c_str(),
                      output_index);

    auto status_result = CanonicalizeShape(*tensor_desc, dims, fallback_with_range);
    if (status_result != SUCCESS) {
      REPORT_CALL_ERROR("E19999",
                        "Invoke CanonicalizeShape failed when ShapeInferenceEngine %s, node:%s, output:%zu.",
                        node_item.NodeName().c_str(), __FUNCTION__, output_index);
      GELOGE(ge::FAILED, "[Canonicalize][Shape] failed for [%s], output %zu.",
             node_item.NodeName().c_str(), output_index);
      return status_result;
    }
    int64_t tensor_size;
    GE_CHK_STATUS_RET(CalcTensorSize(tensor_desc->GetDataType(), dims, tensor_size),
                      "[%s] Failed to calc tensor size for output %zu",
                      node_item.NodeName().c_str(),
                      output_index);
    status_result = CalcTensorSize(tensor_desc->GetDataType(), dims, tensor_size);
    if (status_result != SUCCESS) {
      REPORT_CALL_ERROR("E19999", "Invoke CalcTensorSize failed when ShapeInferenceEngine %s, node:%s, output:%zu.",
          node_item.NodeName().c_str(), __FUNCTION__, output_index);
      GELOGE(ge::FAILED, "[Calc][TensorSize] failed for [%s], output %zu.",
          node_item.NodeName().c_str(), output_index);
      return status_result;
    }
    GELOGD("[%s] Tensor size of output %zu = %ld", node_item.NodeName().c_str(), output_index, tensor_size);
    (void) TensorUtils::SetSize(*tensor_desc, tensor_size);
  }
--- a/ge/hybrid/executor/worker/task_compile_engine.cc
+++ b/ge/hybrid/executor/worker/task_compile_engine.cc
@@ -32,7 +32,7 @@ Status TaskCompileEngine::Compile(NodeState &node_state, GraphExecutionContext *
  shared_ptr<NodeTask> kernel_task;
  auto ret = node_item.node_executor->CompileTask(*context->model, node_item.node, kernel_task);
  RECORD_COMPILE_EVENT(context, node_state.GetName().c_str(), "[Compile] End");
  GE_CHK_STATUS_RET(ret, "Failed to create task for node: %s", node_item.NodeName().c_str());
  GE_CHK_STATUS_RET(ret, "[Compile][Task] failed for node: %s.", node_item.NodeName().c_str());
  node_state.SetKernelTask(kernel_task);
  GELOGI("Compiling node %s successfully", node_state.GetName().c_str());
  return SUCCESS;
--- a/ge/hybrid/model/graph_item.cc
+++ b/ge/hybrid/model/graph_item.cc
@@ -95,7 +95,8 @@ Status GraphItem::GroupNodes() {
    int group = node->group;
    if (group != last_group) {
      if (seen_groups.find(group) != seen_groups.end()) {
        GELOGE(INTERNAL_ERROR, "Unordered node group found. node = %s, group = %d", node->NodeName().c_str(), group);
        GELOGE(INTERNAL_ERROR,
            "[Find][Group]Unordered node group found. node = %s, group = %d", node->NodeName().c_str(), group);
        return INTERNAL_ERROR;
      } else {
        last_group = group;
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -71,8 +71,10 @@ Status SetOutputNameAttr(ComputeGraph &graph) {
    }
  }
  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&graph, ATTR_MODEL_OUT_NODES_NAME, output_names),
                   GELOGE(FAILED, "SetListStr of ATTR_MODEL_OUT_NODES_NAME failed.");
                   return FAILED);
      GELOGE(FAILED, "[Invoke][SetListStr] failed, name:%s.", ATTR_MODEL_OUT_NODES_NAME.c_str());
      REPORT_CALL_ERROR("E19999", "SetListStr failed when %s, name:%s.",
          __FUNCTION__, ATTR_MODEL_OUT_NODES_NAME.c_str());
      return FAILED);
  return SUCCESS;
 }

@@ -109,10 +111,12 @@ Status CollectDependenciesForFusedGraph(NodeItem &node_item, std::set<OpDesc *>
      GE_CHECK_NOTNULL(src_op_desc);
      if (src_node->GetType() != DATA_TYPE) {
        GELOGE(UNSUPPORTED,
               "[%s::%s] Node in fused subgraph can only depend on Data nodes, but depend on %s",
               node_item.NodeName().c_str(),
               node->GetName().c_str(),
               src_node->GetType().c_str());
            "[Check][NodeType][%s::%s] Node in fused subgraph can only depend on Data nodes,"
            "but depend on %s actually",
            node_item.NodeName().c_str(), node->GetName().c_str(), src_node->GetType().c_str());
        REPORT_INNER_ERROR("E19999", "[%s::%s] Node in fused subgraph can only depend on Data nodes,"
            " but depend on %s actually, check invalid when %s.",
            node_item.NodeName().c_str(), node->GetName().c_str(), src_node->GetType().c_str(), __FUNCTION__);
        return UNSUPPORTED;
      }

@@ -129,37 +133,39 @@ HybridModelBuilder::HybridModelBuilder(HybridModel &hybrid_model)
 }

 Status HybridModelBuilder::Build() {
  GE_CHK_STATUS_RET(ValidateParams(), "Failed to validate GeRootModel");
  GE_CHK_STATUS_RET(ValidateParams(), "[Invoke][ValidateParams] failed, model_name_:[%s]", GetGraphName());
  hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName();
  GELOGI("[%s] Start to build hybrid model.", GetGraphName());
  GE_CHK_STATUS_RET(InitRuntimeParams(), "[%s] Failed to InitRuntimeParams", GetGraphName());
  GE_CHK_STATUS_RET(RecoverGraphUnknownFlag(), "[%s] Failed to RecoverGraphUnknownFlag", GetGraphName());
  GE_CHK_STATUS_RET(IndexSpecialNodes(), "[%s] Failed to index nodes", GetGraphName());
  GE_CHK_STATUS_RET(IndexTaskDefs(), "[%s] Failed to index task defs", GetGraphName());
  GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName());
  GE_CHK_STATUS_RET(LoadGraph(), "[%s] Failed to load graph", GetGraphName());
  GE_CHK_STATUS_RET(AssignUninitializedConstantOps(), "[%s] Failed to assign uninitialized constants", GetGraphName());
  GE_CHK_STATUS_RET(TransAllVarData(), "[%s] Failed to trans all var data", GetGraphName());
  GE_CHK_STATUS_RET(CopyVarData(), "[%s] Failed to copy var data", GetGraphName());
  GE_CHK_STATUS_RET(InitModelMem(), "[%s] Failed to init memory", GetGraphName());
  GE_CHK_STATUS_RET(InitConstantOps(), "[%s] Failed to init constant op", GetGraphName());
  GE_CHK_STATUS_RET(InitVariableTensors(), "[%s] Failed to init variables", GetGraphName());
  GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName());
  GE_CHK_STATUS_RET(InitRuntimeParams(), "[Invoke][InitRuntimeParams] failed, model_name_:[%s]", GetGraphName());
  GE_CHK_STATUS_RET(RecoverGraphUnknownFlag(),
      "[Invoke][RecoverGraphUnknownFlag] failed, model_name_:[%s]", GetGraphName());
  GE_CHK_STATUS_RET(IndexSpecialNodes(), "[Invoke][IndexSpecialNodes] failed, model_name_:[%s]", GetGraphName());
  GE_CHK_STATUS_RET(IndexTaskDefs(), "[Invoke][IndexTaskDefs] failed, model_name_:[%s]", GetGraphName());
  GE_CHK_STATUS_RET(InitWeights(), "[Invoke][InitWeights] failed, model_name_:[%s]", GetGraphName());
  GE_CHK_STATUS_RET(LoadGraph(), "[Invoke][LoadGraph] failed, model_name_:[%s]", GetGraphName());
  GE_CHK_STATUS_RET(AssignUninitializedConstantOps(),
      "[Invoke][AssignUninitializedConstantOps] failed, model_name_:[%s]", GetGraphName());
  GE_CHK_STATUS_RET(TransAllVarData(), "[Invoke][TransAllVarData] failed, model_name_:[%s]", GetGraphName());
  GE_CHK_STATUS_RET(CopyVarData(), "[Invoke][CopyVarData] failed, model_name_:[%s]", GetGraphName());
  GE_CHK_STATUS_RET(InitModelMem(), "[Invoke][InitModelMem] failed, model_name_:[%s]", GetGraphName());
  GE_CHK_STATUS_RET(InitConstantOps(), "[Invoke][InitConstantOps] failed, model_name_:[%s]", GetGraphName());
  GE_CHK_STATUS_RET(InitVariableTensors(), "[Invoke][InitVariableTensors], model_name_:[%s]", GetGraphName());
  GE_CHK_STATUS_RET(LoadTasks(), "[Invoke][LoadTasks] failed, model_name_:[%s]", GetGraphName());
  GELOGI("[%s] Done building hybrid model successfully.", GetGraphName());
  return SUCCESS;
 }

 Status HybridModelBuilder::BuildForSingleOp() {
  GE_CHK_STATUS_RET(ValidateParams(), "Failed to validate GeRootModel");
  GE_CHK_STATUS_RET(ValidateParams(), "[Invoke][ValidateParams] failed, model_name_:[%s]", GetGraphName());
  hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName();
  GELOGI("[%s] Start to build hybrid model.", GetGraphName());
  auto ret = ge_root_model_->GetSubgraphInstanceNameToModel();
  const GeModelPtr ge_model = ret[ge_root_model_->GetRootGraph()->GetName()];
  GE_CHK_STATUS_RET(IndexTaskDefs(ge_root_model_->GetRootGraph(), ge_model),
                    "[%s] Failed to index task defs", GetGraphName());
  GE_CHK_STATUS_RET(LoadGraph(), "[%s] Failed to load graph", GetGraphName());
  GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName());
  GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName());
                    "[Invoke][IndexTaskDefs] failed, model_name_:[%s]", GetGraphName());
  GE_CHK_STATUS_RET(LoadGraph(), "[Invoke][LoadGraph] failed, model_name_:[%s]", GetGraphName());
  GE_CHK_STATUS_RET(InitWeights(), "[Invoke][InitWeights] failed, model_name_:[%s]", GetGraphName());
  GE_CHK_STATUS_RET(LoadTasks(), "[Invoke][LoadTasks] failed, model_name_:[%s]", GetGraphName());
  GELOGI("[%s] Done building hybrid model for single op successfully.", GetGraphName());
  return SUCCESS;
 }
@@ -173,18 +179,20 @@ Status HybridModelBuilder::ValidateParams() {
 Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_item) {
  auto op_desc = node->GetOpDesc();
  GE_CHK_STATUS_RET(ParseForceInfershapeNodes(node, node_item),
                    "[%s] Failed to parse force_infershape node.",
                    "[Invoke][ParseForceInfershapeNodes]failed, node:[%s].",
                    node_item.NodeName().c_str());
  vector<string> dependencies = node->GetOpDesc()->GetOpInferDepends();
  GE_CHK_STATUS_RET(ParseDependentInputNodes(node_item, dependencies),
                    "[%s] Failed to parse node dependencies.",
                    "[Invoke][ParseDependentInputNodes]failed, node:[%s].",
                    node_item.NodeName().c_str());

  node_item.outputs.resize(node_item.num_outputs);
  for (int i = 0; i < node_item.num_outputs; ++i) {
    auto out_data_anchor = node->GetOutDataAnchor(i);
    if (out_data_anchor == nullptr) {
      GELOGE(INTERNAL_ERROR, "out anchor[%d] of node %s is nullptr", i, node->GetName().c_str());
      GELOGE(INTERNAL_ERROR, "[Get][OutDataAnchor]out anchor[%d] of node %s is nullptr", i, node->GetName().c_str());
      REPORT_CALL_ERROR("E19999", "out anchor[%d] of node %s is nullptr when %s",
          i, node->GetName().c_str(), __FUNCTION__);
      return INTERNAL_ERROR;
    }

@@ -197,12 +205,11 @@ Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_ite

      NodeItem *dst_node_item = nullptr;
      GE_CHK_STATUS_RET(GetOrCreateNodeItem(dst_node, &dst_node_item),
                        "[%s] Failed to get or create node item.",
          "[GetOrCreate][NodeItem] failed, dst_node:[%s].",
                        dst_node->GetName().c_str());
      int canonical_index;
      GE_CHK_STATUS_RET(dst_node_item->GetCanonicalInputIndex(dst_in_anchor->GetIdx(), canonical_index),
                        "[%s] Failed to canonical input index",
                        dst_node->GetName().c_str());
          "[Invoke][GetCanonicalInputIndex] failed, dst_node:[%s].", dst_node->GetName().c_str());

      node_item.outputs[i].emplace_back(canonical_index, dst_node_item);
    }
@@ -246,7 +253,7 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n
  }

  std::unique_ptr<NodeItem> new_node;
  GE_CHK_STATUS_RET(NodeItem::Create(node, new_node), "Failed to create node item");
  GE_CHK_STATUS_RET(NodeItem::Create(node, new_node), "[Invoke][Create] failed, model_name_:[%s]", GetGraphName());
  GE_CHK_STATUS_RET_NOLOG(NodeExecutorManager::GetInstance().GetExecutor(*node, &new_node->node_executor));

  // we do not need L2 Buffer
@@ -330,10 +337,10 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s
  for (const auto &input_name : dependencies) {
    int input_index = node_item.op_desc->GetInputIndexByName(input_name);
    if (input_index < 0) {
      GELOGE(INTERNAL_ERROR,
             "[%s] Failed to get input index by name: %s",
             node_item.NodeName().c_str(),
             input_name.c_str());
      GELOGE(INTERNAL_ERROR, "[Get][InputIndex]failed, node:[%s] inputname: %s.",
          node_item.NodeName().c_str(), input_name.c_str());
      REPORT_CALL_ERROR("E19999", "GetInputIndexByName failed when HybridModelBuilder %s, node:[%s] inputname: %s.",
          __FUNCTION__, node_item.NodeName().c_str(), input_name.c_str());
      return INTERNAL_ERROR;
    }

@@ -380,10 +387,10 @@ Status HybridModelBuilder::ParseDependentForFusedSubgraph(NodeItem &node_item, s
  for (auto &op_desc : data_ops) {
    uint32_t parent_index = 0;
    if (!AttrUtils::GetInt(*op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
      GELOGE(INTERNAL_ERROR,
             "[%s] Failed to get attr [%s]",
             op_desc->GetName().c_str(),
             ATTR_NAME_PARENT_NODE_INDEX.c_str());
      GELOGE(INTERNAL_ERROR, "[Invoke][GetInt] failed, node:[%s]  attr:[%s]",
          op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
      REPORT_CALL_ERROR("E19999", "invoke GetInt failed when %s, node:[%s]  attr:[%s]",
          __FUNCTION__, op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
      return INTERNAL_ERROR;
    }

@@ -413,24 +420,33 @@ Status HybridModelBuilder::ParseDependentForFusedSubgraph(NodeItem &node_item, s

 Status HybridModelBuilder::UpdateAnchorStatus(const NodePtr &node) {
  if (NodeUtils::SetAllAnchorStatus(node) != GRAPH_SUCCESS) {
    GELOGE(INTERNAL_ERROR, "[%s] NodeUtils::SetAllAnchorStatus failed.", node->GetName().c_str());
    GELOGE(INTERNAL_ERROR, "[Invoke][SetAllAnchorStatus] failed, node:[%s].", node->GetName().c_str());
    REPORT_CALL_ERROR("E19999", "[%s] NodeUtils::SetAllAnchorStatus failed when %s.",
        node->GetName().c_str(), __FUNCTION__);
    return INTERNAL_ERROR;
  }
  for (auto &anchor : node->GetAllInDataAnchors()) {
    auto peer_anchor = anchor->GetPeerOutAnchor();
    if (peer_anchor == nullptr) {
      if (AnchorUtils::SetStatus(anchor, ANCHOR_SUSPEND) != GRAPH_SUCCESS) {
        GELOGE(INTERNAL_ERROR, "[%s] AnchorUtils::SetStatus failed.", node->GetName().c_str());
        GELOGE(INTERNAL_ERROR, "[Invoke][SetStatus] failed to set ANCHOR_SUSPEND, node:[%s].",
            node->GetName().c_str());
        REPORT_CALL_ERROR("E19999", "SetStatus failed to set ANCHOR_SUSPEND, node:[%s] when HybridModelBuilder %s.",
            node->GetName().c_str(), __FUNCTION__);
        return INTERNAL_ERROR;
      }
    } else if (peer_anchor->GetOwnerNode()->GetType() == CONSTANT) {
      if (AnchorUtils::SetStatus(anchor, ANCHOR_CONST) != GRAPH_SUCCESS) {
        GELOGE(INTERNAL_ERROR, "[%s] AnchorUtils::SetStatus failed.", node->GetName().c_str());
        GELOGE(INTERNAL_ERROR, "[Invoke][SetStatus] failed to set ANCHOR_CONST, node:[%s].", node->GetName().c_str());
        REPORT_CALL_ERROR("E19999", "SetStatus failed to set ANCHOR_CONST, node:[%s] when HybridModelBuilder %s.",
            node->GetName().c_str(), __FUNCTION__);
        return INTERNAL_ERROR;
      }
    } else {
      if (AnchorUtils::SetStatus(anchor, ANCHOR_DATA) != GRAPH_SUCCESS) {
        GELOGE(INTERNAL_ERROR, "[%s] AnchorUtils::SetStatus failed.", node->GetName().c_str());
        GELOGE(INTERNAL_ERROR, "[Invoke][SetStatus] failed to set ANCHOR_DATA, node:[%s].", node->GetName().c_str());
        REPORT_CALL_ERROR("E19999", "SetStatus failed to set ANCHOR_DATA, node:[%s] when HybridModelBuilder %s.",
            node->GetName().c_str(), __FUNCTION__);
        return INTERNAL_ERROR;
      }
    }
@@ -441,11 +457,9 @@ Status HybridModelBuilder::UpdateAnchorStatus(const NodePtr &node) {

 Status HybridModelBuilder::DoUnlinkDataAnchors(const OutDataAnchorPtr &out_data_anchor,
                                               const InDataAnchorPtr &in_data_anchor) {
  GE_CHK_GRAPH_STATUS_RET(out_data_anchor->Unlink(in_data_anchor), "Failed to unlink %s:%d from %s:%d",
                          out_data_anchor->GetOwnerNode()->GetName().c_str(),
                          out_data_anchor->GetIdx(),
                          in_data_anchor->GetOwnerNode()->GetName().c_str(),
                          in_data_anchor->GetIdx());
  GE_CHK_GRAPH_STATUS_RET(out_data_anchor->Unlink(in_data_anchor),
      "[Invoke][Unlink] failed to unlink %s:%d from %s:%d", out_data_anchor->GetOwnerNode()->GetName().c_str(),
      out_data_anchor->GetIdx(), in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetIdx());

  GELOGD("Succeeded in unlinking %s:%d from %s:%d",
         out_data_anchor->GetOwnerNode()->GetName().c_str(),
@@ -456,7 +470,7 @@ Status HybridModelBuilder::DoUnlinkDataAnchors(const OutDataAnchorPtr &out_data_
 }

 Status HybridModelBuilder::DoLinkDataAnchors(OutDataAnchorPtr &out_data_anchor, InDataAnchorPtr &in_data_anchor) {
  GE_CHK_GRAPH_STATUS_RET(out_data_anchor->LinkTo(in_data_anchor), "Failed to link %s:%d to %s:%d",
  GE_CHK_GRAPH_STATUS_RET(out_data_anchor->LinkTo(in_data_anchor), "[Invoke][LinkTo]Failed to link %s:%d to %s:%d",
                          out_data_anchor->GetOwnerNode()->GetName().c_str(),
                          out_data_anchor->GetIdx(),
                          in_data_anchor->GetOwnerNode()->GetName().c_str(),
@@ -488,10 +502,10 @@ Status HybridModelBuilder::MergeInputNodes(ComputeGraph &graph) {

    uint32_t parent_index = 0;
    if (!AttrUtils::GetInt(data_op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
      GELOGE(FAILED,
             "[%s] Failed to get attr [%s]",
             data_op_desc->GetName().c_str(),
             ATTR_NAME_PARENT_NODE_INDEX.c_str());
      GELOGE(FAILED, "[Invoke][GetInt] failed, node:[%s] attr:[%s]",
          data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
      REPORT_CALL_ERROR("E19999", "GetInt failed when %s, node:[%s] attr:[%s]",
          __FUNCTION__, data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
      return FAILED;
    }

@@ -557,7 +571,10 @@ Status HybridModelBuilder::MergeNetOutputNode(ComputeGraph &graph) {
    auto index = in_data_anchor->GetIdx();
    auto input_desc = net_output_desc->MutableInputDesc(index);
    if (input_desc == nullptr) {
      GELOGE(INTERNAL_ERROR, "[%s] Failed to get input desc[%d]", net_output_desc->GetName().c_str(), index);
      GELOGE(INTERNAL_ERROR, "[Invoke][MutableInputDesc][%s] Failed to get input desc[%d]",
          net_output_desc->GetName().c_str(), index);
      REPORT_CALL_ERROR("E19999", "[%s] Failed to get input desc[%d] when HybridModelBuilder %s.",
          net_output_desc->GetName().c_str(), index, __FUNCTION__);
      return INTERNAL_ERROR;
    }

@@ -633,12 +650,13 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeG
      }
    }
    GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraph(root_graph, merged_graph, *subgraph),
                            "[%s] Failed to merge subgraph.",
                            "[Invoke][UnfoldSubgraph][%s] Failed to merge subgraph.",
                            subgraph->GetName().c_str());
  }

  // invoke before adding subgraphs. in case modify node id in known-shaped subgraphs.
  GE_CHK_GRAPH_STATUS_RET(merged_graph->TopologicalSorting(), "Failed to invoke TopologicalSorting on merged graph.");
  GE_CHK_GRAPH_STATUS_RET(merged_graph->TopologicalSorting(),
      "[Invoke][TopologicalSorting]Failed to invoke TopologicalSorting on merged graph.");
  GE_DUMP(merged_graph, "hybrid_merged_graph_BeforeStageSort");
  merged_graph->TopologicalSorting([](const NodePtr &a, const NodePtr &b) -> bool {
    uint32_t a_level = UINT32_MAX;
@@ -651,7 +669,7 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeG
  for (auto &remained_subgraph : root_graph->GetAllSubgraphs()) {
    GELOGD("Adding subgraph [%s] to merged-graph.", remained_subgraph->GetName().c_str());
    GE_CHK_GRAPH_STATUS_RET(merged_graph->AddSubgraph(remained_subgraph),
                            "Failed to add subgraph [%s]",
                            "[Invoke][AddSubgraph]Failed to add subgraph [%s]",
                            remained_subgraph->GetName().c_str());
    remained_subgraph->SetParentGraph(merged_graph);
  }
@@ -666,10 +684,10 @@ Status HybridModelBuilder::UnfoldSubgraph(ComputeGraphPtr &root_graph,
  GE_CHECK_NOTNULL(parent_node);

  GE_CHK_STATUS_RET(MergeInputNodes(sub_graph),
                    "[%s] Failed to merge data nodes for subgraph",
                    "[Invoke][MergeInputNodes][%s] Failed to merge data nodes for subgraph",
                    sub_graph.GetName().c_str());
  GE_CHK_STATUS_RET(MergeNetOutputNode(sub_graph),
                    "[%s] Failed to merge net output nodes for subgraph",
                    "[Invoke][MergeNetOutputNode][%s] Failed to merge net output nodes for subgraph",
                    sub_graph.GetName().c_str());
  GELOGD("[%s] Done merging subgraph inputs and outputs successfully", sub_graph.GetName().c_str());

@@ -683,7 +701,7 @@ Status HybridModelBuilder::UnfoldSubgraph(ComputeGraphPtr &root_graph,
      GE_CHECK_NOTNULL(sub_sub_graph);
      if (sub_sub_graph->GetGraphUnknownFlag()) {
        GE_CHK_STATUS_RET(UnfoldSubgraph(root_graph, parent_graph, *sub_sub_graph),
                          "[%s] Failed to merge subgraph",
                          "[Invoke][UnfoldSubgraph][%s] Failed to merge subgraph",
                          sub_sub_graph->GetName().c_str());
        continue;
      }
@@ -757,7 +775,8 @@ Status HybridModelBuilder::LoadGraph() {
    GELOGI("Before merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu",
           root_graph->GetDirectNodesSize(),
           root_graph->GetAllNodesSize());
    GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraphs(root_graph, merged_graph), "Failed to unfold subgraphs.");
    GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraphs(root_graph, merged_graph),
        "[Invoke][UnfoldSubgraphs]Failed to unfold subgraphs, model_name_:%s.", GetGraphName());
    root_graph = std::move(merged_graph);
    GELOGI("After merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu",
           root_graph->GetDirectNodesSize(),
@@ -779,9 +798,11 @@ Status HybridModelBuilder::LoadGraph() {
    op_desc->SetId(index++);
  }
  GE_DUMP(root_graph, "hybrid_merged_graph");
  GE_CHK_STATUS_RET(LoadDynamicSubgraph(*root_graph, true), "Failed to load root graph.");
  GE_CHK_STATUS_RET(LoadDynamicSubgraph(*root_graph, true),
      "[Invoke][LoadDynamicSubgraph]Failed to load root graph, model_name_:%s.", GetGraphName());
  GELOGD("Done loading root graph successfully.");
  GE_CHK_STATUS_RET(hybrid_model_.root_graph_item_->GroupNodes(), "Failed to group nodes for root graph");
  GE_CHK_STATUS_RET(hybrid_model_.root_graph_item_->GroupNodes(),
      "[Invoke][GroupNodes]Failed to group nodes for root graph, model_name_:%s.", GetGraphName());

  for (auto &sub_graph : root_graph->GetAllSubgraphs()) {
    GE_CHECK_NOTNULL(sub_graph);
@@ -797,26 +818,28 @@ Status HybridModelBuilder::LoadGraph() {

    if (sub_graph->GetGraphUnknownFlag()) {
      GE_CHK_STATUS_RET(LoadDynamicSubgraph(*sub_graph, false),
                        "Failed to load subgraph: [%s]",
                        "[Invoke][LoadDynamicSubgraph]Failed to load subgraph: [%s]",
                        sub_graph->GetName().c_str());
    } else {
      GE_CHK_STATUS_RET(IdentifyVariableOutputs(*parent_node_item),
                        "[%s] Failed to identify ref outputs.",
                        "[Invoke][IdentifyVariableOutputs][%s] Failed to identify ref outputs.",
                        parent_node_item->NodeName().c_str());
      GE_CHK_STATUS_RET(IdentifySameInputs(*parent_node_item),
                        "[%s] Failed to identify same outputs.",
                        "[Invoke][IdentifySameInputs][%s] Failed to identify same outputs.",
                        parent_node_item->NodeName().c_str());

      // if parent is function control op. need add a virtual partitioned call
      if (parent_node_item->IsControlOp()) {
        GE_CHK_STATUS_RET(LoadKnownShapedSubgraph(*sub_graph, parent_node_item),
                          "Failed to load function control op subgraph [%s]",
                          "[Invoke][LoadKnownShapedSubgraph]Failed to load function control op subgraph [%s]",
                          sub_graph->GetName().c_str());
      }
    }
  }

  GE_CHK_STATUS_RET(ParseDependentByParallelGroup(), "Failed to establish dependencies for hccl ops");
  GE_CHK_STATUS_RET(ParseDependentByParallelGroup(),
      "[Invoke][ParseDependentByParallelGroup]Failed to establish dependencies for hccl ops, model_name_:%s.",
      GetGraphName());
  GELOGI("Done loading all subgraphs successfully.");
  return SUCCESS;
 }
@@ -834,7 +857,7 @@ Status HybridModelBuilder::VarNodeToTensor(const NodePtr &var_node, std::unique_
  auto tensor_desc = var_node->GetOpDesc()->MutableOutputDesc(0);
  uint8_t *var_logic = nullptr;
  GE_CHK_STATUS_RET(var_manager_->GetVarAddr(var_name, *tensor_desc, &var_logic),
                    "Failed to get var addr. var_name = %s, session_id = %ld",
                    "[Invoke][GetVarAddr]Failed to get var addr. var_name = %s, session_id = %ld",
                    var_name.c_str(),
                    hybrid_model_.GetSessionId());

@@ -846,9 +869,11 @@ Status HybridModelBuilder::VarNodeToTensor(const NodePtr &var_node, std::unique_
  uint8_t *dev_mem = var_manager_->GetVarMemoryAddr(var_logic, memory_type);
  if (dev_mem == nullptr) {
    GELOGE(INTERNAL_ERROR,
           "Failed to copy var %s from device, cant not get "
           "var addr from logic addr %p",
           var_node->GetName().c_str(), var_logic);
        "[Invoke][GetVarMemoryAddr]Failed to copy var %s from device, cant not get var addr from logic addr %p",
        var_node->GetName().c_str(), var_logic);
    REPORT_CALL_ERROR("E19999",
        "GetVarMemoryAddr failed when %s, Failed to copy var %s from device, cant not get var addr from logic addr %p",
        __FUNCTION__, var_node->GetName().c_str(), var_logic);
    return INTERNAL_ERROR;
  }

@@ -876,7 +901,7 @@ Status HybridModelBuilder::HandleDtString(const GeTensor &tensor, void *var_addr
    auto &mutable_tensor = const_cast<GeTensor &>(tensor);
    uint64_t *buff = reinterpret_cast<uint64_t *>(mutable_tensor.MutableData().data());
    GE_CHK_BOOL_RET_STATUS(ge::CheckInt64Uint32MulOverflow(elem_num, kBytes * kStringHeadElems) == SUCCESS, FAILED,
                           "Shape size is invalid");
        "[Invoke][CheckInt64Uint32MulOverflow] failed because Shape size is invalid.");
    auto offset = static_cast<uint64_t>(elem_num * kBytes * kStringHeadElems);
    auto hbm_raw_data_base_addr =
        static_cast<uint64_t>(reinterpret_cast<uintptr_t>(var_addr) + offset);
@@ -928,7 +953,7 @@ Status HybridModelBuilder::InitConstantOps() {
    auto op_desc = var_node->GetOpDesc();
    auto v_weights = ModelUtils::GetWeights(op_desc);
    if (v_weights.empty()) {
      GELOGE(INTERNAL_ERROR, "[%s] Constant no not have value", var_node->GetName().c_str());
      GELOGE(INTERNAL_ERROR, "[Check][Size][%s] Constant op has no weight", var_node->GetName().c_str());
      return INTERNAL_ERROR;
    }
    auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get());
@@ -942,7 +967,7 @@ Status HybridModelBuilder::InitConstantOps() {
      GELOGD("Init tensor with host constant %s size = %zu", var_name.c_str(), aligned_tensor.MutableData().GetSize());
      if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(aligned_tensor.GetAlignedPtr(),
                                                                       aligned_tensor.GetData().size()) == nullptr) {
        GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed.");
        GELOGE(MEMALLOC_FAILED, "[Malloc][HostMemory] for an existed GeTensor failed, model_name_:%s.", GetGraphName());
        return MEMALLOC_FAILED;
      }
      var_tensor.reset(new(std::nothrow)TensorValue(aligned_tensor.MutableData().data(),
@@ -993,17 +1018,20 @@ Status HybridModelBuilder::InitVariableTensors() {
    int64_t tensor_size = 0;
    if (TensorUtils::CalcTensorMemSize(output_tensor.GetShape(), output_tensor.GetFormat(), output_tensor.GetDataType(),
                                       tensor_size) != SUCCESS) {
      GELOGE(INTERNAL_ERROR, "Calculate variable size failed, node name:%s", it.first.c_str());
      REPORT_CALL_ERROR("E19999", "CalcTensorMemSize failed when HybridModelBuilder %s, node name:%s",
          __FUNCTION__, it.first.c_str());
      GELOGE(INTERNAL_ERROR, "[Calculate][TensorMemSize] failed, node name:%s", it.first.c_str());
      return INTERNAL_ERROR;
    }
    SharedMemInfo mem_info(it.first, tensor_size);
    if (HostMemManager::Instance().MallocSharedMemory(mem_info) != SUCCESS) {
      GELOGE(GE_GRAPH_MALLOC_FAILED, "Host variable [%s] malloc failed.", it.first.c_str());
      GELOGE(GE_GRAPH_MALLOC_FAILED, "[Malloc][SharedMemory] failed, Host variable [%s].", it.first.c_str());
      return GE_GRAPH_MALLOC_FAILED;
    }
    if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr,
                                                                     tensor_size) == nullptr) {
      GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed.");
      GELOGE(MEMALLOC_FAILED,
          "[Malloc][HostMem] for an existed GeTensor failed, Host variable [%s].", it.first.c_str());
      return MEMALLOC_FAILED;
    }
    GELOGD("Host variable [%s] malloc success, size=%ld.", it.first.c_str(), tensor_size);
@@ -1054,7 +1082,9 @@ Status HybridModelBuilder::InitWeights() {
      auto op_desc = node->GetOpDesc();
      auto v_weights = ModelUtils::GetWeights(op_desc);
      if (v_weights.empty()) {
        GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", node->GetName().c_str());
        GELOGE(INTERNAL_ERROR, "[Invoke][GetWeights][%s] Constant has no value", node->GetName().c_str());
        REPORT_CALL_ERROR("E19999", "[%s] Constant has no value when %s.",
            node->GetName().c_str(), __FUNCTION__);
        return INTERNAL_ERROR;
      }
      auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get());
@@ -1062,11 +1092,11 @@ Status HybridModelBuilder::InitWeights() {
      const GeTensorDesc &tensor_desc = ge_tensor->GetTensorDesc();
      int64_t tensor_size = 0;
      GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*op_desc->MutableOutputDesc(0), tensor_size),
                              "[%s] Failed to get tensor size",
                              "[Invoke][GetSize][%s] Failed to get output tensor size",
                              node->GetName().c_str());
      int64_t data_offset = 0;
      GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetDataOffset(tensor_desc, data_offset),
                              "[%s] Failed to get data offset",
                              "[Invoke][GetDataOffset][%s] Failed to get data offset",
                              node->GetName().c_str());
      GELOGD("[%s] Start to init Constant node [%s], size = %ld, offset = %ld",
             GetGraphName(),
@@ -1093,7 +1123,8 @@ Status HybridModelBuilder::LoadTask(NodeItem &node_item) {
                                                    node_ptr,
                                                    node_item.kernel_task);
  if (load_ret != UNSUPPORTED && load_ret != SUCCESS) {
    GELOGE(load_ret, "[%s] Failed to load task", node_ptr->GetName().c_str());
    GELOGE(load_ret, "[Invoke][LoadTask][%s] Failed to load task", node_ptr->GetName().c_str());
    REPORT_CALL_ERROR("E19999", "[%s] Failed to load task when %s", node_ptr->GetName().c_str(), __FUNCTION__);
    return load_ret;
  }

@@ -1102,7 +1133,7 @@ Status HybridModelBuilder::LoadTask(NodeItem &node_item) {
 }

 Status HybridModelBuilder::LoadTasks() {
  GE_CHK_STATUS_RET(CheckAicpuOpList(), "Check Aicpu op failed.");
  GE_CHK_STATUS_RET(CheckAicpuOpList(), "[Check][AicpuOpList] failed.");
  std::map<int, std::map<std::string, NodeItem *>> ordered_partitioned_calls;
  for (auto &it : hybrid_model_.node_items_) {
    auto &node_item = it.second;
@@ -1179,7 +1210,8 @@ Status HybridModelBuilder::IndexTaskDefs(const ComputeGraphPtr &sub_graph, const

    auto iter = node_map.find(op_index);
    if (iter == node_map.end()) {
      GELOGE(INTERNAL_ERROR, "Failed to get node by op_index = %u", op_index);
      GELOGE(INTERNAL_ERROR, "[Find][Node]Failed to get node by op_index = %u", op_index);
      REPORT_INNER_ERROR("E19999", "Failed to get node by op_index = %u when %s.", op_index, __FUNCTION__);
      return INTERNAL_ERROR;
    }

@@ -1249,7 +1281,8 @@ Status HybridModelBuilder::IndexTaskDefs() {

      auto iter = node_map.find(op_index);
      if (iter == node_map.end()) {
        GELOGE(INTERNAL_ERROR, "Failed to get node by index = %u", op_index);
        GELOGE(INTERNAL_ERROR, "[Find][Node]Failed to get node by index = %u.", op_index);
        REPORT_INNER_ERROR("E19999", "Failed to get node by index = %u when %s.", op_index, __FUNCTION__);
        return INTERNAL_ERROR;
      }

@@ -1314,16 +1347,18 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node,
  GELOGD("To get peer node of %s::%s", sub_graph->GetName().c_str(), data_node->GetName().c_str());
  auto wrapped_node = data_node->GetOwnerComputeGraph()->GetParentNode();
  if (wrapped_node == nullptr) {
    GELOGE(INTERNAL_ERROR, "[%s] Node is in root graph.", data_node->GetName().c_str());
    REPORT_INNER_ERROR("E19999", "[%s] Node is in root graph when HybridModelBuilder %s.",
        data_node->GetName().c_str(), __FUNCTION__);
    GELOGE(INTERNAL_ERROR, "[Invoke][GetParentNode][%s] Node is in root graph.", data_node->GetName().c_str());
    return INTERNAL_ERROR;
  }
  auto data_op_desc = data_node->GetOpDesc();
  uint32_t parent_index = 0;
  if (!AttrUtils::GetInt(data_op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
    GELOGE(INTERNAL_ERROR,
           "[%s] Failed to get attr [%s]",
           data_op_desc->GetName().c_str(),
           ATTR_NAME_PARENT_NODE_INDEX.c_str());
    REPORT_CALL_ERROR("E19999", "[%s] Failed to get attr [%s] when HybridModelBuilder %s.",
        data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str(), __FUNCTION__);
    GELOGE(INTERNAL_ERROR, "[Invoke][GetInt][%s] Failed to get attr [%s]",
        data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
    return INTERNAL_ERROR;
  }

@@ -1331,7 +1366,10 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node,
  GE_CHECK_NOTNULL(wrapped_node_in_anchor);
  auto src_out_anchor = wrapped_node_in_anchor->GetPeerOutAnchor();
  if (src_out_anchor == nullptr || src_out_anchor->GetOwnerNode() == nullptr) {
    GELOGE(INTERNAL_ERROR, "[%s] Parent node do not have peer anchor.", data_node->GetName().c_str());
    REPORT_INNER_ERROR("E19999", "[%s] Parent node do not have peer anchor when HybridModelBuilder %s.",
        data_node->GetName().c_str(), __FUNCTION__);
    GELOGE(INTERNAL_ERROR,
        "[Check][ParentNode][%s] Parent node do not have peer anchor.", data_node->GetName().c_str());
    return INTERNAL_ERROR;
  }

@@ -1354,10 +1392,13 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node,
  auto src_graph = NodeUtils::GetSubgraph(*src_wrapped_node, kSubgraphIndex);
  GE_CHECK_NOTNULL(src_graph);
  auto src_net_output_node = src_graph->FindFirstNodeMatchType(NETOUTPUT);
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(src_net_output_node == nullptr,
                                 return INTERNAL_ERROR,
                                 "Failed to find NetOutput in subgraph: %s",
                                 src_graph->GetName().c_str());
  if (src_net_output_node == nullptr) {
    REPORT_INNER_ERROR("E19999", "Failed to find NetOutput in subgraph: %s when HybridModelBuilder %s",
        src_graph->GetName().c_str(), __FUNCTION__);
    GELOGE(INTERNAL_ERROR, "[Invoke][FindFirstNodeMatchType]Failed to find NetOutput in subgraph: %s",
        src_graph->GetName().c_str());
    return INTERNAL_ERROR;
  }
  auto net_output_desc = src_net_output_node->GetOpDesc();
  GE_CHECK_NOTNULL(net_output_desc);

@@ -1393,17 +1434,18 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node,
    }
  }

  GELOGE(FAILED,
         "Failed to find peer node for %s::%s",
         sub_graph->GetName().c_str(),
         data_node->GetName().c_str());
  GELOGE(FAILED, "[Get][PeerNode]Failed to find peer node for %s::%s",
      sub_graph->GetName().c_str(), data_node->GetName().c_str());
  REPORT_INNER_ERROR("E19999", "Failed to find peer node for %s::%s when %s.",
      sub_graph->GetName().c_str(), data_node->GetName().c_str(), __FUNCTION__);
  return FAILED;
 }
 Status HybridModelBuilder::InitRuntimeParams() {
  int64_t value = 0;
  bool ret = false;
  if (ge_root_model_->GetSubgraphInstanceNameToModel().empty()) {
    GELOGE(INTERNAL_ERROR, "Root model has no sub model");
    GELOGE(INTERNAL_ERROR, "[Get][SubModel]Root model has no sub model, model:%s.", GetGraphName());
    REPORT_INNER_ERROR("E19999", "Root model has no sub model when %s, model:%s.", __FUNCTION__, GetGraphName());
    return INTERNAL_ERROR;
  }

@@ -1546,8 +1588,10 @@ Status HybridModelBuilder::GetParentNodeOutputIndex(const OpDesc &op_desc, int i
  auto input_desc = op_desc.MutableInputDesc(index);
  GE_CHECK_NOTNULL(input_desc);
  if (!AttrUtils::GetInt(input_desc, ATTR_NAME_PARENT_NODE_INDEX, out_index)) {
    GELOGE(INTERNAL_ERROR, "NetOutput input tensor %d, attr %s not found.",
           index, ATTR_NAME_PARENT_NODE_INDEX.c_str());
    GELOGE(INTERNAL_ERROR, "[Invoke][GetInt]NetOutput %s input tensor %d, attr %s not found.",
        op_desc.GetName().c_str(), index, ATTR_NAME_PARENT_NODE_INDEX.c_str());
    REPORT_CALL_ERROR("E19999", "NetOutput %s input tensor %d, attr %s not found when %s.",
        op_desc.GetName().c_str(), index, ATTR_NAME_PARENT_NODE_INDEX.c_str(), __FUNCTION__);
    return INTERNAL_ERROR;
  }
  return SUCCESS;
@@ -1563,7 +1607,7 @@ Status HybridModelBuilder::InitModelMem() {

  if (total_var_size > 0 && hybrid_model_.var_mem_base_ == nullptr) {
    GE_CHK_STATUS_RET(var_manager_->MallocVarMemory(total_var_size),
                      "Malloc Var Memory Fail.");
                      "[Malloc][VarMemory] failed, size:%zu.", total_var_size);
    hybrid_model_.var_mem_base_ = var_manager_->GetVarMemoryBase(RT_MEMORY_HBM);
  }

@@ -1580,7 +1624,8 @@ Status HybridModelBuilder::TransAllVarData() {
  rtContext_t ctx = nullptr;
  rtError_t rt_ret = rtCtxGetCurrent(&ctx);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Failed to get current context, error_code is: 0x%X.", rt_ret);
    GELOGE(RT_FAILED, "[Invoke][rtCtxGetCurrent]Failed to get current context, error_code is: 0x%X.", rt_ret);
    REPORT_CALL_ERROR("E19999", "rtCtxGetCurrent failed when %s, error_code: 0x%X.", __FUNCTION__, rt_ret);
    return RT_FAILED;
  }

@@ -1594,7 +1639,7 @@ Status HybridModelBuilder::TransAllVarData() {
                                                       runtime_param_.session_id,
                                                       ctx,
                                                       runtime_param_.graph_id),
                    "TransAllVarData failed.");
                    "[Invoke][TransAllVarData] failed.");

  GELOGI("TransAllVarData success.");
  return SUCCESS;
@@ -1604,7 +1649,7 @@ Status HybridModelBuilder::CopyVarData() {
  GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(ge_root_model_->GetRootGraph(),
                                                   runtime_param_.session_id,
                                                   hybrid_model_.device_id_),
                    "CopyVarData failed.");
                    "[Invoke][CopyVarData] failed.");
  GELOGI("CopyVarData success.");
  return SUCCESS;
 }
@@ -1628,7 +1673,7 @@ Status HybridModelBuilder::LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem
      int32_t data_index = 0;
      if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, data_index)) {
        GELOGE(FAILED,
               "[%s] Failed to get attr [%s]",
               "[Invoke][GetInt][%s] Failed to get attr [%s]",
               node->GetName().c_str(),
               ATTR_NAME_PARENT_NODE_INDEX.c_str());
        return FAILED;
@@ -1645,7 +1690,7 @@ Status HybridModelBuilder::LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem
        }

        GE_CHK_GRAPH_STATUS_RET(wrapper_op_desc->AddOutputDesc(*output_desc),
                                "[%s] Failed to add output desc. output index = %d",
                                "[Invoke][AddOutputDesc][%s] Failed to add output desc. output index = %d",
                                graph.GetName().c_str(),
                                output_index);

@@ -2002,10 +2047,10 @@ Status HybridModelBuilder::BuildInputMapping(GraphItem &graph_item,
      data_op_index++;
    } else {
      if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, data_index)) {
        GELOGE(FAILED,
               "[%s] Failed to get attr [%s]",
               node->GetName().c_str(),
               ATTR_NAME_PARENT_NODE_INDEX.c_str());
        GELOGE(FAILED, "[Invoke][GetInt][%s] Failed to get attr [%s]",
            node->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
        REPORT_CALL_ERROR("E19999", "call GetInt failed when HybridModelBuilder %s, [%s] Failed to get attr [%s]",
            __FUNCTION__, node->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
        return FAILED;
      }
    }
@@ -2040,7 +2085,7 @@ Status HybridModelBuilder::CheckAicpuOpList() {
  aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end());
  aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end());
  GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list),
                    "Launch check aicpu op type failed.");
                    "[Launch][KernelCheckAicpuOp] failed.");
  return SUCCESS;
 }

--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit 620e9b9ac3210db3e4cf47babfb23d248bb9f17e
 Subproject commit 4ff5e3987f2e5d2980019defacaf0891861c84fc
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit d744541c6ca7f6966c1befacc9f83f53b0829e0a
 Subproject commit 51fb6c4850906e8342598d47eccfca0b87ffea59