add errlog and modify geloge --2

4 years ago · 1bcc0df932
--- a/ge/hybrid/executor/hybrid_model_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_executor.cc
@@ -72,7 +72,7 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) {
  if (ret == END_OF_SEQUENCE) {
    args.is_eos = true;
  } else {
    GE_CHK_STATUS_RET(ret, "Failed to execute model");
    GE_CHK_STATUS_RET(ret, "[Invoke][ExecuteGraphInternal]Failed when HybridModelExecutor %s.", __FUNCTION__);
  }
  return SUCCESS;
 }
--- a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc
@@ -59,7 +59,10 @@ Status StageExecutor::Start(const std::vector<TensorValue> &inputs, const std::v
    task_queue_.Pop(task_info);
    GELOGD("[Executor: %d] Got task, stage = %d, iteration = %ld", id_, task_info.stage, task_info.iteration);
    if (task_info.iteration >= pipe_config_->iteration_end) {
      GELOGE(INTERNAL_ERROR, "[Executor: %d] Unexpected iteration: %d", id_, task_info.iteration);
      GELOGE(INTERNAL_ERROR, "[Check][Range][Executor: %d] Unexpected iteration: %d when StageExecutor %s.", 
          id_, task_info.iteration, __FUNCTION__);
      REPORT_INNER_ERROR("E19999", "[Executor: %d] Unexpected iteration: %d when StageExecutor %s.", 
          id_, task_info.iteration, __FUNCTION__);
      return INTERNAL_ERROR;
    }
@@ -75,7 +78,8 @@ Status StageExecutor::Start(const std::vector<TensorValue> &inputs, const std::v
    if (task_info.stage == 0) {
      GELOGD("[Executor: %d] To ResetExecutionContext", id_);
      GE_CHK_STATUS_RET(ResetExecutionContext(context_), "[Executor: %d] Failed to reset context", id_);
      GE_CHK_STATUS_RET(ResetExecutionContext(context_), 
          "[Invoke][ResetExecutionContext][Executor: %d] Failed to reset context", id_);
      context_.iteration = task_info.iteration;
      GE_CHK_STATUS_RET_NOLOG(SetInputs(inputs, input_desc));
    }
@@ -92,8 +96,11 @@ Status StageExecutor::Start(const std::vector<TensorValue> &inputs, const std::v
    auto sync_result = Synchronize();
    if (sync_result != SUCCESS) {
      GELOGE(sync_result, "[Executor: %d] Failed to sync result. iteration = %d", id_, task_info.iteration);
      GELOGE(sync_result, 
          "[Invoke][Synchronize][Executor: %d] Failed to sync result when StageExecutor %s. iteration = %d", 
          id_, __FUNCTION__, task_info.iteration);
      REPORT_CALL_ERROR("E19999", "[Executor: %d] Failed to sync result when StageExecutor %s. iteration = %d", 
          id_, __FUNCTION__, task_info.iteration);
      context_.profiler->Dump(std::cout);
      context_.callback_manager->Destroy();
      RuntimeInferenceContext::DestroyContext(std::to_string(context_.context_id));
@@ -242,7 +249,10 @@ Status HybridModelPipelineExecutor::Execute(HybridModelExecutor::ExecuteArgs &ar
    GELOGD("Start to sync result of executor[%zu]", i);
    auto ret = futures[i].get();
    if (ret != SUCCESS) {
      GELOGE(ret, "[Executor: %zu] Failed to schedule tasks.", i);
      GELOGE(ret, "[Check][Result][Executor: %zu] Failed to schedule tasks when HybridModelPipelineExecutor %s.",
          i, __FUNCTION__);
      REPORT_INNER_ERROR("E19999", "[Executor: %zu] Failed to schedule tasks when HybridModelPipelineExecutor %s.",
          i, __FUNCTION__);
      has_error = true;
      continue;
    }
@@ -250,7 +260,10 @@ Status HybridModelPipelineExecutor::Execute(HybridModelExecutor::ExecuteArgs &ar
    ret = stage_executors_[i]->Synchronize();
    if (ret != SUCCESS) {
      GELOGE(ret, "[Executor: %zu] Failed to synchronize result.", i);
      GELOGE(ret, "[Invoke][Synchronize] failed for [Executor: %zu] when HybridModelPipelineExecutor %s.",
          i, __FUNCTION__);
      REPORT_CALL_ERROR("E19999", "[Executor: %zu] failed to Synchronize result when HybridModelPipelineExecutor %s.",
          i, __FUNCTION__);    
      has_error = true;
      continue;
    }
@@ -266,13 +279,14 @@ Status HybridModelPipelineExecutor::Execute(HybridModelExecutor::ExecuteArgs &ar
  iteration_ = config_.iteration_end;
  if (has_error) {
    GELOGE(FAILED, "Error occurred while execution");
    GELOGE(FAILED, "[Check][Error]Error occurred while execution when HybridModelPipelineExecutor %s.", __FUNCTION__);
    REPORT_INNER_ERROR("E19999", "Error occurred while execution when HybridModelPipelineExecutor %s.", __FUNCTION__);
    return FAILED;
  }
  auto last_iter_executor_idx = loop_count % stage_executors_.size();
  GE_CHK_STATUS_RET(stage_executors_[last_iter_executor_idx]->GetOutputs(args.outputs, args.output_desc),
                    "Failed to get output from executor[%zu]", last_iter_executor_idx);
                    "[Get][Outputs]Failed from executor[%zu]", last_iter_executor_idx);
  return SUCCESS;
 }
--- a/ge/hybrid/executor/hybrid_profiler.cc
+++ b/ge/hybrid/executor/hybrid_profiler.cc
@@ -40,7 +40,8 @@ void HybridProfiler::RecordEvent(EventType event_type, const char *fmt, ...) {
  char buf[kEventDescMax];
  if (vsnprintf_s(buf, kEventDescMax, kEventDescMax - 1, fmt, args) == -1) {
    GELOGE(FAILED, "Format %s failed.", fmt);
    GELOGE(FAILED, "[Parse][Param:fmt]Format %s failed when HybridProfiler %s.", fmt, __FUNCTION__);
    REPORT_INNER_ERROR("E19999", "Parse Format %s failed when HybridProfiler %s.", fmt, __FUNCTION__);
    va_end(args);
    return;
  }
@@ -48,7 +49,10 @@ void HybridProfiler::RecordEvent(EventType event_type, const char *fmt, ...) {
  va_end(args);
  auto index = counter_++;
  if (index >= static_cast<int>(events_.size())) {
    GELOGE(INTERNAL_ERROR, "index out of range. index = %d, max event size = %zu", index, events_.size());
    GELOGE(INTERNAL_ERROR, 
        "[Check][Range]index out of range. index = %d, max event size = %zu", index, events_.size());
    REPORT_INNER_ERROR("E19999", "index out of range when HybridProfiler %s. index = %d, max event size = %zu", 
        __FUNCTION__, index, events_.size());
    return;
  }
  auto &evt = events_[index];
--- a/ge/hybrid/executor/node_done_manager.cc
+++ b/ge/hybrid/executor/node_done_manager.cc
@@ -28,7 +28,8 @@ bool NodeDoneManager::Cond::Await() {
  if (!cv_.wait_for(lk,
                    std::chrono::seconds(kDefaultWaitTimeoutInSec),
                    [&]() { return is_released_ || is_cancelled_; })) {
    GELOGE(INTERNAL_ERROR, "Wait timed out.");
    GELOGE(INTERNAL_ERROR, "[Invoke][wait_for]Wait timed out when %s.", __FUNCTION__);
    REPORT_INNER_ERROR("E19999", "wait timed out when %s.", __FUNCTION__);
    return false;
  }
--- a/ge/hybrid/executor/node_state.cc
+++ b/ge/hybrid/executor/node_state.cc
@@ -67,7 +67,10 @@ Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target
    Format format = input_desc.GetFormat();
    DataType data_type = input_desc.GetDataType();
    if (TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size) != GRAPH_SUCCESS) {
      GELOGE(FAILED, "[%s] Calculate tensor memory size failed.", node_item.NodeName().c_str());
      GELOGE(FAILED, "[Invoke][CalcTensorMemSize] failed for [%s] when ShapeInferenceState %s.", 
          node_item.NodeName().c_str(), __FUNCTION__);
      REPORT_CALL_ERROR("E19999", "CalcTensorMemSize failed for [%s] when ShapeInferenceState %s.", 
          node_item.NodeName().c_str(), __FUNCTION__);    
      return FAILED;
    }
  }
@@ -121,13 +124,19 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex
      }
      if (context.GetStatus() != SUCCESS) {
        GELOGE(FAILED, "[%s] Await pending shape cancelled", node_item.NodeName().c_str());
        GELOGE(FAILED, "[Check][Status][%s] Await pending shape cancelled when %s.", 
            node_item.NodeName().c_str(), __FUNCTION__);
        REPORT_CALL_ERROR("E19999", "[%s] Await pending shape cancelled when %s.", 
            node_item.NodeName().c_str(), __FUNCTION__);
        break;
      }
    }
    if (!wait_success) {
      GELOGE(FAILED, "[%s] Wait for shape timeout.", node_item.NodeName().c_str());
      GELOGE(FAILED, "[Check][Status][%s] Wait for shape timeout when %s.", 
          node_item.NodeName().c_str(), __FUNCTION__);
      REPORT_CALL_ERROR("E19999", "[%s] Wait for shape timeout when %s.", 
          node_item.NodeName().c_str(), __FUNCTION__);
      return FAILED;
    }
  }
@@ -232,7 +241,7 @@ Status NodeState::WaitForPrepareDone() {
  if (prepare_future_.valid()) {
    GELOGD("[%s] Start to wait for prepare future.", GetName().c_str());
    GE_CHK_STATUS_RET(prepare_future_.get(),
                      "[%s] PreRun failed.", GetName().c_str());
        "[Check][Status][%s] PreRun failed when NodeState %s.", GetName().c_str(), __FUNCTION__);
  }
  return SUCCESS;
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit 620e9b9ac3210db3e4cf47babfb23d248bb9f17e
 Subproject commit 4ff5e3987f2e5d2980019defacaf0891861c84fc
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit d744541c6ca7f6966c1befacc9f83f53b0829e0a
 Subproject commit 51fb6c4850906e8342598d47eccfca0b87ffea59