Browse Source

modify geloge and add err report

tags/v1.3.0
liudingyan 3 years ago
parent
commit
d85228d6fb
5 changed files with 206 additions and 168 deletions
  1. +7
    -3
      ge/hybrid/executor/rt_callback_manager.cc
  2. +13
    -8
      ge/hybrid/executor/subgraph_context.cc
  3. +56
    -52
      ge/hybrid/executor/subgraph_executor.cc
  4. +2
    -1
      ge/hybrid/model/graph_item.cc
  5. +128
    -104
      ge/hybrid/model/hybrid_model_builder.cc

+ 7
- 3
ge/hybrid/executor/rt_callback_manager.cc View File

@@ -27,7 +27,8 @@ Status CallbackManager::RegisterCallback(rtStream_t stream, rtCallback_t callbac
GE_CHK_RT_RET(rtEventCreate(&event)); GE_CHK_RT_RET(rtEventCreate(&event));
auto rt_ret = rtEventRecord(event, stream); auto rt_ret = rtEventRecord(event, stream);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Failed to invoke rtEventRecord, error code = %d", rt_ret);
GELOGE(RT_FAILED, "[Invoke][rtEventRecord] failed, error code = %d", rt_ret);
REPORT_CALL_ERROR("E19999", "Invoke rtEventRecord failed when %s, error code = %d", __FUNCTION__, rt_ret);
(void) rtEventDestroy(event); (void) rtEventDestroy(event);
return RT_FAILED; return RT_FAILED;
} }
@@ -50,7 +51,8 @@ Status CallbackManager::Init() {
return CallbackProcess(context); return CallbackProcess(context);
}, ctx); }, ctx);
if (!ret_future_.valid()) { if (!ret_future_.valid()) {
GELOGE(INTERNAL_ERROR, "Failed to init callback manager.");
GELOGE(INTERNAL_ERROR, "[Check][ShareState]Failed to init callback manager.");
REPORT_INNER_ERROR("E19999", "Failed to init callback manager.");
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


@@ -73,7 +75,9 @@ Status CallbackManager::CallbackProcess(rtContext_t context) {


auto rt_err = rtEventSynchronize(event); auto rt_err = rtEventSynchronize(event);
if (rt_err != RT_ERROR_NONE) { if (rt_err != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtEventSynchronize failed. ret = %d", rt_err);
GELOGE(RT_FAILED, "[Invoke][rtEventSynchronize] failed. ret = %d", rt_err);
REPORT_CALL_ERROR("E19999",
"Invoke rtEventSynchronize failed when CallbackManager %s, ret = %d.", __FUNCTION__, rt_err);
GE_CHK_RT(rtEventDestroy(event)); GE_CHK_RT(rtEventDestroy(event));
return RT_FAILED; return RT_FAILED;
} }


+ 13
- 8
ge/hybrid/executor/subgraph_context.cc View File

@@ -50,9 +50,11 @@ NodeStatePtr SubgraphContext::GetOrCreateNodeState(const NodeItem *node_item) {
Status SubgraphContext::SetInput(int index, const TensorValue &tensor) { Status SubgraphContext::SetInput(int index, const TensorValue &tensor) {
if (static_cast<size_t>(index) >= all_inputs_.size()) { if (static_cast<size_t>(index) >= all_inputs_.size()) {
GELOGE(INTERNAL_ERROR, GELOGE(INTERNAL_ERROR,
"output index output range. all input num = %zu, input index = %d",
all_inputs_.size(),
index);
"[Check][Param:index]output index output range. all input num = %zu, input index = %d",
all_inputs_.size(), index);
REPORT_INNER_ERROR("E19999",
"input param index out range when SubgraphContext %s, all input num = %zu, input index = %d.",
__FUNCTION__, all_inputs_.size(), index);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }
all_inputs_[index] = tensor; all_inputs_[index] = tensor;
@@ -68,10 +70,11 @@ Status SubgraphContext::SetOutput(const NodeItem &node_item, int output_index, c
auto index = node_item.output_start + output_index; auto index = node_item.output_start + output_index;
if ((output_index >= node_item.num_outputs) || (static_cast<size_t>(index) >= all_outputs_.size())) { if ((output_index >= node_item.num_outputs) || (static_cast<size_t>(index) >= all_outputs_.size())) {
GELOGE(INTERNAL_ERROR, GELOGE(INTERNAL_ERROR,
"output index output range. all output num = %zu, node_item = %s, output index = %d",
all_outputs_.size(),
node_item.DebugString().c_str(),
output_index);
"[Check][Param:output_index]output index output range. all output num = %zu, node_item = %s,"
"output index = %d.", all_outputs_.size(), node_item.DebugString().c_str(), output_index);
REPORT_INNER_ERROR("E19999", "output index output range when SubgraphContext %s. "
"all output num = %zu, node_item = %s, output index = %d.",
__FUNCTION__, all_outputs_.size(), node_item.DebugString().c_str(), output_index);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


@@ -126,7 +129,9 @@ Status SubgraphContext::Await(const NodePtr &node) {


void SubgraphContext::OnError(Status error) { void SubgraphContext::OnError(Status error) {
if (error != END_OF_SEQUENCE) { if (error != END_OF_SEQUENCE) {
GELOGE(error, "[%s] Error occurred while executing graph.", graph_item_->GetName().c_str());
GELOGE(error, "[Check][Param:error][%s] Error occurred while executing graph.", graph_item_->GetName().c_str());
REPORT_INNER_ERROR("E19999", "[%s] Error occurred while executing graph when SubgraphContext %s.",
graph_item_->GetName().c_str(), __FUNCTION__);
} }
node_done_manager_.Destroy(); node_done_manager_.Destroy();
} }


+ 56
- 52
ge/hybrid/executor/subgraph_executor.cc View File

@@ -44,7 +44,8 @@ Status SubgraphExecutor::Init(const std::vector<TensorValue> &inputs,
const std::vector<ConstGeTensorDescPtr> &input_desc) { const std::vector<ConstGeTensorDescPtr> &input_desc) {
subgraph_context_.reset(new(std::nothrow)SubgraphContext(graph_item_, context_)); subgraph_context_.reset(new(std::nothrow)SubgraphContext(graph_item_, context_));
GE_CHECK_NOTNULL(subgraph_context_); GE_CHECK_NOTNULL(subgraph_context_);
GE_CHK_STATUS_RET(subgraph_context_->Init(), "[%s] Failed to init subgraph context.", graph_item_->GetName().c_str());
GE_CHK_STATUS_RET(subgraph_context_->Init(),
"[Init][SubgraphContext][%s] Failed to init subgraph context.", graph_item_->GetName().c_str());


shape_inference_engine_.reset(new(std::nothrow) ShapeInferenceEngine(context_, subgraph_context_.get())); shape_inference_engine_.reset(new(std::nothrow) ShapeInferenceEngine(context_, subgraph_context_.get()));
GE_CHECK_NOTNULL(shape_inference_engine_); GE_CHECK_NOTNULL(shape_inference_engine_);
@@ -55,8 +56,8 @@ Status SubgraphExecutor::Init(const std::vector<TensorValue> &inputs,
graph_item_->GetName().c_str()); graph_item_->GetName().c_str());
} else { } else {
GE_CHK_STATUS_RET(InitInputsForKnownShape(inputs), GE_CHK_STATUS_RET(InitInputsForKnownShape(inputs),
"[%s] Failed to init subgraph executor for known shape subgraph.",
graph_item_->GetName().c_str());
"[Invoke][InitInputsForKnownShape][%s] Failed to init subgraph executor for known shape subgraph.",
graph_item_->GetName().c_str());
} }


return SUCCESS; return SUCCESS;
@@ -67,8 +68,12 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue
// Number of inputs of parent node should be greater or equal than that of subgraph // Number of inputs of parent node should be greater or equal than that of subgraph
auto input_nodes = graph_item_->GetInputNodes(); auto input_nodes = graph_item_->GetInputNodes();
if (inputs.size() < input_nodes.size()) { if (inputs.size() < input_nodes.size()) {
GELOGE(INTERNAL_ERROR, "[%s] Number of inputs [%zu] is not sufficient for subgraph which needs [%zu] inputs.",
graph_item_->GetName().c_str(), inputs.size(), input_nodes.size());
GELOGE(INTERNAL_ERROR,
"[Check][Size][%s] Number of inputs [%zu] is not sufficient for subgraph which needs [%zu] inputs.",
graph_item_->GetName().c_str(), inputs.size(), input_nodes.size());
REPORT_INNER_ERROR("E19999",
"Number of inputs [%zu] is not sufficient for subgraph which needs [%zu] inputs when SubgraphExecutor %s.",
graph_item_->GetName().c_str(), inputs.size(), input_nodes.size(), __FUNCTION__);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


@@ -87,9 +92,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue
input_tensor.DebugString().c_str()); input_tensor.DebugString().c_str());


GE_CHK_STATUS_RET(subgraph_context_->SetInput(*input_node, kDataInputIndex, input_tensor), GE_CHK_STATUS_RET(subgraph_context_->SetInput(*input_node, kDataInputIndex, input_tensor),
"[%s] Failed to set input tensor[%zu]",
graph_item_->GetName().c_str(),
i);
"[Invoke][SetInput] failed for grap_item[%s] input tensor[%zu]", graph_item_->GetName().c_str(), i);


if (force_infer_shape_ || input_node->is_dynamic) { if (force_infer_shape_ || input_node->is_dynamic) {
GELOGD("[%s] Start to update input[%zu] for subgraph data node.", graph_item_->GetName().c_str(), i); GELOGD("[%s] Start to update input[%zu] for subgraph data node.", graph_item_->GetName().c_str(), i);
@@ -112,11 +115,11 @@ Status SubgraphExecutor::InitInputsForKnownShape(const std::vector<TensorValue>
auto &parent_input_index = input_index_mapping[i]; auto &parent_input_index = input_index_mapping[i];
if (static_cast<size_t>(parent_input_index) >= inputs.size()) { if (static_cast<size_t>(parent_input_index) >= inputs.size()) {
GELOGE(INTERNAL_ERROR, GELOGE(INTERNAL_ERROR,
"[%s] Number of inputs [%zu] is not sufficient for subgraph which needs at lease [%d] inputs",
graph_item_->GetName().c_str(),
inputs.size(),
parent_input_index + 1);
"[Check][Size][%s] Number of inputs [%zu] is not sufficient for subgraph which needs at lease [%d] inputs",
graph_item_->GetName().c_str(), inputs.size(), parent_input_index + 1);
REPORT_INNER_ERROR("E19999",
"[%s] Number of inputs [%zu] is not sufficient for subgraph which needs at lease [%d] inputs when %s.",
graph_item_->GetName().c_str(), inputs.size(), parent_input_index + 1, __FUNCTION__);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


@@ -136,10 +139,10 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
const std::vector<ConstGeTensorDescPtr> &input_desc, const std::vector<ConstGeTensorDescPtr> &input_desc,
const std::vector<TensorValue> &outputs) { const std::vector<TensorValue> &outputs) {
GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false"); GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false");
GE_CHK_STATUS_RET(Init(inputs, input_desc), "[%s] Failed to init executor.", graph_item_->GetName().c_str());
GE_CHK_STATUS_RET(Init(inputs, input_desc), "[Invoke][Init]failed for [%s].", graph_item_->GetName().c_str());
if (!outputs.empty()) { if (!outputs.empty()) {
GE_CHK_STATUS_RET(EnableOutputZeroCopy(outputs), GE_CHK_STATUS_RET(EnableOutputZeroCopy(outputs),
"Failed to enable output zero copy by user provided outputs.");
"[Invoke][EnableOutputZeroCopy] Failed by user provided outputs.");
} }
if (!graph_item_->IsDynamic()) { if (!graph_item_->IsDynamic()) {
return ExecuteAsyncForKnownShape(inputs); return ExecuteAsyncForKnownShape(inputs);
@@ -194,12 +197,11 @@ Status SubgraphExecutor::ExecuteAsync(TaskContext &task_context) {
} }


GE_CHK_STATUS_RET(ExecuteAsync(inputs, input_desc), GE_CHK_STATUS_RET(ExecuteAsync(inputs, input_desc),
"[%s] Failed to execute subgraph.",
graph_item_->GetName().c_str());
"[Invoke][ExecuteAsync] failed for [%s].", graph_item_->GetName().c_str());


GE_CHK_STATUS_RET(SetOutputsToParentNode(task_context), GE_CHK_STATUS_RET(SetOutputsToParentNode(task_context),
"[%s] Failed to set output shapes to parent node.",
graph_item_->GetName().c_str());
"[Invoke][SetOutputsToParentNode][%s] Failed to set output shapes to parent node.",
graph_item_->GetName().c_str());
return SUCCESS; return SUCCESS;
} }


@@ -239,7 +241,7 @@ Status SubgraphExecutor::PrepareNodes(int group) {
if (node_item.kernel_task == nullptr) { if (node_item.kernel_task == nullptr) {
GELOGW("[%s] Node of static shape got no task.", node_item.NodeName().c_str()); GELOGW("[%s] Node of static shape got no task.", node_item.NodeName().c_str());
GE_CHK_STATUS_RET(TaskCompileEngine::Compile(*p_node_state, context_), GE_CHK_STATUS_RET(TaskCompileEngine::Compile(*p_node_state, context_),
"[%s] Failed to create task.", p_node_state->GetName().c_str());
"[Invoke][Compile] failed for [%s].", p_node_state->GetName().c_str());
} else { } else {
node_state->SetKernelTask(node_item.kernel_task); node_state->SetKernelTask(node_item.kernel_task);
} }
@@ -248,7 +250,9 @@ Status SubgraphExecutor::PrepareNodes(int group) {
GE_CHECK_NOTNULL(unique_task_context); GE_CHECK_NOTNULL(unique_task_context);
const auto &task = node_state->GetKernelTask(); const auto &task = node_state->GetKernelTask();
if (task == nullptr) { if (task == nullptr) {
GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Get][KernelTask] failed for[%s], NodeTask is null.", node_state->GetName().c_str());
REPORT_CALL_ERROR("E19999", "invoke GetKernelTask failed for %s when %s, nodetask is null.",
node_state->GetName().c_str(), __FUNCTION__);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }
auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release()); auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
@@ -261,8 +265,10 @@ Status SubgraphExecutor::PrepareNodes(int group) {
GELOGD("Got end of sequence"); GELOGD("Got end of sequence");
return SUCCESS; return SUCCESS;
} }
GELOGE(INTERNAL_ERROR, "[%s] Error occurs while launching tasks. quit from preparing nodes.",
graph_item_->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Check][State][%s] Error occurs while launching tasks. quit from preparing nodes.",
graph_item_->GetName().c_str());
REPORT_INNER_ERROR("E19999", "[%s] Error occurs while launching tasks. quit from preparing nodes when %s.",
graph_item_->GetName().c_str(), __FUNCTION__);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


@@ -275,9 +281,9 @@ Status SubgraphExecutor::PrepareNodes(int group) {


Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const { Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const {
HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state),
"[%s] Failed to InferShape.", node_state.GetName().c_str());
"[Invoke][InferShape] failed for [%s].", node_state.GetName().c_str());
HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state), HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state),
"[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str());
"[Invoke][PropagateOutputShapes] failed for [%s].", node_state.GetName().c_str());
return SUCCESS; return SUCCESS;
} }


@@ -285,7 +291,7 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta
auto &node_item = *node_state.GetNodeItem(); auto &node_item = *node_state.GetNodeItem();
if (node_item.kernel_task == nullptr) { if (node_item.kernel_task == nullptr) {
GE_CHK_STATUS_RET(TaskCompileEngine::Compile(node_state, ctx), GE_CHK_STATUS_RET(TaskCompileEngine::Compile(node_state, ctx),
"Failed to create task for node[%s]", node_state.GetName().c_str());
"[Invoke][Compile] Failed for node[%s]", node_state.GetName().c_str());
} else { } else {
node_state.SetKernelTask(node_item.kernel_task); node_state.SetKernelTask(node_item.kernel_task);
} }
@@ -293,7 +299,9 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta
GE_CHECK_NOTNULL(unique_task_context); GE_CHECK_NOTNULL(unique_task_context);
const auto &task = node_state.GetKernelTask(); const auto &task = node_state.GetKernelTask();
if (task == nullptr) { if (task == nullptr) {
GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state.GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Invoke][GetKernelTask] failed for[%s], NodeTask is null.", node_state.GetName().c_str());
REPORT_CALL_ERROR("E19999", "invoke GetKernelTask failed for %s, NodeTask is null when %s.",
node_state.GetName().c_str(), __FUNCTION__);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }
auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release()); auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
@@ -309,7 +317,8 @@ Status SubgraphExecutor::LaunchTasks() {
while (true) { while (true) {
NodeState *node_state = nullptr; NodeState *node_state = nullptr;
if (!ready_queue_.Pop(node_state)) { if (!ready_queue_.Pop(node_state)) {
GELOGE(INTERNAL_ERROR, "[%s] Failed to pop node.", graph_item_->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Invoke][Pop] failed for [%s].", graph_item_->GetName().c_str());
REPORT_CALL_ERROR("E19999", "invoke pop failed for %s when %s", graph_item_->GetName().c_str(), __FUNCTION__);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


@@ -334,8 +343,7 @@ Status SubgraphExecutor::LaunchTasks() {
GE_CHECK_NOTNULL(shared_task_context); GE_CHECK_NOTNULL(shared_task_context);
shared_task_context->SetForceInferShape(force_infer_shape_); shared_task_context->SetForceInferShape(force_infer_shape_);
HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_), HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_),
"[%s] Execute node failed.",
node_state->GetName().c_str());
"[Invoke][ExecuteAsync] failed for [%s].", node_state->GetName().c_str());
GELOGD("[%s] Done executing node successfully.", node_state->GetName().c_str()); GELOGD("[%s] Done executing node successfully.", node_state->GetName().c_str());
} }
} }
@@ -361,8 +369,7 @@ Status SubgraphExecutor::ScheduleTasks(int group) {
} }


GE_CHK_STATUS_RET(prepare_future.get(), GE_CHK_STATUS_RET(prepare_future.get(),
"[%s] Error occurred in task preparation.",
graph_item_->GetName().c_str());
"[Invoke][get] [%s] Error occurred in task preparation.", graph_item_->GetName().c_str());


GELOGD("[%s] Done launching all tasks successfully.", graph_item_->GetName().c_str()); GELOGD("[%s] Done launching all tasks successfully.", graph_item_->GetName().c_str());
return SUCCESS; return SUCCESS;
@@ -373,17 +380,17 @@ Status SubgraphExecutor::GetOutputs(vector<TensorValue> &outputs) {
} }


Status SubgraphExecutor::GetOutputs(vector<TensorValue> &outputs, std::vector<ConstGeTensorDescPtr> &output_desc) { Status SubgraphExecutor::GetOutputs(vector<TensorValue> &outputs, std::vector<ConstGeTensorDescPtr> &output_desc) {
GE_CHK_STATUS_RET(GetOutputs(outputs), "[%s] Failed to get output tensors.", graph_item_->GetName().c_str());
GE_CHK_STATUS_RET(GetOutputs(outputs), "[Invoke][GetOutputs] failed for [%s].", graph_item_->GetName().c_str());


// copy output data from op to designated position // copy output data from op to designated position
GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc), GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc),
"[%s] Failed to get output tensor desc.",
graph_item_->GetName().c_str());
"[Invoke][GetOutputDescList][%s] Failed to get output tensor desc.", graph_item_->GetName().c_str());
if (outputs.size() != output_desc.size()) { if (outputs.size() != output_desc.size()) {
GELOGE(INTERNAL_ERROR, GELOGE(INTERNAL_ERROR,
"Number of output tensors(%zu) mismatch number of output tensor desc(%zu).",
outputs.size(),
output_desc.size());
"[Check][Size]Number of output tensors(%zu) mismatch number of output tensor desc(%zu).",
outputs.size(), output_desc.size());
REPORT_INNER_ERROR("E19999", "Number of output tensors(%zu) mismatch number of output tensor desc(%zu) when %s.",
outputs.size(), output_desc.size(), __FUNCTION__);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }
return SUCCESS; return SUCCESS;
@@ -401,17 +408,15 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) {
std::vector<TensorValue> outputs; std::vector<TensorValue> outputs;
std::vector<ConstGeTensorDescPtr> output_desc_list; std::vector<ConstGeTensorDescPtr> output_desc_list;
GE_CHK_STATUS_RET(subgraph_context_->GetOutputs(outputs), GE_CHK_STATUS_RET(subgraph_context_->GetOutputs(outputs),
"[%s] Failed to get output tensors.",
graph_item_->GetName().c_str());
"[Invoke][GetOutputs][%s] Failed to get output tensors.", graph_item_->GetName().c_str());
GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc_list), GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc_list),
"[%s] Failed to get output tensor desc.",
graph_item_->GetName().c_str());
"[Invoke][GetOutputDescList][%s] Failed to get output tensor desc.", graph_item_->GetName().c_str());


if (outputs.size() != output_desc_list.size()) { if (outputs.size() != output_desc_list.size()) {
GELOGE(INTERNAL_ERROR, "[%s] num output tensors = %zu, num output tensor desc = %zu",
graph_item_->GetName().c_str(),
outputs.size(),
output_desc_list.size());
GELOGE(INTERNAL_ERROR, "[Check][Size][%s] num output tensors = %zu, num output tensor desc = %zu",
graph_item_->GetName().c_str(), outputs.size(), output_desc_list.size());
REPORT_INNER_ERROR("E19999", "[%s] num output tensors = %zu, num output tensor desc = %zu when %s",
graph_item_->GetName().c_str(), outputs.size(), output_desc_list.size(), __FUNCTION__);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


@@ -460,9 +465,10 @@ Status SubgraphExecutor::EnableOutputZeroCopy(const vector<TensorValue> &outputs
const auto &output_edges = graph_item_->GetOutputEdges(); const auto &output_edges = graph_item_->GetOutputEdges();
// Op -> MetOutput, set the output tensor of Op that output to the NetOutput node // Op -> MetOutput, set the output tensor of Op that output to the NetOutput node
if (outputs.size() != output_edges.size()) { if (outputs.size() != output_edges.size()) {
GELOGE(PARAM_INVALID, "Output number mismatches, expect = %zu, but given = %zu",
output_edges.size(),
outputs.size());
GELOGE(PARAM_INVALID, "[Check][Size]Output number mismatches, expect = %zu, but given = %zu",
output_edges.size(), outputs.size());
REPORT_INNER_ERROR("E19999", "Output number mismatches, expect = %zu, but given = %zu when %s",
output_edges.size(), outputs.size(), __FUNCTION__);
return PARAM_INVALID; return PARAM_INVALID;
} }


@@ -478,9 +484,7 @@ Status SubgraphExecutor::EnableOutputZeroCopy(const vector<TensorValue> &outputs
output_tensor.DebugString().c_str()); output_tensor.DebugString().c_str());


GE_CHK_STATUS_RET(subgraph_context_->SetOutput(*output_node, output_idx, output_tensor), GE_CHK_STATUS_RET(subgraph_context_->SetOutput(*output_node, output_idx, output_tensor),
"[%s] Failed to set input tensor[%zu]",
graph_item_->GetName().c_str(),
i);
"[Invoke][SetOutput][%s] Failed to set input tensor[%zu]", graph_item_->GetName().c_str(), i);
} }


GELOGD("Done enabling zero copy for outputs successfully."); GELOGD("Done enabling zero copy for outputs successfully.");


+ 2
- 1
ge/hybrid/model/graph_item.cc View File

@@ -95,7 +95,8 @@ Status GraphItem::GroupNodes() {
int group = node->group; int group = node->group;
if (group != last_group) { if (group != last_group) {
if (seen_groups.find(group) != seen_groups.end()) { if (seen_groups.find(group) != seen_groups.end()) {
GELOGE(INTERNAL_ERROR, "Unordered node group found. node = %s, group = %d", node->NodeName().c_str(), group);
GELOGE(INTERNAL_ERROR,
"[Order][NodeGroup]Unordered node group found. node = %s, group = %d", node->NodeName().c_str(), group);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} else { } else {
last_group = group; last_group = group;


+ 128
- 104
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -71,7 +71,7 @@ Status SetOutputNameAttr(ComputeGraph &graph) {
} }
} }
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&graph, ATTR_MODEL_OUT_NODES_NAME, output_names), GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&graph, ATTR_MODEL_OUT_NODES_NAME, output_names),
GELOGE(FAILED, "SetListStr of ATTR_MODEL_OUT_NODES_NAME failed.");
GELOGE(FAILED, "[Invoke][SetListStr] of ATTR_MODEL_OUT_NODES_NAME failed.");
return FAILED); return FAILED);
return SUCCESS; return SUCCESS;
} }
@@ -109,10 +109,11 @@ Status CollectDependenciesForFusedGraph(NodeItem &node_item, std::set<OpDesc *>
GE_CHECK_NOTNULL(src_op_desc); GE_CHECK_NOTNULL(src_op_desc);
if (src_node->GetType() != DATA_TYPE) { if (src_node->GetType() != DATA_TYPE) {
GELOGE(UNSUPPORTED, GELOGE(UNSUPPORTED,
"[%s::%s] Node in fused subgraph can only depend on Data nodes, but depend on %s",
node_item.NodeName().c_str(),
node->GetName().c_str(),
src_node->GetType().c_str());
"[Check][NodeType][%s::%s] Node in fused subgraph can only depend on Data nodes, but depend on %s",
node_item.NodeName().c_str(), node->GetName().c_str(), src_node->GetType().c_str());
REPORT_INNER_ERROR("E19999",
"[%s::%s] Node in fused subgraph can only depend on Data nodes, but depend on %s when %s.",
node_item.NodeName().c_str(), node->GetName().c_str(), src_node->GetType().c_str(), __FUNCTION__);
return UNSUPPORTED; return UNSUPPORTED;
} }


@@ -129,37 +130,38 @@ HybridModelBuilder::HybridModelBuilder(HybridModel &hybrid_model)
} }


Status HybridModelBuilder::Build() { Status HybridModelBuilder::Build() {
GE_CHK_STATUS_RET(ValidateParams(), "Failed to validate GeRootModel");
GE_CHK_STATUS_RET(ValidateParams(), "[Invoke][ValidateParams] failed, model_name_:[%s]", GetGraphName());
hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName(); hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName();
GELOGI("[%s] Start to build hybrid model.", GetGraphName()); GELOGI("[%s] Start to build hybrid model.", GetGraphName());
GE_CHK_STATUS_RET(InitRuntimeParams(), "[%s] Failed to InitRuntimeParams", GetGraphName());
GE_CHK_STATUS_RET(RecoverGraphUnknownFlag(), "[%s] Failed to RecoverGraphUnknownFlag", GetGraphName());
GE_CHK_STATUS_RET(IndexSpecialNodes(), "[%s] Failed to index nodes", GetGraphName());
GE_CHK_STATUS_RET(IndexTaskDefs(), "[%s] Failed to index task defs", GetGraphName());
GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName());
GE_CHK_STATUS_RET(LoadGraph(), "[%s] Failed to load graph", GetGraphName());
GE_CHK_STATUS_RET(AssignUninitializedConstantOps(), "[%s] Failed to assign uninitialized constants", GetGraphName());
GE_CHK_STATUS_RET(TransAllVarData(), "[%s] Failed to trans all var data", GetGraphName());
GE_CHK_STATUS_RET(CopyVarData(), "[%s] Failed to copy var data", GetGraphName());
GE_CHK_STATUS_RET(InitModelMem(), "[%s] Failed to init memory", GetGraphName());
GE_CHK_STATUS_RET(InitConstantOps(), "[%s] Failed to init constant op", GetGraphName());
GE_CHK_STATUS_RET(InitVariableTensors(), "[%s] Failed to init variables", GetGraphName());
GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName());
GE_CHK_STATUS_RET(InitRuntimeParams(), "[Invoke][InitRuntimeParams] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(RecoverGraphUnknownFlag(), "[Invoke][RecoverGraphUnknownFlag] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(IndexSpecialNodes(), "[Invoke][IndexSpecialNodes] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(IndexTaskDefs(), "[Invoke][IndexTaskDefs] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(InitWeights(), "[Invoke][InitWeights] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(LoadGraph(), "[Invoke][LoadGraph] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(AssignUninitializedConstantOps(),
"[Invoke][AssignUninitializedConstantOps] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(TransAllVarData(), "[Invoke][TransAllVarData] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(CopyVarData(), "[Invoke][CopyVarData] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(InitModelMem(), "[Invoke][InitModelMem] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(InitConstantOps(), "[Invoke][InitConstantOps] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(InitVariableTensors(), "[Invoke][InitVariableTensors], model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(LoadTasks(), "[Invoke][LoadTasks] failed, model_name_:[%s]", GetGraphName());
GELOGI("[%s] Done building hybrid model successfully.", GetGraphName()); GELOGI("[%s] Done building hybrid model successfully.", GetGraphName());
return SUCCESS; return SUCCESS;
} }


Status HybridModelBuilder::BuildForSingleOp() { Status HybridModelBuilder::BuildForSingleOp() {
GE_CHK_STATUS_RET(ValidateParams(), "Failed to validate GeRootModel");
GE_CHK_STATUS_RET(ValidateParams(), "[Invoke][ValidateParams] failed, model_name_:[%s]", GetGraphName());
hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName(); hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName();
GELOGI("[%s] Start to build hybrid model.", GetGraphName()); GELOGI("[%s] Start to build hybrid model.", GetGraphName());
auto ret = ge_root_model_->GetSubgraphInstanceNameToModel(); auto ret = ge_root_model_->GetSubgraphInstanceNameToModel();
const GeModelPtr ge_model = ret[ge_root_model_->GetRootGraph()->GetName()]; const GeModelPtr ge_model = ret[ge_root_model_->GetRootGraph()->GetName()];
GE_CHK_STATUS_RET(IndexTaskDefs(ge_root_model_->GetRootGraph(), ge_model), GE_CHK_STATUS_RET(IndexTaskDefs(ge_root_model_->GetRootGraph(), ge_model),
"[%s] Failed to index task defs", GetGraphName());
GE_CHK_STATUS_RET(LoadGraph(), "[%s] Failed to load graph", GetGraphName());
GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName());
GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName());
"[Invoke][IndexTaskDefs] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(LoadGraph(), "[Invoke][LoadGraph] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(InitWeights(), "[Invoke][InitWeights] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(LoadTasks(), "[Invoke][LoadTasks] failed, model_name_:[%s]", GetGraphName());
GELOGI("[%s] Done building hybrid model for single op successfully.", GetGraphName()); GELOGI("[%s] Done building hybrid model for single op successfully.", GetGraphName());
return SUCCESS; return SUCCESS;
} }
@@ -173,18 +175,20 @@ Status HybridModelBuilder::ValidateParams() {
Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_item) { Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_item) {
auto op_desc = node->GetOpDesc(); auto op_desc = node->GetOpDesc();
GE_CHK_STATUS_RET(ParseForceInfershapeNodes(node, node_item), GE_CHK_STATUS_RET(ParseForceInfershapeNodes(node, node_item),
"[%s] Failed to parse force_infershape node.",
"[Invoke][ParseForceInfershapeNodes]failed, node:[%s].",
node_item.NodeName().c_str()); node_item.NodeName().c_str());
vector<string> dependencies = node->GetOpDesc()->GetOpInferDepends(); vector<string> dependencies = node->GetOpDesc()->GetOpInferDepends();
GE_CHK_STATUS_RET(ParseDependentInputNodes(node_item, dependencies), GE_CHK_STATUS_RET(ParseDependentInputNodes(node_item, dependencies),
"[%s] Failed to parse node dependencies.",
"[Invoke][ParseDependentInputNodes]failed, node:[%s].",
node_item.NodeName().c_str()); node_item.NodeName().c_str());


node_item.outputs.resize(node_item.num_outputs); node_item.outputs.resize(node_item.num_outputs);
for (int i = 0; i < node_item.num_outputs; ++i) { for (int i = 0; i < node_item.num_outputs; ++i) {
auto out_data_anchor = node->GetOutDataAnchor(i); auto out_data_anchor = node->GetOutDataAnchor(i);
if (out_data_anchor == nullptr) { if (out_data_anchor == nullptr) {
GELOGE(INTERNAL_ERROR, "out anchor[%d] of node %s is nullptr", i, node->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Get][OutDataAnchor]out anchor[%d] of node %s is nullptr", i, node->GetName().c_str());
REPORT_CALL_ERROR("E19999", "out anchor[%d] of node %s is nullptr when %s",
i, node->GetName().c_str(), __FUNCTION__);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


@@ -197,12 +201,11 @@ Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_ite


NodeItem *dst_node_item = nullptr; NodeItem *dst_node_item = nullptr;
GE_CHK_STATUS_RET(GetOrCreateNodeItem(dst_node, &dst_node_item), GE_CHK_STATUS_RET(GetOrCreateNodeItem(dst_node, &dst_node_item),
"[%s] Failed to get or create node item.",
"[GetOrCreate][NodeItem] failed, dst_node:[%s].",
dst_node->GetName().c_str()); dst_node->GetName().c_str());
int canonical_index; int canonical_index;
GE_CHK_STATUS_RET(dst_node_item->GetCanonicalInputIndex(dst_in_anchor->GetIdx(), canonical_index), GE_CHK_STATUS_RET(dst_node_item->GetCanonicalInputIndex(dst_in_anchor->GetIdx(), canonical_index),
"[%s] Failed to canonical input index",
dst_node->GetName().c_str());
"[Invoke][GetCanonicalInputIndex] failed, dst_node:[%s].", dst_node->GetName().c_str());


node_item.outputs[i].emplace_back(canonical_index, dst_node_item); node_item.outputs[i].emplace_back(canonical_index, dst_node_item);
} }
@@ -246,7 +249,7 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n
} }


std::unique_ptr<NodeItem> new_node; std::unique_ptr<NodeItem> new_node;
GE_CHK_STATUS_RET(NodeItem::Create(node, new_node), "Failed to create node item");
GE_CHK_STATUS_RET(NodeItem::Create(node, new_node), "[Invoke][Create] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET_NOLOG(NodeExecutorManager::GetInstance().GetExecutor(*node, &new_node->node_executor)); GE_CHK_STATUS_RET_NOLOG(NodeExecutorManager::GetInstance().GetExecutor(*node, &new_node->node_executor));


// we do not need L2 Buffer // we do not need L2 Buffer
@@ -330,10 +333,8 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s
for (const auto &input_name : dependencies) { for (const auto &input_name : dependencies) {
int input_index = node_item.op_desc->GetInputIndexByName(input_name); int input_index = node_item.op_desc->GetInputIndexByName(input_name);
if (input_index < 0) { if (input_index < 0) {
GELOGE(INTERNAL_ERROR,
"[%s] Failed to get input index by name: %s",
node_item.NodeName().c_str(),
input_name.c_str());
GELOGE(INTERNAL_ERROR, "[Get][InputIndex]failed, node:[%s] inputname: %s.",
node_item.NodeName().c_str(), input_name.c_str());
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


@@ -380,10 +381,10 @@ Status HybridModelBuilder::ParseDependentForFusedSubgraph(NodeItem &node_item, s
for (auto &op_desc : data_ops) { for (auto &op_desc : data_ops) {
uint32_t parent_index = 0; uint32_t parent_index = 0;
if (!AttrUtils::GetInt(*op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { if (!AttrUtils::GetInt(*op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
GELOGE(INTERNAL_ERROR,
"[%s] Failed to get attr [%s]",
op_desc->GetName().c_str(),
ATTR_NAME_PARENT_NODE_INDEX.c_str());
GELOGE(INTERNAL_ERROR, "[Invoke][GetInt] failed, node:[%s] attr:[%s]",
op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
REPORT_CALL_ERROR("E19999", "invoke GetInt failed when %s, node:[%s] attr:[%s]",
__FUNCTION__, op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


@@ -413,24 +414,29 @@ Status HybridModelBuilder::ParseDependentForFusedSubgraph(NodeItem &node_item, s


Status HybridModelBuilder::UpdateAnchorStatus(const NodePtr &node) { Status HybridModelBuilder::UpdateAnchorStatus(const NodePtr &node) {
if (NodeUtils::SetAllAnchorStatus(node) != GRAPH_SUCCESS) { if (NodeUtils::SetAllAnchorStatus(node) != GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "[%s] NodeUtils::SetAllAnchorStatus failed.", node->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Invoke][SetAllAnchorStatus] failed, node:[%s].", node->GetName().c_str());
REPORT_CALL_ERROR("E19999", "[%s] NodeUtils::SetAllAnchorStatus failed when %s.",
node->GetName().c_str(), __FUNCTION__);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }
for (auto &anchor : node->GetAllInDataAnchors()) { for (auto &anchor : node->GetAllInDataAnchors()) {
auto peer_anchor = anchor->GetPeerOutAnchor(); auto peer_anchor = anchor->GetPeerOutAnchor();
if (peer_anchor == nullptr) { if (peer_anchor == nullptr) {
if (AnchorUtils::SetStatus(anchor, ANCHOR_SUSPEND) != GRAPH_SUCCESS) { if (AnchorUtils::SetStatus(anchor, ANCHOR_SUSPEND) != GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "[%s] AnchorUtils::SetStatus failed.", node->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Invoke][SetStatus] failed, node:[%s].", node->GetName().c_str());
REPORT_CALL_ERROR("E19999", "SetStatus failed, node:[%s] when %s.", node->GetName().c_str(), __FUNCTION__);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }
} else if (peer_anchor->GetOwnerNode()->GetType() == CONSTANT) { } else if (peer_anchor->GetOwnerNode()->GetType() == CONSTANT) {
if (AnchorUtils::SetStatus(anchor, ANCHOR_CONST) != GRAPH_SUCCESS) { if (AnchorUtils::SetStatus(anchor, ANCHOR_CONST) != GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "[%s] AnchorUtils::SetStatus failed.", node->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Invoke][SetStatus] failed, node:[%s].", node->GetName().c_str());
REPORT_CALL_ERROR("E19999", "SetStatus failed, node:[%s] when %s.", node->GetName().c_str(), __FUNCTION__);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }
} else { } else {
if (AnchorUtils::SetStatus(anchor, ANCHOR_DATA) != GRAPH_SUCCESS) { if (AnchorUtils::SetStatus(anchor, ANCHOR_DATA) != GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "[%s] AnchorUtils::SetStatus failed.", node->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Invoke][SetStatus] failed, node:[%s].", node->GetName().c_str());
REPORT_CALL_ERROR("E19999", "SetStatus failed, node:[%s] when %s.", node->GetName().c_str(), __FUNCTION__);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }
} }
@@ -441,11 +447,9 @@ Status HybridModelBuilder::UpdateAnchorStatus(const NodePtr &node) {


Status HybridModelBuilder::DoUnlinkDataAnchors(const OutDataAnchorPtr &out_data_anchor, Status HybridModelBuilder::DoUnlinkDataAnchors(const OutDataAnchorPtr &out_data_anchor,
const InDataAnchorPtr &in_data_anchor) { const InDataAnchorPtr &in_data_anchor) {
GE_CHK_GRAPH_STATUS_RET(out_data_anchor->Unlink(in_data_anchor), "Failed to unlink %s:%d from %s:%d",
out_data_anchor->GetOwnerNode()->GetName().c_str(),
out_data_anchor->GetIdx(),
in_data_anchor->GetOwnerNode()->GetName().c_str(),
in_data_anchor->GetIdx());
GE_CHK_GRAPH_STATUS_RET(out_data_anchor->Unlink(in_data_anchor),
"[Invoke][Unlink] failed to unlink %s:%d from %s:%d", out_data_anchor->GetOwnerNode()->GetName().c_str(),
out_data_anchor->GetIdx(), in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetIdx());


GELOGD("Succeeded in unlinking %s:%d from %s:%d", GELOGD("Succeeded in unlinking %s:%d from %s:%d",
out_data_anchor->GetOwnerNode()->GetName().c_str(), out_data_anchor->GetOwnerNode()->GetName().c_str(),
@@ -456,7 +460,7 @@ Status HybridModelBuilder::DoUnlinkDataAnchors(const OutDataAnchorPtr &out_data_
} }


Status HybridModelBuilder::DoLinkDataAnchors(OutDataAnchorPtr &out_data_anchor, InDataAnchorPtr &in_data_anchor) { Status HybridModelBuilder::DoLinkDataAnchors(OutDataAnchorPtr &out_data_anchor, InDataAnchorPtr &in_data_anchor) {
GE_CHK_GRAPH_STATUS_RET(out_data_anchor->LinkTo(in_data_anchor), "Failed to link %s:%d to %s:%d",
GE_CHK_GRAPH_STATUS_RET(out_data_anchor->LinkTo(in_data_anchor), "[Invoke][LinkTo]Failed to link %s:%d to %s:%d",
out_data_anchor->GetOwnerNode()->GetName().c_str(), out_data_anchor->GetOwnerNode()->GetName().c_str(),
out_data_anchor->GetIdx(), out_data_anchor->GetIdx(),
in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetOwnerNode()->GetName().c_str(),
@@ -488,10 +492,10 @@ Status HybridModelBuilder::MergeInputNodes(ComputeGraph &graph) {


uint32_t parent_index = 0; uint32_t parent_index = 0;
if (!AttrUtils::GetInt(data_op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { if (!AttrUtils::GetInt(data_op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
GELOGE(FAILED,
"[%s] Failed to get attr [%s]",
data_op_desc->GetName().c_str(),
ATTR_NAME_PARENT_NODE_INDEX.c_str());
GELOGE(FAILED, "[Invoke][GetInt] failed, node:[%s] attr:[%s]",
data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
REPORT_CALL_ERROR("E19999", "GetInt failed when %s, node:[%s] attr:[%s]",
__FUNCTION__, data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
return FAILED; return FAILED;
} }


@@ -557,7 +561,8 @@ Status HybridModelBuilder::MergeNetOutputNode(ComputeGraph &graph) {
auto index = in_data_anchor->GetIdx(); auto index = in_data_anchor->GetIdx();
auto input_desc = net_output_desc->MutableInputDesc(index); auto input_desc = net_output_desc->MutableInputDesc(index);
if (input_desc == nullptr) { if (input_desc == nullptr) {
GELOGE(INTERNAL_ERROR, "[%s] Failed to get input desc[%d]", net_output_desc->GetName().c_str(), index);
GELOGE(INTERNAL_ERROR, "[Invoke][MutableInputDesc][%s] Failed to get input desc[%d]",
net_output_desc->GetName().c_str(), index);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


@@ -633,12 +638,13 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeG
} }
} }
GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraph(root_graph, merged_graph, *subgraph), GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraph(root_graph, merged_graph, *subgraph),
"[%s] Failed to merge subgraph.",
"[Invoke][UnfoldSubgraph][%s] Failed to merge subgraph.",
subgraph->GetName().c_str()); subgraph->GetName().c_str());
} }


// invoke before adding subgraphs. in case modify node id in known-shaped subgraphs. // invoke before adding subgraphs. in case modify node id in known-shaped subgraphs.
GE_CHK_GRAPH_STATUS_RET(merged_graph->TopologicalSorting(), "Failed to invoke TopologicalSorting on merged graph.");
GE_CHK_GRAPH_STATUS_RET(merged_graph->TopologicalSorting(),
"[Invoke][TopologicalSorting]Failed to invoke TopologicalSorting on merged graph.");
GE_DUMP(merged_graph, "hybrid_merged_graph_BeforeStageSort"); GE_DUMP(merged_graph, "hybrid_merged_graph_BeforeStageSort");
merged_graph->TopologicalSorting([](const NodePtr &a, const NodePtr &b) -> bool { merged_graph->TopologicalSorting([](const NodePtr &a, const NodePtr &b) -> bool {
uint32_t a_level = UINT32_MAX; uint32_t a_level = UINT32_MAX;
@@ -651,7 +657,7 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeG
for (auto &remained_subgraph : root_graph->GetAllSubgraphs()) { for (auto &remained_subgraph : root_graph->GetAllSubgraphs()) {
GELOGD("Adding subgraph [%s] to merged-graph.", remained_subgraph->GetName().c_str()); GELOGD("Adding subgraph [%s] to merged-graph.", remained_subgraph->GetName().c_str());
GE_CHK_GRAPH_STATUS_RET(merged_graph->AddSubgraph(remained_subgraph), GE_CHK_GRAPH_STATUS_RET(merged_graph->AddSubgraph(remained_subgraph),
"Failed to add subgraph [%s]",
"[Invoke][AddSubgraph]Failed to add subgraph [%s]",
remained_subgraph->GetName().c_str()); remained_subgraph->GetName().c_str());
remained_subgraph->SetParentGraph(merged_graph); remained_subgraph->SetParentGraph(merged_graph);
} }
@@ -666,10 +672,10 @@ Status HybridModelBuilder::UnfoldSubgraph(ComputeGraphPtr &root_graph,
GE_CHECK_NOTNULL(parent_node); GE_CHECK_NOTNULL(parent_node);


GE_CHK_STATUS_RET(MergeInputNodes(sub_graph), GE_CHK_STATUS_RET(MergeInputNodes(sub_graph),
"[%s] Failed to merge data nodes for subgraph",
"[Invoke][MergeInputNodes][%s] Failed to merge data nodes for subgraph",
sub_graph.GetName().c_str()); sub_graph.GetName().c_str());
GE_CHK_STATUS_RET(MergeNetOutputNode(sub_graph), GE_CHK_STATUS_RET(MergeNetOutputNode(sub_graph),
"[%s] Failed to merge net output nodes for subgraph",
"[Invoke][MergeNetOutputNode][%s] Failed to merge net output nodes for subgraph",
sub_graph.GetName().c_str()); sub_graph.GetName().c_str());
GELOGD("[%s] Done merging subgraph inputs and outputs successfully", sub_graph.GetName().c_str()); GELOGD("[%s] Done merging subgraph inputs and outputs successfully", sub_graph.GetName().c_str());


@@ -683,7 +689,7 @@ Status HybridModelBuilder::UnfoldSubgraph(ComputeGraphPtr &root_graph,
GE_CHECK_NOTNULL(sub_sub_graph); GE_CHECK_NOTNULL(sub_sub_graph);
if (sub_sub_graph->GetGraphUnknownFlag()) { if (sub_sub_graph->GetGraphUnknownFlag()) {
GE_CHK_STATUS_RET(UnfoldSubgraph(root_graph, parent_graph, *sub_sub_graph), GE_CHK_STATUS_RET(UnfoldSubgraph(root_graph, parent_graph, *sub_sub_graph),
"[%s] Failed to merge subgraph",
"[Invoke][UnfoldSubgraph][%s] Failed to merge subgraph",
sub_sub_graph->GetName().c_str()); sub_sub_graph->GetName().c_str());
continue; continue;
} }
@@ -757,7 +763,8 @@ Status HybridModelBuilder::LoadGraph() {
GELOGI("Before merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", GELOGI("Before merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu",
root_graph->GetDirectNodesSize(), root_graph->GetDirectNodesSize(),
root_graph->GetAllNodesSize()); root_graph->GetAllNodesSize());
GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraphs(root_graph, merged_graph), "Failed to unfold subgraphs.");
GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraphs(root_graph, merged_graph),
"[Invoke][UnfoldSubgraphs]Failed to unfold subgraphs, model_name_:%s.", GetGraphName());
root_graph = std::move(merged_graph); root_graph = std::move(merged_graph);
GELOGI("After merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", GELOGI("After merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu",
root_graph->GetDirectNodesSize(), root_graph->GetDirectNodesSize(),
@@ -779,9 +786,11 @@ Status HybridModelBuilder::LoadGraph() {
op_desc->SetId(index++); op_desc->SetId(index++);
} }
GE_DUMP(root_graph, "hybrid_merged_graph"); GE_DUMP(root_graph, "hybrid_merged_graph");
GE_CHK_STATUS_RET(LoadDynamicSubgraph(*root_graph, true), "Failed to load root graph.");
GE_CHK_STATUS_RET(LoadDynamicSubgraph(*root_graph, true),
"[Invoke][LoadDynamicSubgraph]Failed to load root graph, model_name_:%s.", GetGraphName());
GELOGD("Done loading root graph successfully."); GELOGD("Done loading root graph successfully.");
GE_CHK_STATUS_RET(hybrid_model_.root_graph_item_->GroupNodes(), "Failed to group nodes for root graph");
GE_CHK_STATUS_RET(hybrid_model_.root_graph_item_->GroupNodes(),
"[Invoke][GroupNodes]Failed to group nodes for root graph, model_name_:%s.", GetGraphName());


for (auto &sub_graph : root_graph->GetAllSubgraphs()) { for (auto &sub_graph : root_graph->GetAllSubgraphs()) {
GE_CHECK_NOTNULL(sub_graph); GE_CHECK_NOTNULL(sub_graph);
@@ -797,26 +806,28 @@ Status HybridModelBuilder::LoadGraph() {


if (sub_graph->GetGraphUnknownFlag()) { if (sub_graph->GetGraphUnknownFlag()) {
GE_CHK_STATUS_RET(LoadDynamicSubgraph(*sub_graph, false), GE_CHK_STATUS_RET(LoadDynamicSubgraph(*sub_graph, false),
"Failed to load subgraph: [%s]",
"[Invoke][LoadDynamicSubgraph]Failed to load subgraph: [%s]",
sub_graph->GetName().c_str()); sub_graph->GetName().c_str());
} else { } else {
GE_CHK_STATUS_RET(IdentifyVariableOutputs(*parent_node_item), GE_CHK_STATUS_RET(IdentifyVariableOutputs(*parent_node_item),
"[%s] Failed to identify ref outputs.",
"[Invoke][IdentifyVariableOutputs][%s] Failed to identify ref outputs.",
parent_node_item->NodeName().c_str()); parent_node_item->NodeName().c_str());
GE_CHK_STATUS_RET(IdentifySameInputs(*parent_node_item), GE_CHK_STATUS_RET(IdentifySameInputs(*parent_node_item),
"[%s] Failed to identify same outputs.",
"[Invoke][IdentifySameInputs][%s] Failed to identify same outputs.",
parent_node_item->NodeName().c_str()); parent_node_item->NodeName().c_str());


// if parent is function control op. need add a virtual partitioned call // if parent is function control op. need add a virtual partitioned call
if (parent_node_item->IsControlOp()) { if (parent_node_item->IsControlOp()) {
GE_CHK_STATUS_RET(LoadKnownShapedSubgraph(*sub_graph, parent_node_item), GE_CHK_STATUS_RET(LoadKnownShapedSubgraph(*sub_graph, parent_node_item),
"Failed to load function control op subgraph [%s]",
"[Invoke][LoadKnownShapedSubgraph]Failed to load function control op subgraph [%s]",
sub_graph->GetName().c_str()); sub_graph->GetName().c_str());
} }
} }
} }


GE_CHK_STATUS_RET(ParseDependentByParallelGroup(), "Failed to establish dependencies for hccl ops");
GE_CHK_STATUS_RET(ParseDependentByParallelGroup(),
"[Invoke][ParseDependentByParallelGroup]Failed to establish dependencies for hccl ops, model_name_:%s.",
GetGraphName());
GELOGI("Done loading all subgraphs successfully."); GELOGI("Done loading all subgraphs successfully.");
return SUCCESS; return SUCCESS;
} }
@@ -834,7 +845,7 @@ Status HybridModelBuilder::VarNodeToTensor(const NodePtr &var_node, std::unique_
auto tensor_desc = var_node->GetOpDesc()->MutableOutputDesc(0); auto tensor_desc = var_node->GetOpDesc()->MutableOutputDesc(0);
uint8_t *var_logic = nullptr; uint8_t *var_logic = nullptr;
GE_CHK_STATUS_RET(var_manager_->GetVarAddr(var_name, *tensor_desc, &var_logic), GE_CHK_STATUS_RET(var_manager_->GetVarAddr(var_name, *tensor_desc, &var_logic),
"Failed to get var addr. var_name = %s, session_id = %ld",
"[Invoke][GetVarAddr]Failed to get var addr. var_name = %s, session_id = %ld",
var_name.c_str(), var_name.c_str(),
hybrid_model_.GetSessionId()); hybrid_model_.GetSessionId());


@@ -846,9 +857,11 @@ Status HybridModelBuilder::VarNodeToTensor(const NodePtr &var_node, std::unique_
uint8_t *dev_mem = var_manager_->GetVarMemoryAddr(var_logic, memory_type); uint8_t *dev_mem = var_manager_->GetVarMemoryAddr(var_logic, memory_type);
if (dev_mem == nullptr) { if (dev_mem == nullptr) {
GELOGE(INTERNAL_ERROR, GELOGE(INTERNAL_ERROR,
"Failed to copy var %s from device, cant not get "
"var addr from logic addr %p",
var_node->GetName().c_str(), var_logic);
"[Invoke][GetVarMemoryAddr]Failed to copy var %s from device, cant not get var addr from logic addr %p",
var_node->GetName().c_str(), var_logic);
REPORT_CALL_ERROR("E19999",
"GetVarMemoryAddr failed when %s, Failed to copy var %s from device, cant not get var addr from logic addr %p",
__FUNCTION__, var_node->GetName().c_str(), var_logic);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


@@ -876,7 +889,7 @@ Status HybridModelBuilder::HandleDtString(const GeTensor &tensor, void *var_addr
auto &mutable_tensor = const_cast<GeTensor &>(tensor); auto &mutable_tensor = const_cast<GeTensor &>(tensor);
uint64_t *buff = reinterpret_cast<uint64_t *>(mutable_tensor.MutableData().data()); uint64_t *buff = reinterpret_cast<uint64_t *>(mutable_tensor.MutableData().data());
GE_CHK_BOOL_RET_STATUS(ge::CheckInt64Uint32MulOverflow(elem_num, kBytes * kStringHeadElems) == SUCCESS, FAILED, GE_CHK_BOOL_RET_STATUS(ge::CheckInt64Uint32MulOverflow(elem_num, kBytes * kStringHeadElems) == SUCCESS, FAILED,
"Shape size is invalid");
"[Invoke][CheckInt64Uint32MulOverflow] failed because Shape size is invalid.");
auto offset = static_cast<uint64_t>(elem_num * kBytes * kStringHeadElems); auto offset = static_cast<uint64_t>(elem_num * kBytes * kStringHeadElems);
auto hbm_raw_data_base_addr = auto hbm_raw_data_base_addr =
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(var_addr) + offset); static_cast<uint64_t>(reinterpret_cast<uintptr_t>(var_addr) + offset);
@@ -928,7 +941,7 @@ Status HybridModelBuilder::InitConstantOps() {
auto op_desc = var_node->GetOpDesc(); auto op_desc = var_node->GetOpDesc();
auto v_weights = ModelUtils::GetWeights(op_desc); auto v_weights = ModelUtils::GetWeights(op_desc);
if (v_weights.empty()) { if (v_weights.empty()) {
GELOGE(INTERNAL_ERROR, "[%s] Constant no not have value", var_node->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Check][Size][%s] Constant no not have value", var_node->GetName().c_str());
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }
auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get()); auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get());
@@ -942,7 +955,7 @@ Status HybridModelBuilder::InitConstantOps() {
GELOGD("Init tensor with host constant %s size = %zu", var_name.c_str(), aligned_tensor.MutableData().GetSize()); GELOGD("Init tensor with host constant %s size = %zu", var_name.c_str(), aligned_tensor.MutableData().GetSize());
if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(aligned_tensor.GetAlignedPtr(), if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(aligned_tensor.GetAlignedPtr(),
aligned_tensor.GetData().size()) == nullptr) { aligned_tensor.GetData().size()) == nullptr) {
GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed.");
GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed, model_name_:%s.", GetGraphName());
return MEMALLOC_FAILED; return MEMALLOC_FAILED;
} }
var_tensor.reset(new(std::nothrow)TensorValue(aligned_tensor.MutableData().data(), var_tensor.reset(new(std::nothrow)TensorValue(aligned_tensor.MutableData().data(),
@@ -993,17 +1006,18 @@ Status HybridModelBuilder::InitVariableTensors() {
int64_t tensor_size = 0; int64_t tensor_size = 0;
if (TensorUtils::CalcTensorMemSize(output_tensor.GetShape(), output_tensor.GetFormat(), output_tensor.GetDataType(), if (TensorUtils::CalcTensorMemSize(output_tensor.GetShape(), output_tensor.GetFormat(), output_tensor.GetDataType(),
tensor_size) != SUCCESS) { tensor_size) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Calculate variable size failed, node name:%s", it.first.c_str());
GELOGE(INTERNAL_ERROR, "[Calculate][TensorMemSize] failed, node name:%s", it.first.c_str());
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }
SharedMemInfo mem_info(it.first, tensor_size); SharedMemInfo mem_info(it.first, tensor_size);
if (HostMemManager::Instance().MallocSharedMemory(mem_info) != SUCCESS) { if (HostMemManager::Instance().MallocSharedMemory(mem_info) != SUCCESS) {
GELOGE(GE_GRAPH_MALLOC_FAILED, "Host variable [%s] malloc failed.", it.first.c_str());
GELOGE(GE_GRAPH_MALLOC_FAILED, "[Malloc][SharedMemory] failed, Host variable [%s].", it.first.c_str());
return GE_GRAPH_MALLOC_FAILED; return GE_GRAPH_MALLOC_FAILED;
} }
if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr, if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr,
tensor_size) == nullptr) { tensor_size) == nullptr) {
GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed.");
GELOGE(MEMALLOC_FAILED,
"[Malloc][HostMem] for an existed GeTensor failed, Host variable [%s].", it.first.c_str());
return MEMALLOC_FAILED; return MEMALLOC_FAILED;
} }
GELOGD("Host variable [%s] malloc success, size=%ld.", it.first.c_str(), tensor_size); GELOGD("Host variable [%s] malloc success, size=%ld.", it.first.c_str(), tensor_size);
@@ -1054,7 +1068,9 @@ Status HybridModelBuilder::InitWeights() {
auto op_desc = node->GetOpDesc(); auto op_desc = node->GetOpDesc();
auto v_weights = ModelUtils::GetWeights(op_desc); auto v_weights = ModelUtils::GetWeights(op_desc);
if (v_weights.empty()) { if (v_weights.empty()) {
GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", node->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Invoke][GetWeights][%s] Constant has no value", node->GetName().c_str());
REPORT_CALL_ERROR("E19999", "[%s] Constant has no value when %s.",
node->GetName().c_str(), __FUNCTION__);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }
auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get()); auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get());
@@ -1062,11 +1078,11 @@ Status HybridModelBuilder::InitWeights() {
const GeTensorDesc &tensor_desc = ge_tensor->GetTensorDesc(); const GeTensorDesc &tensor_desc = ge_tensor->GetTensorDesc();
int64_t tensor_size = 0; int64_t tensor_size = 0;
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*op_desc->MutableOutputDesc(0), tensor_size), GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*op_desc->MutableOutputDesc(0), tensor_size),
"[%s] Failed to get tensor size",
"[Invoke][GetSize][%s] Failed to get tensor size",
node->GetName().c_str()); node->GetName().c_str());
int64_t data_offset = 0; int64_t data_offset = 0;
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetDataOffset(tensor_desc, data_offset), GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetDataOffset(tensor_desc, data_offset),
"[%s] Failed to get data offset",
"[Invoke][GetDataOffset][%s] Failed to get data offset",
node->GetName().c_str()); node->GetName().c_str());
GELOGD("[%s] Start to init Constant node [%s], size = %ld, offset = %ld", GELOGD("[%s] Start to init Constant node [%s], size = %ld, offset = %ld",
GetGraphName(), GetGraphName(),
@@ -1093,7 +1109,8 @@ Status HybridModelBuilder::LoadTask(NodeItem &node_item) {
node_ptr, node_ptr,
node_item.kernel_task); node_item.kernel_task);
if (load_ret != UNSUPPORTED && load_ret != SUCCESS) { if (load_ret != UNSUPPORTED && load_ret != SUCCESS) {
GELOGE(load_ret, "[%s] Failed to load task", node_ptr->GetName().c_str());
GELOGE(load_ret, "[Invoke][LoadTask][%s] Failed to load task", node_ptr->GetName().c_str());
REPORT_CALL_ERROR("E19999", "[%s] Failed to load task when %s", node_ptr->GetName().c_str(), __FUNCTION__);
return load_ret; return load_ret;
} }


@@ -1102,7 +1119,7 @@ Status HybridModelBuilder::LoadTask(NodeItem &node_item) {
} }


Status HybridModelBuilder::LoadTasks() { Status HybridModelBuilder::LoadTasks() {
GE_CHK_STATUS_RET(CheckAicpuOpList(), "Check Aicpu op failed.");
GE_CHK_STATUS_RET(CheckAicpuOpList(), "[Check][AicpuOpList] failed.");
std::map<int, std::map<std::string, NodeItem *>> ordered_partitioned_calls; std::map<int, std::map<std::string, NodeItem *>> ordered_partitioned_calls;
for (auto &it : hybrid_model_.node_items_) { for (auto &it : hybrid_model_.node_items_) {
auto &node_item = it.second; auto &node_item = it.second;
@@ -1179,7 +1196,8 @@ Status HybridModelBuilder::IndexTaskDefs(const ComputeGraphPtr &sub_graph, const


auto iter = node_map.find(op_index); auto iter = node_map.find(op_index);
if (iter == node_map.end()) { if (iter == node_map.end()) {
GELOGE(INTERNAL_ERROR, "Failed to get node by op_index = %u", op_index);
GELOGE(INTERNAL_ERROR, "[Find][Node]Failed to get node by op_index = %u", op_index);
REPORT_INNER_ERROR("E19999", "Failed to get node by op_index = %u when %s.", op_index, __FUNCTION__);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


@@ -1249,7 +1267,8 @@ Status HybridModelBuilder::IndexTaskDefs() {


auto iter = node_map.find(op_index); auto iter = node_map.find(op_index);
if (iter == node_map.end()) { if (iter == node_map.end()) {
GELOGE(INTERNAL_ERROR, "Failed to get node by index = %u", op_index);
GELOGE(INTERNAL_ERROR, "[Find][Node]Failed to get node by index = %u.", op_index);
REPORT_INNER_ERROR("E19999", "Failed to get node by index = %u when %s.", op_index, __FUNCTION__);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


@@ -1314,14 +1333,14 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node,
GELOGD("To get peer node of %s::%s", sub_graph->GetName().c_str(), data_node->GetName().c_str()); GELOGD("To get peer node of %s::%s", sub_graph->GetName().c_str(), data_node->GetName().c_str());
auto wrapped_node = data_node->GetOwnerComputeGraph()->GetParentNode(); auto wrapped_node = data_node->GetOwnerComputeGraph()->GetParentNode();
if (wrapped_node == nullptr) { if (wrapped_node == nullptr) {
GELOGE(INTERNAL_ERROR, "[%s] Node is in root graph.", data_node->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Invoke][GetParentNode][%s] Node is in root graph.", data_node->GetName().c_str());
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }
auto data_op_desc = data_node->GetOpDesc(); auto data_op_desc = data_node->GetOpDesc();
uint32_t parent_index = 0; uint32_t parent_index = 0;
if (!AttrUtils::GetInt(data_op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { if (!AttrUtils::GetInt(data_op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
GELOGE(INTERNAL_ERROR, GELOGE(INTERNAL_ERROR,
"[%s] Failed to get attr [%s]",
"[Invoke][GetInt][%s] Failed to get attr [%s]",
data_op_desc->GetName().c_str(), data_op_desc->GetName().c_str(),
ATTR_NAME_PARENT_NODE_INDEX.c_str()); ATTR_NAME_PARENT_NODE_INDEX.c_str());
return INTERNAL_ERROR; return INTERNAL_ERROR;
@@ -1331,7 +1350,8 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node,
GE_CHECK_NOTNULL(wrapped_node_in_anchor); GE_CHECK_NOTNULL(wrapped_node_in_anchor);
auto src_out_anchor = wrapped_node_in_anchor->GetPeerOutAnchor(); auto src_out_anchor = wrapped_node_in_anchor->GetPeerOutAnchor();
if (src_out_anchor == nullptr || src_out_anchor->GetOwnerNode() == nullptr) { if (src_out_anchor == nullptr || src_out_anchor->GetOwnerNode() == nullptr) {
GELOGE(INTERNAL_ERROR, "[%s] Parent node do not have peer anchor.", data_node->GetName().c_str());
GELOGE(INTERNAL_ERROR,
"[Check][ParentNode][%s] Parent node do not have peer anchor.", data_node->GetName().c_str());
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


@@ -1356,7 +1376,7 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node,
auto src_net_output_node = src_graph->FindFirstNodeMatchType(NETOUTPUT); auto src_net_output_node = src_graph->FindFirstNodeMatchType(NETOUTPUT);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(src_net_output_node == nullptr, GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(src_net_output_node == nullptr,
return INTERNAL_ERROR, return INTERNAL_ERROR,
"Failed to find NetOutput in subgraph: %s",
"[Invoke][FindFirstNodeMatchType]Failed to find NetOutput in subgraph: %s",
src_graph->GetName().c_str()); src_graph->GetName().c_str());
auto net_output_desc = src_net_output_node->GetOpDesc(); auto net_output_desc = src_net_output_node->GetOpDesc();
GE_CHECK_NOTNULL(net_output_desc); GE_CHECK_NOTNULL(net_output_desc);
@@ -1393,17 +1413,18 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node,
} }
} }


GELOGE(FAILED,
"Failed to find peer node for %s::%s",
sub_graph->GetName().c_str(),
data_node->GetName().c_str());
GELOGE(FAILED, "[Get][PeerNode]Failed to find peer node for %s::%s",
sub_graph->GetName().c_str(), data_node->GetName().c_str());
REPORT_INNER_ERROR("E19999", "Failed to find peer node for %s::%s when %s.",
sub_graph->GetName().c_str(), data_node->GetName().c_str(), __FUNCTION__);
return FAILED; return FAILED;
} }
Status HybridModelBuilder::InitRuntimeParams() { Status HybridModelBuilder::InitRuntimeParams() {
int64_t value = 0; int64_t value = 0;
bool ret = false; bool ret = false;
if (ge_root_model_->GetSubgraphInstanceNameToModel().empty()) { if (ge_root_model_->GetSubgraphInstanceNameToModel().empty()) {
GELOGE(INTERNAL_ERROR, "Root model has no sub model");
GELOGE(INTERNAL_ERROR, "[Get][SubModel]Root model has no sub model, model:%s.", GetGraphName());
REPORT_INNER_ERROR("E19999", "Root model has no sub model when %s, model:%s.", __FUNCTION__, GetGraphName());
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


@@ -1546,8 +1567,10 @@ Status HybridModelBuilder::GetParentNodeOutputIndex(const OpDesc &op_desc, int i
auto input_desc = op_desc.MutableInputDesc(index); auto input_desc = op_desc.MutableInputDesc(index);
GE_CHECK_NOTNULL(input_desc); GE_CHECK_NOTNULL(input_desc);
if (!AttrUtils::GetInt(input_desc, ATTR_NAME_PARENT_NODE_INDEX, out_index)) { if (!AttrUtils::GetInt(input_desc, ATTR_NAME_PARENT_NODE_INDEX, out_index)) {
GELOGE(INTERNAL_ERROR, "NetOutput input tensor %d, attr %s not found.",
index, ATTR_NAME_PARENT_NODE_INDEX.c_str());
GELOGE(INTERNAL_ERROR, "[Invoke][GetInt]NetOutput input tensor %d, attr %s not found.",
index, ATTR_NAME_PARENT_NODE_INDEX.c_str());
REPORT_CALL_ERROR("E19999", "NetOutput input tensor %d, attr %s not found when %s.",
index, ATTR_NAME_PARENT_NODE_INDEX.c_str(), __FUNCTION__);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }
return SUCCESS; return SUCCESS;
@@ -1563,7 +1586,7 @@ Status HybridModelBuilder::InitModelMem() {


if (total_var_size > 0 && hybrid_model_.var_mem_base_ == nullptr) { if (total_var_size > 0 && hybrid_model_.var_mem_base_ == nullptr) {
GE_CHK_STATUS_RET(var_manager_->MallocVarMemory(total_var_size), GE_CHK_STATUS_RET(var_manager_->MallocVarMemory(total_var_size),
"Malloc Var Memory Fail.");
"[Malloc][VarMemory] failed, size:%zu.", total_var_size);
hybrid_model_.var_mem_base_ = var_manager_->GetVarMemoryBase(RT_MEMORY_HBM); hybrid_model_.var_mem_base_ = var_manager_->GetVarMemoryBase(RT_MEMORY_HBM);
} }


@@ -1580,7 +1603,8 @@ Status HybridModelBuilder::TransAllVarData() {
rtContext_t ctx = nullptr; rtContext_t ctx = nullptr;
rtError_t rt_ret = rtCtxGetCurrent(&ctx); rtError_t rt_ret = rtCtxGetCurrent(&ctx);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Failed to get current context, error_code is: 0x%X.", rt_ret);
GELOGE(RT_FAILED, "[Invoke][rtCtxGetCurrent]Failed to get current context, error_code is: 0x%X.", rt_ret);
REPORT_CALL_ERROR("E19999", "rtCtxGetCurrent failed when %s, error_code: 0x%X.", __FUNCTION__, rt_ret);
return RT_FAILED; return RT_FAILED;
} }


@@ -1594,7 +1618,7 @@ Status HybridModelBuilder::TransAllVarData() {
runtime_param_.session_id, runtime_param_.session_id,
ctx, ctx,
runtime_param_.graph_id), runtime_param_.graph_id),
"TransAllVarData failed.");
"[Invoke][TransAllVarData] failed.");


GELOGI("TransAllVarData success."); GELOGI("TransAllVarData success.");
return SUCCESS; return SUCCESS;
@@ -1604,7 +1628,7 @@ Status HybridModelBuilder::CopyVarData() {
GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(ge_root_model_->GetRootGraph(), GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(ge_root_model_->GetRootGraph(),
runtime_param_.session_id, runtime_param_.session_id,
hybrid_model_.device_id_), hybrid_model_.device_id_),
"CopyVarData failed.");
"[Invoke][CopyVarData] failed.");
GELOGI("CopyVarData success."); GELOGI("CopyVarData success.");
return SUCCESS; return SUCCESS;
} }
@@ -1628,7 +1652,7 @@ Status HybridModelBuilder::LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem
int32_t data_index = 0; int32_t data_index = 0;
if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, data_index)) { if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, data_index)) {
GELOGE(FAILED, GELOGE(FAILED,
"[%s] Failed to get attr [%s]",
"[Invoke][GetInt][%s] Failed to get attr [%s]",
node->GetName().c_str(), node->GetName().c_str(),
ATTR_NAME_PARENT_NODE_INDEX.c_str()); ATTR_NAME_PARENT_NODE_INDEX.c_str());
return FAILED; return FAILED;
@@ -1645,7 +1669,7 @@ Status HybridModelBuilder::LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem
} }


GE_CHK_GRAPH_STATUS_RET(wrapper_op_desc->AddOutputDesc(*output_desc), GE_CHK_GRAPH_STATUS_RET(wrapper_op_desc->AddOutputDesc(*output_desc),
"[%s] Failed to add output desc. output index = %d",
"[Invoke][AddOutputDesc][%s] Failed to add output desc. output index = %d",
graph.GetName().c_str(), graph.GetName().c_str(),
output_index); output_index);


@@ -2003,7 +2027,7 @@ Status HybridModelBuilder::BuildInputMapping(GraphItem &graph_item,
} else { } else {
if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, data_index)) { if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, data_index)) {
GELOGE(FAILED, GELOGE(FAILED,
"[%s] Failed to get attr [%s]",
"[Invoke][GetInt][%s] Failed to get attr [%s]",
node->GetName().c_str(), node->GetName().c_str(),
ATTR_NAME_PARENT_NODE_INDEX.c_str()); ATTR_NAME_PARENT_NODE_INDEX.c_str());
return FAILED; return FAILED;
@@ -2040,7 +2064,7 @@ Status HybridModelBuilder::CheckAicpuOpList() {
aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end()); aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end());
aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end()); aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end());
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list),
"Launch check aicpu op type failed.");
"[Launch][KernelCheckAicpuOp] failed.");
return SUCCESS; return SUCCESS;
} }




Loading…
Cancel
Save