diff --git a/ge/common/dump/dump_manager.cc b/ge/common/dump/dump_manager.cc index a659d9c6..462e5032 100644 --- a/ge/common/dump/dump_manager.cc +++ b/ge/common/dump/dump_manager.cc @@ -56,7 +56,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf dump_properties.SetDumpOpSwitch(dump_op_switch); if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) { dump_properties_map_.emplace(kInferSessionId, dump_properties); - GELOGE(PARAM_INVALID, "Dump list is invalid,dump_op_switch is %s", dump_op_switch.c_str()); + GELOGE(PARAM_INVALID, "[Check][DumpList]Failed, dump_op_switch is %s.", dump_op_switch.c_str()); + REPORT_INNER_ERROR("E19999", "Check dump list failed, dump_op_switch is %s.", dump_op_switch.c_str()); return PARAM_INVALID; } @@ -82,7 +83,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf dump_path = dump_config.dump_path; if (dump_path.empty()) { - GELOGE(PARAM_INVALID, "Dump path is empty"); + GELOGE(PARAM_INVALID, "[Check][DumpPath]Failed, it is empty."); + REPORT_INNER_ERROR("E19999", "Check dump path failed, it is empty."); return PARAM_INVALID; } diff --git a/ge/common/dump/dump_op.cc b/ge/common/dump/dump_op.cc index 0becbdc8..1bdbe513 100755 --- a/ge/common/dump/dump_op.cc +++ b/ge/common/dump/dump_op.cc @@ -99,7 +99,8 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) { } int64_t output_size = 0; if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { - GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Get output size filed"); + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Get][Param]Get output size failed, output_size:%d.", output_size); + REPORT_INNER_ERROR("E19999", "Get output size failed, output_size:%d.", output_size); return ACL_ERROR_GE_INTERNAL_ERROR; } GELOGD("Get output size in lanch dump op is %ld", output_size); @@ -126,7 +127,8 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) { } int64_t input_size = 0; if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { - GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Get output size filed"); + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Get][Param]Get input size failed, input_size:%d.", input_size); + REPORT_INNER_ERROR("E19999", "Get input size failed, input_size:%d.", input_size); return ACL_ERROR_GE_INTERNAL_ERROR; } GELOGD("Get input size in lanch dump op is %ld", input_size); @@ -151,30 +153,31 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { size_t proto_size = op_mapping_info.ByteSizeLong(); bool ret = op_mapping_info.SerializeToString(&proto_msg); if (!ret || proto_size == 0) { - GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Protobuf serialize failed, proto_size is %zu", proto_size); + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Serialize][Protobuf]Failed, proto_size:%zu.", proto_size); + REPORT_INNER_ERROR("E19999", "Serialize protobuf failed, proto_size:%zu.", proto_size); return ACL_ERROR_GE_INTERNAL_ERROR; } rtError_t rt_ret = rtMalloc(&proto_dev_mem_, proto_size, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "Call rtMalloc failed, ret: 0x%X", rt_ret); + GELOGE(rt_ret, "[Malloc][ProtoDevMem]Failed, ret:0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMemcpy(proto_dev_mem_, proto_size, proto_msg.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "Call rtMemcpy failed, ret: 0x%X", rt_ret); + GELOGE(rt_ret, "[Copy][ProtoDevMem]Failed, ret:0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMalloc(&proto_size_dev_mem_, sizeof(size_t), RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "Call rtMalloc failed, ret: 0x%X", rt_ret); + GELOGE(rt_ret, "[Malloc][ProtoSizeDevMem]Failed, ret:0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMemcpy(proto_size_dev_mem_, sizeof(size_t), &proto_size, sizeof(size_t), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "Call rtMemcpy failed, ret: 0x%X", rt_ret); + GELOGE(rt_ret, "[Copy][ProtoSizeDevMem]Failed, ret:0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } @@ -193,7 +196,7 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { nullptr, // no need smDesc stream_); if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "Call rtCpuKernelLaunch failed,rt_ret:0x%X", rt_ret); + GELOGE(rt_ret, "[Call][rtCpuKernelLaunch]Failed, rt_ret:0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } GELOGI("Kernel launch dump op success"); @@ -205,12 +208,12 @@ Status DumpOp::LaunchDumpOp() { int32_t device_id = 0; rtError_t rt_ret = rtGetDevice(&device_id); if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); + GELOGE(rt_ret, "[Call][rtGetDevice]Failed, ret:0x%X, device_id:%d.", rt_ret, device_id); return RT_ERROR_TO_GE_STATUS(rt_ret); } if (device_id < 0) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, - "Check device_id failed, device_id = %d, which should be not less than 0.", + "[Check][DeviceId]Failed, device_id:%d, which should be not less than 0.", device_id); return ACL_ERROR_GE_INTERNAL_ERROR; } @@ -240,7 +243,7 @@ Status DumpOp::LaunchDumpOp() { if (dump_properties_.GetDumpMode() == kDumpOutput) { auto ret = DumpOutput(task); if (ret != SUCCESS) { - GELOGE(ret, "Dump output failed"); + GELOGE(ret, "[Dump][Output]Failed, error_code:%u.", ret); return ret; } op_mapping_info.mutable_task()->Add(std::move(task)); @@ -248,7 +251,7 @@ Status DumpOp::LaunchDumpOp() { if (dump_properties_.GetDumpMode() == kDumpInput) { auto ret = DumpInput(task); if (ret != SUCCESS) { - GELOGE(ret, "Dump input failed"); + GELOGE(ret, "[Dump][Input]Failed, error_code:%u.", ret); return ret; } op_mapping_info.mutable_task()->Add(std::move(task)); @@ -256,19 +259,19 @@ Status DumpOp::LaunchDumpOp() { if (dump_properties_.GetDumpMode() == kDumpAll || dump_properties_.IsOpDebugOpen()) { auto ret = DumpOutput(task); if (ret != SUCCESS) { - GELOGE(ret, "Dump output failed when in dumping all"); + GELOGE(ret, "[Dump][Output]Failed when in dumping all, error_code:%u.", ret); return ret; } ret = DumpInput(task); if (ret != SUCCESS) { - GELOGE(ret, "Dump input failed when in dumping all"); + GELOGE(ret, "[Dump][Input]Failed when in dumping all, error_code:%u.", ret); return ret; } op_mapping_info.mutable_task()->Add(std::move(task)); } auto ret = ExecutorDumpOp(op_mapping_info); if (ret != SUCCESS) { - GELOGE(ret, "Executor dump op failed"); + GELOGE(ret, "[Dump][Op]Failed, error_code:%u.", ret); return ret; } return SUCCESS; diff --git a/ge/common/dump/opdebug_register.cc b/ge/common/dump/opdebug_register.cc index 340b89e5..d4011a10 100644 --- a/ge/common/dump/opdebug_register.cc +++ b/ge/common/dump/opdebug_register.cc @@ -27,14 +27,20 @@ Status OpdebugRegister::RegisterDebugForModel(rtModel_t model_handle, uint32_t o GELOGD("Start to register debug for model in overflow"); auto ret = MallocMemForOpdebug(); if (ret != SUCCESS) { - GELOGE(ret, "Malloc memory for opdebug in model overflow failed ,ret:0x%X", ret); + GELOGE(ret, "[Malloc][MemoryForOpdebug]Failed in model overflow, ret:0x%X, op_debug_mode:%u.", + ret, op_debug_mode); + REPORT_INNER_ERROR("E19999", "Malloc memory for opdebug failed in model overflow, ret:0x%X, op_debug_mode:%u.", + ret, op_debug_mode); return ret; } uint32_t debug_stream_id = 0; uint32_t debug_task_id = 0; auto rt_ret = rtDebugRegister(model_handle, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret); + GELOGE(RT_FAILED, "[Register][rtDebug]Failed in model overflow, ret: 0x%X, op_debug_mode:%u.", + rt_ret, op_debug_mode); + REPORT_INNER_ERROR("E19999", "Register rtDebug failed in model overflow, ret:0x%X, op_debug_mode:%u.", + rt_ret, op_debug_mode); return RT_ERROR_TO_GE_STATUS(rt_ret); } GELOGD("debug_task_id:%u, debug_stream_id:%u in model overflow", debug_task_id, debug_stream_id); @@ -74,7 +80,9 @@ Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_de GELOGD("Start to register debug for stream in stream overflow"); auto ret = MallocMemForOpdebug(); if (ret != SUCCESS) { - GELOGE(ret, "Malloc memory for opdebug in stream overflow ,ret:0x%X", ret); + GELOGE(ret, "[Malloc][MemoryForOpdebug]Failed in stream overflow, ret:0x%X, op_debug_mode:%u.", + ret, op_debug_mode); + REPORT_INNER_ERROR("E19999", "Malloc memory for opdebug failed in stream overflow, ret:0x%X, op_debug_mode:%u.", ret, op_debug_mode); return ret; } @@ -83,7 +91,10 @@ Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_de #ifdef ONLY_COMPILE_OPEN_SRC auto rt_ret = rtDebugRegisterForStream(stream, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "rtDebugRegisterForStream error, ret: 0x%X", rt_ret); + GELOGE(RT_FAILED, "[Register][rtDebug]Failed in stream overflow, ret:0x%X, op_debug_mode:%u.", + rt_ret, op_debug_mode); + REPORT_INNER_ERROR("E19999", "Register rtDebug failed in stream overflow, ret:0x%X, op_debug_mode:%u.", + rt_ret, op_debug_mode); return RT_ERROR_TO_GE_STATUS(rt_ret); } #endif @@ -125,7 +136,7 @@ void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) { Status OpdebugRegister::MallocMemForOpdebug() { rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR); if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); + GELOGE(RT_FAILED, "[Malloc][OpDebugMem]Failed, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } @@ -133,16 +144,16 @@ Status OpdebugRegister::MallocMemForOpdebug() { // For data dump, aicpu needs the pointer to pointer that save the real debug address. rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); + GELOGE(RT_FAILED, "[Malloc][P2PDebugMem]Failed, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret); + GELOGE(RT_FAILED, "[Copy][P2PDebugMem]Failed, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } return SUCCESS; } -} // namespace ge \ No newline at end of file +} // namespace ge