Browse Source

Common log optimize

tags/v1.3.0
liyihan2@huawei.com 4 years ago
parent
commit
4ef1970f6d
3 changed files with 41 additions and 25 deletions
  1. +4
    -2
      ge/common/dump/dump_manager.cc
  2. +18
    -15
      ge/common/dump/dump_op.cc
  3. +19
    -8
      ge/common/dump/opdebug_register.cc

+ 4
- 2
ge/common/dump/dump_manager.cc View File

@@ -56,7 +56,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf
dump_properties.SetDumpOpSwitch(dump_op_switch); dump_properties.SetDumpOpSwitch(dump_op_switch);
if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) { if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) {
dump_properties_map_.emplace(kInferSessionId, dump_properties); dump_properties_map_.emplace(kInferSessionId, dump_properties);
GELOGE(PARAM_INVALID, "Dump list is invalid,dump_op_switch is %s", dump_op_switch.c_str());
GELOGE(PARAM_INVALID, "[Check][DumpList]Failed, dump_op_switch is %s.", dump_op_switch.c_str());
REPORT_INNER_ERROR("E19999", "Check dump list failed, dump_op_switch is %s.", dump_op_switch.c_str());
return PARAM_INVALID; return PARAM_INVALID;
} }


@@ -82,7 +83,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf


dump_path = dump_config.dump_path; dump_path = dump_config.dump_path;
if (dump_path.empty()) { if (dump_path.empty()) {
GELOGE(PARAM_INVALID, "Dump path is empty");
GELOGE(PARAM_INVALID, "[Check][DumpPath]Failed, it is empty.");
REPORT_INNER_ERROR("E19999", "Check dump path failed, it is empty.");
return PARAM_INVALID; return PARAM_INVALID;
} }




+ 18
- 15
ge/common/dump/dump_op.cc View File

@@ -99,7 +99,8 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) {
} }
int64_t output_size = 0; int64_t output_size = 0;
if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Get output size filed");
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Get][Param]Get output size failed, output_size:%d.", output_size);
REPORT_INNER_ERROR("E19999", "Get output size failed, output_size:%d.", output_size);
return ACL_ERROR_GE_INTERNAL_ERROR; return ACL_ERROR_GE_INTERNAL_ERROR;
} }
GELOGD("Get output size in lanch dump op is %ld", output_size); GELOGD("Get output size in lanch dump op is %ld", output_size);
@@ -126,7 +127,8 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) {
} }
int64_t input_size = 0; int64_t input_size = 0;
if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Get output size filed");
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Get][Param]Get input size failed, input_size:%d.", input_size);
REPORT_INNER_ERROR("E19999", "Get input size failed, input_size:%d.", input_size);
return ACL_ERROR_GE_INTERNAL_ERROR; return ACL_ERROR_GE_INTERNAL_ERROR;
} }
GELOGD("Get input size in lanch dump op is %ld", input_size); GELOGD("Get input size in lanch dump op is %ld", input_size);
@@ -151,30 +153,31 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) {
size_t proto_size = op_mapping_info.ByteSizeLong(); size_t proto_size = op_mapping_info.ByteSizeLong();
bool ret = op_mapping_info.SerializeToString(&proto_msg); bool ret = op_mapping_info.SerializeToString(&proto_msg);
if (!ret || proto_size == 0) { if (!ret || proto_size == 0) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Protobuf serialize failed, proto_size is %zu", proto_size);
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Serialize][Protobuf]Failed, proto_size:%zu.", proto_size);
REPORT_INNER_ERROR("E19999", "Serialize protobuf failed, proto_size:%zu.", proto_size);
return ACL_ERROR_GE_INTERNAL_ERROR; return ACL_ERROR_GE_INTERNAL_ERROR;
} }


rtError_t rt_ret = rtMalloc(&proto_dev_mem_, proto_size, RT_MEMORY_HBM); rtError_t rt_ret = rtMalloc(&proto_dev_mem_, proto_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Call rtMalloc failed, ret: 0x%X", rt_ret);
GELOGE(rt_ret, "[Malloc][ProtoDevMem]Failed, ret:0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);
} }


rt_ret = rtMemcpy(proto_dev_mem_, proto_size, proto_msg.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); rt_ret = rtMemcpy(proto_dev_mem_, proto_size, proto_msg.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
GELOGE(rt_ret, "[Copy][ProtoDevMem]Failed, ret:0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);
} }


rt_ret = rtMalloc(&proto_size_dev_mem_, sizeof(size_t), RT_MEMORY_HBM); rt_ret = rtMalloc(&proto_size_dev_mem_, sizeof(size_t), RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Call rtMalloc failed, ret: 0x%X", rt_ret);
GELOGE(rt_ret, "[Malloc][ProtoSizeDevMem]Failed, ret:0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);
} }
rt_ret = rtMemcpy(proto_size_dev_mem_, sizeof(size_t), &proto_size, sizeof(size_t), RT_MEMCPY_HOST_TO_DEVICE); rt_ret = rtMemcpy(proto_size_dev_mem_, sizeof(size_t), &proto_size, sizeof(size_t), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
GELOGE(rt_ret, "[Copy][ProtoSizeDevMem]Failed, ret:0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);
} }


@@ -193,7 +196,7 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) {
nullptr, // no need smDesc nullptr, // no need smDesc
stream_); stream_);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Call rtCpuKernelLaunch failed,rt_ret:0x%X", rt_ret);
GELOGE(rt_ret, "[Call][rtCpuKernelLaunch]Failed, rt_ret:0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);
} }
GELOGI("Kernel launch dump op success"); GELOGI("Kernel launch dump op success");
@@ -205,12 +208,12 @@ Status DumpOp::LaunchDumpOp() {
int32_t device_id = 0; int32_t device_id = 0;
rtError_t rt_ret = rtGetDevice(&device_id); rtError_t rt_ret = rtGetDevice(&device_id);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
GELOGE(rt_ret, "[Call][rtGetDevice]Failed, ret:0x%X, device_id:%d.", rt_ret, device_id);
return RT_ERROR_TO_GE_STATUS(rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);
} }
if (device_id < 0) { if (device_id < 0) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,
"Check device_id failed, device_id = %d, which should be not less than 0.",
"[Check][DeviceId]Failed, device_id:%d, which should be not less than 0.",
device_id); device_id);
return ACL_ERROR_GE_INTERNAL_ERROR; return ACL_ERROR_GE_INTERNAL_ERROR;
} }
@@ -240,7 +243,7 @@ Status DumpOp::LaunchDumpOp() {
if (dump_properties_.GetDumpMode() == kDumpOutput) { if (dump_properties_.GetDumpMode() == kDumpOutput) {
auto ret = DumpOutput(task); auto ret = DumpOutput(task);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "Dump output failed");
GELOGE(ret, "[Dump][Output]Failed, error_code:%u.", ret);
return ret; return ret;
} }
op_mapping_info.mutable_task()->Add(std::move(task)); op_mapping_info.mutable_task()->Add(std::move(task));
@@ -248,7 +251,7 @@ Status DumpOp::LaunchDumpOp() {
if (dump_properties_.GetDumpMode() == kDumpInput) { if (dump_properties_.GetDumpMode() == kDumpInput) {
auto ret = DumpInput(task); auto ret = DumpInput(task);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "Dump input failed");
GELOGE(ret, "[Dump][Input]Failed, error_code:%u.", ret);
return ret; return ret;
} }
op_mapping_info.mutable_task()->Add(std::move(task)); op_mapping_info.mutable_task()->Add(std::move(task));
@@ -256,19 +259,19 @@ Status DumpOp::LaunchDumpOp() {
if (dump_properties_.GetDumpMode() == kDumpAll || dump_properties_.IsOpDebugOpen()) { if (dump_properties_.GetDumpMode() == kDumpAll || dump_properties_.IsOpDebugOpen()) {
auto ret = DumpOutput(task); auto ret = DumpOutput(task);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "Dump output failed when in dumping all");
GELOGE(ret, "[Dump][Output]Failed when in dumping all, error_code:%u.", ret);
return ret; return ret;
} }
ret = DumpInput(task); ret = DumpInput(task);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "Dump input failed when in dumping all");
GELOGE(ret, "[Dump][Input]Failed when in dumping all, error_code:%u.", ret);
return ret; return ret;
} }
op_mapping_info.mutable_task()->Add(std::move(task)); op_mapping_info.mutable_task()->Add(std::move(task));
} }
auto ret = ExecutorDumpOp(op_mapping_info); auto ret = ExecutorDumpOp(op_mapping_info);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "Executor dump op failed");
GELOGE(ret, "[Dump][Op]Failed, error_code:%u.", ret);
return ret; return ret;
} }
return SUCCESS; return SUCCESS;


+ 19
- 8
ge/common/dump/opdebug_register.cc View File

@@ -27,14 +27,20 @@ Status OpdebugRegister::RegisterDebugForModel(rtModel_t model_handle, uint32_t o
GELOGD("Start to register debug for model in overflow"); GELOGD("Start to register debug for model in overflow");
auto ret = MallocMemForOpdebug(); auto ret = MallocMemForOpdebug();
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "Malloc memory for opdebug in model overflow failed ,ret:0x%X", ret);
GELOGE(ret, "[Malloc][MemoryForOpdebug]Failed in model overflow, ret:0x%X, op_debug_mode:%u.",
ret, op_debug_mode);
REPORT_INNER_ERROR("E19999", "Malloc memory for opdebug failed in model overflow, ret:0x%X, op_debug_mode:%u.",
ret, op_debug_mode);
return ret; return ret;
} }
uint32_t debug_stream_id = 0; uint32_t debug_stream_id = 0;
uint32_t debug_task_id = 0; uint32_t debug_task_id = 0;
auto rt_ret = rtDebugRegister(model_handle, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); auto rt_ret = rtDebugRegister(model_handle, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret);
GELOGE(RT_FAILED, "[Register][rtDebug]Failed in model overflow, ret: 0x%X, op_debug_mode:%u.",
rt_ret, op_debug_mode);
REPORT_INNER_ERROR("E19999", "Register rtDebug failed in model overflow, ret:0x%X, op_debug_mode:%u.",
rt_ret, op_debug_mode);
return RT_ERROR_TO_GE_STATUS(rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);
} }
GELOGD("debug_task_id:%u, debug_stream_id:%u in model overflow", debug_task_id, debug_stream_id); GELOGD("debug_task_id:%u, debug_stream_id:%u in model overflow", debug_task_id, debug_stream_id);
@@ -74,7 +80,9 @@ Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_de
GELOGD("Start to register debug for stream in stream overflow"); GELOGD("Start to register debug for stream in stream overflow");
auto ret = MallocMemForOpdebug(); auto ret = MallocMemForOpdebug();
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "Malloc memory for opdebug in stream overflow ,ret:0x%X", ret);
GELOGE(ret, "[Malloc][MemoryForOpdebug]Failed in stream overflow, ret:0x%X, op_debug_mode:%u.",
ret, op_debug_mode);
REPORT_INNER_ERROR("E19999", "Malloc memory for opdebug failed in stream overflow, ret:0x%X, op_debug_mode:%u.", ret, op_debug_mode);
return ret; return ret;
} }


@@ -83,7 +91,10 @@ Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_de
#ifdef ONLY_COMPILE_OPEN_SRC #ifdef ONLY_COMPILE_OPEN_SRC
auto rt_ret = rtDebugRegisterForStream(stream, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); auto rt_ret = rtDebugRegisterForStream(stream, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtDebugRegisterForStream error, ret: 0x%X", rt_ret);
GELOGE(RT_FAILED, "[Register][rtDebug]Failed in stream overflow, ret:0x%X, op_debug_mode:%u.",
rt_ret, op_debug_mode);
REPORT_INNER_ERROR("E19999", "Register rtDebug failed in stream overflow, ret:0x%X, op_debug_mode:%u.",
rt_ret, op_debug_mode);
return RT_ERROR_TO_GE_STATUS(rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);
} }
#endif #endif
@@ -125,7 +136,7 @@ void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) {
Status OpdebugRegister::MallocMemForOpdebug() { Status OpdebugRegister::MallocMemForOpdebug() {
rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR); rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
GELOGE(RT_FAILED, "[Malloc][OpDebugMem]Failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);
} }


@@ -133,16 +144,16 @@ Status OpdebugRegister::MallocMemForOpdebug() {
// For data dump, aicpu needs the pointer to pointer that save the real debug address. // For data dump, aicpu needs the pointer to pointer that save the real debug address.
rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM); rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
GELOGE(RT_FAILED, "[Malloc][P2PDebugMem]Failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);
} }
rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret);
GELOGE(RT_FAILED, "[Copy][P2PDebugMem]Failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);
} }


return SUCCESS; return SUCCESS;
} }


} // namespace ge
} // namespace ge

Loading…
Cancel
Save