@@ -23,6 +23,7 @@ const char *const kDumpOFF = "OFF"; | |||||
const char *const kDumpoff = "off"; | const char *const kDumpoff = "off"; | ||||
const char *const kDumpOn = "on"; | const char *const kDumpOn = "on"; | ||||
const uint64_t kInferSessionId = 0; | const uint64_t kInferSessionId = 0; | ||||
const uint32_t kAllOverflow = 3; | |||||
} // namespace | } // namespace | ||||
namespace ge { | namespace ge { | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpManager &DumpManager::GetInstance() { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpManager &DumpManager::GetInstance() { | ||||
@@ -30,78 +31,103 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpManager &DumpManager::GetIn | |||||
return instance; | return instance; | ||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf(const DumpConfig &dump_config) { | |||||
DumpProperties dump_properties; | |||||
std::string dump_status; | |||||
std::string dump_path; | |||||
std::string dump_mode; | |||||
std::string dump_op_switch; | |||||
if (dump_config.dump_status.empty()) { | |||||
bool DumpManager::NeedDoDump(const DumpConfig &dump_config, DumpProperties &dump_properties) { | |||||
if (dump_config.dump_status.empty() && dump_config.dump_debug.empty()) { | |||||
dump_properties_map_.emplace(kInferSessionId, dump_properties); | dump_properties_map_.emplace(kInferSessionId, dump_properties); | ||||
GELOGI("Dump does not open"); | GELOGI("Dump does not open"); | ||||
return SUCCESS; | |||||
return false; | |||||
} | } | ||||
dump_status = dump_config.dump_status; | |||||
GELOGI("Dump status is %s", dump_status.c_str()); | |||||
if (dump_config.dump_status == kDumpoff || dump_config.dump_status == kDumpOFF) { | |||||
GELOGI("Dump status is %s, dump debug is %s.", dump_config.dump_status.c_str(), dump_config.dump_debug.c_str()); | |||||
if ((dump_config.dump_status == kDumpoff || dump_config.dump_status == kDumpOFF) && | |||||
dump_config.dump_debug == kDumpoff) { | |||||
dump_properties.ClearDumpPropertyValue(); | dump_properties.ClearDumpPropertyValue(); | ||||
dump_properties_map_.emplace(kInferSessionId, dump_properties); | dump_properties_map_.emplace(kInferSessionId, dump_properties); | ||||
return SUCCESS; | |||||
return false; | |||||
} | |||||
if (dump_config.dump_status == kDumpOn && dump_config.dump_debug == kDumpOn) { | |||||
GELOGW("Not support coexistence of dump debug and dump status."); | |||||
return false; | |||||
} | } | ||||
dump_properties.SetDumpStatus(dump_status); | |||||
return true; | |||||
} | |||||
dump_op_switch = dump_config.dump_op_switch; | |||||
dump_properties.SetDumpOpSwitch(dump_op_switch); | |||||
if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) { | |||||
dump_properties_map_.emplace(kInferSessionId, dump_properties); | |||||
GELOGE(PARAM_INVALID, "[Check][DumpList]Invalid, dump_op_switch is %s", | |||||
dump_op_switch.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "Dump list check invalid, dump_op_switch is %s", | |||||
dump_op_switch.c_str()); | |||||
return PARAM_INVALID; | |||||
void DumpManager::SetDumpDebugConf(const DumpConfig &dump_config, DumpProperties &dump_properties) { | |||||
if (dump_config.dump_debug == kDumpOn) { | |||||
GELOGI("Only do overflow detection, dump debug is %s.", dump_config.dump_debug.c_str()); | |||||
dump_properties.InitInferOpDebug(); | |||||
dump_properties.SetOpDebugMode(kAllOverflow); | |||||
} | } | ||||
} | |||||
if (!dump_config.dump_list.empty()) { | |||||
for (auto model_dump : dump_config.dump_list) { | |||||
std::string model_name = model_dump.model_name; | |||||
GELOGI("Dump model is %s", model_name.c_str()); | |||||
std::set<std::string> dump_layers; | |||||
for (auto layer : model_dump.layers) { | |||||
GELOGI("Dump layer is %s in model", layer.c_str()); | |||||
dump_layers.insert(layer); | |||||
} | |||||
dump_properties.AddPropertyValue(model_name, dump_layers); | |||||
void DumpManager::SetDumpList(const DumpConfig &dump_config, DumpProperties &dump_properties) { | |||||
for (const auto &model_dump : dump_config.dump_list) { | |||||
std::string model_name = model_dump.model_name; | |||||
GELOGI("Dump model is %s", model_name.c_str()); | |||||
std::set<std::string> dump_layers; | |||||
for (const auto &layer : model_dump.layers) { | |||||
GELOGI("Dump layer is %s in model", layer.c_str()); | |||||
dump_layers.insert(layer); | |||||
} | |||||
dump_properties.AddPropertyValue(model_name, dump_layers); | |||||
} | |||||
} | |||||
Status DumpManager::SetNormalDumpConf(const DumpConfig &dump_config, DumpProperties &dump_properties) { | |||||
if (dump_config.dump_status == kDumpOn) { | |||||
GELOGI("Only do normal dump process, dump status is %s.", dump_config.dump_status.c_str()); | |||||
dump_properties.SetDumpStatus(dump_config.dump_status); | |||||
std::string dump_op_switch = dump_config.dump_op_switch; | |||||
dump_properties.SetDumpOpSwitch(dump_op_switch); | |||||
if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) { | |||||
dump_properties_map_.emplace(kInferSessionId, dump_properties); | |||||
GELOGE(PARAM_INVALID, "[Check][DumpList]Invalid, dump_op_switch is %s", dump_op_switch.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "Dump list check invalid, dump_op_switch is %s", dump_op_switch.c_str()); | |||||
return PARAM_INVALID; | |||||
} | } | ||||
if (dump_op_switch == kDumpOn) { | |||||
GELOGI("Start to dump model and single op,dump op switch is %s", dump_op_switch.c_str()); | |||||
if (!dump_config.dump_list.empty()) { | |||||
if (dump_op_switch == kDumpOn) { | |||||
GELOGI("Start to dump model and single op, dump op switch is %s", dump_op_switch.c_str()); | |||||
} else { | |||||
GELOGI("Only dump model, dump op switch is %s", dump_op_switch.c_str()); | |||||
} | |||||
SetDumpList(dump_config, dump_properties); | |||||
} else { | } else { | ||||
GELOGI("Only dump model,dump op switch is %s", dump_op_switch.c_str()); | |||||
GELOGI("Only dump single op, dump op switch is %s", dump_op_switch.c_str()); | |||||
} | } | ||||
} else { | |||||
GELOGI("Only dump single op,dump op switch is %s", dump_op_switch.c_str()); | |||||
GELOGI("Dump mode is %s", dump_config.dump_mode.c_str()); | |||||
dump_properties.SetDumpMode(dump_config.dump_mode); | |||||
} | } | ||||
return SUCCESS; | |||||
} | |||||
dump_path = dump_config.dump_path; | |||||
Status DumpManager::SetDumpPath(const DumpConfig &dump_config, DumpProperties &dump_properties) { | |||||
std::string dump_path = dump_config.dump_path; | |||||
if (dump_path.empty()) { | if (dump_path.empty()) { | ||||
GELOGE(PARAM_INVALID, "[Check][DumpPath]It is empty"); | GELOGE(PARAM_INVALID, "[Check][DumpPath]It is empty"); | ||||
REPORT_INNER_ERROR("E19999", "Dump path check is empty"); | REPORT_INNER_ERROR("E19999", "Dump path check is empty"); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
if (dump_path[dump_path.size() - 1] != '/') { | if (dump_path[dump_path.size() - 1] != '/') { | ||||
dump_path = dump_path + "/"; | dump_path = dump_path + "/"; | ||||
} | } | ||||
dump_path = dump_path + CurrentTimeInStr() + "/"; | dump_path = dump_path + CurrentTimeInStr() + "/"; | ||||
GELOGI("Dump path is %s", dump_path.c_str()); | GELOGI("Dump path is %s", dump_path.c_str()); | ||||
dump_properties.SetDumpPath(dump_path); | dump_properties.SetDumpPath(dump_path); | ||||
return SUCCESS; | |||||
} | |||||
dump_mode = dump_config.dump_mode; | |||||
GELOGI("Dump mode is %s", dump_mode.c_str()); | |||||
dump_properties.SetDumpMode(dump_mode); | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf(const DumpConfig &dump_config) { | |||||
DumpProperties dump_properties; | |||||
if (!NeedDoDump(dump_config, dump_properties)) { | |||||
GELOGD("No need do dump process."); | |||||
return SUCCESS; | |||||
} | |||||
SetDumpDebugConf(dump_config, dump_properties); | |||||
GE_CHK_STATUS_RET(SetNormalDumpConf(dump_config, dump_properties), "[Init][DumpConf] failed when dump status is on."); | |||||
GE_CHK_STATUS_RET(SetDumpPath(dump_config, dump_properties), "[Init][DumpPath] failed."); | |||||
dump_properties_map_[kInferSessionId] = dump_properties; | dump_properties_map_[kInferSessionId] = dump_properties; | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -34,6 +34,11 @@ class DumpManager { | |||||
void RemoveDumpProperties(uint64_t session_id); | void RemoveDumpProperties(uint64_t session_id); | ||||
private: | private: | ||||
bool NeedDoDump(const DumpConfig &dump_config, DumpProperties &dump_properties); | |||||
void SetDumpDebugConf(const DumpConfig &dump_config, DumpProperties &dump_properties); | |||||
Status SetDumpPath(const DumpConfig &dump_config, DumpProperties &dump_properties); | |||||
Status SetNormalDumpConf(const DumpConfig &dump_config, DumpProperties &dump_properties); | |||||
void SetDumpList(const DumpConfig &dump_config, DumpProperties &dump_properties); | |||||
std::mutex mutex_; | std::mutex mutex_; | ||||
std::map<uint64_t, DumpProperties> dump_properties_map_; | std::map<uint64_t, DumpProperties> dump_properties_map_; | ||||
}; | }; | ||||
@@ -53,7 +53,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti | |||||
dump_path_.clear(); | dump_path_.clear(); | ||||
dump_step_.clear(); | dump_step_.clear(); | ||||
dump_mode_.clear(); | dump_mode_.clear(); | ||||
is_op_debug_ = false; | |||||
is_train_op_debug_ = false; | |||||
is_infer_op_debug_ = false; | |||||
op_debug_mode_ = 0; | op_debug_mode_ = 0; | ||||
std::string enable_dump; | std::string enable_dump; | ||||
@@ -124,7 +125,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::ClearDumpI | |||||
dump_mode_.clear(); | dump_mode_.clear(); | ||||
dump_op_switch_.clear(); | dump_op_switch_.clear(); | ||||
dump_status_.clear(); | dump_status_.clear(); | ||||
is_op_debug_ = false; | |||||
is_train_op_debug_ = false; | |||||
is_infer_op_debug_ = false; | |||||
op_debug_mode_ = 0; | op_debug_mode_ = 0; | ||||
} | } | ||||
@@ -203,6 +205,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperti | |||||
return dump_status_; | return dump_status_; | ||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitInferOpDebug() { | |||||
is_infer_op_debug_ = true; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetOpDebugMode(const uint32_t &op_debug_mode) { | |||||
op_debug_mode_ = op_debug_mode; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch( | ||||
const std::string &dump_op_switch) { | const std::string &dump_op_switch) { | ||||
dump_op_switch_ = dump_op_switch; | dump_op_switch_ = dump_op_switch; | ||||
@@ -237,7 +247,8 @@ void DumpProperties::CopyFrom(const DumpProperties &other) { | |||||
dump_op_switch_ = other.dump_op_switch_; | dump_op_switch_ = other.dump_op_switch_; | ||||
model_dump_properties_map_ = other.model_dump_properties_map_; | model_dump_properties_map_ = other.model_dump_properties_map_; | ||||
is_op_debug_ = other.is_op_debug_; | |||||
is_train_op_debug_ = other.is_train_op_debug_; | |||||
is_infer_op_debug_ = other.is_infer_op_debug_; | |||||
op_debug_mode_ = other.op_debug_mode_; | op_debug_mode_ = other.op_debug_mode_; | ||||
} | } | ||||
} | } | ||||
@@ -254,15 +265,15 @@ void DumpProperties::SetDumpDebugOptions() { | |||||
if (dump_debug_mode == OP_DEBUG_AICORE) { | if (dump_debug_mode == OP_DEBUG_AICORE) { | ||||
GELOGD("ge.exec.dumpDebugMode=aicore_overflow, op debug is open."); | GELOGD("ge.exec.dumpDebugMode=aicore_overflow, op debug is open."); | ||||
is_op_debug_ = true; | |||||
is_train_op_debug_ = true; | |||||
op_debug_mode_ = kAicoreOverflow; | op_debug_mode_ = kAicoreOverflow; | ||||
} else if (dump_debug_mode == OP_DEBUG_ATOMIC) { | } else if (dump_debug_mode == OP_DEBUG_ATOMIC) { | ||||
GELOGD("ge.exec.dumpDebugMode=atomic_overflow, op debug is open."); | GELOGD("ge.exec.dumpDebugMode=atomic_overflow, op debug is open."); | ||||
is_op_debug_ = true; | |||||
is_train_op_debug_ = true; | |||||
op_debug_mode_ = kAtomicOverflow; | op_debug_mode_ = kAtomicOverflow; | ||||
} else if (dump_debug_mode == OP_DEBUG_ALL) { | } else if (dump_debug_mode == OP_DEBUG_ALL) { | ||||
GELOGD("ge.exec.dumpDebugMode=all, op debug is open."); | GELOGD("ge.exec.dumpDebugMode=all, op debug is open."); | ||||
is_op_debug_ = true; | |||||
is_train_op_debug_ = true; | |||||
op_debug_mode_ = kAllOverflow; | op_debug_mode_ = kAllOverflow; | ||||
} else { | } else { | ||||
GELOGW("ge.exec.dumpDebugMode is invalid."); | GELOGW("ge.exec.dumpDebugMode is invalid."); | ||||
@@ -65,16 +65,26 @@ class DumpProperties { | |||||
const std::string &GetDumpStatus() const; | const std::string &GetDumpStatus() const; | ||||
void InitInferOpDebug(); | |||||
bool IsInferOpDebug() const { | |||||
return is_infer_op_debug_; | |||||
} | |||||
void SetDumpOpSwitch(const std::string &dump_op_switch); | void SetDumpOpSwitch(const std::string &dump_op_switch); | ||||
const std::string &GetDumpOpSwitch() const; | const std::string &GetDumpOpSwitch() const; | ||||
bool IsOpDebugOpen() const { return is_op_debug_; } | |||||
bool IsOpDebugOpen() const { | |||||
return is_train_op_debug_ || is_infer_op_debug_; | |||||
} | |||||
bool IsDumpOpen() const; | bool IsDumpOpen() const; | ||||
bool IsSingleOpNeedDump() const; | bool IsSingleOpNeedDump() const; | ||||
void SetOpDebugMode(const uint32_t &op_debug_mode); | |||||
uint32_t GetOpDebugMode() const { return op_debug_mode_; } | uint32_t GetOpDebugMode() const { return op_debug_mode_; } | ||||
const std::string &GetEnableDump() const {return enable_dump_;} | const std::string &GetEnableDump() const {return enable_dump_;} | ||||
@@ -96,7 +106,8 @@ class DumpProperties { | |||||
std::string dump_op_switch_; | std::string dump_op_switch_; | ||||
std::map<std::string, std::set<std::string>> model_dump_properties_map_; | std::map<std::string, std::set<std::string>> model_dump_properties_map_; | ||||
bool is_op_debug_ = false; | |||||
bool is_train_op_debug_ = false; | |||||
bool is_infer_op_debug_ = false; | |||||
uint32_t op_debug_mode_ = 0; | uint32_t op_debug_mode_ = 0; | ||||
}; | }; | ||||
} | } | ||||
@@ -663,7 +663,7 @@ Status DataDumper::LoadDumpInfo() { | |||||
SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info); | SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info); | ||||
if (!op_list_.empty() || is_op_debug_ || is_end_graph_) { | if (!op_list_.empty() || is_op_debug_ || is_end_graph_) { | ||||
auto ret = ExecuteLoadDumpInfo(op_mapping_info); | |||||
ret = ExecuteLoadDumpInfo(op_mapping_info); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Execute load dump info failed"); | GELOGE(ret, "Execute load dump info failed"); | ||||
return ret; | return ret; | ||||
@@ -544,9 +544,15 @@ Status HybridModelAsyncExecutor::DumpOpDebug() { | |||||
data_dumper_.SetModelId(model_->GetModelId()); | data_dumper_.SetModelId(model_->GetModelId()); | ||||
data_dumper_.SetDeviceId(model_->GetDeviceId()); | data_dumper_.SetDeviceId(model_->GetDeviceId()); | ||||
void *global_step = nullptr; | void *global_step = nullptr; | ||||
TensorValue *varible_global_step = model_->GetVariable(NODE_NAME_GLOBAL_STEP); | |||||
if (varible_global_step != nullptr) { | |||||
global_step = const_cast<void *>(varible_global_step->GetData()); | |||||
if (dump_properties.IsInferOpDebug()) { | |||||
GELOGD("Init global step when infer with op debug."); | |||||
global_step = executor_->GetContext()->global_step; | |||||
} else { | |||||
TensorValue *varible_global_step = model_->GetVariable(NODE_NAME_GLOBAL_STEP); | |||||
if (varible_global_step != nullptr) { | |||||
global_step = const_cast<void *>(varible_global_step->GetData()); | |||||
} | |||||
} | } | ||||
void *loop_per_iter = nullptr; | void *loop_per_iter = nullptr; | ||||
@@ -293,6 +293,7 @@ struct DumpConfig { | |||||
std::string dump_mode; | std::string dump_mode; | ||||
std::string dump_status; | std::string dump_status; | ||||
std::string dump_op_switch; | std::string dump_op_switch; | ||||
std::string dump_debug; | |||||
std::vector<ModelDumpConfig> dump_list; | std::vector<ModelDumpConfig> dump_list; | ||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
@@ -67,6 +67,35 @@ TEST_F(UTEST_dump_manager, is_dump_single_op_close_success) { | |||||
EXPECT_EQ(ret, ge::SUCCESS); | EXPECT_EQ(ret, ge::SUCCESS); | ||||
} | } | ||||
// dump_debug and debug_status are on | |||||
TEST_F(UTEST_dump_manager, dump_op_debug_on) { | |||||
DumpConfig dump_config; | |||||
dump_config.dump_debug = "on"; | |||||
dump_config.dump_status = "on"; | |||||
auto ret = DumpManager::GetInstance().SetDumpConf(dump_config); | |||||
EXPECT_EQ(ret, ge::SUCCESS); | |||||
} | |||||
// just dump_status is on | |||||
TEST_F(UTEST_dump_manager, dump_status_without_dump_list) { | |||||
DumpConfig dump_config; | |||||
dump_config.dump_status = "on"; | |||||
auto ret = DumpManager::GetInstance().SetDumpConf(dump_config); | |||||
EXPECT_EQ(ret, ge::PARAM_INVALID); | |||||
} | |||||
// dump_status is on with dump_list | |||||
TEST_F(UTEST_dump_manager, dump_status_with_dump_list) { | |||||
DumpConfig dump_config; | |||||
dump_config.dump_status = "on"; | |||||
ModelDumpConfig dump_list; | |||||
dump_list.model_name = "test"; | |||||
dump_list.layers.push_back("first"); | |||||
dump_config.dump_list.push_back(dump_list); | |||||
auto ret = DumpManager::GetInstance().SetDumpConf(dump_config); | |||||
EXPECT_EQ(ret, ge::PARAM_INVALID); | |||||
} | |||||
TEST_F(UTEST_dump_manager, add_dump_properties_success) { | TEST_F(UTEST_dump_manager, add_dump_properties_success) { | ||||
DumpProperties dump_properties; | DumpProperties dump_properties; | ||||
DumpManager::GetInstance().AddDumpProperties(0, dump_properties); | DumpManager::GetInstance().AddDumpProperties(0, dump_properties); | ||||