@@ -23,6 +23,7 @@ const char *const kDumpOFF = "OFF"; | |||
const char *const kDumpoff = "off"; | |||
const char *const kDumpOn = "on"; | |||
const uint64_t kInferSessionId = 0; | |||
const uint32_t kAllOverflow = 3; | |||
} // namespace | |||
namespace ge { | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpManager &DumpManager::GetInstance() { | |||
@@ -30,78 +31,103 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpManager &DumpManager::GetIn | |||
return instance; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf(const DumpConfig &dump_config) { | |||
DumpProperties dump_properties; | |||
std::string dump_status; | |||
std::string dump_path; | |||
std::string dump_mode; | |||
std::string dump_op_switch; | |||
if (dump_config.dump_status.empty()) { | |||
bool DumpManager::NeedDoDump(const DumpConfig &dump_config, DumpProperties &dump_properties) { | |||
if (dump_config.dump_status.empty() && dump_config.dump_debug.empty()) { | |||
dump_properties_map_.emplace(kInferSessionId, dump_properties); | |||
GELOGI("Dump does not open"); | |||
return SUCCESS; | |||
return false; | |||
} | |||
dump_status = dump_config.dump_status; | |||
GELOGI("Dump status is %s", dump_status.c_str()); | |||
if (dump_config.dump_status == kDumpoff || dump_config.dump_status == kDumpOFF) { | |||
GELOGI("Dump status is %s, dump debug is %s.", dump_config.dump_status.c_str(), dump_config.dump_debug.c_str()); | |||
if ((dump_config.dump_status == kDumpoff || dump_config.dump_status == kDumpOFF) && | |||
dump_config.dump_debug == kDumpoff) { | |||
dump_properties.ClearDumpPropertyValue(); | |||
dump_properties_map_.emplace(kInferSessionId, dump_properties); | |||
return SUCCESS; | |||
return false; | |||
} | |||
if (dump_config.dump_status == kDumpOn && dump_config.dump_debug == kDumpOn) { | |||
GELOGW("Not support coexistence of dump debug and dump status."); | |||
return false; | |||
} | |||
dump_properties.SetDumpStatus(dump_status); | |||
return true; | |||
} | |||
dump_op_switch = dump_config.dump_op_switch; | |||
dump_properties.SetDumpOpSwitch(dump_op_switch); | |||
if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) { | |||
dump_properties_map_.emplace(kInferSessionId, dump_properties); | |||
GELOGE(PARAM_INVALID, "[Check][DumpList]Invalid, dump_op_switch is %s", | |||
dump_op_switch.c_str()); | |||
REPORT_INNER_ERROR("E19999", "Dump list check invalid, dump_op_switch is %s", | |||
dump_op_switch.c_str()); | |||
return PARAM_INVALID; | |||
void DumpManager::SetDumpDebugConf(const DumpConfig &dump_config, DumpProperties &dump_properties) { | |||
if (dump_config.dump_debug == kDumpOn) { | |||
GELOGI("Only do overflow detection, dump debug is %s.", dump_config.dump_debug.c_str()); | |||
dump_properties.InitInferOpDebug(); | |||
dump_properties.SetOpDebugMode(kAllOverflow); | |||
} | |||
} | |||
if (!dump_config.dump_list.empty()) { | |||
for (auto model_dump : dump_config.dump_list) { | |||
std::string model_name = model_dump.model_name; | |||
GELOGI("Dump model is %s", model_name.c_str()); | |||
std::set<std::string> dump_layers; | |||
for (auto layer : model_dump.layers) { | |||
GELOGI("Dump layer is %s in model", layer.c_str()); | |||
dump_layers.insert(layer); | |||
} | |||
dump_properties.AddPropertyValue(model_name, dump_layers); | |||
void DumpManager::SetDumpList(const DumpConfig &dump_config, DumpProperties &dump_properties) { | |||
for (const auto &model_dump : dump_config.dump_list) { | |||
std::string model_name = model_dump.model_name; | |||
GELOGI("Dump model is %s", model_name.c_str()); | |||
std::set<std::string> dump_layers; | |||
for (const auto &layer : model_dump.layers) { | |||
GELOGI("Dump layer is %s in model", layer.c_str()); | |||
dump_layers.insert(layer); | |||
} | |||
dump_properties.AddPropertyValue(model_name, dump_layers); | |||
} | |||
} | |||
Status DumpManager::SetNormalDumpConf(const DumpConfig &dump_config, DumpProperties &dump_properties) { | |||
if (dump_config.dump_status == kDumpOn) { | |||
GELOGI("Only do normal dump process, dump status is %s.", dump_config.dump_status.c_str()); | |||
dump_properties.SetDumpStatus(dump_config.dump_status); | |||
std::string dump_op_switch = dump_config.dump_op_switch; | |||
dump_properties.SetDumpOpSwitch(dump_op_switch); | |||
if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) { | |||
dump_properties_map_.emplace(kInferSessionId, dump_properties); | |||
GELOGE(PARAM_INVALID, "[Check][DumpList]Invalid, dump_op_switch is %s", dump_op_switch.c_str()); | |||
REPORT_INNER_ERROR("E19999", "Dump list check invalid, dump_op_switch is %s", dump_op_switch.c_str()); | |||
return PARAM_INVALID; | |||
} | |||
if (dump_op_switch == kDumpOn) { | |||
GELOGI("Start to dump model and single op,dump op switch is %s", dump_op_switch.c_str()); | |||
if (!dump_config.dump_list.empty()) { | |||
if (dump_op_switch == kDumpOn) { | |||
GELOGI("Start to dump model and single op, dump op switch is %s", dump_op_switch.c_str()); | |||
} else { | |||
GELOGI("Only dump model, dump op switch is %s", dump_op_switch.c_str()); | |||
} | |||
SetDumpList(dump_config, dump_properties); | |||
} else { | |||
GELOGI("Only dump model,dump op switch is %s", dump_op_switch.c_str()); | |||
GELOGI("Only dump single op, dump op switch is %s", dump_op_switch.c_str()); | |||
} | |||
} else { | |||
GELOGI("Only dump single op,dump op switch is %s", dump_op_switch.c_str()); | |||
GELOGI("Dump mode is %s", dump_config.dump_mode.c_str()); | |||
dump_properties.SetDumpMode(dump_config.dump_mode); | |||
} | |||
return SUCCESS; | |||
} | |||
dump_path = dump_config.dump_path; | |||
Status DumpManager::SetDumpPath(const DumpConfig &dump_config, DumpProperties &dump_properties) { | |||
std::string dump_path = dump_config.dump_path; | |||
if (dump_path.empty()) { | |||
GELOGE(PARAM_INVALID, "[Check][DumpPath]It is empty"); | |||
REPORT_INNER_ERROR("E19999", "Dump path check is empty"); | |||
return PARAM_INVALID; | |||
} | |||
if (dump_path[dump_path.size() - 1] != '/') { | |||
dump_path = dump_path + "/"; | |||
} | |||
dump_path = dump_path + CurrentTimeInStr() + "/"; | |||
GELOGI("Dump path is %s", dump_path.c_str()); | |||
dump_properties.SetDumpPath(dump_path); | |||
return SUCCESS; | |||
} | |||
dump_mode = dump_config.dump_mode; | |||
GELOGI("Dump mode is %s", dump_mode.c_str()); | |||
dump_properties.SetDumpMode(dump_mode); | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf(const DumpConfig &dump_config) { | |||
DumpProperties dump_properties; | |||
if (!NeedDoDump(dump_config, dump_properties)) { | |||
GELOGD("No need do dump process."); | |||
return SUCCESS; | |||
} | |||
SetDumpDebugConf(dump_config, dump_properties); | |||
GE_CHK_STATUS_RET(SetNormalDumpConf(dump_config, dump_properties), "[Init][DumpConf] failed when dump status is on."); | |||
GE_CHK_STATUS_RET(SetDumpPath(dump_config, dump_properties), "[Init][DumpPath] failed."); | |||
dump_properties_map_[kInferSessionId] = dump_properties; | |||
return SUCCESS; | |||
} | |||
@@ -34,6 +34,11 @@ class DumpManager { | |||
void RemoveDumpProperties(uint64_t session_id); | |||
private: | |||
bool NeedDoDump(const DumpConfig &dump_config, DumpProperties &dump_properties); | |||
void SetDumpDebugConf(const DumpConfig &dump_config, DumpProperties &dump_properties); | |||
Status SetDumpPath(const DumpConfig &dump_config, DumpProperties &dump_properties); | |||
Status SetNormalDumpConf(const DumpConfig &dump_config, DumpProperties &dump_properties); | |||
void SetDumpList(const DumpConfig &dump_config, DumpProperties &dump_properties); | |||
std::mutex mutex_; | |||
std::map<uint64_t, DumpProperties> dump_properties_map_; | |||
}; | |||
@@ -53,7 +53,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti | |||
dump_path_.clear(); | |||
dump_step_.clear(); | |||
dump_mode_.clear(); | |||
is_op_debug_ = false; | |||
is_train_op_debug_ = false; | |||
is_infer_op_debug_ = false; | |||
op_debug_mode_ = 0; | |||
std::string enable_dump; | |||
@@ -124,7 +125,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::ClearDumpI | |||
dump_mode_.clear(); | |||
dump_op_switch_.clear(); | |||
dump_status_.clear(); | |||
is_op_debug_ = false; | |||
is_train_op_debug_ = false; | |||
is_infer_op_debug_ = false; | |||
op_debug_mode_ = 0; | |||
} | |||
@@ -203,6 +205,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperti | |||
return dump_status_; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitInferOpDebug() { | |||
is_infer_op_debug_ = true; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetOpDebugMode(const uint32_t &op_debug_mode) { | |||
op_debug_mode_ = op_debug_mode; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch( | |||
const std::string &dump_op_switch) { | |||
dump_op_switch_ = dump_op_switch; | |||
@@ -237,7 +247,8 @@ void DumpProperties::CopyFrom(const DumpProperties &other) { | |||
dump_op_switch_ = other.dump_op_switch_; | |||
model_dump_properties_map_ = other.model_dump_properties_map_; | |||
is_op_debug_ = other.is_op_debug_; | |||
is_train_op_debug_ = other.is_train_op_debug_; | |||
is_infer_op_debug_ = other.is_infer_op_debug_; | |||
op_debug_mode_ = other.op_debug_mode_; | |||
} | |||
} | |||
@@ -254,15 +265,15 @@ void DumpProperties::SetDumpDebugOptions() { | |||
if (dump_debug_mode == OP_DEBUG_AICORE) { | |||
GELOGD("ge.exec.dumpDebugMode=aicore_overflow, op debug is open."); | |||
is_op_debug_ = true; | |||
is_train_op_debug_ = true; | |||
op_debug_mode_ = kAicoreOverflow; | |||
} else if (dump_debug_mode == OP_DEBUG_ATOMIC) { | |||
GELOGD("ge.exec.dumpDebugMode=atomic_overflow, op debug is open."); | |||
is_op_debug_ = true; | |||
is_train_op_debug_ = true; | |||
op_debug_mode_ = kAtomicOverflow; | |||
} else if (dump_debug_mode == OP_DEBUG_ALL) { | |||
GELOGD("ge.exec.dumpDebugMode=all, op debug is open."); | |||
is_op_debug_ = true; | |||
is_train_op_debug_ = true; | |||
op_debug_mode_ = kAllOverflow; | |||
} else { | |||
GELOGW("ge.exec.dumpDebugMode is invalid."); | |||
@@ -65,16 +65,26 @@ class DumpProperties { | |||
const std::string &GetDumpStatus() const; | |||
void InitInferOpDebug(); | |||
bool IsInferOpDebug() const { | |||
return is_infer_op_debug_; | |||
} | |||
void SetDumpOpSwitch(const std::string &dump_op_switch); | |||
const std::string &GetDumpOpSwitch() const; | |||
bool IsOpDebugOpen() const { return is_op_debug_; } | |||
bool IsOpDebugOpen() const { | |||
return is_train_op_debug_ || is_infer_op_debug_; | |||
} | |||
bool IsDumpOpen() const; | |||
bool IsSingleOpNeedDump() const; | |||
void SetOpDebugMode(const uint32_t &op_debug_mode); | |||
uint32_t GetOpDebugMode() const { return op_debug_mode_; } | |||
const std::string &GetEnableDump() const {return enable_dump_;} | |||
@@ -96,7 +106,8 @@ class DumpProperties { | |||
std::string dump_op_switch_; | |||
std::map<std::string, std::set<std::string>> model_dump_properties_map_; | |||
bool is_op_debug_ = false; | |||
bool is_train_op_debug_ = false; | |||
bool is_infer_op_debug_ = false; | |||
uint32_t op_debug_mode_ = 0; | |||
}; | |||
} | |||
@@ -663,7 +663,7 @@ Status DataDumper::LoadDumpInfo() { | |||
SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info); | |||
if (!op_list_.empty() || is_op_debug_ || is_end_graph_) { | |||
auto ret = ExecuteLoadDumpInfo(op_mapping_info); | |||
ret = ExecuteLoadDumpInfo(op_mapping_info); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Execute load dump info failed"); | |||
return ret; | |||
@@ -544,9 +544,15 @@ Status HybridModelAsyncExecutor::DumpOpDebug() { | |||
data_dumper_.SetModelId(model_->GetModelId()); | |||
data_dumper_.SetDeviceId(model_->GetDeviceId()); | |||
void *global_step = nullptr; | |||
TensorValue *varible_global_step = model_->GetVariable(NODE_NAME_GLOBAL_STEP); | |||
if (varible_global_step != nullptr) { | |||
global_step = const_cast<void *>(varible_global_step->GetData()); | |||
if (dump_properties.IsInferOpDebug()) { | |||
GELOGD("Init global step when infer with op debug."); | |||
global_step = executor_->GetContext()->global_step; | |||
} else { | |||
TensorValue *varible_global_step = model_->GetVariable(NODE_NAME_GLOBAL_STEP); | |||
if (varible_global_step != nullptr) { | |||
global_step = const_cast<void *>(varible_global_step->GetData()); | |||
} | |||
} | |||
void *loop_per_iter = nullptr; | |||
@@ -293,6 +293,7 @@ struct DumpConfig { | |||
std::string dump_mode; | |||
std::string dump_status; | |||
std::string dump_op_switch; | |||
std::string dump_debug; | |||
std::vector<ModelDumpConfig> dump_list; | |||
}; | |||
} // namespace ge | |||
@@ -67,6 +67,35 @@ TEST_F(UTEST_dump_manager, is_dump_single_op_close_success) { | |||
EXPECT_EQ(ret, ge::SUCCESS); | |||
} | |||
// dump_debug and debug_status are on | |||
TEST_F(UTEST_dump_manager, dump_op_debug_on) { | |||
DumpConfig dump_config; | |||
dump_config.dump_debug = "on"; | |||
dump_config.dump_status = "on"; | |||
auto ret = DumpManager::GetInstance().SetDumpConf(dump_config); | |||
EXPECT_EQ(ret, ge::SUCCESS); | |||
} | |||
// just dump_status is on | |||
TEST_F(UTEST_dump_manager, dump_status_without_dump_list) { | |||
DumpConfig dump_config; | |||
dump_config.dump_status = "on"; | |||
auto ret = DumpManager::GetInstance().SetDumpConf(dump_config); | |||
EXPECT_EQ(ret, ge::PARAM_INVALID); | |||
} | |||
// dump_status is on with dump_list | |||
TEST_F(UTEST_dump_manager, dump_status_with_dump_list) { | |||
DumpConfig dump_config; | |||
dump_config.dump_status = "on"; | |||
ModelDumpConfig dump_list; | |||
dump_list.model_name = "test"; | |||
dump_list.layers.push_back("first"); | |||
dump_config.dump_list.push_back(dump_list); | |||
auto ret = DumpManager::GetInstance().SetDumpConf(dump_config); | |||
EXPECT_EQ(ret, ge::PARAM_INVALID); | |||
} | |||
TEST_F(UTEST_dump_manager, add_dump_properties_success) { | |||
DumpProperties dump_properties; | |||
DumpManager::GetInstance().AddDumpProperties(0, dump_properties); | |||