From: @zhou_lili Reviewed-by: @sheng-nan Signed-off-by:tags/v1.3.0
| @@ -23,6 +23,7 @@ const char *const kDumpOFF = "OFF"; | |||
| const char *const kDumpoff = "off"; | |||
| const char *const kDumpOn = "on"; | |||
| const uint64_t kInferSessionId = 0; | |||
| const uint32_t kAllOverflow = 3; | |||
| } // namespace | |||
| namespace ge { | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpManager &DumpManager::GetInstance() { | |||
| @@ -30,78 +31,103 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpManager &DumpManager::GetIn | |||
| return instance; | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf(const DumpConfig &dump_config) { | |||
| DumpProperties dump_properties; | |||
| std::string dump_status; | |||
| std::string dump_path; | |||
| std::string dump_mode; | |||
| std::string dump_op_switch; | |||
| if (dump_config.dump_status.empty()) { | |||
| bool DumpManager::NeedDoDump(const DumpConfig &dump_config, DumpProperties &dump_properties) { | |||
| if (dump_config.dump_status.empty() && dump_config.dump_debug.empty()) { | |||
| dump_properties_map_.emplace(kInferSessionId, dump_properties); | |||
| GELOGI("Dump does not open"); | |||
| return SUCCESS; | |||
| return false; | |||
| } | |||
| dump_status = dump_config.dump_status; | |||
| GELOGI("Dump status is %s", dump_status.c_str()); | |||
| if (dump_config.dump_status == kDumpoff || dump_config.dump_status == kDumpOFF) { | |||
| GELOGI("Dump status is %s, dump debug is %s.", dump_config.dump_status.c_str(), dump_config.dump_debug.c_str()); | |||
| if ((dump_config.dump_status == kDumpoff || dump_config.dump_status == kDumpOFF) && | |||
| dump_config.dump_debug == kDumpoff) { | |||
| dump_properties.ClearDumpPropertyValue(); | |||
| dump_properties_map_.emplace(kInferSessionId, dump_properties); | |||
| return SUCCESS; | |||
| return false; | |||
| } | |||
| if (dump_config.dump_status == kDumpOn && dump_config.dump_debug == kDumpOn) { | |||
| GELOGW("Not support coexistence of dump debug and dump status."); | |||
| return false; | |||
| } | |||
| dump_properties.SetDumpStatus(dump_status); | |||
| return true; | |||
| } | |||
| dump_op_switch = dump_config.dump_op_switch; | |||
| dump_properties.SetDumpOpSwitch(dump_op_switch); | |||
| if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) { | |||
| dump_properties_map_.emplace(kInferSessionId, dump_properties); | |||
| GELOGE(PARAM_INVALID, "[Check][DumpList]Invalid, dump_op_switch is %s", | |||
| dump_op_switch.c_str()); | |||
| REPORT_INNER_ERROR("E19999", "Dump list check invalid, dump_op_switch is %s", | |||
| dump_op_switch.c_str()); | |||
| return PARAM_INVALID; | |||
| void DumpManager::SetDumpDebugConf(const DumpConfig &dump_config, DumpProperties &dump_properties) { | |||
| if (dump_config.dump_debug == kDumpOn) { | |||
| GELOGI("Only do overflow detection, dump debug is %s.", dump_config.dump_debug.c_str()); | |||
| dump_properties.InitInferOpDebug(); | |||
| dump_properties.SetOpDebugMode(kAllOverflow); | |||
| } | |||
| } | |||
| if (!dump_config.dump_list.empty()) { | |||
| for (auto model_dump : dump_config.dump_list) { | |||
| std::string model_name = model_dump.model_name; | |||
| GELOGI("Dump model is %s", model_name.c_str()); | |||
| std::set<std::string> dump_layers; | |||
| for (auto layer : model_dump.layers) { | |||
| GELOGI("Dump layer is %s in model", layer.c_str()); | |||
| dump_layers.insert(layer); | |||
| } | |||
| dump_properties.AddPropertyValue(model_name, dump_layers); | |||
| void DumpManager::SetDumpList(const DumpConfig &dump_config, DumpProperties &dump_properties) { | |||
| for (const auto &model_dump : dump_config.dump_list) { | |||
| std::string model_name = model_dump.model_name; | |||
| GELOGI("Dump model is %s", model_name.c_str()); | |||
| std::set<std::string> dump_layers; | |||
| for (const auto &layer : model_dump.layers) { | |||
| GELOGI("Dump layer is %s in model", layer.c_str()); | |||
| dump_layers.insert(layer); | |||
| } | |||
| dump_properties.AddPropertyValue(model_name, dump_layers); | |||
| } | |||
| } | |||
| Status DumpManager::SetNormalDumpConf(const DumpConfig &dump_config, DumpProperties &dump_properties) { | |||
| if (dump_config.dump_status == kDumpOn) { | |||
| GELOGI("Only do normal dump process, dump status is %s.", dump_config.dump_status.c_str()); | |||
| dump_properties.SetDumpStatus(dump_config.dump_status); | |||
| std::string dump_op_switch = dump_config.dump_op_switch; | |||
| dump_properties.SetDumpOpSwitch(dump_op_switch); | |||
| if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) { | |||
| dump_properties_map_.emplace(kInferSessionId, dump_properties); | |||
| GELOGE(PARAM_INVALID, "[Check][DumpList]Invalid, dump_op_switch is %s", dump_op_switch.c_str()); | |||
| REPORT_INNER_ERROR("E19999", "Dump list check invalid, dump_op_switch is %s", dump_op_switch.c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| if (dump_op_switch == kDumpOn) { | |||
| GELOGI("Start to dump model and single op,dump op switch is %s", dump_op_switch.c_str()); | |||
| if (!dump_config.dump_list.empty()) { | |||
| if (dump_op_switch == kDumpOn) { | |||
| GELOGI("Start to dump model and single op, dump op switch is %s", dump_op_switch.c_str()); | |||
| } else { | |||
| GELOGI("Only dump model, dump op switch is %s", dump_op_switch.c_str()); | |||
| } | |||
| SetDumpList(dump_config, dump_properties); | |||
| } else { | |||
| GELOGI("Only dump model,dump op switch is %s", dump_op_switch.c_str()); | |||
| GELOGI("Only dump single op, dump op switch is %s", dump_op_switch.c_str()); | |||
| } | |||
| } else { | |||
| GELOGI("Only dump single op,dump op switch is %s", dump_op_switch.c_str()); | |||
| GELOGI("Dump mode is %s", dump_config.dump_mode.c_str()); | |||
| dump_properties.SetDumpMode(dump_config.dump_mode); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| dump_path = dump_config.dump_path; | |||
| Status DumpManager::SetDumpPath(const DumpConfig &dump_config, DumpProperties &dump_properties) { | |||
| std::string dump_path = dump_config.dump_path; | |||
| if (dump_path.empty()) { | |||
| GELOGE(PARAM_INVALID, "[Check][DumpPath]It is empty"); | |||
| REPORT_INNER_ERROR("E19999", "Dump path check is empty"); | |||
| return PARAM_INVALID; | |||
| } | |||
| if (dump_path[dump_path.size() - 1] != '/') { | |||
| dump_path = dump_path + "/"; | |||
| } | |||
| dump_path = dump_path + CurrentTimeInStr() + "/"; | |||
| GELOGI("Dump path is %s", dump_path.c_str()); | |||
| dump_properties.SetDumpPath(dump_path); | |||
| return SUCCESS; | |||
| } | |||
| dump_mode = dump_config.dump_mode; | |||
| GELOGI("Dump mode is %s", dump_mode.c_str()); | |||
| dump_properties.SetDumpMode(dump_mode); | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf(const DumpConfig &dump_config) { | |||
| DumpProperties dump_properties; | |||
| if (!NeedDoDump(dump_config, dump_properties)) { | |||
| GELOGD("No need do dump process."); | |||
| return SUCCESS; | |||
| } | |||
| SetDumpDebugConf(dump_config, dump_properties); | |||
| GE_CHK_STATUS_RET(SetNormalDumpConf(dump_config, dump_properties), "[Init][DumpConf] failed when dump status is on."); | |||
| GE_CHK_STATUS_RET(SetDumpPath(dump_config, dump_properties), "[Init][DumpPath] failed."); | |||
| dump_properties_map_[kInferSessionId] = dump_properties; | |||
| return SUCCESS; | |||
| } | |||
| @@ -34,6 +34,11 @@ class DumpManager { | |||
| void RemoveDumpProperties(uint64_t session_id); | |||
| private: | |||
| bool NeedDoDump(const DumpConfig &dump_config, DumpProperties &dump_properties); | |||
| void SetDumpDebugConf(const DumpConfig &dump_config, DumpProperties &dump_properties); | |||
| Status SetDumpPath(const DumpConfig &dump_config, DumpProperties &dump_properties); | |||
| Status SetNormalDumpConf(const DumpConfig &dump_config, DumpProperties &dump_properties); | |||
| void SetDumpList(const DumpConfig &dump_config, DumpProperties &dump_properties); | |||
| std::mutex mutex_; | |||
| std::map<uint64_t, DumpProperties> dump_properties_map_; | |||
| }; | |||
| @@ -53,7 +53,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti | |||
| dump_path_.clear(); | |||
| dump_step_.clear(); | |||
| dump_mode_.clear(); | |||
| is_op_debug_ = false; | |||
| is_train_op_debug_ = false; | |||
| is_infer_op_debug_ = false; | |||
| op_debug_mode_ = 0; | |||
| std::string enable_dump; | |||
| @@ -124,7 +125,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::ClearDumpI | |||
| dump_mode_.clear(); | |||
| dump_op_switch_.clear(); | |||
| dump_status_.clear(); | |||
| is_op_debug_ = false; | |||
| is_train_op_debug_ = false; | |||
| is_infer_op_debug_ = false; | |||
| op_debug_mode_ = 0; | |||
| } | |||
| @@ -203,6 +205,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperti | |||
| return dump_status_; | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitInferOpDebug() { | |||
| is_infer_op_debug_ = true; | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetOpDebugMode(const uint32_t &op_debug_mode) { | |||
| op_debug_mode_ = op_debug_mode; | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch( | |||
| const std::string &dump_op_switch) { | |||
| dump_op_switch_ = dump_op_switch; | |||
| @@ -237,7 +247,8 @@ void DumpProperties::CopyFrom(const DumpProperties &other) { | |||
| dump_op_switch_ = other.dump_op_switch_; | |||
| model_dump_properties_map_ = other.model_dump_properties_map_; | |||
| is_op_debug_ = other.is_op_debug_; | |||
| is_train_op_debug_ = other.is_train_op_debug_; | |||
| is_infer_op_debug_ = other.is_infer_op_debug_; | |||
| op_debug_mode_ = other.op_debug_mode_; | |||
| } | |||
| } | |||
| @@ -254,15 +265,15 @@ void DumpProperties::SetDumpDebugOptions() { | |||
| if (dump_debug_mode == OP_DEBUG_AICORE) { | |||
| GELOGD("ge.exec.dumpDebugMode=aicore_overflow, op debug is open."); | |||
| is_op_debug_ = true; | |||
| is_train_op_debug_ = true; | |||
| op_debug_mode_ = kAicoreOverflow; | |||
| } else if (dump_debug_mode == OP_DEBUG_ATOMIC) { | |||
| GELOGD("ge.exec.dumpDebugMode=atomic_overflow, op debug is open."); | |||
| is_op_debug_ = true; | |||
| is_train_op_debug_ = true; | |||
| op_debug_mode_ = kAtomicOverflow; | |||
| } else if (dump_debug_mode == OP_DEBUG_ALL) { | |||
| GELOGD("ge.exec.dumpDebugMode=all, op debug is open."); | |||
| is_op_debug_ = true; | |||
| is_train_op_debug_ = true; | |||
| op_debug_mode_ = kAllOverflow; | |||
| } else { | |||
| GELOGW("ge.exec.dumpDebugMode is invalid."); | |||
| @@ -65,16 +65,26 @@ class DumpProperties { | |||
| const std::string &GetDumpStatus() const; | |||
| void InitInferOpDebug(); | |||
| bool IsInferOpDebug() const { | |||
| return is_infer_op_debug_; | |||
| } | |||
| void SetDumpOpSwitch(const std::string &dump_op_switch); | |||
| const std::string &GetDumpOpSwitch() const; | |||
| bool IsOpDebugOpen() const { return is_op_debug_; } | |||
| bool IsOpDebugOpen() const { | |||
| return is_train_op_debug_ || is_infer_op_debug_; | |||
| } | |||
| bool IsDumpOpen() const; | |||
| bool IsSingleOpNeedDump() const; | |||
| void SetOpDebugMode(const uint32_t &op_debug_mode); | |||
| uint32_t GetOpDebugMode() const { return op_debug_mode_; } | |||
| const std::string &GetEnableDump() const {return enable_dump_;} | |||
| @@ -96,7 +106,8 @@ class DumpProperties { | |||
| std::string dump_op_switch_; | |||
| std::map<std::string, std::set<std::string>> model_dump_properties_map_; | |||
| bool is_op_debug_ = false; | |||
| bool is_train_op_debug_ = false; | |||
| bool is_infer_op_debug_ = false; | |||
| uint32_t op_debug_mode_ = 0; | |||
| }; | |||
| } | |||
| @@ -663,7 +663,7 @@ Status DataDumper::LoadDumpInfo() { | |||
| SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info); | |||
| if (!op_list_.empty() || is_op_debug_ || is_end_graph_) { | |||
| auto ret = ExecuteLoadDumpInfo(op_mapping_info); | |||
| ret = ExecuteLoadDumpInfo(op_mapping_info); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Execute load dump info failed"); | |||
| return ret; | |||
| @@ -544,9 +544,15 @@ Status HybridModelAsyncExecutor::DumpOpDebug() { | |||
| data_dumper_.SetModelId(model_->GetModelId()); | |||
| data_dumper_.SetDeviceId(model_->GetDeviceId()); | |||
| void *global_step = nullptr; | |||
| TensorValue *varible_global_step = model_->GetVariable(NODE_NAME_GLOBAL_STEP); | |||
| if (varible_global_step != nullptr) { | |||
| global_step = const_cast<void *>(varible_global_step->GetData()); | |||
| if (dump_properties.IsInferOpDebug()) { | |||
| GELOGD("Init global step when infer with op debug."); | |||
| global_step = executor_->GetContext()->global_step; | |||
| } else { | |||
| TensorValue *varible_global_step = model_->GetVariable(NODE_NAME_GLOBAL_STEP); | |||
| if (varible_global_step != nullptr) { | |||
| global_step = const_cast<void *>(varible_global_step->GetData()); | |||
| } | |||
| } | |||
| void *loop_per_iter = nullptr; | |||
| @@ -293,6 +293,7 @@ struct DumpConfig { | |||
| std::string dump_mode; | |||
| std::string dump_status; | |||
| std::string dump_op_switch; | |||
| std::string dump_debug; | |||
| std::vector<ModelDumpConfig> dump_list; | |||
| }; | |||
| } // namespace ge | |||
| @@ -67,6 +67,35 @@ TEST_F(UTEST_dump_manager, is_dump_single_op_close_success) { | |||
| EXPECT_EQ(ret, ge::SUCCESS); | |||
| } | |||
| // dump_debug and debug_status are on | |||
| TEST_F(UTEST_dump_manager, dump_op_debug_on) { | |||
| DumpConfig dump_config; | |||
| dump_config.dump_debug = "on"; | |||
| dump_config.dump_status = "on"; | |||
| auto ret = DumpManager::GetInstance().SetDumpConf(dump_config); | |||
| EXPECT_EQ(ret, ge::SUCCESS); | |||
| } | |||
| // just dump_status is on | |||
| TEST_F(UTEST_dump_manager, dump_status_without_dump_list) { | |||
| DumpConfig dump_config; | |||
| dump_config.dump_status = "on"; | |||
| auto ret = DumpManager::GetInstance().SetDumpConf(dump_config); | |||
| EXPECT_EQ(ret, ge::PARAM_INVALID); | |||
| } | |||
| // dump_status is on with dump_list | |||
| TEST_F(UTEST_dump_manager, dump_status_with_dump_list) { | |||
| DumpConfig dump_config; | |||
| dump_config.dump_status = "on"; | |||
| ModelDumpConfig dump_list; | |||
| dump_list.model_name = "test"; | |||
| dump_list.layers.push_back("first"); | |||
| dump_config.dump_list.push_back(dump_list); | |||
| auto ret = DumpManager::GetInstance().SetDumpConf(dump_config); | |||
| EXPECT_EQ(ret, ge::PARAM_INVALID); | |||
| } | |||
| TEST_F(UTEST_dump_manager, add_dump_properties_success) { | |||
| DumpProperties dump_properties; | |||
| DumpManager::GetInstance().AddDumpProperties(0, dump_properties); | |||