modified: ge/graph/load/graph_loader.cc modified: ge/graph/load/model_manager/davinci_model.cc modified: ge/hybrid/executor/hybrid_model_async_executor.cc modified: ge/init/gelib.cc deleted: ge/omm/csa_interact.cc deleted: ge/omm/csa_interact.h modified: tests/ut/ge/graph/execute/graph_execute_unittest.cc modified: ge/CMakeLists.txt modified: ge/executor/CMakeLists.txt modified: ge/executor/module.mk modified: ge/ge_inference.mk modified: ge/ge_runner.mk modified: ge/graph/execute/graph_execute.cc modified: ge/graph/load/graph_loader.cc modified: ge/graph/load/model_manager/davinci_model.cc modified: ge/hybrid/executor/hybrid_model_async_executor.cc modified: ge/init/gelib.cc deleted: ge/omm/csa_interact.cc deleted: ge/omm/csa_interact.h modified: tests/ut/ge/CMakeLists.txt modified: tests/ut/ge/graph/execute/graph_execute_unittest.cc modified: ge/CMakeLists.txt modified: ge/executor/CMakeLists.txt modified: ge/executor/module.mk modified: ge/ge_inference.mk modified: ge/ge_runner.mk modified: ge/graph/execute/graph_execute.cc modified: ge/graph/load/graph_loader.cc modified: ge/graph/load/model_manager/davinci_model.cc modified: ge/hybrid/executor/hybrid_model_async_executor.cc modified: ge/init/gelib.cc deleted: ge/omm/csa_interact.cc deleted: ge/omm/csa_interact.h modified: tests/ut/ge/CMakeLists.txt modified: tests/ut/ge/graph/execute/graph_execute_unittest.cctags/v1.3.0
| @@ -341,7 +341,6 @@ set(TRAIN_SRC_LIST | |||||
| "init/gelib.cc" | "init/gelib.cc" | ||||
| "model/ge_model.cc" | "model/ge_model.cc" | ||||
| "model/ge_root_model.cc" | "model/ge_root_model.cc" | ||||
| "omm/csa_interact.cc" | |||||
| "opskernel_manager/ops_kernel_manager.cc" | "opskernel_manager/ops_kernel_manager.cc" | ||||
| "opskernel_manager/ops_kernel_builder_manager.cc" | "opskernel_manager/ops_kernel_builder_manager.cc" | ||||
| "session/inner_session.cc" | "session/inner_session.cc" | ||||
| @@ -416,7 +415,6 @@ set(TRAIN_SRC_LIST | |||||
| set(INFER_SRC_LIST | set(INFER_SRC_LIST | ||||
| "graph/manager/trans_var_data_utils.cc" | "graph/manager/trans_var_data_utils.cc" | ||||
| "omm/csa_interact.cc" | |||||
| "common/fp16_t.cc" | "common/fp16_t.cc" | ||||
| "common/formats/utils/formats_trans_utils.cc" | "common/formats/utils/formats_trans_utils.cc" | ||||
| "common/formats/format_transfers/datatype_transfer.cc" | "common/formats/format_transfers/datatype_transfer.cc" | ||||
| @@ -86,7 +86,6 @@ set(SRC_LIST | |||||
| "../common/profiling/ge_profiling.cc" | "../common/profiling/ge_profiling.cc" | ||||
| "../graph/load/graph_loader.cc" | "../graph/load/graph_loader.cc" | ||||
| "../graph/execute/graph_execute.cc" | "../graph/execute/graph_execute.cc" | ||||
| "../omm/csa_interact.cc" | |||||
| "../graph/manager/graph_manager_utils.cc" | "../graph/manager/graph_manager_utils.cc" | ||||
| "../graph/manager/graph_var_manager.cc" | "../graph/manager/graph_var_manager.cc" | ||||
| "../graph/manager/graph_mem_allocator.cc" | "../graph/manager/graph_mem_allocator.cc" | ||||
| @@ -11,7 +11,6 @@ local_ge_executor_src_files := \ | |||||
| ../common/profiling/ge_profiling.cc \ | ../common/profiling/ge_profiling.cc \ | ||||
| ../graph/load/graph_loader.cc \ | ../graph/load/graph_loader.cc \ | ||||
| ../graph/execute/graph_execute.cc \ | ../graph/execute/graph_execute.cc \ | ||||
| ../omm/csa_interact.cc \ | |||||
| ../graph/manager/graph_manager_utils.cc \ | ../graph/manager/graph_manager_utils.cc \ | ||||
| ../graph/manager/graph_var_manager.cc \ | ../graph/manager/graph_var_manager.cc \ | ||||
| ../graph/manager/rdma_pool_allocator.cc \ | ../graph/manager/rdma_pool_allocator.cc \ | ||||
| @@ -4,7 +4,6 @@ COMMON_LOCAL_SRC_FILES := \ | |||||
| proto/fusion_model.proto \ | proto/fusion_model.proto \ | ||||
| proto/optimizer_priority.proto \ | proto/optimizer_priority.proto \ | ||||
| graph/manager/trans_var_data_utils.cc \ | graph/manager/trans_var_data_utils.cc \ | ||||
| omm/csa_interact.cc \ | |||||
| common/fp16_t.cc \ | common/fp16_t.cc \ | ||||
| common/formats/utils/formats_trans_utils.cc \ | common/formats/utils/formats_trans_utils.cc \ | ||||
| common/formats/format_transfers/datatype_transfer.cc \ | common/formats/format_transfers/datatype_transfer.cc \ | ||||
| @@ -256,7 +256,6 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
| init/gelib.cc \ | init/gelib.cc \ | ||||
| model/ge_model.cc \ | model/ge_model.cc \ | ||||
| model/ge_root_model.cc \ | model/ge_root_model.cc \ | ||||
| omm/csa_interact.cc \ | |||||
| opskernel_manager/ops_kernel_manager.cc \ | opskernel_manager/ops_kernel_manager.cc \ | ||||
| opskernel_manager/ops_kernel_builder_manager.cc \ | opskernel_manager/ops_kernel_builder_manager.cc \ | ||||
| session/inner_session.cc \ | session/inner_session.cc \ | ||||
| @@ -21,7 +21,6 @@ | |||||
| #include "graph/load/model_manager/model_manager.h" | #include "graph/load/model_manager/model_manager.h" | ||||
| #include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
| #include "omm/csa_interact.h" | |||||
| namespace ge { | namespace ge { | ||||
| using Uint32Pair = pair<uint32_t, uint32_t>; | using Uint32Pair = pair<uint32_t, uint32_t>; | ||||
| @@ -490,12 +489,10 @@ Status GraphExecutor::AsyncExecuteModel(const GeRootModelPtr &ge_root_model, con | |||||
| } catch (std::bad_alloc &) { | } catch (std::bad_alloc &) { | ||||
| REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed"); | REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed"); | ||||
| GELOGE(MEMALLOC_FAILED, "RunAsync failed, bad memory allocation occur !"); | GELOGE(MEMALLOC_FAILED, "RunAsync failed, bad memory allocation occur !"); | ||||
| CsaInteract::GetInstance().WriteErrorCode(FAILED, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||||
| return MEMALLOC_FAILED; | return MEMALLOC_FAILED; | ||||
| } catch (...) { | } catch (...) { | ||||
| REPORT_INNER_ERROR("E19999", "Some exceptions occur failed"); | REPORT_INNER_ERROR("E19999", "Some exceptions occur failed"); | ||||
| GELOGE(FAILED, "RunAsync failed, some exceptions occur !"); | GELOGE(FAILED, "RunAsync failed, some exceptions occur !"); | ||||
| CsaInteract::GetInstance().WriteErrorCode(FAILED, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -509,18 +506,15 @@ Status GraphExecutor::DataInput(const InputData &input_data, OutputData &output_ | |||||
| Status ret = model_manager->DataInput(input_data, output_data); | Status ret = model_manager->DataInput(input_data, output_data); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "DataInput: DataInput failed."); | GELOGE(ret, "DataInput: DataInput failed."); | ||||
| CsaInteract::GetInstance().WriteErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| } catch (std::bad_alloc &) { | } catch (std::bad_alloc &) { | ||||
| REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed"); | REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed"); | ||||
| GELOGE(MEMALLOC_FAILED, "DataInput failed, bad memory allocation occur !"); | GELOGE(MEMALLOC_FAILED, "DataInput failed, bad memory allocation occur !"); | ||||
| CsaInteract::GetInstance().WriteErrorCode(FAILED, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||||
| return MEMALLOC_FAILED; | return MEMALLOC_FAILED; | ||||
| } catch (...) { | } catch (...) { | ||||
| REPORT_INNER_ERROR("E19999", "Some exceptions occur failed"); | REPORT_INNER_ERROR("E19999", "Some exceptions occur failed"); | ||||
| GELOGE(FAILED, "DataInput failed, some exceptions occur !"); | GELOGE(FAILED, "DataInput failed, some exceptions occur !"); | ||||
| CsaInteract::GetInstance().WriteErrorCode(FAILED, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -535,18 +529,15 @@ Status GraphExecutor::GetInputOutputDescInfo(const uint32_t model_id, vector<Inp | |||||
| Status ret = model_manager->GetInputOutputDescInfo(model_id, input_desc, output_desc); | Status ret = model_manager->GetInputOutputDescInfo(model_id, input_desc, output_desc); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "GetInputOutputDescInfo failed."); | GELOGE(ret, "GetInputOutputDescInfo failed."); | ||||
| CsaInteract::GetInstance().WriteErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| } catch (std::bad_alloc &) { | } catch (std::bad_alloc &) { | ||||
| REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed"); | REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed"); | ||||
| GELOGE(MEMALLOC_FAILED, "GetInputOutputDescInfo failed, bad memory allocation occur !"); | GELOGE(MEMALLOC_FAILED, "GetInputOutputDescInfo failed, bad memory allocation occur !"); | ||||
| CsaInteract::GetInstance().WriteErrorCode(FAILED, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||||
| return MEMALLOC_FAILED; | return MEMALLOC_FAILED; | ||||
| } catch (...) { | } catch (...) { | ||||
| REPORT_INNER_ERROR("E19999", "Some exceptions occur failed"); | REPORT_INNER_ERROR("E19999", "Some exceptions occur failed"); | ||||
| GELOGE(FAILED, "GetInputOutputDescInfo failed, some exceptions occur !"); | GELOGE(FAILED, "GetInputOutputDescInfo failed, some exceptions occur !"); | ||||
| CsaInteract::GetInstance().WriteErrorCode(FAILED, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -564,18 +555,15 @@ Status GraphExecutor::GetInputOutputDescInfo(const uint32_t model_id, vector<Inp | |||||
| new_model_desc); | new_model_desc); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "GetInputOutputDescInfo failed."); | GELOGE(ret, "GetInputOutputDescInfo failed."); | ||||
| CsaInteract::GetInstance().WriteErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| } catch (std::bad_alloc &) { | } catch (std::bad_alloc &) { | ||||
| REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed"); | REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed"); | ||||
| GELOGE(MEMALLOC_FAILED, "GetInputOutputDescInfo failed, bad memory allocation occur !"); | GELOGE(MEMALLOC_FAILED, "GetInputOutputDescInfo failed, bad memory allocation occur !"); | ||||
| CsaInteract::GetInstance().WriteErrorCode(FAILED, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||||
| return MEMALLOC_FAILED; | return MEMALLOC_FAILED; | ||||
| } catch (...) { | } catch (...) { | ||||
| REPORT_INNER_ERROR("E19999", "Some exceptions occur failed"); | REPORT_INNER_ERROR("E19999", "Some exceptions occur failed"); | ||||
| GELOGE(FAILED, "GetInputOutputDescInfo failed, some exceptions occur !"); | GELOGE(FAILED, "GetInputOutputDescInfo failed, some exceptions occur !"); | ||||
| CsaInteract::GetInstance().WriteErrorCode(FAILED, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -24,7 +24,6 @@ | |||||
| #include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
| #include "graph/load/model_manager/model_manager.h" | #include "graph/load/model_manager/model_manager.h" | ||||
| #include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
| #include "omm/csa_interact.h" | |||||
| namespace ge { | namespace ge { | ||||
| Status GraphLoader::UnloadModel(uint32_t model_id) { | Status GraphLoader::UnloadModel(uint32_t model_id) { | ||||
| @@ -40,7 +39,6 @@ Status GraphLoader::UnloadModel(uint32_t model_id) { | |||||
| ret = model_manager->Unload(model_id); | ret = model_manager->Unload(model_id); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "UnloadModel: Unload failed. model id:%u", model_id); | GELOGE(ret, "UnloadModel: Unload failed. model id:%u", model_id); | ||||
| CsaInteract::GetInstance().WriteErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_UNLOAD); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| GELOGI("UnLoad model success, model id:%u.", model_id); | GELOGI("UnLoad model success, model id:%u.", model_id); | ||||
| @@ -55,7 +53,6 @@ Status GraphLoader::LoadModelOnline(uint32_t &model_id, const std::shared_ptr<ge | |||||
| REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, ret:0x%X", | REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, ret:0x%X", | ||||
| GetContext().DeviceId(), rt_ret); | GetContext().DeviceId(), rt_ret); | ||||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
| CsaInteract::GetInstance().WriteErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_LOAD); | |||||
| return RT_FAILED; | return RT_FAILED; | ||||
| } | } | ||||
| if (ge_root_model_ptr == nullptr) { | if (ge_root_model_ptr == nullptr) { | ||||
| @@ -69,8 +66,6 @@ Status GraphLoader::LoadModelOnline(uint32_t &model_id, const std::shared_ptr<ge | |||||
| Status ret = model_manager->LoadModelOnline(model_id, ge_root_model_ptr, listener); | Status ret = model_manager->LoadModelOnline(model_id, ge_root_model_ptr, listener); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "LoadModel: Load failed. ret = %u", ret); | GELOGE(ret, "LoadModel: Load failed. ret = %u", ret); | ||||
| CsaInteract::GetInstance().WriteErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_LOAD); | |||||
| rt_ret = rtDeviceReset(GetContext().DeviceId()); | rt_ret = rtDeviceReset(GetContext().DeviceId()); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, ret:0x%X", | REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, ret:0x%X", | ||||
| @@ -94,7 +89,6 @@ Status GraphLoader::LoadModelOnline(uint32_t &model_id, const std::shared_ptr<ge | |||||
| } | } | ||||
| GELOGE(ret, "LoadModel: Start failed."); | GELOGE(ret, "LoadModel: Start failed."); | ||||
| CsaInteract::GetInstance().WriteErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| rt_ret = rtDeviceReset(GetContext().DeviceId()); | rt_ret = rtDeviceReset(GetContext().DeviceId()); | ||||
| @@ -247,7 +241,6 @@ Status GraphLoader::GetMemoryInfo(int64_t &free) { | |||||
| REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, ret:0x%X", | REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, ret:0x%X", | ||||
| GetContext().DeviceId(), rt_ret); | GetContext().DeviceId(), rt_ret); | ||||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
| CsaInteract::GetInstance().WriteErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_LOAD); | |||||
| return RT_FAILED; | return RT_FAILED; | ||||
| } | } | ||||
| size_t total_mem = 0; | size_t total_mem = 0; | ||||
| @@ -50,7 +50,6 @@ | |||||
| #include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
| #include "init/gelib.h" | #include "init/gelib.h" | ||||
| #include "mmpa/mmpa_api.h" | #include "mmpa/mmpa_api.h" | ||||
| #include "omm/csa_interact.h" | |||||
| #include "runtime/base.h" | #include "runtime/base.h" | ||||
| #include "runtime/dev.h" | #include "runtime/dev.h" | ||||
| #include "runtime/event.h" | #include "runtime/event.h" | ||||
| @@ -2718,7 +2717,6 @@ Status DavinciModel::ReturnNoOutput(uint32_t data_id) { | |||||
| void *DavinciModel::Run(DavinciModel *model) { | void *DavinciModel::Run(DavinciModel *model) { | ||||
| GE_CHK_BOOL_EXEC(model != nullptr, | GE_CHK_BOOL_EXEC(model != nullptr, | ||||
| CsaInteract::GetInstance().WriteErrorCode(FAILED, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||||
| return nullptr, "model_pointer is null!") | return nullptr, "model_pointer is null!") | ||||
| bool seq_end_flag = false; | bool seq_end_flag = false; | ||||
| uint32_t model_id = model->Id(); | uint32_t model_id = model->Id(); | ||||
| @@ -2742,7 +2740,6 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
| bool rslt_flg = true; | bool rslt_flg = true; | ||||
| if (model->GetDataInputer() == nullptr) { | if (model->GetDataInputer() == nullptr) { | ||||
| GELOGW("Data inputer is nullptr."); | GELOGW("Data inputer is nullptr."); | ||||
| CsaInteract::GetInstance().StoreInternalErrorCode(FAILED, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||||
| break; | break; | ||||
| } | } | ||||
| @@ -2763,7 +2760,6 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
| ret = model->SyncVarData(); | ret = model->SyncVarData(); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
| ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); | ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); | ||||
| CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||||
| continue, "Copy input data to model failed."); // [No need to check value] | continue, "Copy input data to model failed."); // [No need to check value] | ||||
| GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(Model_SyncVarData, "Model Run SyncVarData")); | GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(Model_SyncVarData, "Model Run SyncVarData")); | ||||
| @@ -2773,7 +2769,6 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
| ret = model->CopyInputData(current_data, false); | ret = model->CopyInputData(current_data, false); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
| ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); | ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); | ||||
| CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||||
| continue, "Copy input data to model failed."); // [No need to check value] | continue, "Copy input data to model failed."); // [No need to check value] | ||||
| if (model->is_online_infer_dynamic_ && !model->is_getnext_sink_dynamic_) { | if (model->is_online_infer_dynamic_ && !model->is_getnext_sink_dynamic_) { | ||||
| model->cur_dynamic_dims_.clear(); | model->cur_dynamic_dims_.clear(); | ||||
| @@ -2794,7 +2789,6 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
| rt_ret = rtModelExecute(model->rt_model_handle_, model->rt_model_stream_, 0); | rt_ret = rtModelExecute(model->rt_model_handle_, model->rt_model_stream_, 0); | ||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, rslt_flg = false; | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, rslt_flg = false; | ||||
| (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); | (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); | ||||
| CsaInteract::GetInstance().WriteErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); | |||||
| continue); | continue); | ||||
| GELOGI("rtModelExecute end"); | GELOGI("rtModelExecute end"); | ||||
| GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(rtModelExecute, "GraphExcute::rtModelExecute")); | GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(rtModelExecute, "GraphExcute::rtModelExecute")); | ||||
| @@ -2812,7 +2806,6 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
| rt_ret != RT_ERROR_NONE, rslt_flg = false; GELOGI("seq_end_flg: %d", seq_end_flag); | rt_ret != RT_ERROR_NONE, rslt_flg = false; GELOGI("seq_end_flg: %d", seq_end_flag); | ||||
| (void)model->ReturnResult(current_data.index, false, seq_end_flag, | (void)model->ReturnResult(current_data.index, false, seq_end_flag, | ||||
| data_wrapper->GetOutput()); // [No need to check value] | data_wrapper->GetOutput()); // [No need to check value] | ||||
| CsaInteract::GetInstance().StoreInternalErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); | |||||
| continue); | continue); | ||||
| } | } | ||||
| @@ -2841,7 +2834,6 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
| GELOGI("run iterator count is %lu, model_id:%u", model->iterator_count_, model->model_id_); | GELOGI("run iterator count is %lu, model_id:%u", model->iterator_count_, model->model_id_); | ||||
| } | } | ||||
| CsaInteract::GetInstance().WriteInternalErrorCode(); | |||||
| GELOGI("Model run end, model id:%u", model->model_id_); | GELOGI("Model run end, model id:%u", model->model_id_); | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| @@ -19,7 +19,6 @@ | |||||
| #include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
| #include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
| #include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
| #include "omm/csa_interact.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| @@ -163,7 +162,6 @@ Status HybridModelAsyncExecutor::RunInternal() { | |||||
| ret = PreRun(current_data, args); | ret = PreRun(current_data, args); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
| ret != SUCCESS, (void) HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); | ret != SUCCESS, (void) HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); | ||||
| CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||||
| continue, "[Invoke][PreRun] failed, model_id:%u.", model_id_); // [No need to check value] | continue, "[Invoke][PreRun] failed, model_id:%u.", model_id_); // [No need to check value] | ||||
| if (pipe_executor_ != nullptr) { | if (pipe_executor_ != nullptr) { | ||||
| @@ -181,7 +179,6 @@ Status HybridModelAsyncExecutor::RunInternal() { | |||||
| } | } | ||||
| ret = HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); | ret = HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); | |||||
| continue; | continue; | ||||
| } | } | ||||
| @@ -191,7 +188,6 @@ Status HybridModelAsyncExecutor::RunInternal() { | |||||
| GELOGI("run iterator count is %lu, model_id:%u", iterator_count_, model_id_); | GELOGI("run iterator count is %lu, model_id:%u", iterator_count_, model_id_); | ||||
| } | } | ||||
| CsaInteract::GetInstance().WriteInternalErrorCode(); | |||||
| GELOGI("Model run end, model id:%u", model_id_); | GELOGI("Model run end, model id:%u", model_id_); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -42,7 +42,6 @@ | |||||
| #include "graph/manager/graph_mem_allocator.h" | #include "graph/manager/graph_mem_allocator.h" | ||||
| #include "graph/manager/host_mem_manager.h" | #include "graph/manager/host_mem_manager.h" | ||||
| #include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
| #include "omm/csa_interact.h" | |||||
| #include "runtime/kernel.h" | #include "runtime/kernel.h" | ||||
| #include "opskernel_manager/ops_kernel_builder_manager.h" | #include "opskernel_manager/ops_kernel_builder_manager.h" | ||||
| #include "external/runtime/rt_error_codes.h" | #include "external/runtime/rt_error_codes.h" | ||||
| @@ -376,10 +375,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOpt | |||||
| } | } | ||||
| GE_CHK_STATUS_RET(HostMemManager::Instance().Initialize()); | GE_CHK_STATUS_RET(HostMemManager::Instance().Initialize()); | ||||
| // Update CSA file | |||||
| CsaInteract::GetInstance().Init(options.device_id, GetContext().TraceId()); | |||||
| Status ret = CsaInteract::GetInstance().WriteJobState(JOBSTATE_RUNNING, JOBSUBSTATE_ENV_INIT); | |||||
| GE_LOGE_IF(ret != SUCCESS, "[Write][JobState] failed, ret:%u ", ret); | |||||
| // set device id | // set device id | ||||
| GELOGI("set logical device id:%u", options.device_id); | GELOGI("set logical device id:%u", options.device_id); | ||||
| @@ -408,10 +403,6 @@ Status GELib::SystemShutdownWithOptions(const Options &options) { | |||||
| GE_CHK_RT(rtDeviceReset(options.device_id)); | GE_CHK_RT(rtDeviceReset(options.device_id)); | ||||
| // Update CSA file | |||||
| Status ret = CsaInteract::GetInstance().WriteJobState(JOBSTATE_SUCCEED); | |||||
| GE_LOGE_IF(ret != SUCCESS, "[Write][JobState] failed, ret:%u ", ret); | |||||
| is_system_inited = false; | is_system_inited = false; | ||||
| is_shutdown = true; | is_shutdown = true; | ||||
| GELOGI("%s finalize GELib success.", mode.c_str()); | GELOGI("%s finalize GELib success.", mode.c_str()); | ||||
| @@ -1,265 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "omm/csa_interact.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/debug/log.h" | |||||
| #include "framework/common/util.h" | |||||
| #include "graph/ge_context.h" | |||||
| #include "graph/manager/graph_var_manager.h" | |||||
| #include "graph/utils/tensor_utils.h" | |||||
| #include "mmpa/mmpa_api.h" | |||||
| #include "nlohmann/json.hpp" | |||||
| namespace ge { | |||||
| namespace { | |||||
| const char FMK_STATUS_FILE_DIR_ENV[] = "FMK_STATUS_FILE_DIR"; | |||||
| const char JOBSTATE_FILE_NAME[] = "jobstateupdate_framework"; | |||||
| const char HCOM_DETECT_FILE_NAME[] = "hcom_detection_result"; | |||||
| const char FILE_SEPARATE[] = "/"; | |||||
| } // namespace | |||||
| /// | |||||
| /// @brief Obtain CsaInteract instance | |||||
| /// @return CsaInteract instance | |||||
| /// | |||||
| CsaInteract &CsaInteract::GetInstance() { | |||||
| static CsaInteract instance; | |||||
| return instance; | |||||
| } | |||||
| /// | |||||
| /// @brief CsaInteract instance initialization | |||||
| /// @param [in] dev_index device index | |||||
| /// @param [in] job_id job id | |||||
| /// @return void | |||||
| /// | |||||
| void CsaInteract::Init(int32_t dev_index, int64_t job_id) { | |||||
| if (!is_init_) { | |||||
| dev_index_ = dev_index; | |||||
| job_id_ = job_id; | |||||
| char file_dir_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
| INT32 res = mmGetEnv(FMK_STATUS_FILE_DIR_ENV, file_dir_env, MMPA_MAX_PATH); | |||||
| string csa_path_prefix; | |||||
| if (res == EN_OK) { | |||||
| csa_path_prefix = file_dir_env; | |||||
| } | |||||
| if (!csa_path_prefix.empty()) { | |||||
| job_state_file_ = csa_path_prefix + std::to_string(dev_index_) + FILE_SEPARATE + JOBSTATE_FILE_NAME; | |||||
| hcom_detect_file_ = csa_path_prefix + std::to_string(dev_index_) + FILE_SEPARATE + HCOM_DETECT_FILE_NAME; | |||||
| } | |||||
| is_init_ = true; | |||||
| } | |||||
| } | |||||
| /// | |||||
| /// @brief Update job state file | |||||
| /// @param [in] job_state job state | |||||
| /// @param [in] job_sub_state detailed job state | |||||
| /// @param [in] module_ret_errcode sub module training failure error code | |||||
| /// @param [in] error_module error module identified by FMK | |||||
| /// @return Status | |||||
| /// | |||||
| Status CsaInteract::WriteJobState(JobState job_state, JobSubState job_sub_state, uint32_t module_ret_errcode, | |||||
| ErrorModule error_module) { | |||||
| if (!is_init_) { | |||||
| GELOGE(INTERNAL_ERROR, "[Init][CsaInteract] obj has not init, can't WriteJobState"); | |||||
| REPORT_INNER_ERROR("E19999", "WriteJobState failed before init. "); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| if ((curr_state_ == JOBSTATE_FAILED) || (curr_state_ == JOBSTATE_KILLED)) { | |||||
| return SUCCESS; | |||||
| } | |||||
| if (job_state_file_.empty()) { | |||||
| return SUCCESS; | |||||
| } | |||||
| std::string content; | |||||
| try { | |||||
| nlohmann::json content_json; | |||||
| content_json["job_id"] = job_id_; | |||||
| content_json["jobstate"] = job_state; | |||||
| // Only the running or running failure state has a job sub state | |||||
| if ((job_state == JOBSTATE_RUNNING) || (job_state == JOBSTATE_FAILED)) { | |||||
| content_json["job_sub_state"] = job_sub_state; | |||||
| } | |||||
| content_json["time"] = CurrentTimeInStr(); | |||||
| // Write error code only if run failed | |||||
| if (job_state == JOBSTATE_FAILED) { | |||||
| content_json["errorcode"] = module_ret_errcode; | |||||
| content_json["errmodule"] = error_module; | |||||
| } | |||||
| content = content_json.dump(); | |||||
| } catch (const nlohmann::json::exception &e) { | |||||
| GELOGE(INTERNAL_ERROR, "[Create][JsonObject] exception:%s job_state:%u job_sub_state:%u.", | |||||
| e.what(), job_state, job_sub_state); | |||||
| REPORT_INNER_ERROR("E19999", "Create json object failed. exception:%s job_state:%u job_sub_state:%u.", | |||||
| e.what(), job_state, job_sub_state); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| if (WriteFile(job_state_file_, content) != SUCCESS) { | |||||
| // The error log subfunction has been printed and will not print again | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| curr_state_ = job_state; | |||||
| return SUCCESS; | |||||
| } | |||||
| /// | |||||
| /// @brief Update error code in the job state file | |||||
| /// @param [in] module_ret_errcode sub module training failure error code | |||||
| /// @param [in] error_module error module identified by FMK | |||||
| /// @param [in] job_sub_state detailed job state | |||||
| /// @return void | |||||
| /// | |||||
| void CsaInteract::WriteErrorCode(uint32_t module_ret_errcode, ErrorModule error_module, JobSubState job_sub_state) { | |||||
| // The error log subfunction has been printed and will not print again | |||||
| Status ret = WriteJobState(JOBSTATE_FAILED, job_sub_state, module_ret_errcode, error_module); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGW("write error code fail. ret_code: %u, status: %u", module_ret_errcode, job_sub_state); | |||||
| } | |||||
| } | |||||
| /// | |||||
| /// @brief Record errors that occurred durning the training | |||||
| /// @param [in] module_ret_errcode sub module training failure error code | |||||
| /// @param [in] error_module error module identified by FMK | |||||
| /// @param [in] job_sub_state detailed job state | |||||
| /// @return void | |||||
| /// | |||||
| void CsaInteract::StoreInternalErrorCode(uint32_t module_ret_errcode, ErrorModule error_module, | |||||
| JobSubState job_sub_state) { | |||||
| is_have_internal_error_ = true; | |||||
| csa_error_code_.module_ret_errcode = module_ret_errcode; | |||||
| csa_error_code_.error_module = error_module; | |||||
| csa_error_code_.job_sub_state = job_sub_state; | |||||
| } | |||||
| /// | |||||
| /// @brief Update training error code in the job state file | |||||
| /// @return void | |||||
| /// | |||||
| void CsaInteract::WriteInternalErrorCode() { | |||||
| if (is_have_internal_error_) { | |||||
| WriteErrorCode(csa_error_code_.module_ret_errcode, csa_error_code_.error_module, csa_error_code_.job_sub_state); | |||||
| } | |||||
| } | |||||
| /// | |||||
| /// @brief Update network connectivity detect file | |||||
| /// @param [in] content network connectivity content | |||||
| /// @return Status | |||||
| /// | |||||
| Status CsaInteract::WriteHcomDetection(const std::string &content) { | |||||
| if (!is_init_) { | |||||
| GELOGE(INTERNAL_ERROR, "[Init][CsaInteract] obj has not init, can't WriteJobState"); | |||||
| REPORT_INNER_ERROR("E19999", "WriteHcomDetection failed before init."); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| if (hcom_detect_file_.empty()) { | |||||
| return SUCCESS; | |||||
| } | |||||
| return WriteFile(hcom_detect_file_, content); | |||||
| } | |||||
| /// | |||||
| /// @ingroup WriteFile | |||||
| /// @brief Write the content into the file. If the file does not exist, create the file | |||||
| /// @param [in] file_name: File name to be written | |||||
| /// @param [in] content: Contents to be written | |||||
| /// @return Status | |||||
| /// | |||||
| Status CsaInteract::WriteFile(const std::string &file_name, const std::string &content) { | |||||
| // if file path is not exist, then make path | |||||
| INT32 flags = M_WRONLY | O_TRUNC | M_CREAT; | |||||
| int32_t fd = mmOpen2(file_name.c_str(), flags, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD); | |||||
| if (fd == EN_ERROR) { | |||||
| if (MakePath(file_name) != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "[Create][File Path] errno is %d", errno); | |||||
| REPORT_CALL_ERROR("E19999", "MakePath failed. errno is %d", errno); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| fd = mmOpen2(file_name.c_str(), flags, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD); | |||||
| if (fd == EN_ERROR) { | |||||
| GELOGE(INTERNAL_ERROR, "[Open][File] errno is %d file_name: %s", errno, file_name.c_str()); | |||||
| REPORT_CALL_ERROR("E19999", "mmOpen2 failed. errno is %d file_name: %s", errno, file_name.c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| } | |||||
| mmSsize_t ret = mmWrite(fd, reinterpret_cast<void *>(const_cast<char *>(content.c_str())), content.length()); | |||||
| if (ret == EN_ERROR) { | |||||
| GELOGE(INTERNAL_ERROR, "[Write][File] errno is %d", errno); | |||||
| REPORT_CALL_ERROR("E19999", "mmWrite failed. errno is %d", errno); | |||||
| ret = mmClose(fd); | |||||
| if (ret == EN_ERROR) { | |||||
| GELOGE(INTERNAL_ERROR, "[Close][File] error is %d", errno); | |||||
| REPORT_CALL_ERROR("E19999", "mmClose failed. error is %d", errno); | |||||
| } | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| ret = mmClose(fd); | |||||
| if (ret == EN_ERROR) { | |||||
| GELOGE(INTERNAL_ERROR, "[Close][File] error is %d", errno); | |||||
| REPORT_CALL_ERROR("E19999", "mmClose failed. error is %d", errno); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| /// | |||||
| /// @ingroup MakePath | |||||
| /// @brief Verify whether the file path exists, if not, recursively create the folder | |||||
| /// @param [in] file_name: File name to be verified | |||||
| /// @return Status | |||||
| /// | |||||
| Status CsaInteract::MakePath(const std::string &file_name) { | |||||
| std::size_t found = file_name.find_last_of("/"); | |||||
| if (found == std::string::npos) { | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| std::string file_path = file_name.substr(0, found + 1); | |||||
| if (mmAccess(file_path.c_str()) == EN_OK) { | |||||
| return SUCCESS; | |||||
| } | |||||
| found = file_path.find_first_of("/"); | |||||
| while (found != std::string::npos) { | |||||
| std::string pre_path = file_path.substr(0, found + 1); | |||||
| if (mmAccess(pre_path.c_str()) != EN_OK) { | |||||
| if (mmMkdir(pre_path.c_str(), M_IRWXU) != EN_OK) { | |||||
| GELOGE(INTERNAL_ERROR, "[Create][FileDir] fail, errno is %d, pre_path:%s", errno, pre_path.c_str()); | |||||
| REPORT_CALL_ERROR("E19999", "mmMkdir failed. errno is %d pre_path:%s", errno, pre_path.c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| } | |||||
| found = file_path.find_first_of("/", found + 1); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -1,183 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_OMM_CSA_INTERACT_H_ | |||||
| #define GE_OMM_CSA_INTERACT_H_ | |||||
| #include <string> | |||||
| #include "framework/common/ge_inner_error_codes.h" | |||||
| namespace ge { | |||||
| enum JobState { | |||||
| JOBSTATE_WAITING = 1, | |||||
| JOBSTATE_RUNNING, | |||||
| JOBSTATE_KILLING, | |||||
| JOBSTATE_SUCCEED, | |||||
| JOBSTATE_FAILED, | |||||
| JOBSTATE_KILLED, | |||||
| JOBSTATE_UNKOWN | |||||
| }; | |||||
| enum JobSubState { | |||||
| JOBSUBSTATE_ENV_INIT = 201, | |||||
| JOBSUBSTATE_ENV_FIN, | |||||
| JOBSUBSTATE_RESOUCE_ALLOC, | |||||
| JOBSUBSTATE_MODEL_COMPILE, | |||||
| JOBSUBSTATE_GRAPH_PREPARE, | |||||
| JOBSUBSTATE_GRAPH_SPLIT, | |||||
| JOBSUBSTATE_GRAPH_OPTIMIZE, | |||||
| JOBSUBSTATE_GRAPH_BUILD, | |||||
| JOBSUBSTATE_GRAPH_LOAD, | |||||
| JOBSUBSTATE_GRAPH_EXEC, | |||||
| JOBSUBSTATE_GRAPH_UNLOAD, | |||||
| JOBSUBSTATE_OTHER | |||||
| }; | |||||
| enum ErrorModule { | |||||
| ERROR_MODULE_DRIVER = 0x01, | |||||
| ERROR_MODULE_RUNTIME = 0x04, | |||||
| ERROR_MODULE_CCE = 0x06, | |||||
| ERROR_MODULE_FMK = 0x08, | |||||
| ERROR_MODULE_HCCL = 0x12 | |||||
| }; | |||||
| struct CsaErrorCode { | |||||
| CsaErrorCode() | |||||
| : module_ret_errcode(0), | |||||
| error_module(ERROR_MODULE_FMK), | |||||
| job_sub_state(JOBSUBSTATE_OTHER) {} | |||||
| ~CsaErrorCode() {} | |||||
| uint32_t module_ret_errcode; | |||||
| ErrorModule error_module; | |||||
| JobSubState job_sub_state; | |||||
| }; | |||||
| class CsaInteract { | |||||
| public: | |||||
| /// | |||||
| /// @brief Obtain CsaInteract instance | |||||
| /// @return CsaInteract instance | |||||
| /// | |||||
| static CsaInteract& GetInstance(); | |||||
| /// | |||||
| /// @brief CsaInteract instance initialization | |||||
| /// @param [in] dev_index device index | |||||
| /// @param [in] job_id job id | |||||
| /// @return void | |||||
| /// | |||||
| void Init(int32_t dev_index, int64_t job_id); | |||||
| /// | |||||
| /// @brief Update job state file | |||||
| /// @param [in] job_state job state | |||||
| /// @param [in] job_sub_state detailed job state | |||||
| /// @param [in] module_ret_errcode sub module training failure error code | |||||
| /// @param [in] error_module error module identified by FMK | |||||
| /// @return Status | |||||
| /// | |||||
| Status WriteJobState(JobState job_state, | |||||
| JobSubState job_sub_state = JOBSUBSTATE_OTHER, | |||||
| uint32_t module_ret_errcode = SUCCESS, | |||||
| ErrorModule error_module = ERROR_MODULE_FMK); | |||||
| /// | |||||
| /// @brief Update error code in the job state file | |||||
| /// @param [in] module_ret_errcode sub module training failure error code | |||||
| /// @param [in] error_module error module identified by FMK | |||||
| /// @param [in] job_sub_state detailed job state | |||||
| /// @return void | |||||
| /// | |||||
| void WriteErrorCode(uint32_t module_ret_errcode, ErrorModule error_module, | |||||
| JobSubState job_sub_state); | |||||
| /// | |||||
| /// @brief Record errors that occurred durning the training | |||||
| /// @param [in] module_ret_errcode sub module training failure error code | |||||
| /// @param [in] error_module error module identified by FMK | |||||
| /// @param [in] job_sub_state detailed job state | |||||
| /// @return void | |||||
| /// | |||||
| void StoreInternalErrorCode(uint32_t module_ret_errcode, | |||||
| ErrorModule error_module, | |||||
| JobSubState job_sub_state); | |||||
| /// | |||||
| /// @brief Update training error code in the job state file | |||||
| /// @return void | |||||
| /// | |||||
| void WriteInternalErrorCode(); | |||||
| /// | |||||
| /// @brief Update network connectivity detect file | |||||
| /// @param [in] content network connectivity content | |||||
| /// @return Status | |||||
| /// | |||||
| Status WriteHcomDetection(const std::string& content); | |||||
| private: | |||||
| CsaInteract() | |||||
| : dev_index_(0), | |||||
| job_id_(0), | |||||
| is_init_(false), | |||||
| curr_state_(JOBSTATE_UNKOWN), | |||||
| is_have_internal_error_(false) {} | |||||
| ~CsaInteract() {} | |||||
| CsaInteract(const CsaInteract&) = delete; | |||||
| CsaInteract(CsaInteract&&) = delete; | |||||
| CsaInteract& operator=(const CsaInteract&) = delete; | |||||
| CsaInteract& operator=(CsaInteract&&) = delete; | |||||
| /// | |||||
| /// @ingroup WriteFile | |||||
| /// @brief Write the content into the file. If the file does not exist, create the file | |||||
| /// @param [in] file_name: File name to be written | |||||
| /// @param [in] content: Contents to be written | |||||
| /// @return Status | |||||
| /// | |||||
| Status WriteFile(const std::string& file_name, const std::string& content); | |||||
| /// | |||||
| /// @ingroup MakePath | |||||
| /// @brief Verify whether the file path exists, if not, recursively create the folder | |||||
| /// @param [in] file_name: File name to be verified | |||||
| /// @return Status | |||||
| /// | |||||
| Status MakePath(const std::string& file_name); | |||||
| // device index | |||||
| int32_t dev_index_; | |||||
| // job id | |||||
| int64_t job_id_; | |||||
| // is initialization complete | |||||
| bool is_init_; | |||||
| // current job state | |||||
| JobState curr_state_; | |||||
| // job state file | |||||
| std::string job_state_file_; | |||||
| // network connectivity detect file | |||||
| std::string hcom_detect_file_; | |||||
| // identification of internal errors that occurred during the training | |||||
| bool is_have_internal_error_; | |||||
| // error code information | |||||
| CsaErrorCode csa_error_code_; | |||||
| }; | |||||
| } // namespace ge | |||||
| #endif // GE_OMM_CSA_INTERACT_H_ | |||||
| @@ -287,7 +287,6 @@ set(COMMON_SRC_FILES | |||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/zero_copy_task.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/zero_copy_task.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/cpu_queue_schedule.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/aipp_utils.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/aipp_utils.cc" | ||||
| "${GE_CODE_DIR}/ge/omm/csa_interact.cc" | |||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/tbe_handle_store.cc" | ||||
| "${GE_CODE_DIR}/ge/common/kernel_store.cc" | "${GE_CODE_DIR}/ge/common/kernel_store.cc" | ||||
| "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" | "${GE_CODE_DIR}/ge/common/tbe_kernel_store.cc" | ||||
| @@ -391,7 +390,6 @@ set(GRAPH_PARTITION_COMMON_SRC_FILES | |||||
| set(GRAPH_LOAD_COMMON_SRC_FILES | set(GRAPH_LOAD_COMMON_SRC_FILES | ||||
| "${GE_CODE_DIR}/ge/graph/load/graph_loader.cc" | "${GE_CODE_DIR}/ge/graph/load/graph_loader.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/manager/graph_manager_utils.cc" | "${GE_CODE_DIR}/ge/graph/manager/graph_manager_utils.cc" | ||||
| "${GE_CODE_DIR}/ge/omm/csa_interact.cc" | |||||
| "${GE_CODE_DIR}/ge/graph/manager/graph_mem_allocator.cc" | "${GE_CODE_DIR}/ge/graph/manager/graph_mem_allocator.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/manager/graph_var_manager.cc" | "${GE_CODE_DIR}/ge/graph/manager/graph_var_manager.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc" | "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc" | ||||
| @@ -22,7 +22,6 @@ | |||||
| #include "graph/execute/graph_execute.h" | #include "graph/execute/graph_execute.h" | ||||
| #include "graph/load/model_manager/model_manager.h" | #include "graph/load/model_manager/model_manager.h" | ||||
| #include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
| #include "omm/csa_interact.h" | |||||
| #undef private | #undef private | ||||
| #undef public | #undef public | ||||