Browse Source

adapt to new session id in multithread infer

tags/v1.3.0
wuweikang 3 years ago
parent
commit
b307b80f2b
5 changed files with 126 additions and 14 deletions
  1. +3
    -0
      ge/executor/CMakeLists.txt
  2. +70
    -12
      ge/graph/load/model_manager/model_manager.cc
  3. +11
    -0
      ge/graph/load/model_manager/model_manager.h
  4. +0
    -2
      ge/init/gelib.cc
  5. +42
    -0
      tests/ut/ge/graph/load/model_manager_unittest.cc

+ 3
- 0
ge/executor/CMakeLists.txt View File

@@ -19,6 +19,7 @@ set(SRC_LIST
"../common/dump/exception_dumper.cc"
"../common/dump/dump_manager.cc"
"../common/dump/dump_op.cc"
"../common/dump/dump_server.cc"
"../common/dump/opdebug_register.cc"
"../common/profiling/ge_profiling.cc"
"../graph/load/graph_loader.cc"
@@ -201,6 +202,7 @@ target_include_directories(ge_executor SYSTEM PRIVATE
${GE_CODE_DIR}/../inc/cce
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
)

target_link_libraries(ge_executor PRIVATE
@@ -247,6 +249,7 @@ target_include_directories(ge_executor_shared PRIVATE
${GE_CODE_DIR}/../inc/cce
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
)

target_link_options(ge_executor_shared PRIVATE


+ 70
- 12
ge/graph/load/model_manager/model_manager.cc View File

@@ -27,6 +27,7 @@
#include "graph/load/model_manager/davinci_model.h"
#include "model/ge_root_model.h"
#include "common/formats/utils/formats_trans_utils.h"
#include "toolchain/adx_datadump_server.h"

namespace ge {
thread_local uint32_t device_count = 0;
@@ -48,6 +49,7 @@ const int kTimeSpecNano = 1000000000;
const int kTimeSpecMiro = 1000000;
const int kOpNameMaxSize = 100;
const uint64_t kInferSessionId = 0;
const int32_t kDumpStatus = 0;
#pragma pack(push, 1)
struct CustAicpuSoBuf {
uint64_t kernelSoBuf;
@@ -321,6 +323,58 @@ bool ModelManager::IsNeedHybridLoad(ge::GeRootModel &ge_root_model) {
(void)AttrUtils::GetBool(root_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dsp_partitioned_graph);
return is_shape_unknown || is_dsp_partitioned_graph || GetContext().GetHostExecFlag();
}

bool ModelManager::IsDumpSeverInited(uint64_t session_id) {
auto it = session_id_to_dump_server_init_flag_.find(session_id);
return it != session_id_to_dump_server_init_flag_.end() && it->second;
}

Status ModelManager::AddDumpProperties(uint64_t session_id, const DumpProperties &dump_properties) {
if (!IsDumpSeverInited(session_id)) {
if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) {
GE_IF_BOOL_EXEC(AdxDataDumpServerInit() != kDumpStatus,
GELOGE(PARAM_INVALID, "[Init][AdxDataDumpServer] failed, session_id:%lu.", session_id);
return PARAM_INVALID)
GELOGI("Init adx data dump server success");
session_id_to_dump_server_init_flag_[session_id] = true;
}
}
DumpManager::GetInstance().AddDumpProperties(session_id, dump_properties);
return SUCCESS;
}

Status ModelManager::InitDumPropertiesWithNewSessionId(uint64_t session_id) {
DumpProperties dump_properties;
dump_properties.InitByOptions();
GE_CHK_STATUS_RET(AddDumpProperties(session_id, dump_properties), "[Add][DumpProperties] failed.");
return SUCCESS;
}

Status ModelManager::UpdateSessionId(uint32_t model_id, GeModelPtr ge_model,
std::shared_ptr<DavinciModel> &davinci_model, uint64_t &session_id) {
uint64_t new_session_id;
Status ret = GenSessionId(new_session_id);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed.");
ret = davinci_model->UpdateSessionId(new_session_id);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed.");
ge_model->InsertSessionMap(model_id, new_session_id);
GELOGD("Update new session id: %lu.", new_session_id);
session_id = new_session_id;
return SUCCESS;
}

bool ModelManager::HasVarNode(ComputeGraphPtr &compute_graph) const {
for (ge::NodePtr &node : compute_graph->GetAllNodes()) {
if (node == nullptr) {
continue;
}
if (node->GetType() == VARIABLE) {
return true;
}
}
return false;
}

///
/// @ingroup domi_ome
/// @brief load model online
@@ -347,10 +401,6 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
davinci_model->SetId(model_id);
davinci_model->SetDeviceId(GetContext().DeviceId());

const DumpProperties &dump_properties = DumpManager::GetInstance().GetDumpProperties(GetContext().SessionId());
davinci_model->SetDumpProperties(dump_properties);
dump_properties_ = dump_properties;

auto root_graph = ge_root_model->GetRootGraph();
GE_CHECK_NOTNULL(root_graph);
string root_model_name = root_graph->GetName();
@@ -364,15 +414,23 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
/// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail.
/// These session_ids come from the same model, so the values of session_id are the same.
/// Update session_id for infer in load model to avoid the same session_id.
if (!ge_root_model->GetTrainFlag()) {
uint64_t new_session_id;
ret = GenSessionId(new_session_id);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed.");
ret = davinci_model->UpdateSessionId(new_session_id);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed.");
ge_model->InsertSessionMap(model_id, new_session_id);
GELOGD("Update new session id: %lu.", new_session_id);
uint64_t session_id = GetContext().SessionId();
// Inference graph with variable node is not support for multi-threads scenario
if (!ge_root_model->GetTrainFlag() && !HasVarNode(root_graph)) {
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(UpdateSessionId(model_id, ge_model, davinci_model, session_id) != SUCCESS,
return ret,
"UpdateSessionId failed.");
GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId()));
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(InitDumPropertiesWithNewSessionId(session_id) != SUCCESS,
GE_CHK_RT(rtDeviceReset(static_cast<int32_t>(GetContext().DeviceId())));
return ret,
"Init DumProperties with new session_id failed.");
}

const DumpProperties &dump_properties = DumpManager::GetInstance().GetDumpProperties(session_id);
davinci_model->SetDumpProperties(dump_properties);
dump_properties_ = dump_properties;

GE_TIMESTAMP_START(Init);
GE_IF_BOOL_EXEC(SUCCESS != (ret = davinci_model->Init()), GELOGW("DavinciInit failed."); break;);
GE_TIMESTAMP_END(Init, "GraphLoader::ModelInit");


+ 11
- 0
ge/graph/load/model_manager/model_manager.h View File

@@ -345,6 +345,16 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {

void GenModelId(uint32_t *id);

Status InitDumPropertiesWithNewSessionId(uint64_t session_id);

bool IsDumpSeverInited(uint64_t session_id);

Status AddDumpProperties(uint64_t session_id, const DumpProperties &dump_properties);

Status UpdateSessionId(uint32_t model_id, GeModelPtr ge_model,
std::shared_ptr<DavinciModel> &davinci_model, uint64_t &session_id);

bool HasVarNode(ComputeGraphPtr &compute_graph) const;

std::map<uint32_t, std::shared_ptr<DavinciModel>> model_map_;
std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_;
@@ -361,6 +371,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {

static DumpProperties dump_properties_;
bool dump_exception_flag_ = false;
std::map<uint64_t, bool> session_id_to_dump_server_init_flag_;
};
} // namespace ge



+ 0
- 2
ge/init/gelib.cc View File

@@ -60,8 +60,6 @@ static std::shared_ptr<GELib> instancePtr_ = nullptr;

// Initial each module of GE, if one failed, release all
Status GELib::Initialize(const map<string, string> &options) {


GELOGI("initial start");
GEEVENT("[GEPERFTRACE] GE Init Start");
// Multiple initializations are not allowed


+ 42
- 0
tests/ut/ge/graph/load/model_manager_unittest.cc View File

@@ -25,6 +25,7 @@
#include "common/op/ge_op_utils.h"
#include "graph/load/graph_loader.h"
#include "graph/load/model_manager/davinci_model.h"
#include "graph/ops_stub.h"

using namespace std;
using namespace testing;
@@ -56,6 +57,23 @@ class UtestModelManagerModelManager : public testing::Test {

void TearDown() {}

void CreateGraph(Graph &graph) {
TensorDesc desc(ge::Shape({1, 3, 224, 224}));
uint32_t size = desc.GetShape().GetShapeSize();
desc.SetSize(size);
auto data = op::Data("Data").set_attr_index(0);
data.update_input_desc_data(desc);
data.update_output_desc_out(desc);

auto flatten = op::Flatten("Flatten").set_input_x(data, data.name_out_out());

std::vector<Operator> inputs{data};
std::vector<Operator> outputs{flatten};
std::vector<Operator> targets{flatten};
// Graph graph("test_graph");
graph.SetInputs(inputs).SetOutputs(outputs).SetTargets(targets);
}

void GenUnencryptModelData(ModelData &data) {
const int model_len = 10;
data.model_len = sizeof(ModelFileHeader) + model_len;
@@ -420,4 +438,28 @@ TEST_F(UtestModelManagerModelManager, test_data_input_tensor) {
auto ret = mm.DataInputTensor(model_id,inputs);
EXPECT_EQ(PARAM_INVALID, ret); // HybridDavinciModel::impl_ is null.
}

TEST_F(UtestModelManagerModelManager, test_init_dump_properties_with_new_session_id) {
ModelManager model_manager;
uint64_t session_id = 1;
model_manager.InitDumPropertiesWithNewSessionId(session_id);
}

TEST_F(UtestModelManagerModelManager, test_update_session_id) {
ModelManager model_manager;
uint32_t model_id = 0;
uint64_t session_id = 0;
GeModelPtr ge_model = MakeShared<GeModel>();
std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, nullptr);
model_manager.UpdateSessionId(model_id, ge_model, davinci_model, session_id);
}

TEST_F(UtestModelManagerModelManager, test_has_var_node) {
ModelManager model_manager;
uint64_t session_id = 1;
Graph graph("test");
CreateGraph(graph);
auto compute_graph = ge::GraphUtils::GetComputeGraph(graph);
model_manager.HasVarNode(compute_graph);
}
} // namespace ge

Loading…
Cancel
Save