You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ge_api.cc 34 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "external/ge/ge_api.h"
  17. #include <iostream>
  18. #include <malloc.h>
  19. #include "framework/common/debug/log.h"
  20. #include "framework/common/debug/ge_log.h"
  21. #include "common/ge/datatype_util.h"
  22. #include "proto/ge_api.pb.h"
  23. #include "graph/model_serialize.h"
  24. #include "graph/detail/model_serialize_imp.h"
  25. #include "graph/utils/tensor_adapter.h"
  26. #include "init/gelib.h"
  27. #include "session/session_manager.h"
  28. #include "graph/opsproto_manager.h"
  29. #include "graph/utils/type_utils.h"
  30. #include "graph/manager/util/rt_context_util.h"
  31. #include "common/ge_call_wrapper.h"
  32. #include "register/op_registry.h"
  33. #include "common/ge/tbe_plugin_manager.h"
  34. #include "common/util/error_manager/error_manager.h"
  35. #include "toolchain/plog.h"
  36. #include "ir_build/option_utils.h"
  37. #include "framework/common/ge_types.h"
  38. #include "external/ge/ge_api_types.h"
  39. #include "graph/ge_context.h"
  40. #include "common/profiling/profiling_init.h"
  41. #include "common/profiling/profiling_properties.h"
  42. using domi::OpRegistry;
  43. using std::map;
  44. using std::string;
  45. using std::vector;
  46. namespace {
  47. const int32_t kMaxStrLen = 128;
  48. const int kDecimal = 10;
  49. const int kDefaultDeviceIdForTrain = 0;
  50. const int kDefaultDeviceIdForInfer = -1;
  51. void InitOptions(const map<string, string> &option_map, ge::Options &options) {
  52. GELOGD("InitOptions start");
  53. options.session_id = 0;
  54. auto is_train_mode = false;
  55. auto iter = option_map.find(ge::OPTION_GRAPH_RUN_MODE);
  56. if (iter != option_map.end()) {
  57. if (ge::GraphRunMode(std::strtol(iter->second.c_str(), nullptr, kDecimal)) >= ge::TRAIN) {
  58. is_train_mode = true;
  59. }
  60. }
  61. iter = option_map.find(ge::OPTION_EXEC_SESSION_ID);
  62. if (iter != option_map.end()) {
  63. options.session_id = std::strtoll(iter->second.c_str(), nullptr, kDecimal);
  64. }
  65. options.device_id = is_train_mode ? kDefaultDeviceIdForTrain : kDefaultDeviceIdForInfer;
  66. iter = option_map.find(ge::OPTION_EXEC_DEVICE_ID);
  67. if (iter != option_map.end()) {
  68. options.device_id = static_cast<int32_t>(std::strtol(iter->second.c_str(), nullptr, kDecimal));
  69. }
  70. iter = option_map.find(ge::OPTION_EXEC_JOB_ID);
  71. if (iter != option_map.end()) {
  72. options.job_id = iter->second.c_str();
  73. }
  74. options.isUseHcom = false;
  75. iter = option_map.find(ge::OPTION_EXEC_IS_USEHCOM);
  76. if (iter != option_map.end()) {
  77. std::istringstream(iter->second) >> options.isUseHcom;
  78. }
  79. options.isUseHvd = false;
  80. iter = option_map.find(ge::OPTION_EXEC_IS_USEHVD);
  81. if (iter != option_map.end()) {
  82. std::istringstream(iter->second) >> options.isUseHvd;
  83. }
  84. options.deployMode = false;
  85. iter = option_map.find(ge::OPTION_EXEC_DEPLOY_MODE);
  86. if (iter != option_map.end()) {
  87. std::istringstream(iter->second) >> options.deployMode;
  88. }
  89. iter = option_map.find(ge::OPTION_EXEC_POD_NAME);
  90. if (iter != option_map.end()) {
  91. options.podName = iter->second.c_str();
  92. }
  93. iter = option_map.find(ge::OPTION_EXEC_PROFILING_MODE);
  94. if (iter != option_map.end()) {
  95. options.profiling_mode = iter->second.c_str();
  96. }
  97. iter = option_map.find(ge::OPTION_EXEC_PROFILING_OPTIONS);
  98. if (iter != option_map.end()) {
  99. options.profiling_options = iter->second.c_str();
  100. }
  101. iter = option_map.find(ge::OPTION_EXEC_RANK_ID);
  102. if (iter != option_map.end()) {
  103. options.rankId = std::strtoll(iter->second.c_str(), nullptr, kDecimal);
  104. }
  105. iter = option_map.find(ge::OPTION_EXEC_RANK_TABLE_FILE);
  106. if (iter != option_map.end()) {
  107. options.rankTableFile = iter->second.c_str();
  108. }
  109. options.enable_atomic = true;
  110. iter = option_map.find(ge::OPTION_EXEC_ATOMIC_FLAG);
  111. GE_IF_BOOL_EXEC(iter != option_map.end(),
  112. options.enable_atomic = std::strtol(iter->second.c_str(), nullptr, kDecimal));
  113. GELOGD("ge InnerInitialize, the enable_atomic_flag in options_ is %d", options.enable_atomic);
  114. }
  115. void InitProfiling(ge::Options &options) {
  116. GELOGD("InitProfiling start");
  117. ge::GetContext().Init();
  118. // Profiling init
  119. if (ge::ProfilingInit::Instance().Init(options) != ge::SUCCESS) {
  120. GELOGW("Profiling init failed.");
  121. }
  122. }
  123. void ShutDownProfiling() {
  124. GELOGD("Profiling shut down");
  125. if (ge::ProfilingProperties::Instance().ProfilingOn()) {
  126. ge::ProfilingInit::Instance().ShutDownProfiling();
  127. }
  128. }
  129. } // namespace
  130. static bool g_ge_initialized = false;
  131. static std::mutex g_ge_release_mutex; // GEFinalize and ~Session use
  132. static std::shared_ptr<ge::SessionManager> g_session_manager;
  133. namespace ge {
  134. void GetOpsProtoPath(std::string &opsproto_path) {
  135. GELOGI("Enter get ops proto path schedule");
  136. const char *path_env = std::getenv("ASCEND_OPP_PATH");
  137. if (path_env != nullptr) {
  138. std::string path = path_env;
  139. opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/");
  140. GELOGI("Get opsproto so path from env: %s", path.c_str());
  141. return;
  142. }
  143. std::string path_base = PluginManager::GetPath();
  144. GELOGI("path_base is %s", path_base.c_str());
  145. path_base = path_base.substr(0, path_base.rfind('/'));
  146. path_base = path_base.substr(0, path_base.rfind('/') + 1);
  147. opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
  148. }
  149. Status CheckOptionsValid(const std::map<string, string> &options) {
  150. // check job_id is valid
  151. auto job_id_iter = options.find(OPTION_EXEC_JOB_ID);
  152. if (job_id_iter != options.end()) {
  153. if (job_id_iter->second.length() > kMaxStrLen) {
  154. GELOGE(PARAM_INVALID, "[Check][JobId]Failed, the job_id [%s] string length: %zu > max string length: %d",
  155. job_id_iter->second.c_str(), job_id_iter->second.length(), kMaxStrLen);
  156. REPORT_INPUT_ERROR("E10051", std::vector<std::string>({"id", "length"}),
  157. std::vector<std::string>({job_id_iter->second,
  158. std::to_string(kMaxStrLen)}));
  159. return FAILED;
  160. }
  161. }
  162. // check modify_mixlist is valid
  163. if (ge::CheckModifyMixlistParamValid(options) != ge::SUCCESS) {
  164. return FAILED;
  165. }
  166. return SUCCESS;
  167. }
  168. // Initialize GE, prepare for execution, call GELib::Initialize
  169. Status GEInitializeImpl(const std::map<string, string> &options) {
  170. ErrorManager::GetInstance().GenWorkStreamIdDefault();
  171. GELOGT(TRACE_INIT, "GEInitialize start");
  172. std::string path_base = ge::GELib::GetPath();
  173. auto ret = ErrorManager::GetInstance().Init(path_base);
  174. if (ret != SUCCESS) {
  175. GELOGE(GE_CLI_INIT_FAILED, "[Init][PathBase]Init failed when pass param path_base:%s", path_base.c_str());
  176. REPORT_CALL_ERROR("E19999", "Init failed when pass param path_base:%s", path_base.c_str());
  177. return ret;
  178. }
  179. // 0.check init status
  180. if (g_ge_initialized) {
  181. GELOGW("GEInitialize is called more than once");
  182. return SUCCESS;
  183. }
  184. ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOpsProtoInit);
  185. // Load OpsProto lib plugin
  186. std::string opsproto_path;
  187. GetOpsProtoPath(opsproto_path);
  188. OpsProtoManager *manager = OpsProtoManager::Instance();
  189. std::map<string, string> option_tmp;
  190. option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path));
  191. GE_TIMESTAMP_START(GEInitialize);
  192. bool is_proto_init = manager->Initialize(option_tmp);
  193. GE_TIMESTAMP_END(GEInitialize, "GEInitialize::ManagerInitialize");
  194. if (!is_proto_init) {
  195. GELOGE(GE_CLI_INIT_FAILED, "[Init][OpsProtoPath]Loading OpsProto lib plugin failed, OpsProtoPath:%s invalid.",
  196. opsproto_path.c_str());
  197. REPORT_CALL_ERROR("E19999", "Loading OpsProto lib plugin failed, OpsProtoPath:%s invalid", opsproto_path.c_str());
  198. return FAILED;
  199. }
  200. ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOther);
  201. // check options is valid
  202. GE_TIMESTAMP_START(CheckOptionsValid);
  203. if (CheckOptionsValid(options) != SUCCESS) {
  204. return FAILED;
  205. }
  206. ge::Options str_options;
  207. InitOptions(options, str_options);
  208. InitProfiling(str_options);
  209. GE_TIMESTAMP_END(CheckOptionsValid, "GEInitialize::CheckOptionsValid");
  210. ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOpsProtoInit);
  211. GE_TIMESTAMP_START(InitPreparation);
  212. TBEPluginManager::Instance().InitPreparation(options);
  213. GE_TIMESTAMP_END(InitPreparation, "GEInitialize::InitPreparation");
  214. // call Initialize
  215. GELOGT(TRACE_RUNNING, "Initializing environment");
  216. ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOther);
  217. GE_TIMESTAMP_START(GELibInitialize);
  218. ret = ge::GELib::Initialize(options);
  219. GE_TIMESTAMP_END(GELibInitialize, "GEInitialize::GELibInitialize");
  220. if (ret != SUCCESS) {
  221. GELOGE(GE_CLI_INIT_FAILED, "[Init][GELib]Failed, error code = %u", ret);
  222. return FAILED;
  223. }
  224. ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOther);
  225. GELOGI("sessionManager initial.");
  226. GE_TIMESTAMP_START(SessionManagerInitialize);
  227. g_session_manager = MakeShared<ge::SessionManager>();
  228. if (g_session_manager == nullptr) {
  229. GELOGE(GE_CLI_INIT_FAILED, "[Init][Create]SessionManager failed");
  230. return FAILED;
  231. }
  232. ret = g_session_manager->Initialize(options);
  233. GE_TIMESTAMP_END(SessionManagerInitialize, "InnerInitialize::SessionManagerInitialize");
  234. if (ret != SUCCESS) {
  235. GELOGE(ret, "[Init][SessionManager] GE session manager initial failed.");
  236. REPORT_CALL_ERROR("E19999", "SessionManager initialize failed.");
  237. return ret;
  238. }
  239. // 7.check return status, return
  240. if (!g_ge_initialized) {
  241. // Initialize success, first time calling initialize
  242. g_ge_initialized = true;
  243. }
  244. GELOGT(TRACE_STOP, "GEInitialize finished");
  245. return ret;
  246. }
  247. // Initialize GE, prepare for execution, call GELib::Initialize
  248. Status GEInitialize(const std::map<string, string> &options) {
  249. ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOther);
  250. if (DlogReportInitialize() != SUCCESS) {
  251. GELOGW("Dlog report device log initialize failed.");
  252. }
  253. return GEInitializeImpl(options);
  254. }
  255. Status GEInitialize(const std::map<AscendString, AscendString> &options) {
  256. ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOther);
  257. std::map<std::string, std::string> str_options;
  258. for (auto &option : options) {
  259. if (option.first.GetString() == nullptr || option.second.GetString() == nullptr) {
  260. GELOGE(FAILED, "[Check][Param]Options invalid, first or second option is nullptr.");
  261. REPORT_INNER_ERROR("E19999", "Check parameter's options invalid, the first or second option is nullptr.");
  262. return FAILED;
  263. }
  264. std::string key = option.first.GetString();
  265. std::string val = option.second.GetString();
  266. str_options[key] = val;
  267. }
  268. if (DlogReportInitialize() != SUCCESS) {
  269. GELOGW("Dlog report device log initialize failed.");
  270. }
  271. return GEInitializeImpl(str_options);
  272. }
  273. // GE finalize, releasing all resources
  274. Status GEFinalize() {
  275. std::lock_guard<std::mutex> lock(g_ge_release_mutex);
  276. // check init status
  277. if (!g_ge_initialized) {
  278. GELOGW("[FINAL][FINAL]GEFinalize is called before GEInitialize");
  279. return SUCCESS;
  280. }
  281. ShutDownProfiling();
  282. ErrorManager::GetInstance().SetStage(error_message::kFinalize, error_message::kFinalize);
  283. ErrorManager::GetInstance().GenWorkStreamIdDefault();
  284. GELOGT(TRACE_INIT, "GEFinalize start");
  285. // call Finalize
  286. Status ret = SUCCESS;
  287. Status middle_ret;
  288. GELOGT(TRACE_RUNNING, "Finalizing environment");
  289. std::shared_ptr<GELib> instancePtr = ge::GELib::GetInstance();
  290. if (instancePtr == nullptr || !instancePtr->InitFlag()) {
  291. GELOGW("GEFinalize Failed: GE not initialized.");
  292. ret = GE_CLI_GE_NOT_INITIALIZED;
  293. }
  294. if (ret != GE_CLI_GE_NOT_INITIALIZED) {
  295. middle_ret = instancePtr->Finalize();
  296. GELOGI("GEFinalize finalize gelib ret=%u", middle_ret);
  297. if (middle_ret != SUCCESS) {
  298. ret = middle_ret;
  299. }
  300. }
  301. GELOGI("SessionManager finalization.");
  302. if (g_session_manager != nullptr) {
  303. (void)g_session_manager->Finalize(); // always success.
  304. }
  305. middle_ret = TBEPluginManager::Instance().Finalize();
  306. if (middle_ret != SUCCESS) {
  307. ret = middle_ret;
  308. }
  309. if (g_ge_initialized && ret == SUCCESS) {
  310. // Unified destruct rt_context
  311. RtContextUtil::GetInstance().DestroyAllRtContexts();
  312. g_ge_initialized = false;
  313. }
  314. // to avoid memory fragment, use malloc_trim to back free stack to system
  315. malloc_trim(0);
  316. if (DlogReportFinalize() != SUCCESS) {
  317. GELOGW("Dlog report device log finalize failed.");
  318. }
  319. GELOGT(TRACE_STOP, "GEFinalize finished");
  320. return ret;
  321. }
  322. std::string GEGetErrorMsg() {
  323. return ErrorManager::GetInstance().GetErrorMessage();
  324. }
  325. std::string GEGetWarningMsg() {
  326. return ErrorManager::GetInstance().GetWarningMessage();
  327. }
  328. // Initialize session,which calls innerSession
  329. Session::Session(const std::map<string, string> &options) {
  330. ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOther);
  331. GELOGT(TRACE_INIT, "Start to construct session.");
  332. ErrorManager::GetInstance().GenWorkStreamIdDefault();
  333. // check init status
  334. sessionId_ = 0;
  335. if (!g_ge_initialized) {
  336. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before.");
  337. REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before.");
  338. return;
  339. }
  340. GELOGT(TRACE_RUNNING, "Creating session");
  341. uint64_t session_id = 0;
  342. Status ret = g_session_manager->CreateSession(options, session_id);
  343. GELOGT(TRACE_RUNNING, "Session id is %lu", session_id);
  344. // check return status, return, update session id if success
  345. if (ret == SUCCESS) {
  346. sessionId_ = session_id;
  347. } else {
  348. GELOGE(ret, "[Construct][Session]Failed, error code:%u.", ret);
  349. return;
  350. }
  351. GELOGT(TRACE_STOP, "Session Constructor finished");
  352. }
  353. Session::Session(const std::map<AscendString, AscendString> &options) {
  354. ErrorManager::GetInstance().SetStage(error_message::kInitialize, error_message::kOther);
  355. GELOGT(TRACE_INIT, "Session Constructor start");
  356. ErrorManager::GetInstance().GenWorkStreamIdDefault();
  357. // check init status
  358. sessionId_ = 0;
  359. if (!g_ge_initialized) {
  360. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before.");
  361. REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before.");
  362. return;
  363. }
  364. // call Initialize
  365. GELOGT(TRACE_RUNNING, "Creating session");
  366. std::map<std::string, std::string> str_options;
  367. for (auto &option : options) {
  368. if (option.first.GetString() == nullptr || option.second.GetString() == nullptr) {
  369. GELOGE(FAILED, "[Construct][Session]Failed, the first or second option is nullptr.");
  370. REPORT_INNER_ERROR("E19999", "Creating session's options invalid, the first or second option is nullptr.");
  371. return;
  372. }
  373. std::string key = option.first.GetString();
  374. std::string val = option.second.GetString();
  375. str_options[key] = val;
  376. }
  377. uint64_t session_id = 0;
  378. Status ret = g_session_manager->CreateSession(str_options, session_id);
  379. GELOGT(TRACE_RUNNING, "Session id is %lu", session_id);
  380. // check return status, return, update session id if success
  381. if (ret == SUCCESS) {
  382. sessionId_ = session_id;
  383. } else {
  384. GELOGE(ret, "[Construct][Session]Failed, error code:%u.", ret);
  385. REPORT_CALL_ERROR("E19999", "Construct session failed, error code:%u.", ret);
  386. return;
  387. }
  388. GELOGT(TRACE_STOP, "Session Constructor finished");
  389. }
  390. // session destructor
  391. Session::~Session() {
  392. ErrorManager::GetInstance().SetStage(error_message::kFinalize, error_message::kFinalize);
  393. GELOGT(TRACE_INIT, "Start to destruct session.");
  394. // 0.check init status
  395. if (!g_ge_initialized) {
  396. GELOGW("GE is not yet initialized or is finalized.");
  397. return;
  398. }
  399. Status ret = FAILED;
  400. std::lock_guard<std::mutex> lock(g_ge_release_mutex);
  401. try {
  402. uint64_t session_id = sessionId_;
  403. // call DestroySession
  404. GELOGT(TRACE_RUNNING, "Session id is %lu", session_id);
  405. GELOGT(TRACE_RUNNING, "Destroying session");
  406. ret = g_session_manager->DestroySession(session_id);
  407. } catch (google::protobuf::FatalException &e) {
  408. GELOGE(GE_CLI_SESS_DESTROY_FAILED, "[Destruct][Session]Failed because get fatalException.");
  409. REPORT_CALL_ERROR("E19999", "Destruct session failed, get fatal exception");
  410. }
  411. // check return status, return, update session id if success
  412. if (ret != SUCCESS) {
  413. GELOGE(ret, "[Destruct][Session]Failed, error code:%u.", ret);
  414. REPORT_CALL_ERROR("E19999", "Destruct session failed, error code:%u.", ret);
  415. }
  416. GELOGT(TRACE_STOP, "Session Destructor finished");
  417. }
  418. // Add Graph
  419. Status Session::AddGraph(uint32_t graph_id, const Graph &graph) {
  420. std::map<std::string, std::string> options;
  421. return AddGraph(graph_id, graph, options);
  422. }
  423. // Add Graph
  424. Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options) {
  425. ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther);
  426. GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_);
  427. ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  428. if (!g_ge_initialized) {
  429. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before.");
  430. REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before.");
  431. return FAILED;
  432. }
  433. GELOGD("Adding graph to session");
  434. Status ret = g_session_manager->AddGraph(sessionId_, graph_id, graph, options);
  435. if (ret != SUCCESS) {
  436. GELOGE(ret, "[Add][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", ret, sessionId_, graph_id);
  437. return FAILED;
  438. }
  439. GELOGD("AddGraph finished in Session.");
  440. return ret;
  441. }
  442. //Add Graph
  443. Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<AscendString, AscendString> &options) {
  444. ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther);
  445. GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_);
  446. ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  447. if (!g_ge_initialized) {
  448. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before.");
  449. REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before.");
  450. return FAILED;
  451. }
  452. GELOGD("Adding graph to session");
  453. std::map<std::string, std::string> str_options;
  454. for (auto &option : options) {
  455. if (option.first.GetString() == nullptr || option.second.GetString() == nullptr) {
  456. GELOGE(FAILED, "[Add][Graph]Failed, the first or second option is nullptr.");
  457. REPORT_INNER_ERROR("E19999", "Add Graph Failed, the first or second option is nullptr.");
  458. return FAILED;
  459. }
  460. std::string key = option.first.GetString();
  461. std::string val = option.second.GetString();
  462. str_options[key] = val;
  463. }
  464. Status ret = g_session_manager->AddGraph(sessionId_, graph_id, graph, str_options);
  465. if (ret != SUCCESS) {
  466. GELOGE(ret, "[Add][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", ret, sessionId_, graph_id);
  467. return FAILED;
  468. }
  469. GELOGD("AddGraph finished in Session.");
  470. return ret;
  471. }
  472. Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph) {
  473. std::map<AscendString, AscendString> options;
  474. return AddGraphWithCopy(graph_id, graph, options);
  475. }
  476. // Add Graph With Copy
  477. Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph,
  478. const std::map<AscendString, AscendString> &options) {
  479. ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther);
  480. GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_);
  481. ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  482. if (!g_ge_initialized) {
  483. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before.");
  484. REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before.");
  485. return FAILED;
  486. }
  487. std::map<std::string, std::string> str_options;
  488. for (auto it = options.begin(); it != options.end(); ++it) {
  489. str_options.insert({it->first.GetString(), it->second.GetString()});
  490. }
  491. GELOGD("Adding graph to session");
  492. Status ret = g_session_manager->AddGraphWithCopy(sessionId_, graph_id, graph, str_options);
  493. if (ret != SUCCESS) {
  494. GELOGE(ret, "[Add][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", ret, sessionId_, graph_id);
  495. return FAILED;
  496. }
  497. GELOGD("AddGraph finished in Session.");
  498. return ret;
  499. }
  500. // Remove Graph
  501. Status Session::RemoveGraph(uint32_t graph_id) {
  502. ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther);
  503. GELOGT(TRACE_INIT, "Session RemoveGraph start");
  504. ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  505. // call RemoveGraph
  506. if (!g_ge_initialized) {
  507. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before.");
  508. REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before.");
  509. return FAILED;
  510. }
  511. GELOGT(TRACE_RUNNING, "Removing Graph from session");
  512. Status ret = g_session_manager->RemoveGraph(sessionId_, graph_id);
  513. // check return status, return
  514. if (ret != SUCCESS) {
  515. GELOGE(ret, "[Remove][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", ret, sessionId_, graph_id);
  516. REPORT_CALL_ERROR("E19999", "Remove graph failed, error code:%u, session_id:%lu, graph_id:%u",
  517. ret, sessionId_, graph_id);
  518. return FAILED;
  519. }
  520. GELOGT(TRACE_STOP, "Session RemoveGraph finished");
  521. return ret;
  522. }
  523. // Print Output Result
  524. void PrintOutputResult(std::vector<Tensor> &outputs) {
  525. if (outputs.empty() || outputs[0].GetData() == nullptr) {
  526. GELOGW("outputs is empty or data is nullptr.");
  527. return;
  528. }
  529. size_t out_buf_size = outputs[0].GetSize();
  530. TensorDesc desc(outputs[0].GetTensorDesc());
  531. DataType data_type = desc.GetDataType();
  532. auto iter = CONST_OPDATA_TYPE_SIZE_MAP.find(data_type);
  533. if (iter == CONST_OPDATA_TYPE_SIZE_MAP.end()) {
  534. GELOGI("DataType %s has not defined size", TypeUtils::DataTypeToSerialString(data_type).c_str());
  535. return;
  536. }
  537. size_t length = CONST_OPDATA_TYPE_SIZE_MAP[data_type];
  538. for (size_t i = 0; i < 10 && i < (out_buf_size / length); ++i) { // take first 10 at most
  539. switch (data_type) {
  540. case DT_BOOL:
  541. case DT_INT8:
  542. case DT_UINT8:
  543. GELOGI("output data[%zu]=%d", i, *(reinterpret_cast<int8_t *>(outputs[0].GetData()) + i));
  544. break;
  545. case DT_INT16:
  546. case DT_UINT16:
  547. GELOGI("output data[%zu]=%d", i, *(reinterpret_cast<int16_t *>(outputs[0].GetData()) + i));
  548. break;
  549. case DT_INT32:
  550. case DT_UINT32:
  551. GELOGI("output data[%zu]=%d", i, *(reinterpret_cast<int32_t *>(outputs[0].GetData()) + i));
  552. break;
  553. case DT_INT64:
  554. case DT_UINT64:
  555. GELOGI("output data[%zu]=%ld", i, *(reinterpret_cast<int64_t *>(outputs[0].GetData()) + i));
  556. break;
  557. case DT_FLOAT:
  558. GELOGI("output data[%zu]=%f", i, *(reinterpret_cast<float *>(outputs[0].GetData()) + i));
  559. break;
  560. case DT_DOUBLE:
  561. GELOGI("output data[%zu]=%lf", i, *(reinterpret_cast<double *>(outputs[0].GetData()) + i));
  562. break;
  563. default:
  564. GELOGI("Output datatype %s is not supported.", TypeUtils::DataTypeToSerialString(data_type).c_str());
  565. return;
  566. }
  567. }
  568. }
  569. // Run Graph
  570. Status Session::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs) {
  571. ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther);
  572. GELOGT(TRACE_INIT, "Session RunGraph start");
  573. ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  574. if (!g_ge_initialized) {
  575. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before.");
  576. REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before.");
  577. return FAILED;
  578. }
  579. // call RunGraph
  580. GELOGT(TRACE_RUNNING, "Running Graph");
  581. Status ret = g_session_manager->RunGraph(sessionId_, graph_id, inputs, outputs);
  582. // check return status
  583. if (ret != SUCCESS) {
  584. GELOGE(ret, "[Run][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", ret, sessionId_, graph_id);
  585. REPORT_CALL_ERROR("E19999", "Remove graph failed, error code:%u, session_id:%lu, graph_id:%u",
  586. ret, sessionId_, graph_id);
  587. return FAILED;
  588. }
  589. // print output
  590. if (outputs.size() > 0) {
  591. PrintOutputResult(outputs);
  592. }
  593. // return
  594. GELOGT(TRACE_STOP, "Session RunGraph finished");
  595. return ret;
  596. }
  597. // Run Graph with stream Asynchronously
  598. Status Session::RunGraphWithStreamAsync(uint32_t graph_id, void *stream, const std::vector<Tensor> &inputs,
  599. std::vector<Tensor> &outputs) {
  600. ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther);
  601. GELOGT(TRACE_INIT, "Start to run graph with stream async.");
  602. ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  603. if (!g_ge_initialized) {
  604. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before.");
  605. REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before.");
  606. return FAILED;
  607. }
  608. GELOGT(TRACE_RUNNING, "Run Graph Run graph with stream asyn.");
  609. Status ret = g_session_manager->RunGraphWithStreamAsync(sessionId_, graph_id, stream, inputs, outputs);
  610. if (ret != SUCCESS) {
  611. GELOGE(ret, "[Run][Graph]Run graph with stream asyn Failed,"
  612. "error code = %u, session id = %lu, graph id = %u, stream = %p.", ret, sessionId_, graph_id, stream);
  613. REPORT_CALL_ERROR("E19999", "[Run][Graph]Run graph with stream asyn failed, error code = %u, session id = %lu,"
  614. "graph id = %u, stream = %p.", ret, sessionId_, graph_id, stream);
  615. return FAILED;
  616. }
  617. GELOGT(TRACE_STOP, "Session run graph with stream async finished");
  618. return SUCCESS;
  619. }
  620. // Register Call Back
  621. Status Session::RegisterCallBackFunc(const std::string &key, const pCallBackFunc &callback) {
  622. ErrorManager::GetInstance().GenWorkStreamIdDefault();
  623. if (!g_ge_initialized) {
  624. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before.");
  625. REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before.");
  626. return FAILED;
  627. }
  628. return g_session_manager->RegisterCallBackFunc(sessionId_, key, callback);
  629. }
  630. Status Session::RegisterCallBackFunc(const char *key, const session::pCallBackFunc &callback) {
  631. ErrorManager::GetInstance().GenWorkStreamIdDefault();
  632. if (!g_ge_initialized) {
  633. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before.");
  634. REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before.");
  635. return FAILED;
  636. }
  637. std::string str_key;
  638. if (key != nullptr) {
  639. str_key = key;
  640. }
  641. return g_session_manager->RegisterCallBackFunc(sessionId_, str_key, callback);
  642. }
  643. // Build Graph
  644. Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) {
  645. ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther);
  646. ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  647. if (!g_ge_initialized) {
  648. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before.");
  649. REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before.");
  650. return FAILED;
  651. }
  652. GELOGT(TRACE_RUNNING, "Building Graph");
  653. Status ret = g_session_manager->BuildGraph(sessionId_, graph_id, inputs);
  654. if (ret != SUCCESS) {
  655. GELOGE(ret, "[Build][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", ret, sessionId_, graph_id);
  656. REPORT_CALL_ERROR("E19999", "Build graph failed , error code:%u, session_id:%lu, graph_id:%u",
  657. ret, sessionId_, graph_id);
  658. return FAILED;
  659. }
  660. return SUCCESS;
  661. }
  662. // Build Graph
  663. Status Session::BuildGraph(uint32_t graph_id, const std::vector<ge::Tensor> &inputs) {
  664. ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kOther);
  665. ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  666. if (!g_ge_initialized) {
  667. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before.");
  668. REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before.");
  669. return FAILED;
  670. }
  671. GELOGT(TRACE_RUNNING, "Building Graph");
  672. Status ret = g_session_manager->BuildGraph(sessionId_, graph_id, inputs);
  673. if (ret != SUCCESS) {
  674. GELOGE(ret, "[Build][Graph]Failed, error code:%u, session_id:%lu, graph_id:%u.", ret, sessionId_, graph_id);
  675. REPORT_CALL_ERROR("E19999", "Build graph failed , error code:%u, session_id:%lu, graph_id:%u",
  676. ret, sessionId_, graph_id);
  677. return FAILED;
  678. }
  679. return SUCCESS;
  680. }
  681. // Run Graph Asynchronously
  682. Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<ge::Tensor> &inputs,
  683. RunAsyncCallback callback) {
  684. ErrorManager::GetInstance().SetStage(error_message::kModelExecute, error_message::kModelExecute);
  685. ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
  686. if (!g_ge_initialized) {
  687. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before.");
  688. REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before.");
  689. return FAILED;
  690. }
  691. GELOGT(TRACE_RUNNING, "Run Graph Asynchronously");
  692. GELOGW(
  693. "The callback function will not be checked. Please ensure that the implementation of the function is trusted.");
  694. Status ret = g_session_manager->RunGraphAsync(sessionId_, graph_id, inputs, callback);
  695. if (ret != SUCCESS) {
  696. GELOGE(ret, "[Run][Graph]RunGraphAsync Failed, error code:%u, session_id:%lu, graph_id:%u.",
  697. ret, sessionId_, graph_id);
  698. REPORT_CALL_ERROR("E19999", "RunGraphAsync Failed, error code:%u, session_id:%lu, graph_id:%u",
  699. ret, sessionId_, graph_id);
  700. return FAILED;
  701. }
  702. return SUCCESS;
  703. }
  704. // Get Variables
  705. Status Session::GetVariables(const std::vector<std::string> &var_names, std::vector<Tensor> &var_values) {
  706. ErrorManager::GetInstance().SetStage(error_message::kModelExecute, error_message::kModelExecute);
  707. ErrorManager::GetInstance().GenWorkStreamIdDefault();
  708. if (!g_ge_initialized) {
  709. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before.");
  710. REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before.");
  711. return FAILED;
  712. }
  713. GELOGT(TRACE_RUNNING, "Get Variables");
  714. Status ret = g_session_manager->GetVariables(sessionId_, var_names, var_values);
  715. if (ret != SUCCESS) {
  716. GELOGE(ret, "[Get][Variables]Failed, error code:%u, session_id:%lu.", ret, sessionId_);
  717. return FAILED;
  718. }
  719. return SUCCESS;
  720. }
  721. // Get Variables
  722. Status Session::GetVariables(const std::vector<AscendString> &var_names, std::vector<Tensor> &var_values) {
  723. ErrorManager::GetInstance().SetStage(error_message::kModelExecute, error_message::kModelExecute);
  724. ErrorManager::GetInstance().GenWorkStreamIdDefault();
  725. if (!g_ge_initialized) {
  726. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before.");
  727. REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before.");
  728. return FAILED;
  729. }
  730. GELOGT(TRACE_RUNNING, "Get Variables");
  731. std::vector<ge::string> str_var_names;
  732. for (auto &var_name : var_names) {
  733. if (var_name.GetString() == nullptr) {
  734. GELOGE(FAILED, "[Get][Variable]Failed, variables' names are nullptr.");
  735. REPORT_INNER_ERROR("E19999", "GetVariables failed, variables' names are nullptr.");
  736. return FAILED;
  737. }
  738. str_var_names.emplace_back(var_name.GetString());
  739. }
  740. Status ret = g_session_manager->GetVariables(sessionId_, str_var_names, var_values);
  741. if (ret != SUCCESS) {
  742. GELOGE(ret, "[Get][Variables]Failed, error code:%u, session_id:%lu.", ret, sessionId_);
  743. REPORT_CALL_ERROR("E19999", "Get variables failed, error code:%u, session_id:%lu.", ret, sessionId_);
  744. return FAILED;
  745. }
  746. return SUCCESS;
  747. }
  748. bool Session::IsGraphNeedRebuild(uint32_t graph_id) {
  749. if (!g_ge_initialized) {
  750. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Construct][Session]Failed because lack GEInitialize call before.");
  751. REPORT_INNER_ERROR("E19999", "Creating session failed because lack GEInitialize call before.");
  752. return false;
  753. }
  754. return g_session_manager->IsGraphNeedRebuild(sessionId_, graph_id);
  755. }
  756. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示