You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ge_api.cc 14 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. KK
  17. #include "ge/ge_api.h"
  18. #include <iostream>
  19. #include "common/debug/log.h"
  20. #include "framework/common/debug/ge_log.h"
  21. #include "common/ge/datatype_util.h"
  22. #include "proto/ge_api.pb.h"
  23. #include "graph/model_serialize.h"
  24. #include "graph/detail/model_serialize_imp.h"
  25. #include "graph/utils/tensor_adapter.h"
  26. #include "init/gelib.h"
  27. #include "session/session_manager.h"
  28. #include "graph/opsproto_manager.h"
  29. #include "graph/utils/type_utils.h"
  30. #include "register/op_registry.h"
  31. using domi::GetContext;
  32. using domi::OpRegistry;
  33. using std::map;
  34. using std::string;
  35. using std::vector;
  36. namespace ge {
  37. static const int32_t kMaxStrLen = 128;
  38. static bool kGeInitialized = false;
  39. static std::mutex kGeReleaseMutex; // GEFinalize and ~Session use
  40. void GetOpsProtoPath(std::string &opsproto_path) {
  41. GELOGI("Enter get ops proto path schedule");
  42. const char *path_env = std::getenv("ASCEND_OPP_PATH");
  43. if (path_env != nullptr) {
  44. std::string path = path_env;
  45. opsproto_path = (path + "/op_proto/built-in/" + ":") + (path + "/op_proto/custom/");
  46. GELOGI("Get opsproto so path from env: %s", path.c_str());
  47. return;
  48. }
  49. std::string path_base = PluginManager::GetPath();
  50. GELOGI("path_base is %s", path_base.c_str());
  51. path_base = path_base.substr(0, path_base.rfind('/'));
  52. path_base = path_base.substr(0, path_base.rfind('/') + 1);
  53. opsproto_path = (path_base + "ops/op_proto/built-in/" + ":") + (path_base + "ops/op_proto/custom/");
  54. }
  55. Status CheckDumpAndReuseMemory(const std::map<string, string> &options) {
  56. const int kDecimal = 10;
  57. auto dump_op_env = std::getenv("DUMP_OP");
  58. int dump_op_flag = (dump_op_env != nullptr) ? std::strtol(dump_op_env, nullptr, kDecimal) : 0;
  59. auto disable_reuse_memory_iter = options.find("ge.exec.disableReuseMemory");
  60. if (disable_reuse_memory_iter != options.end()) {
  61. if (disable_reuse_memory_iter->second == "0") {
  62. GELOGD("ge.exec.disableReuseMemory=0, reuse memory is open");
  63. if (dump_op_flag) {
  64. GELOGW("Will dump incorrect op data with GE Option ge.exec.disableReuseMemory=0");
  65. }
  66. } else if (disable_reuse_memory_iter->second == "1") {
  67. GELOGD("ge.exec.disableReuseMemory=1, reuse memory is close");
  68. } else {
  69. GELOGE(PARAM_INVALID, "CheckDumpAndReuseMemory ge.exec.disableReuseMemory is valid");
  70. return FAILED;
  71. }
  72. } else {
  73. if (dump_op_flag) {
  74. GELOGW("Will dump incorrect op data with default reuse memory");
  75. }
  76. }
  77. return SUCCESS;
  78. }
  79. Status CheckOptionsValid(const std::map<string, string> &options) {
  80. // check job_id is valid
  81. auto job_id_iter = options.find(OPTION_EXEC_JOB_ID);
  82. if (job_id_iter != options.end()) {
  83. if (job_id_iter->second.length() > kMaxStrLen) {
  84. GELOGE(PARAM_INVALID, "CheckOptionsValid job_id failed, string len > %d", kMaxStrLen);
  85. return FAILED;
  86. }
  87. }
  88. // Check ge.exec.disableReuseMemory and env DUMP_OP
  89. if (CheckDumpAndReuseMemory(options) != SUCCESS) {
  90. return FAILED;
  91. }
  92. return SUCCESS;
  93. }
  94. void SaveDdkVersion(const std::map<string, string> &options) {
  95. auto ddk_option = options.find(DDK_VERSION_FLAG);
  96. if (ddk_option != options.end()) {
  97. auto ddk_version = ddk_option->second;
  98. if (!ddk_version.empty()) {
  99. GELOGI("Input ddk version : %s.", ddk_version.c_str());
  100. domi::GetContext().ddk_version = ddk_version;
  101. }
  102. } else {
  103. GELOGW("No ddkVersion!");
  104. return;
  105. }
  106. }
  107. // Initialize GE, prepare for execution, call GELib::Initialize
  108. Status GEInitialize(const std::map<string, string> &options) {
  109. GELOGT(TRACE_INIT, "GEInitialize start");
  110. // 0.check init status
  111. if (kGeInitialized) {
  112. GELOGW("GEInitialize is called more than once");
  113. return SUCCESS;
  114. }
  115. // Load OpsProto lib plugin
  116. std::string opsproto_path;
  117. GetOpsProtoPath(opsproto_path);
  118. OpsProtoManager *manager = OpsProtoManager::Instance();
  119. std::map<string, string> option_tmp;
  120. option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path));
  121. bool is_proto_init = manager->Initialize(option_tmp);
  122. if (!is_proto_init) {
  123. GELOGE(GE_CLI_INIT_FAILED, "geInitialize failed, ops proto path is invalid.");
  124. return FAILED;
  125. }
  126. // check options is valid
  127. if (CheckOptionsValid(options) != SUCCESS) {
  128. return FAILED;
  129. }
  130. SaveDdkVersion(options);
  131. // call Initialize
  132. GELOGT(TRACE_RUNNING, "Initializing environment");
  133. Status ret = ge::GELib::Initialize(options);
  134. if (ret != SUCCESS) {
  135. GELOGE(GE_CLI_INIT_FAILED, "geInitialize failed, error code = %u", ret);
  136. return FAILED;
  137. }
  138. // 7.check return status, return
  139. if (!kGeInitialized) {
  140. // Initialize success, first time calling initialize
  141. kGeInitialized = true;
  142. }
  143. GELOGT(TRACE_STOP, "GEInitialize finished");
  144. return ret;
  145. }
  146. // GE finalize, releasing all resources
  147. Status GEFinalize() {
  148. GELOGT(TRACE_INIT, "GEFinalize start");
  149. // check init status
  150. if (!kGeInitialized) {
  151. GELOGW("GEFinalize is called before GEInitialize");
  152. return SUCCESS;
  153. }
  154. std::lock_guard<std::mutex> lock(kGeReleaseMutex);
  155. // call Finalize
  156. GELOGT(TRACE_RUNNING, "Finalizing environment");
  157. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  158. if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
  159. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GEFinalize Failed: GE not initialized");
  160. return GE_CLI_GE_NOT_INITIALIZED;
  161. }
  162. Status ret = instance_ptr->Finalize();
  163. GELOGI("GEFinalize finalize gelib ret=%u", ret);
  164. if (ret != SUCCESS) {
  165. GELOGE(ret, "GEFinalize Failed");
  166. return FAILED;
  167. }
  168. if (kGeInitialized && ret == SUCCESS) {
  169. kGeInitialized = false;
  170. }
  171. GELOGT(TRACE_STOP, "GEFinalize finished");
  172. return ret;
  173. }
  174. // Initialize session,which calls innerSession
  175. Session::Session(const std::map<string, string> &options) {
  176. GELOGT(TRACE_INIT, "Session Constructor start");
  177. // check init status
  178. sessionId_ = 0;
  179. if (!kGeInitialized) {
  180. GELOGE(GE_CLI_GE_NOT_INITIALIZED);
  181. return;
  182. }
  183. // call Initialize
  184. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  185. if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
  186. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Session Constructor failed");
  187. return;
  188. }
  189. GELOGT(TRACE_RUNNING, "Creating session");
  190. uint64_t session_id = 0;
  191. Status ret = instance_ptr->SessionManagerObj().CreateSession(options, session_id);
  192. GELOGT(TRACE_RUNNING, "Session id is %lu", session_id);
  193. // check return status, return, update session id if success
  194. if (ret == SUCCESS) {
  195. sessionId_ = session_id;
  196. } else {
  197. GELOGE(ret, "Session constructor failed, session Id not initialized");
  198. return;
  199. }
  200. GELOGT(TRACE_STOP, "Session Constructor finished");
  201. }
  202. // session destructor
  203. Session::~Session() {
  204. GELOGT(TRACE_INIT, "Session Destructor start");
  205. // 0.check init status
  206. if (!kGeInitialized) {
  207. GELOGW("GE is not yet initialized or is finalized.");
  208. return;
  209. }
  210. Status ret = FAILED;
  211. std::lock_guard<std::mutex> lock(kGeReleaseMutex);
  212. try {
  213. uint64_t session_id = sessionId_;
  214. // call DestroySession
  215. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  216. if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
  217. GELOGW("GE is not yet initialized or is finalized.");
  218. return;
  219. }
  220. GELOGT(TRACE_RUNNING, "Session id is %lu", session_id);
  221. GELOGT(TRACE_RUNNING, "Destroying session");
  222. ret = instance_ptr->SessionManagerObj().DestroySession(session_id);
  223. } catch (google::protobuf::FatalException &e) {
  224. GELOGE(GE_CLI_SESS_DESTROY_FAILED, "SessionDestructor throws FatalException");
  225. }
  226. // check return status, return, update session id if success
  227. if (ret != SUCCESS) {
  228. GELOGE(ret, "Session Destructor failed");
  229. }
  230. GELOGT(TRACE_STOP, "Session Destructor finished");
  231. }
  232. Status Session::AddGraph(uint32_t graph_id, const Graph &graph) {
  233. std::map<std::string, std::string> options;
  234. return AddGraph(graph_id, graph, options);
  235. }
  236. Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options) {
  237. GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, sessinon_id: %lu.", graph_id, sessionId_);
  238. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  239. if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
  240. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Sesson.");
  241. return FAILED;
  242. }
  243. GELOGD("Adding graph to session");
  244. Status ret = instance_ptr->SessionManagerObj().AddGraph(sessionId_, graph_id, graph, options);
  245. if (ret != SUCCESS) {
  246. GELOGE(ret, "AddGraph failed in Session.");
  247. return FAILED;
  248. }
  249. GELOGD("AddGraph finished in Session.");
  250. return ret;
  251. }
  252. Status Session::RemoveGraph(uint32_t graph_id) {
  253. GELOGT(TRACE_INIT, "Session RemoveGraph start");
  254. // call RemoveGraph
  255. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  256. if (!instance_ptr || !instance_ptr->InitFlag()) {
  257. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Session RemoveGraph failed");
  258. return FAILED;
  259. }
  260. GELOGT(TRACE_RUNNING, "Removing Graph from session");
  261. Status ret = instance_ptr->SessionManagerObj().RemoveGraph(sessionId_, graph_id);
  262. // check return status, return
  263. if (ret != SUCCESS) {
  264. GELOGE(ret, "session RemoveGraph failed");
  265. return FAILED;
  266. }
  267. GELOGT(TRACE_STOP, "Session RemoveGraph finished");
  268. return ret;
  269. }
  270. void PrintOutputResult(std::vector<Tensor> &outputs) {
  271. if (outputs.empty() || outputs[0].GetData() == nullptr) {
  272. GELOGW("outputs is empty or data is nullptr.");
  273. return;
  274. }
  275. size_t out_buf_size = outputs[0].GetSize();
  276. TensorDesc desc(outputs[0].GetTensorDesc());
  277. DataType data_type = desc.GetDataType();
  278. auto iter = CONST_OPDATA_TYPE_SIZE_MAP.find(data_type);
  279. if (iter == CONST_OPDATA_TYPE_SIZE_MAP.end()) {
  280. GELOGI("DataType %s has not defined size", TypeUtils::DataTypeToSerialString(data_type).c_str());
  281. return;
  282. }
  283. size_t length = CONST_OPDATA_TYPE_SIZE_MAP[data_type];
  284. for (size_t i = 0; i < 10 && i < (out_buf_size / length); ++i) { // take first 10 at most
  285. switch (data_type) {
  286. case DT_BOOL:
  287. case DT_INT8:
  288. case DT_UINT8:
  289. GELOGI("output data[%zu]=%d", i, *(reinterpret_cast<int8_t *>(outputs[0].GetData()) + i));
  290. break;
  291. case DT_INT16:
  292. case DT_UINT16:
  293. GELOGI("output data[%zu]=%d", i, *(reinterpret_cast<int16_t *>(outputs[0].GetData()) + i));
  294. break;
  295. case DT_INT32:
  296. case DT_UINT32:
  297. GELOGI("output data[%zu]=%d", i, *(reinterpret_cast<int32_t *>(outputs[0].GetData()) + i));
  298. break;
  299. case DT_INT64:
  300. case DT_UINT64:
  301. GELOGI("output data[%zu]=%ld", i, *(reinterpret_cast<int64_t *>(outputs[0].GetData()) + i));
  302. break;
  303. case DT_FLOAT:
  304. GELOGI("output data[%zu]=%f", i, *(reinterpret_cast<float *>(outputs[0].GetData()) + i));
  305. break;
  306. case DT_DOUBLE:
  307. GELOGI("output data[%zu]=%lf", i, *(reinterpret_cast<double *>(outputs[0].GetData()) + i));
  308. break;
  309. default:
  310. GELOGI("Output datatype %s is not support print.", TypeUtils::DataTypeToSerialString(data_type).c_str());
  311. return;
  312. }
  313. }
  314. }
  315. Status Session::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs) {
  316. GELOGT(TRACE_INIT, "Session RunGraph start");
  317. std::vector<Tensor> graph_inputs = inputs;
  318. // call RunGraph
  319. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  320. if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
  321. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Session RunGraph failed");
  322. return FAILED;
  323. }
  324. GELOGT(TRACE_RUNNING, "Running Graph");
  325. Status ret = instance_ptr->SessionManagerObj().RunGraph(sessionId_, graph_id, graph_inputs, outputs);
  326. // check return status
  327. if (ret != SUCCESS) {
  328. GELOGE(ret, "Session RunGraph failed");
  329. return FAILED;
  330. }
  331. // print output
  332. if (outputs.size() > 0) {
  333. PrintOutputResult(outputs);
  334. }
  335. // return
  336. GELOGT(TRACE_STOP, "Session RunGraph finished");
  337. return ret;
  338. }
  339. Status Session::RegisterCallBackFunc(const std::string &key, const pCallBackFunc &callback) {
  340. GELOGW(
  341. "The callback function will not be checked. Please ensure that the implementation of the function is trusted.");
  342. return ge::GELib::GetInstance()->SessionManagerObj().RegisterCallBackFunc(sessionId_, key, callback);
  343. }
  344. Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<TensorInfo> &inputs,
  345. std::vector<TensorInfo> &outputs, std::function<void(Status)> callback) {
  346. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  347. if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
  348. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");
  349. return FAILED;
  350. }
  351. GELOGT(TRACE_RUNNING, "Run Graph Asynchronously");
  352. GELOGW(
  353. "The callback function will not be checked. Please ensure that the implementation of the function is trusted.");
  354. Status ret =
  355. ge::GELib::GetInstance()->SessionManagerObj().RunGraphAsync(sessionId_, graph_id, inputs, outputs, callback);
  356. if (ret != SUCCESS) {
  357. GELOGE(ret, "SessionManager RunGraphAsync failed");
  358. return FAILED;
  359. }
  360. return SUCCESS;
  361. }
  362. bool Session::IsGraphNeedRebuild(uint32_t graph_id) {
  363. return ge::GELib::GetInstance()->SessionManagerObj().IsGraphNeedRebuild(sessionId_, graph_id);
  364. }
  365. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示