You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dnnengine_manager.cc 32 kB

5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "engine_manager/dnnengine_manager.h"
  17. #include <cstdio>
  18. #include <map>
  19. #include "framework/common/debug/log.h"
  20. #include "common/ge/ge_util.h"
  21. #include "analyzer/analyzer.h"
  22. #include "graph/ge_context.h"
  23. #include "graph/utils/graph_utils.h"
  24. #include "graph/utils/node_utils.h"
  25. #include "init/gelib.h"
  26. namespace {
  27. const char *const kSchedulerUnits = "schedule_units";
  28. const char *const kId = "id";
  29. const char *const kName = "name";
  30. const char *const kExAttrs = "ex_attrs";
  31. const char *const kIndependent = "independent";
  32. const char *const kSkipAssignStream = "skip_assign_stream";
  33. const char *const kCalEngines = "cal_engines";
  34. const char *const kAttach = "attach";
  35. const char *const kVectorCore = "VectorCore";
  36. const char *const kVectorEngine = "VectorEngine";
  37. const char *const kAIcoreEngine = "AIcoreEngine";
  38. const char *const kHostCpuEngineName = "DNN_VM_HOST_CPU";
  39. const char *const kHostCpuOpKernelLibName = "DNN_VM_HOST_CPU_OP_STORE";
  40. } // namespace
  41. namespace ge {
  42. namespace {
  43. const std::set<std::string> kNotCpuOp = {DATA, CONSTANT, CONSTANTOP, VARIABLE, NETOUTPUT};
  44. const char *const kGetDNNEngineObjs = "GetDNNEngineObjs";
  45. const char *const kInvalidCompositeEngineName = "InvalidCompositeEngineName";
  46. constexpr uint32_t kMaxRecursiveDepth = 10;
  47. bool ExecOnHostCpu(const OpDescPtr &op_desc) {
  48. bool is_host_cpu_op = (kNotCpuOp.find(op_desc->GetType()) == kNotCpuOp.end());
  49. return ge::GetContext().GetHostExecFlag() && is_host_cpu_op;
  50. }
  51. } // namespace
  52. DNNEngineManager::DNNEngineManager() : init_flag_(false) {}
  53. DNNEngineManager::~DNNEngineManager() {
  54. engines_attrs_map_.clear();
  55. schedulers_.clear();
  56. }
  57. DNNEngineManager &DNNEngineManager::GetInstance() {
  58. static DNNEngineManager instance;
  59. return instance;
  60. }
  61. Status DNNEngineManager::Initialize(const std::map<std::string, std::string> &options) {
  62. // Multiple initializations are not supported
  63. if (init_flag_) {
  64. GELOGW("DNNEngineManager has been initialized.");
  65. return SUCCESS;
  66. }
  67. // Load engine so
  68. std::string plugin_so_path = "plugin/nnengine/";
  69. std::string path = PluginManager::GetPath();
  70. std::string engine_plugin_path = path + plugin_so_path;
  71. std::vector<std::string> so_func{kGetDNNEngineObjs};
  72. Status status = plugin_mgr_.Load(engine_plugin_path, so_func);
  73. if (status != SUCCESS) {
  74. GELOGE(status, "[Load][EngineSo]Failed, lib path %s", path.c_str());
  75. REPORT_CALL_ERROR("E19999", "Load engine so failed, lib path %s", engine_plugin_path.c_str());
  76. return status;
  77. }
  78. status = plugin_mgr_.InvokeAll<std::map<std::string, DNNEnginePtr> &>(kGetDNNEngineObjs, engines_map_);
  79. if (status != SUCCESS) {
  80. GELOGE(status, "[Get][DNNEngineObjs]Failed, so_api_func %s", kGetDNNEngineObjs);
  81. REPORT_CALL_ERROR("E19999", "Get DNNEngineObjs failed, so_api_func %s", kGetDNNEngineObjs);
  82. return status;
  83. }
  84. GELOGI("The number of DNNEngineObjs is %zu.", engines_map_.size());
  85. // Engines initialize
  86. for (auto iter = engines_map_.begin(); iter != engines_map_.end(); ++iter) {
  87. if (iter->second == nullptr) {
  88. GELOGI("Engine: %s point to nullptr", (iter->first).c_str());
  89. continue;
  90. }
  91. GELOGI("DNNEngine name: %s.", (iter->first).c_str());
  92. status = iter->second->Initialize(options);
  93. if (status != SUCCESS) {
  94. GELOGE(status, "[Init][Engine]Failed, engine %s", (iter->first).c_str());
  95. REPORT_CALL_ERROR("E19999", "Initialize engine %s failed", (iter->first).c_str());
  96. return status;
  97. }
  98. // Check engines' attribute
  99. DNNEngineAttribute attrs;
  100. iter->second->GetAttributes(attrs);
  101. if (attrs.runtime_type == RuntimeType::DEVICE) {
  102. if ((attrs.mem_type.size()) != 1 || (attrs.mem_type[0] != GE_ENGINE_ATTR_MEM_TYPE_HBM)) {
  103. GELOGE(GE_ENG_MEMTYPE_ERROR, "[Check][Param]Engine %s in aicore, but the memory type is "
  104. "not HBM, mem_type_size %lu", (iter->first).c_str(), attrs.mem_type.size());
  105. REPORT_INNER_ERROR("E19999", "Engine %s in aicore, but the memory type is not HBM, mem_type_size %lu",
  106. (iter->first).c_str(), attrs.mem_type.size());
  107. return GE_ENG_MEMTYPE_ERROR;
  108. }
  109. }
  110. }
  111. status = ParserJsonFile();
  112. if (status != SUCCESS) {
  113. GELOGE(status, "[Parse][JsonFile]Failed");
  114. return status;
  115. }
  116. status = CheckJsonFile();
  117. if (status != SUCCESS) {
  118. GELOGE(status, "[Check][JsonFile]Failed");
  119. return status;
  120. }
  121. init_flag_ = true;
  122. return SUCCESS;
  123. }
  124. Status DNNEngineManager::Finalize() {
  125. // Finalize is not allowed, initialize first is necessary
  126. if (!init_flag_) {
  127. GELOGW("DNNEngineManager has been finalized.");
  128. return SUCCESS;
  129. }
  130. for (auto iter = engines_map_.begin(); iter != engines_map_.end(); ++iter) {
  131. if (iter->second != nullptr) {
  132. GELOGI("DNNEngine name: %s.", (iter->first).c_str());
  133. Status status = iter->second->Finalize();
  134. if (status != SUCCESS) {
  135. GELOGE(status, "[Finalize][Engine]Failed, engine %s", (iter->first).c_str());
  136. REPORT_CALL_ERROR("E19999", "Finalize engine %s failed", (iter->first).c_str());
  137. return status;
  138. }
  139. }
  140. }
  141. init_flag_ = false;
  142. engines_map_.clear();
  143. atomic_2_composite_.clear();
  144. return SUCCESS;
  145. }
  146. std::shared_ptr<ge::DNNEngine> DNNEngineManager::GetEngine(const std::string &name) const {
  147. auto iter = engines_map_.find(name);
  148. if (iter != engines_map_.end()) {
  149. return iter->second;
  150. }
  151. GELOGW("Failed to get engine object by engine name. %s.", name.c_str());
  152. return nullptr;
  153. }
  154. bool DNNEngineManager::IsEngineRegistered(const std::string &name) {
  155. auto iter = engines_map_.find(name);
  156. if (iter != engines_map_.end()) {
  157. return true;
  158. }
  159. GELOGW("Engine: %s is not Registered", name.c_str());
  160. return false;
  161. }
  162. void DNNEngineManager::InitPerformanceStatistic() {
  163. std::lock_guard<std::mutex> lock(mutex_);
  164. checksupport_cost_.clear();
  165. }
  166. const map<string, uint64_t> &DNNEngineManager::GetCheckSupportCost() const {
  167. std::lock_guard<std::mutex> lock(mutex_);
  168. return checksupport_cost_;
  169. }
  170. std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) {
  171. std::lock_guard<std::mutex> lock(mutex_);
  172. GE_IF_BOOL_EXEC(node_ptr == nullptr, GELOGE(GE_CLI_GE_NOT_INITIALIZED, "DNNEngineManager: node_ptr is nullptr");
  173. return "");
  174. auto op_desc = node_ptr->GetOpDesc();
  175. GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(GE_CLI_GE_NOT_INITIALIZED, "DNNEngineManager: op_desc is nullptr");
  176. return "");
  177. // Use the OpsKernelManager to get the opInfos for this opCode
  178. std::vector<OpInfo> op_infos = OpsKernelManager::GetInstance().GetOpsKernelInfo(op_desc->GetType());
  179. if (op_infos.empty()) {
  180. GELOGI("DNNEngineManager: Can not get op info by op type %s", op_desc->GetType().c_str());
  181. return "";
  182. }
  183. GE_IF_BOOL_EXEC(ExecOnHostCpu(op_desc), return GetHostCpuEngineName(op_infos, op_desc));
  184. std::string ge_core_type;
  185. Status ret = ge::GetContext().GetOption(ge::CORE_TYPE, ge_core_type);
  186. GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE"));
  187. std::string exclude_core_Type = (ge_core_type == kVectorCore) ? kAIcoreEngine : kVectorEngine;
  188. GELOGD("engine type will exclude: %s", exclude_core_Type.c_str());
  189. std::map<std::string, std::string> unsupported_reasons;
  190. for (const auto &it : op_infos) {
  191. if (it.engine == exclude_core_Type) {
  192. continue;
  193. }
  194. const auto &kernel_name = it.opKernelLib;
  195. auto kernel_info_store = OpsKernelManager::GetInstance().GetOpsKernelInfoStore(kernel_name);
  196. if (kernel_info_store == nullptr) {
  197. GELOGW("DNNEngineManager:Can not find any supported ops kernel info store by kernel_name %s, op type is %s, "
  198. "op name is %s", kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str());
  199. return "";
  200. }
  201. std::string unsupported_reason;
  202. // It will be replaced by engine's check support
  203. uint64_t start_time = GetCurrentTimestamp();
  204. if (kernel_info_store->CheckSupported(node_ptr, unsupported_reason)) {
  205. checksupport_cost_[kernel_name] += GetCurrentTimestamp() - start_time;
  206. op_desc->SetOpEngineName(it.engine);
  207. op_desc->SetOpKernelLibName(kernel_name);
  208. // set attrs for taking information when load txt to graph object
  209. if (it.flagAsync) {
  210. GELOGD("Set aicpu blocking op:%s attribute(is_blocking_op):true", op_desc->GetName().c_str());
  211. (void)AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true);
  212. }
  213. (void) AttrUtils::SetStr(op_desc, ATTR_NAME_ENGINE_NAME_FOR_LX, it.engine);
  214. (void) AttrUtils::SetStr(op_desc, ATTR_NAME_KKERNEL_LIB_NAME_FOR_LX, kernel_name);
  215. GELOGD("DNNEngineManager:Set kernel_lib %s, atomic engine %s, to node %s", kernel_name.c_str(), it.engine.c_str(),
  216. op_desc->GetName().c_str());
  217. return it.engine;
  218. } else {
  219. checksupport_cost_[kernel_name] += GetCurrentTimestamp() - start_time;
  220. unsupported_reasons.emplace(kernel_name, unsupported_reason);
  221. GELOGI("DNNEngineManager:Check support failed, kernel_name is %s, op type is %s, op name is %s",
  222. kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str());
  223. if (!op_desc->HasAttr("_is_ge_op")) {
  224. ErrorManager::GetInstance().ATCReportErrMessage("W11001", {"opname"}, {op_desc->GetName()});
  225. }
  226. }
  227. }
  228. // concat unsupported reasons analyzed data selection
  229. string reason;
  230. for (const auto &it : unsupported_reasons) {
  231. reason += it.first + ":" + it.second + ";";
  232. ErrorManager::GetInstance().ATCReportErrMessage(
  233. "E13002", {"optype", "opskernel", "reason"}, {op_desc->GetType(), it.first, it.second});
  234. GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "[Check][OpSupported]Op type %s of ops kernel %s "
  235. "is unsupported, reason : %s",
  236. op_desc->GetType().c_str(), it.first.c_str(), it.second.c_str());
  237. }
  238. auto root_graph = ge::GraphUtils::FindRootGraph(node_ptr->GetOwnerComputeGraph());
  239. analyzer::DataInfo analyze_info{root_graph->GetSessionID(), root_graph->GetGraphID(),
  240. analyzer::CHECKSUPPORT, node_ptr, reason};
  241. // do not change original process
  242. (void)Analyzer::GetInstance()->DoAnalyze(analyze_info);
  243. ErrorManager::GetInstance().ATCReportErrMessage(
  244. "E13003", {"opname", "optype"}, {op_desc->GetName(), op_desc->GetType()});
  245. GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "[Get][DNNEngineName]Can't find any supported ops kernel "
  246. "and engine of %s, type is %s",
  247. op_desc->GetName().c_str(), op_desc->GetType().c_str());
  248. return "";
  249. }
  250. std::string DNNEngineManager::GetCompositeEngineName(const ge::NodePtr &node_ptr, uint32_t recursive_depth) {
  251. // op_desc of node should not be null
  252. const auto &op_desc = node_ptr->GetOpDesc();
  253. if (recursive_depth > kMaxRecursiveDepth) {
  254. REPORT_INNER_ERROR("E19999", "Get CompositeEngineName will be terminated because too many nesting levels(%u) of "
  255. "subgraphs, last node is %s", recursive_depth, op_desc->GetName().c_str());
  256. GELOGE(PARAM_INVALID,
  257. "[Check][Param] Get CompositeEngineName will be terminated because too many nesting levels(%u) of subgraphs, "
  258. "last node is %s", recursive_depth, op_desc->GetName().c_str());
  259. return "";
  260. }
  261. if (OpsKernelManager::GetInstance().GetCompositeEngines().empty() ||
  262. OpsKernelManager::GetInstance().GetCompositeEngineKernelLibNames().empty()) {
  263. return "";
  264. }
  265. // composite engine name exist
  266. std::string composite_engine_name;
  267. (void)AttrUtils::GetStr(op_desc, ATTR_NAME_COMPOSITE_ENGINE_NAME, composite_engine_name);
  268. std::string composite_engine_kernel_lib_name;
  269. (void)AttrUtils::GetStr(op_desc, ATTR_NAME_COMPOSITE_ENGINE_KERNEL_LIB_NAME, composite_engine_kernel_lib_name);
  270. if (!composite_engine_name.empty() && !composite_engine_kernel_lib_name.empty()) {
  271. return composite_engine_name;
  272. }
  273. // normal node without subgraph
  274. if (op_desc->GetSubgraphInstanceNames().empty()) {
  275. return GetCompositeEngine(node_ptr);
  276. }
  277. return GetCompositeEngine(node_ptr, recursive_depth);
  278. }
  279. std::string DNNEngineManager::GetCompositeEngine(const NodePtr &node) {
  280. // op_desc of node should not be null
  281. const auto &op_desc = node->GetOpDesc();
  282. auto atomic_engine_name = op_desc->GetOpEngineName().empty() ? GetDNNEngineName(node) : op_desc->GetOpEngineName();
  283. bool gelocal_follow_flag = false;
  284. if (IsStreamAssignSkip(atomic_engine_name)) {
  285. bool in_diff_flag = false;
  286. std::string in_composite_engine_name = kInvalidCompositeEngineName;
  287. for (const auto &in_node : node->GetInAllNodes()) {
  288. std::string tmp_composite_engine_name;
  289. (void)AttrUtils::GetStr(in_node->GetOpDesc(), ATTR_NAME_COMPOSITE_ENGINE_NAME, tmp_composite_engine_name);
  290. if (in_composite_engine_name == kInvalidCompositeEngineName) {
  291. in_composite_engine_name = tmp_composite_engine_name;
  292. } else if (in_composite_engine_name != tmp_composite_engine_name) {
  293. in_diff_flag = true;
  294. break;
  295. }
  296. }
  297. if (!in_diff_flag &&
  298. (in_composite_engine_name != kInvalidCompositeEngineName) &&
  299. !in_composite_engine_name.empty()) {
  300. gelocal_follow_flag = true;
  301. }
  302. }
  303. std::string composite_engine_name;
  304. if (!gelocal_follow_flag) {
  305. composite_engine_name = GetCompositeEngineName(atomic_engine_name);
  306. }
  307. const auto &composite_engine_kernel_lib_name = GetCompositeEngineKernelLibName(composite_engine_name);
  308. if (composite_engine_name.empty() || composite_engine_kernel_lib_name.empty()) {
  309. (void)op_desc->DelAttr(ATTR_NAME_COMPOSITE_ENGINE_NAME);
  310. (void)op_desc->DelAttr(ATTR_NAME_COMPOSITE_ENGINE_KERNEL_LIB_NAME);
  311. } else {
  312. GELOGI("Assign composite engine %s, kernel lib name %s for node %s.", composite_engine_name.c_str(),
  313. composite_engine_kernel_lib_name.c_str(), op_desc->GetName().c_str());
  314. (void)AttrUtils::SetStr(op_desc, ATTR_NAME_COMPOSITE_ENGINE_NAME, composite_engine_name);
  315. (void)AttrUtils::SetStr(op_desc, ATTR_NAME_COMPOSITE_ENGINE_KERNEL_LIB_NAME, composite_engine_kernel_lib_name);
  316. }
  317. return composite_engine_name;
  318. }
  319. std::string DNNEngineManager::GetCompositeEngine(const NodePtr &func_node, uint32_t recursive_depth) {
  320. // op_desc of node should not be null
  321. const auto &op_desc = func_node->GetOpDesc();
  322. bool graph_diff_composite_engine_flag = false;
  323. std::string graph_composite_engine_name = kInvalidCompositeEngineName;
  324. std::vector<ComputeGraphPtr> subgraphs;
  325. if (NodeUtils::GetDirectSubgraphs(func_node, subgraphs) != GRAPH_SUCCESS) {
  326. REPORT_CALL_ERROR("E19999", "Get subgraphs of node %s failed", op_desc->GetName().c_str());
  327. GELOGE(FAILED, "[Check][Param] Get subgraphs of node %s failed", op_desc->GetName().c_str());
  328. return "";
  329. }
  330. for (const auto &subgraph : subgraphs) {
  331. std::string cur_graph_composite_engine_name = GetCompositeEngine(subgraph, recursive_depth);
  332. if (graph_composite_engine_name == kInvalidCompositeEngineName) {
  333. graph_composite_engine_name = cur_graph_composite_engine_name;
  334. } else if (graph_composite_engine_name != cur_graph_composite_engine_name) {
  335. graph_diff_composite_engine_flag = true;
  336. break;
  337. }
  338. }
  339. std::string composite_engine_name;
  340. std::string composite_engine_kernel_lib_name = GetCompositeEngineKernelLibName(graph_composite_engine_name);
  341. if (!graph_diff_composite_engine_flag &&
  342. (graph_composite_engine_name != kInvalidCompositeEngineName) &&
  343. !graph_composite_engine_name.empty() &&
  344. !composite_engine_kernel_lib_name.empty()) {
  345. composite_engine_name = graph_composite_engine_name;
  346. GELOGI("Assign composite engine %s, kernel lib name %s for node %s.", composite_engine_name.c_str(),
  347. composite_engine_kernel_lib_name.c_str(), op_desc->GetName().c_str());
  348. (void)AttrUtils::SetStr(op_desc, ATTR_NAME_COMPOSITE_ENGINE_NAME, composite_engine_name);
  349. (void)AttrUtils::SetStr(op_desc, ATTR_NAME_COMPOSITE_ENGINE_KERNEL_LIB_NAME, composite_engine_kernel_lib_name);
  350. } else {
  351. (void)op_desc->DelAttr(ATTR_NAME_COMPOSITE_ENGINE_NAME);
  352. (void)op_desc->DelAttr(ATTR_NAME_COMPOSITE_ENGINE_KERNEL_LIB_NAME);
  353. }
  354. return composite_engine_name;
  355. }
  356. std::string DNNEngineManager::GetCompositeEngine(const ComputeGraphPtr &subgraph, uint32_t recursive_depth) {
  357. std::string graph_composite_engine_name;
  358. (void)AttrUtils::GetStr(subgraph, ATTR_NAME_COMPOSITE_ENGINE_NAME, graph_composite_engine_name);
  359. // if subgraph has been assigned
  360. if (!graph_composite_engine_name.empty()) {
  361. return graph_composite_engine_name;
  362. }
  363. bool node_diff_composite_engine_flag = false;
  364. std::string node_composite_engine_name = kInvalidCompositeEngineName;
  365. uint32_t assigned_node_num = 0;
  366. for (const auto &cur_node : subgraph->GetDirectNode()) {
  367. if (IsNoTask(cur_node)) {
  368. continue;
  369. }
  370. assigned_node_num++;
  371. std::string cur_node_composite_engine_name = GetCompositeEngineName(cur_node, recursive_depth + 1);
  372. if (node_composite_engine_name == kInvalidCompositeEngineName) {
  373. node_composite_engine_name = cur_node_composite_engine_name;
  374. } else if (node_composite_engine_name != cur_node_composite_engine_name) {
  375. node_diff_composite_engine_flag = true;
  376. break;
  377. }
  378. }
  379. if (assigned_node_num == 0) {
  380. GELOGD("all nodes in subgraph %s belongs to ge_local engine", subgraph->GetName().c_str());
  381. return "";
  382. }
  383. if (!node_diff_composite_engine_flag &&
  384. (node_composite_engine_name != kInvalidCompositeEngineName) &&
  385. !node_composite_engine_name.empty()) {
  386. GELOGI("Assign composite engine %s for subgraph %s.", node_composite_engine_name.c_str(), subgraph->GetName().c_str());
  387. (void)AttrUtils::SetStr(subgraph, ATTR_NAME_COMPOSITE_ENGINE_NAME, node_composite_engine_name);
  388. graph_composite_engine_name = node_composite_engine_name;
  389. }
  390. else {
  391. (void)subgraph->DelAttr(ATTR_NAME_COMPOSITE_ENGINE_NAME);
  392. }
  393. return graph_composite_engine_name;
  394. }
  395. std::string DNNEngineManager::GetCompositeEngineName(const string &atomic_engine_name) {
  396. if (atomic_2_composite_.empty()) {
  397. InitAtomicCompositeMapping();
  398. }
  399. const auto &iter = atomic_2_composite_.find(atomic_engine_name);
  400. if (iter == atomic_2_composite_.end()) {
  401. GELOGW("Composite engine which contains atomic engine %s is not registered", atomic_engine_name.c_str());
  402. return "";
  403. }
  404. return iter->second;
  405. }
  406. std::string DNNEngineManager::GetCompositeEngineKernelLibName(const string &composite_engine_name) const {
  407. const auto &composite_engine_2_kernel_lib_name = OpsKernelManager::GetInstance().GetCompositeEngineKernelLibNames();
  408. const auto &iter = composite_engine_2_kernel_lib_name.find(composite_engine_name);
  409. if (iter == composite_engine_2_kernel_lib_name.end()) {
  410. GELOGW("Kernel lib name of composite engine %s is not registered", composite_engine_name.c_str());
  411. return "";
  412. }
  413. return iter->second;
  414. }
  415. std::string DNNEngineManager::GetHostCpuEngineName(const std::vector<OpInfo> &op_infos,
  416. const OpDescPtr &op_desc) const {
  417. for (const auto &it : op_infos) {
  418. if ((it.engine == kHostCpuEngineName) && (it.opKernelLib == kHostCpuOpKernelLibName)) {
  419. op_desc->SetOpEngineName(kHostCpuEngineName);
  420. op_desc->SetOpKernelLibName(kHostCpuOpKernelLibName);
  421. GELOGI("DNNEngineManager: Set OpKernelLibName %s and OpEngineName %s to %s",
  422. kHostCpuOpKernelLibName, kHostCpuEngineName, op_desc->GetName().c_str());
  423. return kHostCpuEngineName;
  424. }
  425. }
  426. GELOGE(FAILED, "[Get][HostCpuEngineName]Failed, HostCpuEngine not support [%s, %s]",
  427. op_desc->GetName().c_str(), op_desc->GetType().c_str());
  428. REPORT_INNER_ERROR("E19999", "Get HostCpuEngineName failed, HostCpuEngine not support [%s, %s]",
  429. op_desc->GetName().c_str(), op_desc->GetType().c_str());
  430. return "";
  431. }
  432. const std::map<std::string, SchedulerConf> &DNNEngineManager::GetSchedulers() const { return schedulers_; }
  433. Status DNNEngineManager::ParserJsonFile() {
  434. GELOGI("Begin to parser json file");
  435. std::string json_file_path = "plugin/nnengine/ge_config/engine_conf.json";
  436. std::string path = PluginManager::GetPath();
  437. path.append(json_file_path);
  438. nlohmann::json scheduler_json_file;
  439. Status status = ReadJsonFile(path, &scheduler_json_file);
  440. if (status != SUCCESS) {
  441. GELOGE(FAILED, "[Read][JsonFile]Failed, file %s", path.c_str());
  442. REPORT_CALL_ERROR("E19999", "Read json file %s failed", path.c_str());
  443. return FAILED;
  444. }
  445. if (scheduler_json_file.is_null()) {
  446. // when engine_conf.json is not exist, just return success
  447. GELOGW("Json file is null");
  448. return SUCCESS;
  449. }
  450. try {
  451. nlohmann::json scheduler_utils_json = scheduler_json_file[kSchedulerUnits];
  452. if (scheduler_utils_json.is_null()) {
  453. GELOGE(FAILED, "[Check[Param]Find scheduler units failed, the message is null, file %s", path.c_str());
  454. REPORT_INNER_ERROR("E19999", "Find scheduler units failed, the message is null, file %s", path.c_str());
  455. return FAILED;
  456. }
  457. if (!scheduler_utils_json.is_array()) {
  458. GELOGE(FAILED, "[Check][Param]The message of kSchedulerUnits is not array and "
  459. "the file path is %s", path.c_str());
  460. REPORT_INNER_ERROR("E19999", "The message of kSchedulerUnits is not array and "
  461. "the file path is %s", path.c_str());
  462. return FAILED;
  463. }
  464. auto size = scheduler_json_file[kSchedulerUnits].size();
  465. for (size_t i = 0; i < size; i++) {
  466. SchedulerConf scheduler_conf;
  467. std::map<std::string, EngineConfPtr> engine_conf_map;
  468. nlohmann::json engines_json_map = scheduler_utils_json[i][kCalEngines];
  469. if (engines_json_map.is_null()) {
  470. GELOGE(FAILED, "[Check][Param]The message of cal_engines is null, file %s", path.c_str());
  471. REPORT_INNER_ERROR("E19999", "The message of cal_engines is null, file %s", path.c_str());
  472. return FAILED;
  473. }
  474. std::string scheduler_id_temp = scheduler_utils_json[i][kId];
  475. if (!scheduler_id_temp.empty()) {
  476. scheduler_conf.id = scheduler_id_temp;
  477. } else {
  478. GELOGE(FAILED, "[Check][Param]Scheduler ID is null, file %s", path.c_str());
  479. REPORT_INNER_ERROR("E19999", "Scheduler ID is null, file %s", path.c_str());
  480. return FAILED;
  481. }
  482. status = ParserEngineMessage(engines_json_map, scheduler_id_temp, engine_conf_map);
  483. if (status != SUCCESS) {
  484. GELOGE(FAILED, "[Parse][EngineMessage]Failed, scheduler_id_temp %s", scheduler_id_temp.c_str());
  485. REPORT_CALL_ERROR("E19999", "Parse engine message failed, scheduler_id_temp %s",
  486. scheduler_id_temp.c_str());
  487. return FAILED;
  488. }
  489. scheduler_conf.name = scheduler_utils_json[i][kName];
  490. scheduler_conf.ex_attrs = scheduler_utils_json[i][kExAttrs];
  491. scheduler_conf.cal_engines = engine_conf_map;
  492. auto it = schedulers_.find(scheduler_id_temp);
  493. if (it != schedulers_.end()) {
  494. GELOGE(FAILED, "[Check][Param]There are the same scheduler ts %s in the json file",
  495. scheduler_id_temp.c_str());
  496. REPORT_INNER_ERROR("E19999", "[Check][Param]There are the same scheduler ts %s "
  497. "in the json file", scheduler_id_temp.c_str());
  498. return FAILED;
  499. }
  500. schedulers_.emplace(scheduler_id_temp, scheduler_conf);
  501. }
  502. } catch (const nlohmann::detail::type_error &e) {
  503. GELOGE(FAILED, "[Parse][JsonFile]Failed, file %s, reason %s", path.c_str(), e.what());
  504. REPORT_CALL_ERROR("E19999", "Parse json file %s failed, reason %s", path.c_str(), e.what());
  505. return FAILED;
  506. }
  507. GELOGI("Parser json file SUCCESS");
  508. return SUCCESS;
  509. }
  510. Status DNNEngineManager::ParserEngineMessage(const json engines_json, const std::string &scheduler_mark,
  511. std::map<std::string, EngineConfPtr> &engines) {
  512. GELOGI("Begin to parser engine massage");
  513. if (engines_json.is_null()) {
  514. GELOGE(FAILED, "[Check][Param]The message of cal_engines is null");
  515. REPORT_INNER_ERROR("E19999", "The message of cal_engines is null");
  516. return FAILED;
  517. }
  518. try {
  519. if (engines_json.is_array()) {
  520. for (size_t i = 0; i < engines_json.size(); i++) {
  521. nlohmann::json engines_elems = engines_json[i];
  522. EngineConfPtr engine_conf_ptr = MakeShared<EngineConf>();
  523. if (engine_conf_ptr == nullptr) {
  524. return FAILED;
  525. }
  526. std::string engine_id = engines_elems[kId];
  527. if (!engine_id.empty()) {
  528. engine_conf_ptr->id = engine_id;
  529. } else {
  530. GELOGE(FAILED, "[Check][Param]Engine ID is null");
  531. REPORT_INNER_ERROR("E19999", "Engine ID is null");
  532. return FAILED;
  533. }
  534. if (engines_elems.find(kName) != engines_elems.end()) {
  535. engine_conf_ptr->name = engines_elems[kName];
  536. } else {
  537. GELOGW("The engine %s name is null", engine_id.c_str());
  538. }
  539. if (engines_elems.find(kIndependent) != engines_elems.end()) {
  540. engine_conf_ptr->independent = engines_elems[kIndependent];
  541. }
  542. if (engines_elems.find(kAttach) != engines_elems.end()) {
  543. engine_conf_ptr->attach = engines_elems[kAttach];
  544. }
  545. if (engines_elems.find(kSkipAssignStream) != engines_elems.end()) {
  546. engine_conf_ptr->skip_assign_stream = engines_elems[kSkipAssignStream];
  547. }
  548. engine_conf_ptr->scheduler_id = scheduler_mark;
  549. auto it = engines.find(engine_id);
  550. if (it != engines.end()) {
  551. GELOGE(FAILED, "[Check][Param]There are the same engine %s message in the json file",
  552. engine_id.c_str());
  553. REPORT_INNER_ERROR("E19999", "There are the same engine %s message in the json file",
  554. engine_id.c_str());
  555. return FAILED;
  556. }
  557. engines.emplace(engine_id, engine_conf_ptr);
  558. }
  559. } else {
  560. GELOGE(FAILED, "[Check][Param]The message of cal_engines is not array in the json file");
  561. REPORT_INNER_ERROR("E19999", "The message of cal_engines is not array in the json file");
  562. return FAILED;
  563. }
  564. } catch (const json::exception &e) {
  565. GELOGE(FAILED, "[Construct][JsonContent]Failed, reason %s", e.what());
  566. REPORT_INNER_ERROR("E19999", "Construct json content failed, reason %s", e.what());
  567. return FAILED;
  568. }
  569. GELOGI("Parser engine massage success");
  570. return SUCCESS;
  571. }
  572. Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle handle) {
  573. GELOGD("Begin to read json file");
  574. if (file_path.empty()) {
  575. GELOGE(FAILED, "[Check][Param]Json path is empty");
  576. REPORT_INNER_ERROR("E19999", "Json path is empty");
  577. return FAILED;
  578. }
  579. nlohmann::json *json_file = reinterpret_cast<nlohmann::json *>(handle);
  580. if (json_file == nullptr) {
  581. GELOGE(FAILED, "[Check][Param]Json file is nullptr");
  582. REPORT_CALL_ERROR("E19999", "Json file is nullptr");
  583. return FAILED;
  584. }
  585. const char *file = file_path.data();
  586. if ((mmAccess2(file, M_F_OK)) != EN_OK) {
  587. if (engines_map_.size() != 0) {
  588. GELOGE(FAILED, "[Check][Param]The json file %s not exists, err %s",
  589. file_path.c_str(), strerror(errno));
  590. REPORT_CALL_ERROR("E19999", "Json file %s not exists, err %s",
  591. file_path.c_str(), strerror(errno));
  592. return FAILED;
  593. } else {
  594. GELOGW("The json file %s is not needed.", file_path.c_str());
  595. return SUCCESS;
  596. }
  597. }
  598. std::ifstream ifs(file_path);
  599. if (!ifs.is_open()) {
  600. GELOGE(FAILED, "[Open][JsonFile]Failed, file %s", file_path.c_str());
  601. REPORT_CALL_ERROR("E19999", "Open json file %s failed", file_path.c_str());
  602. return FAILED;
  603. }
  604. try {
  605. ifs >> *json_file;
  606. } catch (const json::exception &e) {
  607. GELOGE(FAILED, "[Read][JsonFile]Failed, reason %s", e.what());
  608. REPORT_CALL_ERROR("E19999", "Read json file failed, reason %s", e.what());
  609. ifs.close();
  610. return FAILED;
  611. }
  612. ifs.close();
  613. GELOGD("Read json file success");
  614. return SUCCESS;
  615. }
  616. Status DNNEngineManager::CheckJsonFile() {
  617. GELOGD("Begin to check json file");
  618. for (auto &it : engines_map_) {
  619. std::string engine_name = it.first;
  620. int count = 0;
  621. for (auto &iter : schedulers_) {
  622. auto engine_map = iter.second.cal_engines;
  623. auto iter_engine_name = engine_map.find(engine_name);
  624. if (iter_engine_name != engine_map.end()) {
  625. count++;
  626. }
  627. }
  628. if (count == 0) {
  629. GELOGE(FAILED, "[Check][JsonFile]The engine message %s is not found in the json file",
  630. engine_name.c_str());
  631. REPORT_INNER_ERROR("E19999", "The engine message %s is not found in the json file",
  632. engine_name.c_str());
  633. return FAILED;
  634. }
  635. if (count > 1) {
  636. GELOGE(FAILED, "[Check][JsonFile]The same engine message %s exists in the json file",
  637. engine_name.c_str());
  638. REPORT_INNER_ERROR("E19999", "The same engine message %s exists in the json file",
  639. engine_name.c_str());
  640. return FAILED;
  641. }
  642. }
  643. GELOGD("Check json file success");
  644. return SUCCESS;
  645. }
  646. void DNNEngineManager::InitAtomicCompositeMapping() {
  647. for (const auto &item : OpsKernelManager::GetInstance().GetCompositeEngines()) {
  648. const auto &composite_engine = GetEngine(item.first);
  649. if ((composite_engine == nullptr) || composite_engine->IsAtomic()) {
  650. GELOGW("Composite engine %s is not registered", item.first.c_str());
  651. }
  652. for (const auto &atomic_engine_name : item.second) {
  653. const auto &atomic_engine = GetEngine(atomic_engine_name);
  654. if ((atomic_engine == nullptr) || !atomic_engine->IsAtomic()) {
  655. GELOGW("Atomic engine %s is not registered", atomic_engine_name.c_str());
  656. continue;
  657. }
  658. auto iter = atomic_2_composite_.find(atomic_engine_name);
  659. if (iter != atomic_2_composite_.end()) {
  660. GELOGW("Atomic engine %s has been contained in composite engine %s, and will be overwritten by engine %s",
  661. atomic_engine_name.c_str(), iter->second.c_str(), item.first.c_str());
  662. }
  663. atomic_2_composite_[atomic_engine_name] = item.first;
  664. }
  665. }
  666. }
  667. bool DNNEngineManager::IsNoTask(const NodePtr &node) {
  668. const auto &op_desc = node->GetOpDesc();
  669. // op_desc of node should not be null
  670. if (op_desc->HasAttr(ATTR_NAME_NOTASK)) {
  671. return true;
  672. }
  673. return IsStreamAssignSkip(node) && op_desc->GetSubgraphInstanceNames().empty();
  674. }
  675. bool DNNEngineManager::IsStreamAssignSkip(const NodePtr &node) {
  676. const auto &op_desc = node->GetOpDesc();
  677. // op_desc of node should not be null
  678. const auto &engine_name = op_desc->GetOpEngineName().empty() ? GetDNNEngineName(node) : op_desc->GetOpEngineName();
  679. return IsStreamAssignSkip(engine_name);
  680. }
  681. bool DNNEngineManager::IsStreamAssignSkip(const string &engine_name) {
  682. // Only one scheduler has been supported by now
  683. for (const auto &scheduler : schedulers_) {
  684. const auto &iter = scheduler.second.cal_engines.find(engine_name);
  685. if (iter == scheduler.second.cal_engines.end()) {
  686. GELOGW("No engine found within name %s", engine_name.c_str());
  687. continue;
  688. }
  689. if (iter->second == nullptr) {
  690. GELOGW("engine configuration of engine %s is null", engine_name.c_str());
  691. continue;
  692. }
  693. return iter->second->skip_assign_stream;
  694. }
  695. return false;
  696. }
  697. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示