You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dnnengine_manager.cc 32 kB

5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "engine_manager/dnnengine_manager.h"
  17. #include <cstdio>
  18. #include <map>
  19. #include "framework/common/debug/log.h"
  20. #include "common/ge/ge_util.h"
  21. #include "analyzer/analyzer.h"
  22. #include "graph/ge_context.h"
  23. #include "graph/utils/graph_utils.h"
  24. #include "graph/utils/node_utils.h"
  25. #include "init/gelib.h"
  26. namespace {
  27. const char *const kSchedulerUnits = "schedule_units";
  28. const char *const kId = "id";
  29. const char *const kName = "name";
  30. const char *const kExAttrs = "ex_attrs";
  31. const char *const kIndependent = "independent";
  32. const char *const kSkipAssignStream = "skip_assign_stream";
  33. const char *const kCalEngines = "cal_engines";
  34. const char *const kAttach = "attach";
  35. const char *const kVectorCore = "VectorCore";
  36. const char *const kVectorEngine = "VectorEngine";
  37. const char *const kAIcoreEngine = "AIcoreEngine";
  38. const char *const kHostCpuEngineName = "DNN_VM_HOST_CPU";
  39. const char *const kHostCpuOpKernelLibName = "DNN_VM_HOST_CPU_OP_STORE";
  40. } // namespace
  41. namespace ge {
  42. namespace {
  43. const std::set<std::string> kNotCpuOp = {DATA, CONSTANT, CONSTANTOP, VARIABLE, NETOUTPUT};
  44. const char *const kGetDNNEngineObjs = "GetDNNEngineObjs";
  45. const char *const kGetCompoundEngineContains = "GetCompoundEngineContains";
  46. const char *const kInvalidCompoundEngineName = "InvalidCompoundEngineName";
  47. constexpr uint32_t kMaxRecursiveDepth = 10;
  48. bool ExecOnHostCpu(const OpDescPtr &op_desc) {
  49. bool is_host_cpu_op = (kNotCpuOp.find(op_desc->GetType()) == kNotCpuOp.end());
  50. return ge::GetContext().GetHostExecFlag() && is_host_cpu_op;
  51. }
  52. } // namespace
  53. DNNEngineManager::DNNEngineManager() : init_flag_(false) {}
  54. DNNEngineManager::~DNNEngineManager() {
  55. engines_attrs_map_.clear();
  56. schedulers_.clear();
  57. }
  58. Status DNNEngineManager::Initialize(const std::map<std::string, std::string> &options) {
  59. // Multiple initializations are not supported
  60. if (init_flag_) {
  61. GELOGW("DNNEngineManager has been initialized.");
  62. return SUCCESS;
  63. }
  64. // Load engine so
  65. std::string plugin_so_path = "plugin/nnengine/";
  66. std::string path = PluginManager::GetPath();
  67. std::string engine_plugin_path = path + plugin_so_path;
  68. std::vector<std::string> so_func{kGetDNNEngineObjs};
  69. Status status = atomic_plugin_mgr_.Load(engine_plugin_path, so_func);
  70. if (status != SUCCESS) {
  71. GELOGE(status, "[Load][EngineSo]Failed, lib path %s", path.c_str());
  72. REPORT_CALL_ERROR("E19999", "Load engine so failed, lib path %s", engine_plugin_path.c_str());
  73. return status;
  74. }
  75. status = atomic_plugin_mgr_.InvokeAll<std::map<std::string, DNNEnginePtr> &>(kGetDNNEngineObjs, engines_map_);
  76. if (status != SUCCESS) {
  77. GELOGE(status, "[Get][DNNEngineObjs]Failed, so_api_func %s", kGetDNNEngineObjs);
  78. REPORT_CALL_ERROR("E19999", "Get DNNEngineObjs failed, so_api_func %s", kGetDNNEngineObjs);
  79. return status;
  80. }
  81. GELOGI("The number of DNNEngineObjs is %zu.", engines_map_.size());
  82. // Engines initialize
  83. for (auto iter = engines_map_.begin(); iter != engines_map_.end(); ++iter) {
  84. if (iter->second == nullptr) {
  85. GELOGI("Engine: %s point to nullptr", (iter->first).c_str());
  86. continue;
  87. }
  88. GELOGI("DNNEngine name: %s.", (iter->first).c_str());
  89. status = iter->second->Initialize(options);
  90. if (status != SUCCESS) {
  91. GELOGE(status, "[Init][Engine]Failed, engine %s", (iter->first).c_str());
  92. REPORT_CALL_ERROR("E19999", "Initialize engine %s failed", (iter->first).c_str());
  93. return status;
  94. }
  95. // Check engines' attribute
  96. DNNEngineAttribute attrs;
  97. iter->second->GetAttributes(attrs);
  98. if (attrs.runtime_type == RuntimeType::DEVICE) {
  99. if ((attrs.mem_type.size()) != 1 || (attrs.mem_type[0] != GE_ENGINE_ATTR_MEM_TYPE_HBM)) {
  100. GELOGE(GE_ENG_MEMTYPE_ERROR, "[Check][Param]Engine %s in aicore, but the memory type is "
  101. "not HBM, mem_type_size %lu", (iter->first).c_str(), attrs.mem_type.size());
  102. REPORT_INNER_ERROR("E19999", "Engine %s in aicore, but the memory type is not HBM, mem_type_size %lu",
  103. (iter->first).c_str(), attrs.mem_type.size());
  104. return GE_ENG_MEMTYPE_ERROR;
  105. }
  106. }
  107. }
  108. status = ParserJsonFile();
  109. if (status != SUCCESS) {
  110. GELOGE(status, "[Parse][JsonFile]Failed");
  111. return status;
  112. }
  113. status = CheckJsonFile();
  114. if (status != SUCCESS) {
  115. GELOGE(status, "[Check][JsonFile]Failed");
  116. return status;
  117. }
  118. status = InitCompoundEngines(path);
  119. if (status != SUCCESS) {
  120. GELOGE(status, "[Init][CompoundEngine]Failed");
  121. return status;
  122. }
  123. init_flag_ = true;
  124. return SUCCESS;
  125. }
  126. Status DNNEngineManager::Finalize() {
  127. // Finalize is not allowed, initialize first is necessary
  128. if (!init_flag_) {
  129. GELOGW("DNNEngineManager has been finalized.");
  130. return SUCCESS;
  131. }
  132. for (auto iter = engines_map_.begin(); iter != engines_map_.end(); ++iter) {
  133. if (iter->second != nullptr) {
  134. GELOGI("DNNEngine name: %s.", (iter->first).c_str());
  135. Status status = iter->second->Finalize();
  136. if (status != SUCCESS) {
  137. GELOGE(status, "[Finalize][Engine]Failed, engine %s", (iter->first).c_str());
  138. REPORT_CALL_ERROR("E19999", "Finalize engine %s failed", (iter->first).c_str());
  139. return status;
  140. }
  141. }
  142. }
  143. init_flag_ = false;
  144. engines_map_.clear();
  145. compound_engine_contains_.clear();
  146. atomic_2_compound_.clear();
  147. compound_engine_2_kernel_lib_name_.clear();
  148. return SUCCESS;
  149. }
  150. std::shared_ptr<ge::DNNEngine> DNNEngineManager::GetEngine(const std::string &name) const {
  151. auto iter = engines_map_.find(name);
  152. if (iter != engines_map_.end()) {
  153. return iter->second;
  154. }
  155. GELOGW("Failed to get engine object by engine name. %s.", name.c_str());
  156. return nullptr;
  157. }
  158. bool DNNEngineManager::IsEngineRegistered(const std::string &name) {
  159. auto iter = engines_map_.find(name);
  160. if (iter != engines_map_.end()) {
  161. return true;
  162. }
  163. GELOGW("Engine: %s is not Registered", name.c_str());
  164. return false;
  165. }
  166. void DNNEngineManager::InitPerformanceStatistic() {
  167. std::lock_guard<std::mutex> lock(mutex_);
  168. checksupport_cost_.clear();
  169. }
  170. const map<string, uint64_t> &DNNEngineManager::GetCheckSupportCost() const {
  171. std::lock_guard<std::mutex> lock(mutex_);
  172. return checksupport_cost_;
  173. }
  174. std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) {
  175. std::lock_guard<std::mutex> lock(mutex_);
  176. GE_IF_BOOL_EXEC(node_ptr == nullptr, GELOGE(GE_CLI_GE_NOT_INITIALIZED, "DNNEngineManager: node_ptr is nullptr");
  177. return "");
  178. auto op_desc = node_ptr->GetOpDesc();
  179. GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(GE_CLI_GE_NOT_INITIALIZED, "DNNEngineManager: op_desc is nullptr");
  180. return "");
  181. // Use the OpsKernelManager in GELib to get the opInfos for this opCode
  182. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  183. if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) {
  184. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Get][DNNEngineName]Failed, gelib not init before");
  185. REPORT_INNER_ERROR("E19999", "Get DNNEngineName failed, gelib not init before");
  186. return "";
  187. }
  188. OpsKernelManager &ops_kernel_manager = instance_ptr->OpsKernelManagerObj();
  189. std::vector<OpInfo> op_infos = ops_kernel_manager.GetOpsKernelInfo(op_desc->GetType());
  190. if (op_infos.empty()) {
  191. GELOGI("DNNEngineManager: Can not get op info by op type %s", op_desc->GetType().c_str());
  192. return "";
  193. }
  194. GE_IF_BOOL_EXEC(ExecOnHostCpu(op_desc), return GetHostCpuEngineName(op_infos, op_desc));
  195. std::string ge_core_type;
  196. Status ret = ge::GetContext().GetOption(ge::CORE_TYPE, ge_core_type);
  197. GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE"));
  198. std::string exclude_core_Type = (ge_core_type == kVectorCore) ? kAIcoreEngine : kVectorEngine;
  199. GELOGD("engine type will exclude: %s", exclude_core_Type.c_str());
  200. std::map<std::string, std::string> unsupported_reasons;
  201. for (const auto &it : op_infos) {
  202. if (it.engine == exclude_core_Type) {
  203. continue;
  204. }
  205. const auto &kernel_name = it.opKernelLib;
  206. auto kernel_info_store = ops_kernel_manager.GetOpsKernelInfoStore(kernel_name);
  207. if (kernel_info_store == nullptr) {
  208. GELOGW("DNNEngineManager:Can not find any supported ops kernel info store by kernel_name %s, op type is %s, "
  209. "op name is %s", kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str());
  210. }
  211. std::string unsupported_reason;
  212. // It will be replaced by engine's check support
  213. uint64_t start_time = GetCurrentTimestamp();
  214. if (kernel_info_store->CheckSupported(node_ptr, unsupported_reason)) {
  215. checksupport_cost_[kernel_name] += GetCurrentTimestamp() - start_time;
  216. op_desc->SetOpEngineName(it.engine);
  217. op_desc->SetOpKernelLibName(kernel_name);
  218. // set attrs for taking information when load txt to graph object
  219. (void) AttrUtils::SetStr(op_desc, ATTR_NAME_ENGINE_NAME_FOR_LX, it.engine);
  220. (void) AttrUtils::SetStr(op_desc, ATTR_NAME_KKERNEL_LIB_NAME_FOR_LX, kernel_name);
  221. GELOGD("DNNEngineManager:Set kernel_lib %s, atomic engine %s, to node %s", kernel_name.c_str(), it.engine.c_str(),
  222. op_desc->GetName().c_str());
  223. return it.engine;
  224. } else {
  225. checksupport_cost_[kernel_name] += GetCurrentTimestamp() - start_time;
  226. unsupported_reasons.emplace(kernel_name, unsupported_reason);
  227. GELOGI("DNNEngineManager:Check support failed, kernel_name is %s, op type is %s, op name is %s",
  228. kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str());
  229. if (!op_desc->HasAttr("_is_ge_op")) {
  230. ErrorManager::GetInstance().ATCReportErrMessage("W11001", {"opname"}, {op_desc->GetName()});
  231. }
  232. }
  233. }
  234. // concat unsupported reasons analyzed data selection
  235. string reason;
  236. for (const auto &it : unsupported_reasons) {
  237. reason += it.first + ":" + it.second + ";";
  238. ErrorManager::GetInstance().ATCReportErrMessage(
  239. "E13002", {"optype", "opskernel", "reason"}, {op_desc->GetType(), it.first, it.second});
  240. GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "[Check][OpSupported]Op type %s of ops kernel %s "
  241. "is unsupported, reason : %s",
  242. op_desc->GetType().c_str(), it.first.c_str(), it.second.c_str());
  243. }
  244. auto root_graph = ge::GraphUtils::FindRootGraph(node_ptr->GetOwnerComputeGraph());
  245. analyzer::DataInfo analyze_info{root_graph->GetSessionID(), root_graph->GetGraphID(),
  246. analyzer::CHECKSUPPORT, node_ptr, reason};
  247. // do not change original process
  248. (void)Analyzer::GetInstance()->DoAnalyze(analyze_info);
  249. ErrorManager::GetInstance().ATCReportErrMessage(
  250. "E13003", {"opname", "optype"}, {op_desc->GetName(), op_desc->GetType()});
  251. GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "[Get][DNNEngineName]Can't find any supported ops kernel "
  252. "and engine of %s, type is %s",
  253. op_desc->GetName().c_str(), op_desc->GetType().c_str());
  254. return "";
  255. }
  256. std::string DNNEngineManager::GetCompoundEngineName(const ge::NodePtr &node_ptr, uint32_t recursive_depth) {
  257. if ((node_ptr == nullptr) || (node_ptr->GetOpDesc() == nullptr)) {
  258. return "";
  259. }
  260. const auto &op_desc = node_ptr->GetOpDesc();
  261. if (recursive_depth > kMaxRecursiveDepth) {
  262. REPORT_INNER_ERROR("E19999", "Get CompoundEngineName will be terminated because too many nesting levels(%d) of "
  263. "subgraphs, last node is %s", recursive_depth, op_desc->GetName().c_str());
  264. GELOGE(PARAM_INVALID,
  265. "[Check][Param] Get CompoundEngineName will be terminated because too many nesting levels(%d) of subgraphs, "
  266. "last node is %s", recursive_depth, op_desc->GetName().c_str());
  267. return "";
  268. }
  269. if (compound_engine_contains_.empty() || compound_engine_2_kernel_lib_name_.empty()) {
  270. return "";
  271. }
  272. // compound engine name exist
  273. std::string compound_engine_name;
  274. (void)AttrUtils::GetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_NAME, compound_engine_name);
  275. std::string compound_engine_kernel_lib_name;
  276. (void)AttrUtils::GetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_KERNEL_LIB_NAME, compound_engine_kernel_lib_name);
  277. if (!(compound_engine_name.empty() || compound_engine_kernel_lib_name.empty())) {
  278. return compound_engine_name;
  279. }
  280. // normal node without subgraph
  281. if (op_desc->GetSubgraphInstanceNames().empty()) {
  282. auto atomic_engine_name = op_desc->GetOpEngineName();
  283. if (atomic_engine_name.empty()) {
  284. atomic_engine_name = GetDNNEngineName(node_ptr);
  285. }
  286. compound_engine_name = GetOwningCompoundEngine(atomic_engine_name);
  287. compound_engine_kernel_lib_name = GetCompoundEngineKernelLibName(compound_engine_name);
  288. if (compound_engine_name.empty() || compound_engine_kernel_lib_name.empty()) {
  289. (void)op_desc->DelAttr(ATTR_NAME_COMPOUND_ENGINE_NAME);
  290. (void)op_desc->DelAttr(ATTR_NAME_COMPOUND_ENGINE_KERNEL_LIB_NAME);
  291. } else {
  292. GELOGI("Assign compound engine %s, kernel lib name %s for node %s.",
  293. compound_engine_name.c_str(), compound_engine_kernel_lib_name.c_str(), op_desc->GetName().c_str());
  294. (void)AttrUtils::SetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_NAME, compound_engine_name);
  295. (void)AttrUtils::SetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_KERNEL_LIB_NAME, compound_engine_kernel_lib_name);
  296. }
  297. return compound_engine_name;
  298. }
  299. bool graph_diff_compound_engine_flag = false;
  300. std::string graph_compound_engine_name = kInvalidCompoundEngineName;
  301. std::vector<ComputeGraphPtr> subgraphs;
  302. if (NodeUtils::GetSubgraphs(node_ptr, subgraphs) != GRAPH_SUCCESS) {
  303. REPORT_CALL_ERROR("E19999", "Get subgraphs of node %s failed", op_desc->GetName().c_str());
  304. GELOGE(FAILED, "[Check][Param] Get subgraphs of node %s failed", op_desc->GetName().c_str());
  305. return "";
  306. }
  307. for (const auto &subgraph : subgraphs) {
  308. std::string cur_graph_compound_engine_name;
  309. // if subgraph has been assigned
  310. if (subgraph->HasAttr(ATTR_NAME_COMPOUND_ENGINE_NAME)) {
  311. (void)AttrUtils::GetStr(subgraph, ATTR_NAME_COMPOUND_ENGINE_NAME, cur_graph_compound_engine_name);
  312. } else {
  313. bool node_diff_compound_engine_flag = false;
  314. std::string node_compound_engine_name = kInvalidCompoundEngineName;
  315. uint32_t assign_node_num = 0;
  316. for (const auto &cur_node : subgraph->GetDirectNode()) {
  317. if (IsStreamAssignSkip(cur_node) && cur_node->GetOpDesc()->GetSubgraphInstanceNames().empty()) {
  318. continue;
  319. }
  320. assign_node_num++;
  321. std::string cur_node_compound_engine_name = GetCompoundEngineName(cur_node, recursive_depth + 1);
  322. if (node_compound_engine_name == kInvalidCompoundEngineName) {
  323. node_compound_engine_name = cur_node_compound_engine_name;
  324. } else if (node_compound_engine_name != cur_node_compound_engine_name) {
  325. node_diff_compound_engine_flag = true;
  326. break;
  327. }
  328. }
  329. if (assign_node_num == 0) {
  330. GELOGD("all nodes in subgraph %s belongs to ge_local engine", subgraph->GetName().c_str());
  331. continue;
  332. }
  333. if (!(node_diff_compound_engine_flag ||
  334. (node_compound_engine_name == kInvalidCompoundEngineName) ||
  335. node_compound_engine_name.empty())) {
  336. GELOGI("Assign compound engine %s for subgraph %s.", node_compound_engine_name.c_str(), subgraph->GetName().c_str());
  337. (void)AttrUtils::SetStr(subgraph, ATTR_NAME_COMPOUND_ENGINE_NAME, node_compound_engine_name);
  338. cur_graph_compound_engine_name = node_compound_engine_name;
  339. } else {
  340. (void)subgraph->DelAttr(ATTR_NAME_COMPOUND_ENGINE_NAME);
  341. cur_graph_compound_engine_name.clear();
  342. }
  343. }
  344. if (graph_compound_engine_name == kInvalidCompoundEngineName) {
  345. graph_compound_engine_name = cur_graph_compound_engine_name;
  346. } else if (graph_compound_engine_name != cur_graph_compound_engine_name) {
  347. graph_diff_compound_engine_flag = true;
  348. break;
  349. }
  350. }
  351. compound_engine_kernel_lib_name = GetCompoundEngineKernelLibName(graph_compound_engine_name);
  352. if (!(graph_diff_compound_engine_flag || (graph_compound_engine_name == kInvalidCompoundEngineName) ||
  353. graph_compound_engine_name.empty() || compound_engine_kernel_lib_name.empty())) {
  354. compound_engine_name = graph_compound_engine_name;
  355. GELOGI("Assign compound engine %s, kernel lib name %s for node %s.",
  356. compound_engine_name.c_str(), compound_engine_kernel_lib_name.c_str(), op_desc->GetName().c_str());
  357. (void)AttrUtils::SetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_NAME, compound_engine_name);
  358. (void)AttrUtils::SetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_KERNEL_LIB_NAME, compound_engine_kernel_lib_name);
  359. } else {
  360. (void)op_desc->DelAttr(ATTR_NAME_COMPOUND_ENGINE_NAME);
  361. (void)op_desc->DelAttr(ATTR_NAME_COMPOUND_ENGINE_KERNEL_LIB_NAME);
  362. }
  363. return compound_engine_name;
  364. }
  365. std::string DNNEngineManager::GetOwningCompoundEngine(const string &atomic_engine_name) {
  366. if (atomic_2_compound_.empty()) {
  367. InitAtomicCompoundMapping();
  368. }
  369. const auto &iter = atomic_2_compound_.find(atomic_engine_name);
  370. if (iter == atomic_2_compound_.end()) {
  371. GELOGW("Compound engine which contains atomic engine %s is not registered", atomic_engine_name.c_str());
  372. return "";
  373. }
  374. return iter->second;
  375. }
  376. std::string DNNEngineManager::GetCompoundEngineKernelLibName(const string &compound_engine_name) const {
  377. const auto &iter = compound_engine_2_kernel_lib_name_.find(compound_engine_name);
  378. if (iter == compound_engine_2_kernel_lib_name_.end()) {
  379. GELOGW("Kernel lib name of compound engine %s is not registered", compound_engine_name.c_str());
  380. return "";
  381. }
  382. return iter->second;
  383. }
  384. std::string DNNEngineManager::GetHostCpuEngineName(const std::vector<OpInfo> &op_infos,
  385. const OpDescPtr &op_desc) const {
  386. for (const auto &it : op_infos) {
  387. if ((it.engine == kHostCpuEngineName) && (it.opKernelLib == kHostCpuOpKernelLibName)) {
  388. op_desc->SetOpEngineName(kHostCpuEngineName);
  389. op_desc->SetOpKernelLibName(kHostCpuOpKernelLibName);
  390. GELOGI("DNNEngineManager: Set OpKernelLibName %s and OpEngineName %s to %s",
  391. kHostCpuOpKernelLibName, kHostCpuEngineName, op_desc->GetName().c_str());
  392. return kHostCpuEngineName;
  393. }
  394. }
  395. GELOGE(FAILED, "[Get][HostCpuEngineName]Failed, HostCpuEngine not support [%s, %s]",
  396. op_desc->GetName().c_str(), op_desc->GetType().c_str());
  397. REPORT_INNER_ERROR("E19999", "Get HostCpuEngineName failed, HostCpuEngine not support [%s, %s]",
  398. op_desc->GetName().c_str(), op_desc->GetType().c_str());
  399. return "";
  400. }
  401. const std::map<std::string, SchedulerConf> &DNNEngineManager::GetSchedulers() const { return schedulers_; }
  402. Status DNNEngineManager::ParserJsonFile() {
  403. GELOGI("Begin to parser json file");
  404. std::string json_file_path = "plugin/nnengine/ge_config/engine_conf.json";
  405. std::string path = PluginManager::GetPath();
  406. path.append(json_file_path);
  407. nlohmann::json scheduler_json_file;
  408. Status status = ReadJsonFile(path, &scheduler_json_file);
  409. if (status != SUCCESS) {
  410. GELOGE(FAILED, "[Read][JsonFile]Failed, file %s", path.c_str());
  411. REPORT_CALL_ERROR("E19999", "Read json file %s failed", path.c_str());
  412. return FAILED;
  413. }
  414. if (scheduler_json_file.is_null()) {
  415. // when engine_conf.json is not exist, just return success
  416. GELOGW("Json file is null");
  417. return SUCCESS;
  418. }
  419. try {
  420. nlohmann::json scheduler_utils_json = scheduler_json_file[kSchedulerUnits];
  421. if (scheduler_utils_json.is_null()) {
  422. GELOGE(FAILED, "[Check[Param]Find scheduler units failed, the message is null, file %s", path.c_str());
  423. REPORT_INNER_ERROR("E19999", "Find scheduler units failed, the message is null, file %s", path.c_str());
  424. return FAILED;
  425. }
  426. if (!scheduler_utils_json.is_array()) {
  427. GELOGE(FAILED, "[Check][Param]The message of kSchedulerUnits is not array and "
  428. "the file path is %s", path.c_str());
  429. REPORT_INNER_ERROR("E19999", "The message of kSchedulerUnits is not array and "
  430. "the file path is %s", path.c_str());
  431. return FAILED;
  432. }
  433. auto size = scheduler_json_file[kSchedulerUnits].size();
  434. for (size_t i = 0; i < size; i++) {
  435. SchedulerConf scheduler_conf;
  436. std::map<std::string, EngineConfPtr> engine_conf_map;
  437. nlohmann::json engines_json_map = scheduler_utils_json[i][kCalEngines];
  438. if (engines_json_map.is_null()) {
  439. GELOGE(FAILED, "[Check][Param]The message of cal_engines is null, file %s", path.c_str());
  440. REPORT_INNER_ERROR("E19999", "The message of cal_engines is null, file %s", path.c_str());
  441. return FAILED;
  442. }
  443. std::string scheduler_id_temp = scheduler_utils_json[i][kId];
  444. if (!scheduler_id_temp.empty()) {
  445. scheduler_conf.id = scheduler_id_temp;
  446. } else {
  447. GELOGE(FAILED, "[Check][Param]Scheduler ID is null, file %s", path.c_str());
  448. REPORT_INNER_ERROR("E19999", "Scheduler ID is null, file %s", path.c_str());
  449. return FAILED;
  450. }
  451. status = ParserEngineMessage(engines_json_map, scheduler_id_temp, engine_conf_map);
  452. if (status != SUCCESS) {
  453. GELOGE(FAILED, "[Parse][EngineMessage]Failed, scheduler_id_temp %s", scheduler_id_temp.c_str());
  454. REPORT_CALL_ERROR("E19999", "Parse engine message failed, scheduler_id_temp %s",
  455. scheduler_id_temp.c_str());
  456. return FAILED;
  457. }
  458. scheduler_conf.name = scheduler_utils_json[i][kName];
  459. scheduler_conf.ex_attrs = scheduler_utils_json[i][kExAttrs];
  460. scheduler_conf.cal_engines = engine_conf_map;
  461. auto it = schedulers_.find(scheduler_id_temp);
  462. if (it != schedulers_.end()) {
  463. GELOGE(FAILED, "[Check][Param]There are the same scheduler ts %s in the json file",
  464. scheduler_id_temp.c_str());
  465. REPORT_INNER_ERROR("E19999", "[Check][Param]There are the same scheduler ts %s "
  466. "in the json file", scheduler_id_temp.c_str());
  467. return FAILED;
  468. }
  469. schedulers_.emplace(scheduler_id_temp, scheduler_conf);
  470. }
  471. } catch (const nlohmann::detail::type_error &e) {
  472. GELOGE(FAILED, "[Parse][JsonFile]Failed, file %s, reason %s", path.c_str(), e.what());
  473. REPORT_CALL_ERROR("E19999", "Parse json file %s failed, reason %s", path.c_str(), e.what());
  474. return FAILED;
  475. }
  476. GELOGI("Parser json file SUCCESS");
  477. return SUCCESS;
  478. }
  479. Status DNNEngineManager::ParserEngineMessage(const json engines_json, const std::string &scheduler_mark,
  480. std::map<std::string, EngineConfPtr> &engines) {
  481. GELOGI("Begin to parser engine massage");
  482. if (engines_json.is_null()) {
  483. GELOGE(FAILED, "[Check][Param]The message of cal_engines is null");
  484. REPORT_INNER_ERROR("E19999", "The message of cal_engines is null");
  485. return FAILED;
  486. }
  487. try {
  488. if (engines_json.is_array()) {
  489. for (size_t i = 0; i < engines_json.size(); i++) {
  490. nlohmann::json engines_elems = engines_json[i];
  491. EngineConfPtr engine_conf_ptr = MakeShared<EngineConf>();
  492. if (engine_conf_ptr == nullptr) {
  493. return FAILED;
  494. }
  495. std::string engine_id = engines_elems[kId];
  496. if (!engine_id.empty()) {
  497. engine_conf_ptr->id = engine_id;
  498. } else {
  499. GELOGE(FAILED, "[Check][Param]Engine ID is null");
  500. REPORT_INNER_ERROR("E19999", "Engine ID is null");
  501. return FAILED;
  502. }
  503. if (engines_elems.find(kName) != engines_elems.end()) {
  504. engine_conf_ptr->name = engines_elems[kName];
  505. } else {
  506. GELOGW("The engine %s name is null", engine_id.c_str());
  507. }
  508. if (engines_elems.find(kIndependent) != engines_elems.end()) {
  509. engine_conf_ptr->independent = engines_elems[kIndependent];
  510. }
  511. if (engines_elems.find(kAttach) != engines_elems.end()) {
  512. engine_conf_ptr->attach = engines_elems[kAttach];
  513. }
  514. if (engines_elems.find(kSkipAssignStream) != engines_elems.end()) {
  515. engine_conf_ptr->skip_assign_stream = engines_elems[kSkipAssignStream];
  516. }
  517. engine_conf_ptr->scheduler_id = scheduler_mark;
  518. auto it = engines.find(engine_id);
  519. if (it != engines.end()) {
  520. GELOGE(FAILED, "[Check][Param]There are the same engine %s message in the json file",
  521. engine_id.c_str());
  522. REPORT_INNER_ERROR("E19999", "There are the same engine %s message in the json file",
  523. engine_id.c_str());
  524. return FAILED;
  525. }
  526. engines.emplace(engine_id, engine_conf_ptr);
  527. }
  528. } else {
  529. GELOGE(FAILED, "[Check][Param]The message of cal_engines is not array in the json file");
  530. REPORT_INNER_ERROR("E19999", "The message of cal_engines is not array in the json file");
  531. return FAILED;
  532. }
  533. } catch (const json::exception &e) {
  534. GELOGE(FAILED, "[Construct][JsonContent]Failed, reason %s", e.what());
  535. REPORT_INNER_ERROR("E19999", "Construct json content failed, reason %s", e.what());
  536. return FAILED;
  537. }
  538. GELOGI("Parser engine massage success");
  539. return SUCCESS;
  540. }
  541. Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle handle) {
  542. GELOGD("Begin to read json file");
  543. if (file_path.empty()) {
  544. GELOGE(FAILED, "[Check][Param]Json path is empty");
  545. REPORT_INNER_ERROR("E19999", "Json path is empty");
  546. return FAILED;
  547. }
  548. nlohmann::json *json_file = reinterpret_cast<nlohmann::json *>(handle);
  549. if (json_file == nullptr) {
  550. GELOGE(FAILED, "[Check][Param]Json file is nullptr");
  551. REPORT_CALL_ERROR("E19999", "Json file is nullptr");
  552. return FAILED;
  553. }
  554. const char *file = file_path.data();
  555. if ((mmAccess2(file, M_F_OK)) != EN_OK) {
  556. if (engines_map_.size() != 0) {
  557. GELOGE(FAILED, "[Check][Param]The json file %s not exists, err %s",
  558. file_path.c_str(), strerror(errno));
  559. REPORT_CALL_ERROR("E19999", "Json file %s not exists, err %s",
  560. file_path.c_str(), strerror(errno));
  561. return FAILED;
  562. } else {
  563. GELOGW("The json file %s is not needed.", file_path.c_str());
  564. return SUCCESS;
  565. }
  566. }
  567. std::ifstream ifs(file_path);
  568. if (!ifs.is_open()) {
  569. GELOGE(FAILED, "[Open][JsonFile]Failed, file %s", file_path.c_str());
  570. REPORT_CALL_ERROR("E19999", "Open json file %s failed", file_path.c_str());
  571. return FAILED;
  572. }
  573. try {
  574. ifs >> *json_file;
  575. } catch (const json::exception &e) {
  576. GELOGE(FAILED, "[Read][JsonFile]Failed, reason %s", e.what());
  577. REPORT_CALL_ERROR("E19999", "Read json file failed, reason %s", e.what());
  578. ifs.close();
  579. return FAILED;
  580. }
  581. ifs.close();
  582. GELOGD("Read json file success");
  583. return SUCCESS;
  584. }
  585. Status DNNEngineManager::CheckJsonFile() {
  586. GELOGD("Begin to check json file");
  587. for (auto &it : engines_map_) {
  588. if (!it.second->IsAtomic()) {
  589. continue;
  590. }
  591. std::string engine_name = it.first;
  592. int count = 0;
  593. for (auto &iter : schedulers_) {
  594. auto engine_map = iter.second.cal_engines;
  595. auto iter_engine_name = engine_map.find(engine_name);
  596. if (iter_engine_name != engine_map.end()) {
  597. count++;
  598. }
  599. }
  600. if (count == 0) {
  601. GELOGE(FAILED, "[Check][JsonFile]The engine message %s is not found in the json file",
  602. engine_name.c_str());
  603. REPORT_INNER_ERROR("E19999", "The engine message %s is not found in the json file",
  604. engine_name.c_str());
  605. return FAILED;
  606. }
  607. if (count > 1) {
  608. GELOGE(FAILED, "[Check][JsonFile]The same engine message %s exists in the json file",
  609. engine_name.c_str());
  610. REPORT_INNER_ERROR("E19999", "The same engine message %s exists in the json file",
  611. engine_name.c_str());
  612. return FAILED;
  613. }
  614. }
  615. GELOGD("Check json file success");
  616. return SUCCESS;
  617. }
  618. Status DNNEngineManager::InitCompoundEngines(const std::string &path) {
  619. std::string compound_engine_so = path + "plugin/opskernel/fe.so";
  620. Status status = compound_plugin_mgr_.Load(compound_engine_so);
  621. if (status != SUCCESS) {
  622. GELOGE(status, "[Load][EngineSo]Failed, lib path %s", path.c_str());
  623. REPORT_CALL_ERROR("E19999", "Load engine so failed, lib path %s", compound_engine_so.c_str());
  624. return status;
  625. }
  626. compound_plugin_mgr_.
  627. OptionalInvokeAll<std::map<std::string, std::set<std::string>> &, std::map<std::string, std::string> &>(
  628. kGetCompoundEngineContains, compound_engine_contains_, compound_engine_2_kernel_lib_name_);
  629. if (compound_engine_contains_.empty()) {
  630. GELOGI("No compound engine is registered.");
  631. return SUCCESS;
  632. }
  633. std::vector<std::string> invalid_compound_engines;
  634. for (const auto &item : compound_engine_contains_) {
  635. const auto &compound_engine = GetEngine(item.first);
  636. if ((compound_engine == nullptr) || compound_engine->IsAtomic()) {
  637. GELOGW("Compound engine %s is not registered", item.first.c_str());
  638. continue;
  639. }
  640. if (compound_engine_2_kernel_lib_name_.find(item.first) == compound_engine_2_kernel_lib_name_.end() ||
  641. compound_engine_2_kernel_lib_name_[item.first].empty()) {
  642. GELOGW("Kernel lib name of compound engine %s is empty, will delete", item.first.c_str());
  643. invalid_compound_engines.emplace_back(item.first);
  644. }
  645. }
  646. for (const auto &invalid_compound_engine : invalid_compound_engines) {
  647. (void)compound_engine_contains_.erase(invalid_compound_engine);
  648. (void)compound_engine_2_kernel_lib_name_.erase(invalid_compound_engine);
  649. }
  650. return SUCCESS;
  651. }
  652. void DNNEngineManager::InitAtomicCompoundMapping() {
  653. for (const auto &item : compound_engine_contains_) {
  654. for (const auto &atomic_engine_name : item.second) {
  655. const auto &atomic_engine = GetEngine(atomic_engine_name);
  656. if ((atomic_engine == nullptr) || !atomic_engine->IsAtomic()) {
  657. GELOGW("Atomic engine %s is not registered", atomic_engine_name.c_str());
  658. continue;
  659. }
  660. auto iter = atomic_2_compound_.find(atomic_engine_name);
  661. if (iter != atomic_2_compound_.end()) {
  662. GELOGW("Atomic engine %s has been contained in compound engine %s, and will be overwritten by engine %s",
  663. atomic_engine_name.c_str(), iter->second.c_str(), item.first.c_str());
  664. }
  665. atomic_2_compound_[atomic_engine_name] = item.first;
  666. }
  667. }
  668. }
  669. bool DNNEngineManager::IsStreamAssignSkip(const NodePtr &node) {
  670. const auto &op_desc = node->GetOpDesc();
  671. if (op_desc == nullptr) {
  672. return false;
  673. }
  674. std::string engine_name = op_desc->GetOpEngineName();
  675. if (engine_name.empty()) {
  676. engine_name = GetDNNEngineName(node);
  677. }
  678. return IsStreamAssignSkip(engine_name);
  679. }
  680. bool DNNEngineManager::IsStreamAssignSkip(const string &engine_name) {
  681. // Only one scheduler has been supported by now
  682. for (const auto &scheduler : schedulers_) {
  683. const map<string, EngineConfPtr> cal_engines = scheduler.second.cal_engines;
  684. auto cal_engines_iter = cal_engines.find(engine_name);
  685. if (cal_engines_iter == cal_engines.end()) {
  686. GELOGW("No cal_engines found within engine %s", engine_name.c_str());
  687. continue;
  688. }
  689. EngineConfPtr engine_conf_ptr = cal_engines_iter->second;
  690. if (engine_conf_ptr == nullptr) {
  691. GELOGW("engine_conf_ptr within engine %s is null", engine_name.c_str());
  692. continue;
  693. }
  694. return engine_conf_ptr->skip_assign_stream;
  695. }
  696. return false;
  697. }
  698. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示