You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cce_stub.cc 20 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <vector>
  17. #include <cce/cce.h>
  18. #include <cce/dnn.h>
  19. #include <cce/compiler_stub.h>
  20. #include <cce/taskdown_api.h>
  21. #include "cce/optimizer/fusion_engine.h"
  22. #include "common/op/attr_value_util.h"
  23. #include "graph/utils/tensor_utils.h"
  24. #include "graph/utils/graph_utils.h"
  25. using namespace cce;
  26. using namespace std;
  27. using namespace ge;
  28. using namespace fusion;
  29. uint64_t global_mem_base = 0;
  30. namespace cce {
  31. #define DIM_MAX_SIZE 8
  32. static const uint32_t C0 = 16;
  33. struct tagCcPad {};
  34. struct tagCcConvolution {};
  35. struct tagCcLRN {};
  36. struct tagCcFasterRcnnProposal {};
  37. struct tagCcRoiAlign {};
  38. struct tagCcBatchNorm {};
  39. struct tagCcDetectpostprocess {};
  40. struct tagCcSsdDetectionOutput {};
  41. struct tagCcRefinedetDetectionOutput {};
  42. struct tagCcMsrGenerateRpnProposals {};
  43. struct tagCcFilter {
  44. vector<uint32_t> dims;
  45. };
  46. struct tagCcTensor {
  47. ccTensorFormat_t format;
  48. ccDataType_t data_type;
  49. uint32_t dim_cnt;
  50. int32_t real_dim_cnt;
  51. uint32_t data_size;
  52. int32_t dim_buf[DIM_MAX_SIZE];
  53. int32_t stride_buf[DIM_MAX_SIZE];
  54. };
  55. typedef struct tagCcPooling {
  56. ccPoolingMode_t mode;
  57. ccPaddingMode_t pad_mode;
  58. ccNanPropagation_t max_pooling_nan_opt;
  59. uint32_t dim_cnt;
  60. int32_t window_dim[6];
  61. int32_t padding[6];
  62. int32_t stride[6];
  63. } ccPooling_t;
  64. struct tagCcActivation {};
  65. struct tagCcFasterRcnnDetectionOutput {};
  66. struct tagCcSpatialTransformer {};
  67. struct tagCcPower {};
  68. struct tagCcResizeBilinear {};
  69. struct tagCcSsdNormalize {};
  70. struct tagCcSsdPostProcessor {};
  71. struct tagCcSsdPriorBox {};
  72. struct tagCcPsRoiPooling {};
  73. struct tagMsrFastRcnnPredictions {};
  74. struct tagCcPRelu {};
  75. struct tagCcStridedSlice {};
  76. struct tagCcStridedSliceAttrs {};
  77. struct tagCcRnn {};
  78. struct tagCcArgmaxmin {};
  79. typedef struct tagCcLog {
  80. ccDataType_t data_type;
  81. uint32_t param_cnt;
  82. } ccLog_t;
  83. typedef struct tagCcLog *ccLogDescriptor_t;
  84. struct tagCcPadV2 {};
  85. ccStatus_t ccGetPadV2OutputDim(const ccTensorDescriptor_t x_desc, const ccPadV2Descriptor_t pad_desc, int32_t *dim_cnt,
  86. int32_t dim[], int32_t dim_len) {
  87. *dim_cnt = 4;
  88. dim[0] = 1;
  89. dim[1] = 2;
  90. dim[2] = 2;
  91. dim[3] = 3;
  92. return CC_STATUS_SUCCESS;
  93. }
  94. ccStatus_t ccPadV2Forward(ccHandle_t handle, const ccPadV2Descriptor_t pad_desc, const void *alpha,
  95. const ccTensorDescriptor_t x_desc, const void *x, const void *beta,
  96. const ccTensorDescriptor_t output_desc, void *output) {
  97. return CC_STATUS_SUCCESS;
  98. }
  99. ccStatus_t ccCreatePadV2Descriptor(ccPadV2Descriptor_t *pad_desc) { return CC_STATUS_SUCCESS; }
  100. ccStatus_t ccDestroyPadV2Descriptor(ccPadV2Descriptor_t *pad_desc) { return CC_STATUS_SUCCESS; }
  101. ccStatus_t ccSetKernelOpMap(ccHandle_t handle) { return CC_STATUS_SUCCESS; }
  102. ccStatus_t ccDataDumpForward(ccHandle_t handle, const void *buffer, const uint64_t buf_len, const uint32_t task_index) {
  103. return CC_STATUS_SUCCESS;
  104. }
  105. ccStatus_t ccSetPadV2Descriptor(ccPadV2Descriptor_t pad_desc, const int32_t pad_shape_cnt,
  106. const int32_t pad_shape_low[], const int32_t pad_shape_high[],
  107. const ccPadMode_t pad_mode, const void *pad_value, const ccDataType_t pad_value_type) {
  108. return CC_STATUS_SUCCESS;
  109. }
  110. struct tagCcYoloDetectionOutput {
  111. ccYoloVersion_t yolo_version;
  112. uint32_t net_h;
  113. uint32_t net_w;
  114. uint32_t post_top_k;
  115. uint32_t classes;
  116. float nms_threshold;
  117. float iou_thre_decay;
  118. float coor_scale_factor;
  119. bool relative;
  120. float obj_threshold;
  121. float cls_threshold;
  122. uint32_t bias_num;
  123. float *bias;
  124. };
  125. struct tagCcYoloRegion {};
  126. struct tagCcEltwise {};
  127. struct tagCcHashTableLookup {};
  128. struct tagCcEmbeddingAttnDecoder {};
  129. struct tagNonMaxSuppression {};
  130. struct tagCcArcSinCos {};
  131. struct tagCcPow {};
  132. struct tagCcConcatFive2Four_t {};
  133. struct tagCcConcatFour2Five_t {};
  134. ccStatus_t ccCreatePowDescriptor(ccPowDescriptor_t *pow_desc) {
  135. *pow_desc = new tagCcPow();
  136. return CC_STATUS_SUCCESS;
  137. }
  138. ccStatus_t ccSetPowDescriptor(ccPowDescriptor_t pow_desc, ccDataType_t data_type, uint32_t param_cnt) {
  139. return CC_STATUS_SUCCESS;
  140. }
  141. ccStatus_t ccDestroyPowDescriptor(ccPowDescriptor_t *pow_desc) {
  142. if (nullptr == pow_desc) {
  143. return CC_STATUS_BAD_PARAM;
  144. }
  145. delete *pow_desc;
  146. *pow_desc = 0;
  147. return CC_STATUS_SUCCESS;
  148. }
  149. ccStatus_t ccPowForward(ccHandle_t handle, const ccPowDescriptor_t pow_desc, const void *pow_param, const void *alpha,
  150. const ccTensorDescriptor_t x_desc, const void *x, const ccTensorDescriptor_t y_desc,
  151. const void *y, const void *beta, const ccTensorDescriptor_t z_desc, void *z) {
  152. return CC_STATUS_SUCCESS;
  153. }
  154. ccStatus_t ccLogicalOrForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t x_desc, const void *x,
  155. const ccTensorDescriptor_t y_desc, const void *y, const void *beta,
  156. const ccTensorDescriptor_t output_desc, void *output) {
  157. return CC_STATUS_SUCCESS;
  158. }
  159. ccStatus_t ccCompareForward(ccHandle_t handle, ccCompareType_t compare_type, const void *alpha,
  160. const ccTensorDescriptor_t x_desc, const void *x, const ccTensorDescriptor_t y_desc,
  161. const void *y, const void *beta, const ccTensorDescriptor_t output_desc, void *output) {
  162. return CC_STATUS_SUCCESS;
  163. }
  164. ccStatus_t ccGetCompareOutputDim(const ccTensorDescriptor_t x_desc, const ccTensorDescriptor_t y_desc, int32_t *dim_cnt,
  165. int32_t *dim, int32_t dim_len) {
  166. *dim_cnt = 4;
  167. dim[0] = 1;
  168. dim[1] = 1;
  169. dim[2] = 1;
  170. dim[3] = 1;
  171. return CC_STATUS_SUCCESS;
  172. }
  173. ccStatus_t ccArcTanForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t x_desc, const void *x,
  174. const void *beta, const ccTensorDescriptor_t y_desc, void *y) {
  175. return CC_STATUS_SUCCESS;
  176. }
  177. ccStatus_t ccAtanhForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t x_desc, const void *x,
  178. const void *beta, const ccTensorDescriptor_t y_desc, void *y) {
  179. return CC_STATUS_SUCCESS;
  180. }
  181. ccStatus_t ccIsDepthwiseHighPerformance(int32_t input_n, int32_t input_c, int32_t input_h, int32_t input_w,
  182. int32_t filter_n, int32_t filter_c, int32_t filter_h, int32_t filter_w,
  183. int32_t dilation_h, int32_t dilation_w, int32_t pad_h_head, int32_t pad_h_tail,
  184. int32_t pad_w_head, int32_t pad_w_tail, int32_t stride_h, int32_t stride_w,
  185. int32_t group_num, bool &is_high_performance, bool is_quant,
  186. ccDataType_t input_data_type, ccDataType_t output_data_type) {
  187. is_high_performance = true;
  188. return CC_STATUS_SUCCESS;
  189. }
  190. struct tagCcSpaceToBatch {};
  191. struct tagCcBatchToSpace {};
  192. struct tagCcResizeNearestNeighbor {};
  193. ccStatus_t ccGetStream(ccHandle_t handle, rtStream_t *stream_id) { return CC_STATUS_SUCCESS; }
  194. ccStatus_t ccGetRtVersion(uint32_t *count) { return CC_STATUS_SUCCESS; }
  195. ccStatus_t ccDestroyTensorDescriptor(ccTensorDescriptor_t *tensor_desc) {
  196. if (nullptr == tensor_desc) {
  197. return CC_STATUS_BAD_PARAM;
  198. }
  199. delete *tensor_desc;
  200. *tensor_desc = 0;
  201. return CC_STATUS_SUCCESS;
  202. }
  203. ccStatus_t ccDestroyFilterDescriptor(ccFilterDescriptor_t *filter_desc) {
  204. delete *filter_desc;
  205. *filter_desc = 0;
  206. return CC_STATUS_SUCCESS;
  207. }
  208. ccStatus_t ccGetFilterSizeInBytes(const ccFilterDescriptor_t filter_desc, uint32_t *size) {
  209. *size = filter_desc->dims[0] * filter_desc->dims[1] * filter_desc->dims[2] * filter_desc->dims[3] * sizeof(float);
  210. return CC_STATUS_SUCCESS;
  211. }
  212. ccStatus_t ccTransFilter(const ccFilterDescriptor_t w_desc, const void *w, ccFilterDescriptor_t y_desc, void *y,
  213. uint32_t y_size_in_bytes) {
  214. y = const_cast<void *>(w);
  215. return CC_STATUS_SUCCESS;
  216. }
  217. ccStatus_t ccCreateTensorDescriptor(ccTensorDescriptor_t *tensor_desc) {
  218. *tensor_desc = new tagCcTensor();
  219. return CC_STATUS_SUCCESS;
  220. }
  221. ccStatus_t ccSetTensor4dDescriptor(ccTensorDescriptor_t tensor_desc, ccTensorFormat_t format, ccDataType_t data_type,
  222. int32_t n, int32_t c, int32_t h, int32_t w) {
  223. if (CC_TENSOR_NHWC == format) {
  224. tensor_desc->dim_buf[0] = n;
  225. tensor_desc->dim_buf[1] = h;
  226. tensor_desc->dim_buf[2] = w;
  227. tensor_desc->dim_buf[3] = c;
  228. } else {
  229. tensor_desc->dim_buf[0] = n;
  230. tensor_desc->dim_buf[1] = c;
  231. tensor_desc->dim_buf[2] = h;
  232. tensor_desc->dim_buf[3] = w;
  233. }
  234. tensor_desc->dim_cnt = 4;
  235. tensor_desc->data_type = data_type;
  236. tensor_desc->format = format;
  237. tensor_desc->data_size = n * c * h * w * sizeof(data_type);
  238. return CC_STATUS_SUCCESS;
  239. }
  240. ccStatus_t ccGetTensorSizeInBytes(const ccTensorDescriptor_t tensor_desc, uint32_t *size) {
  241. if ((NULL == tensor_desc) || (NULL == size)) {
  242. return CC_STATUS_BAD_PARAM;
  243. }
  244. *size = tensor_desc->data_size;
  245. return CC_STATUS_SUCCESS;
  246. }
  247. ccStatus_t ccGetTensorMemorySizeInBytes(const ccTensorDescriptor_t tensor_desc, uint32_t *size) {
  248. *size = tensor_desc->data_size;
  249. return CC_STATUS_SUCCESS;
  250. }
  251. ccStatus_t ccCreateFilterDescriptor(ccFilterDescriptor_t *filter_desc) {
  252. *filter_desc = new tagCcFilter();
  253. return CC_STATUS_SUCCESS;
  254. }
  255. ccStatus_t ccSetFilter4dDescriptor(ccFilterDescriptor_t filter_desc, ccTensorFormat_t format, ccDataType_t data_type,
  256. int32_t k, int32_t c, int32_t h, int32_t w) {
  257. filter_desc->dims.push_back(k);
  258. filter_desc->dims.push_back(c);
  259. filter_desc->dims.push_back(h);
  260. filter_desc->dims.push_back(w);
  261. return CC_STATUS_SUCCESS;
  262. }
  263. ccStatus_t ccSetFilterFractalDescriptor(ccFilterDescriptor_t filter_desc, ccTensorFormat_t format,
  264. ccDataType_t data_type, int32_t k, int32_t c, int32_t h, int32_t w) {
  265. filter_desc->dims.push_back(k);
  266. filter_desc->dims.push_back(c);
  267. filter_desc->dims.push_back(h);
  268. filter_desc->dims.push_back(w);
  269. return CC_STATUS_SUCCESS;
  270. }
  271. ccStatus_t ccSetStream(ccHandle_t handle, rtStream_t stream_id) { return CC_STATUS_SUCCESS; }
  272. ccStatus_t ccCreatePoolingMaskDescriptor(ccTensorDescriptor_t *pooling_mask_desc) {
  273. *pooling_mask_desc = new tagCcTensor();
  274. return CC_STATUS_SUCCESS;
  275. }
  276. ccStatus_t ccSetPoolingMaskTensorDescriptor(ccTensorDescriptor_t tensor_desc, ccTensorFormat_t format,
  277. ccDataType_t data_type, int32_t n, int32_t c, int32_t h, int32_t w,
  278. int32_t window_h, int32_t window_w) {
  279. return CC_STATUS_SUCCESS;
  280. }
  281. ccStatus_t ccSetFilter6dDescriptor(ccTensorDescriptor_t filter_desc, ccTensorFormat_t format, ccDataType_t data_type,
  282. int32_t c1, int32_t h, int32_t w, int32_t n, int32_t co, int32_t c0) {
  283. return CC_STATUS_SUCCESS;
  284. }
  285. /// @ingroup dnn
  286. /// @brief get the format and dimcnt of GeTensor
  287. /// @param [in] tensor_desc descriptor of tensor
  288. /// @param [in|out] format point to format
  289. /// @return ccStatus_t
  290. ccStatus_t ccGetTensorFormat(const ccTensorDescriptor_t tensor_desc, ccTensorFormat_t *format) {
  291. *format = tensor_desc->format;
  292. return CC_STATUS_SUCCESS;
  293. }
  294. ccStatus_t ccTransTensor(const ccTensorDescriptor_t x_desc, const void *x, const ccTensorDescriptor_t y_desc, void *y,
  295. uint32_t y_size_in_bytes) {
  296. return CC_STATUS_SUCCESS;
  297. }
  298. void cceSysInit() {}
  299. bool compilerStubFree() { return true; }
  300. bool compilerStubInit() { return true; }
  301. ccStatus_t ccSetInt8Filter4dDescriptor(ccFilterDescriptor_t filter_desc, ccTensorFormat_t format,
  302. ccDataType_t data_type, int32_t k, int32_t c, int32_t h, int32_t w,
  303. ccDataType_t output_data_type) {
  304. filter_desc->dims.push_back(k);
  305. filter_desc->dims.push_back(c);
  306. filter_desc->dims.push_back(h);
  307. filter_desc->dims.push_back(w);
  308. return CC_STATUS_SUCCESS;
  309. }
  310. ccStatus_t ccSetTensorNdDescriptor(ccTensorDescriptor_t tensor_desc, ccDataType_t data_type, int32_t dim_cnt,
  311. int32_t dimA[]) {
  312. tensor_desc->data_type = data_type;
  313. tensor_desc->data_size = sizeof(data_type);
  314. for (int32_t i = 0; i < dim_cnt; i++) {
  315. tensor_desc->data_size = tensor_desc->data_size * dimA[i];
  316. }
  317. tensor_desc->format = CC_TENSOR_ND;
  318. return CC_STATUS_SUCCESS;
  319. }
  320. ccStatus_t CceProfilingConfig(const char *target, const char *job_ctx, uint32_t flag) { return CC_STATUS_SUCCESS; }
  321. ccStatus_t ccSetTensorRealDimCnt(ccTensorDescriptor_t tensor_desc, int32_t real_dim_cnt) {
  322. if (tensor_desc != NULL && tensor_desc != nullptr) {
  323. tensor_desc->real_dim_cnt = real_dim_cnt;
  324. }
  325. return CC_STATUS_SUCCESS;
  326. }
  327. ccStatus_t ccGetTensorRealDimCnt(ccTensorDescriptor_t tensor_desc, int32_t *real_dim_cnt) {
  328. *real_dim_cnt = tensor_desc->real_dim_cnt;
  329. return CC_STATUS_SUCCESS;
  330. }
  331. ccStatus_t ccSetQuantizeFactors(ccQuantizeDescriptor_t quantize_info, ccScaleValueMode_t scale_val_mode,
  332. const uint16_t *scale, const uint16_t *offset, const uint8_t *offset_pad) {
  333. return CC_STATUS_SUCCESS;
  334. }
  335. ccStatus_t ccSetReQuantizeFactors(ccQuantizeDescriptor_t quantize_info, ccScaleValueMode_t scale_val_mode,
  336. const uint16_t *scale_rq, const uint16_t *next_layer_offset,
  337. const int32_t *offset_w) {
  338. return CC_STATUS_SUCCESS;
  339. }
  340. ccStatus_t ccSetDeQuantizeFactors(ccQuantizeDescriptor_t quantize_info, ccScaleValueMode_t scale_val_mode,
  341. const uint16_t *scale_dq, const int32_t *offset_w) {
  342. return CC_STATUS_SUCCESS;
  343. }
  344. ccStatus_t ccSetQuantizeAlgoAndScaleType(ccQuantizeDescriptor_t quantize_info, ccQuantizeAlgo_t quant_algo,
  345. ccScaleType_t scale_type, bool relu_flag) {
  346. return CC_STATUS_SUCCESS;
  347. }
  348. ccStatus_t ccPrintTimeStat() { return CC_STATUS_SUCCESS; }
  349. ccStatus_t ccSetModelId(ccHandle_t handle, uint32_t model_id) { return CC_STATUS_SUCCESS; }
  350. ccStatus_t ccGetKernelContext(rtStream_t stream_id, ccOpContext &op_context) {
  351. if (stream_id == nullptr) {
  352. op_context.kernelType = ccKernelType::TE;
  353. } else {
  354. op_context.kernelType = ccKernelType::CCE_AI_CORE;
  355. op_context.opId = 1;
  356. op_context.kernelFuncId = 1;
  357. op_context.isFlowtable = true;
  358. op_context.opCount = 1;
  359. op_context.opIndex2[0] = 0;
  360. }
  361. return CC_STATUS_SUCCESS;
  362. }
  363. ccStatus_t ccUpdateKernelArgs(ccOpContext &op_context, uint64_t data_base_addr, uint64_t weight_base_addr,
  364. uint64_t variable_base_addr, void *args_addr, uint64_t args_size, void *l2ctrl_addr) {
  365. return CC_STATUS_SUCCESS;
  366. }
  367. ccStatus_t ccGetKernelArgsAddrs(ccOpContext &op_context, void *args_addr, uint64_t args_size, void *l2ctrl_addr,
  368. std::vector<ccOpAddrsInfo> &op_addrs_info) {
  369. // cce
  370. ccOpAddrsInfo tmp_op_addrs_info;
  371. uint64_t tmp_input = (uint64_t)global_mem_base;
  372. tmp_op_addrs_info.addrPos = &tmp_input;
  373. tmp_op_addrs_info.addrData = tmp_input;
  374. op_addrs_info.push_back(tmp_op_addrs_info);
  375. uint64_t tmp_output = (uint64_t)(global_mem_base + 5476352);
  376. tmp_op_addrs_info.addrPos = &tmp_output;
  377. tmp_op_addrs_info.addrData = tmp_output;
  378. op_addrs_info.push_back(tmp_op_addrs_info);
  379. return CC_STATUS_SUCCESS;
  380. }
  381. ccStatus_t ccSetKernelArgs(std::vector<ccOpAddrsInfo> &date_info) { return CC_STATUS_SUCCESS; }
  382. } // namespace cce
  383. // ccFusion no namespace
  384. ccStatus_t ccFusionStart(ccHandle_t handle, uint32_t graph_id, uint32_t init_flag, CceFusionMemCfg_t mem_cfg) {
  385. return CC_STATUS_SUCCESS;
  386. }
  387. //???ccFusion ????namespace cce??
  388. ccStatus_t ccFusionStart(ccHandle_t handle, uint32_t graph_id, uint32_t init_flag, uint32_t addr_change_flag) {
  389. return CC_STATUS_SUCCESS;
  390. }
  391. ccStatus_t ccFusionEnd(ccHandle_t handle, uint32_t graph_id) { return CC_STATUS_SUCCESS; }
  392. ccStatus_t ccFusionTaskEnd(ccHandle_t handle, uint32_t graph_id) { return CC_STATUS_SUCCESS; }
  393. ccStatus_t ccKernelLaunchRepeat(ccHandle_t handle) { return CC_STATUS_SUCCESS; }
  394. ccStatus_t ccKernelDelete(ccHandle_t handle) { return CC_STATUS_SUCCESS; }
  395. ccStatus_t cce::ccSetTensorFormat(cce::tagCcTensor *, cce::tagCcTensorFormat) { return CC_STATUS_SUCCESS; }
  396. namespace fusion {
  397. uint32_t BufferFusion(std::shared_ptr<ge::ComputeGraph>, std::shared_ptr<ge::ComputeGraph>, bool) { return 0; }
  398. uint32_t BufferFusionTrain(std::shared_ptr<ge::ComputeGraph>, std::shared_ptr<ge::ComputeGraph>) { return 0; }
  399. uint32_t GraphFusionTrain(ge::ComputeGraphPtr orig_graph, ge::ComputeGraphPtr fusion_graph) { return 0; }
  400. } // namespace fusion
  401. namespace fusion {
  402. using namespace ge;
  403. uint32_t Fusion(ComputeGraphPtr model_graph, ComputeGraphPtr fusion_graph, kScopeNodeMap_t &te_fusion_map) {
  404. OpDescPtr op_def_a = std::make_shared<OpDesc>();
  405. op_def_a->SetName("reduction_nd");
  406. op_def_a->SetType("reduction_nd");
  407. GeTensorDescPtr v_input_desc = std::make_shared<GeTensorDesc>();
  408. op_def_a->AddInputDesc(*v_input_desc);
  409. vector<int64_t> v_input;
  410. v_input.push_back(0);
  411. op_def_a->SetInputOffset(v_input);
  412. GeTensorDesc input_desc = op_def_a->GetInputDesc(0);
  413. input_desc.SetFormat(FORMAT_NCHW);
  414. input_desc.SetDataType(DT_FLOAT);
  415. input_desc.SetShape(GeShape({1, 3, 5, 5}));
  416. ge::TensorUtils::SetSize(input_desc, 192);
  417. ge::TensorUtils::SetRealDimCnt(input_desc, 4);
  418. GeTensorDescPtr output_desc = std::make_shared<GeTensorDesc>();
  419. op_def_a->AddOutputDesc(*output_desc);
  420. output_desc->SetFormat(FORMAT_NCHW);
  421. output_desc->SetDataType(DT_FLOAT);
  422. output_desc->SetShape(GeShape({1, 3, 5}));
  423. ge::TensorUtils::SetSize(*output_desc, 96);
  424. ge::TensorUtils::SetRealDimCnt(*output_desc, 3);
  425. OpDescPtr op_def_b = std::make_shared<OpDesc>();
  426. op_def_b->SetName("transdata_1");
  427. op_def_b->SetType("TransData");
  428. int stream_num = 1;
  429. int flag = 0;
  430. NodePtr node_a = fusion_graph->AddNode(op_def_a);
  431. NodePtr node_b = fusion_graph->AddNode(op_def_b);
  432. GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0));
  433. int32_t a = 1;
  434. int32_t b = 2;
  435. AttrUtils::SetInt(op_def_a, "fusion_scope", a);
  436. AttrUtils::SetInt(op_def_b, "fusion_scope", b);
  437. vector<NodePtr> node_list1;
  438. node_list1.push_back(node_a);
  439. vector<NodePtr> node_list2;
  440. node_list2.push_back(node_b);
  441. te_fusion_map[1] = node_list1;
  442. te_fusion_map[2] = node_list2;
  443. return FUSION_STATUS_SUCCESS;
  444. }
  445. uint32_t FusionTaskBuild(cce::ccHandle_t cc_handle, ge::ComputeGraphPtr fusion_graph, ge::Buffer &buffer,
  446. ModelRes &model_res, std::vector<TaskDef> &task_def_list_) {
  447. TaskDef task_def_temp;
  448. task_def_list_.push_back(task_def_temp);
  449. return FUSION_STATUS_SUCCESS;
  450. }
  451. uint32_t GraphFusion(ge::ComputeGraphPtr orig_graph, ge::ComputeGraphPtr fusion_graph) {
  452. *fusion_graph = *orig_graph;
  453. return FUSION_STATUS_SUCCESS;
  454. }
  455. void FusionTaskBuildComplete(std::vector<ccHandle_t> cc_handle_list) { return; }
  456. } // namespace fusion
  457. ccStatus_t cce::ccSetTensorDescriptorQuantizeParam(ccTensorDescriptor_t tensor_desc,
  458. const ccVecQuantizePara_t *vec_quantize_para) {
  459. return CC_STATUS_SUCCESS;
  460. }
  461. ccStatus_t cce::ccSetAllOffsetQuantizeFactors(ccQuantizeDescriptor_t quantize_info, const uint8_t *offset_w,
  462. const uint8_t *offset_d, const uint16_t *scale_req,
  463. const uint16_t *offset_d_next) {
  464. return CC_STATUS_SUCCESS;
  465. }

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示