| Author | SHA1 | Message | Date |
|---|---|---|---|
|
|
caf8b80936 | update RELEASE.md. | 4 years ago |
|
|
8770bfcdd7 |
!1526 update commite id
From: @shenwei41 Reviewed-by: @liucunwei,@xsmq Signed-off-by: @liucunwei |
4 years ago |
|
|
329a144864 | Merge remote-tracking branch 'origin/r1.2' into code_sync_0415 | 4 years ago |
|
|
da1a60bc02 |
!1512 multi-thread online infer
From: @HW_KK Reviewed-by: @ji_chen,@wqtshg,@ji_chen Signed-off-by: @ji_chen |
4 years ago |
|
|
76c0c3a371 | multi-thread online infer | 4 years ago |
|
|
2d446b8def |
!1503 change model_name for dump
From: @jiming6 Reviewed-by: @xchu42,@wqtshg Signed-off-by: @liyihan123,@ji_chen |
4 years ago |
|
|
c144b4bb9e |
!1507 Adaptation run package 0412
From: @shenwei41 Reviewed-by: @lilongfei15,@liucunwei Signed-off-by: @lilongfei15,@liucunwei |
4 years ago |
|
|
4928f86819 | update commit id | 4 years ago |
|
|
b5a55e9ca9 | Merge remote-tracking branch 'origin/r1.2' into code_sync_0412 | 4 years ago |
|
|
48d7b6dc8b | fix | 4 years ago |
|
|
daf8e56d25 | fix | 4 years ago |
|
|
46156bf04f | fix dump | 4 years ago |
|
|
ee67c45a2b |
!1487 Fix hccl control dependency
From: @xchu42 Reviewed-by: @ji_chen,@wqtshg,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
|
4b90851c68 |
!1484 remove unused func InsertMemcpyNode
From: @lichun30 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
|
2e8d863a1e |
!1482 ge static check
From: @zhou_chao1993 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
|
44415f12c8 |
!1492 modify single op dump bug in c77
From: @zhou_chao1993 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
|
36f2c837bf | Fix hccl control dependency | 4 years ago |
|
|
f49599b6c5 | modify single op dump bug | 4 years ago |
|
|
99e607c6d1 |
!1490 fix optional input bug
From: @wan_xuelei Reviewed-by: @xchu42,@wqtshg Signed-off-by: @lbisdaddy |
4 years ago |
|
|
d5f56ad31c | fix optional input bug | 4 years ago |
|
|
c73a3c7b46 | fix sc check error | 4 years ago |
|
|
f971f512e3 | static check modify | 4 years ago |
|
|
7f73eedb8a |
!1478 Don't reset -2 when there is aicore op.
From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
|
ed941d6d87 |
!1461 modify dump single op in c77
From: @zhou_chao1993 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
|
089b82e9bd |
!1469 modify dynamic shape dump in c77
From: @zhou_chao1993 Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @lbisdaddy |
4 years ago |
|
|
e52c916f56 | Don't reset -2 when there is aicore op. | 4 years ago |
|
|
4c8e5f73c6 |
!1476 Bugfix: Missing hccl execution dependency due to wrong attribute type of _parallel_group
From: @xchu42 Reviewed-by: @ji_chen,@wqtshg Signed-off-by: @ji_chen |
4 years ago |
|
|
a4783ff468 |
!1460 Reduce weight memory usage & Remove redundant memcpy
From: @xchu42 Reviewed-by: @wqtshg,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
|
19d1f804c7 | Bugfix: keep hccl control dependency | 4 years ago |
|
|
c90cae1410 | modify dynamic shape dump | 4 years ago |
|
|
4c0d85693a |
!1463 Save atomic kernel bin to model.
From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chen |
4 years ago |
|
|
b48ecfe347 | Save atomic kernel bin to model. | 4 years ago |
|
|
d7b607dc83 |
!1464 fix aipp check
From: @wangxiaotian22 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
|
637bcc86d6 | modify dump single op | 4 years ago |
|
|
30743e1e59 | fix aipp check | 4 years ago |
|
|
24b2437361 | Fix dump for known-shaped subgraph | 4 years ago |
|
|
6d92a616ea |
!1455 Synchronize latest Ascend software suite 06 Apr 2021
From: @nicholas_yhr Reviewed-by: @majorzhang,@lilongfei15 Signed-off-by: @majorzhang |
4 years ago |
|
|
03e87b5570 | Merge remote-tracking branch 'upstream/r1.2' into code_sync_0406 | 4 years ago |
|
|
3ef3f54d94 | Save atomic kernel bin to model. | 4 years ago |
|
|
34f09f4fc8 |
!1447 LinkToPotentialPrecedenceNode
From: @dimitri_rose Reviewed-by: @sheng-nan,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
|
73e7c53f8a |
!1448 Fix bug of const input index.
From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
|
494fa061a8 |
!1444 modify dump content in c77
From: @zhou_chao1993 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
|
aeec1cb08b |
!1446 modify set dump in c77
From: @zhou_chao1993 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
|
960cc1fd64 | Fix bug of const input index. | 4 years ago |
|
|
5f1e659fcd | LinkToPotentialPrecedenceNode | 4 years ago |
|
|
b1822cc73c | modify set dump in c77 | 4 years ago |
|
|
4931c4fa1e | modify dump content | 4 years ago |
|
|
24d3b54ab8 |
!1443 synchronize latest ascend softare suite 02 Apr 2021
From: @nicholas_yhr Reviewed-by: @lilongfei15,@ljl0711 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
2fbf01c53f | Merge remote-tracking branch 'origin/r1.2' into code_sync_0402 | 4 years ago |
|
|
9d6aaa117c |
!1419 Add GetOriginalType for support RefSwitch & RefMerge
From: @chen_yemeng Reviewed-by: @xchu42,@wqtshg Signed-off-by: @lbisdaddy |
4 years ago |
|
|
0da36c04e4 |
!1421 fixed sc warning
From: @li-lei0106 Reviewed-by: @wqtshg,@xchu42 Signed-off-by: @ji_chen |
4 years ago |
|
|
2ac43d4033 |
!1430 fix 1951 ts 4g bug
From: @wan_xuelei Reviewed-by: @xchu42,@wqtshg Signed-off-by: @lbisdaddy |
4 years ago |
|
|
2112a36e80 |
!1415 support unknown while subgraph
From: @lichun30 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
|
68595a656a | fix ts 4g memory bug | 4 years ago |
|
|
890373c79c | fixed reviewbot warning | 4 years ago |
|
|
7a40a575f7 | Add GetOriginalType for support RefSwitch & RefMerge | 4 years ago |
|
|
701b0d6c1b | support unknown while subgraph | 4 years ago |
|
|
da71533e55 |
!1345 fixed sc warning
From: @li-lei0106 Reviewed-by: Signed-off-by: |
4 years ago |
|
|
af83c480c5 |
!1388 Feature: Tiger online inference support
From: @hugo1 Reviewed-by: @xchu42,@ji_chen,@wqtshg Signed-off-by: @ji_chen |
4 years ago |
|
|
c936821629 | modified: metadef | 4 years ago |
|
|
971630a7d2 |
!1400 Bugfix: While loop failed to restore original input after execution
From: @xchu42 Reviewed-by: @ji_chen,@wqtshg Signed-off-by: @liyihan123,@ji_chen |
4 years ago |
|
|
1735e1b1f3 |
!1402 l2 buffer for f1.3.0
From: @youui Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @liyihan123,@ji_chen |
4 years ago |
|
|
12cef9e9b9 | support unknown while subgraph | 4 years ago |
|
|
0679af1d75 |
!1409 update include files 0330
From: @shenwei41 Reviewed-by: @lilongfei15,@wenkai_dist,@ljl0711 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
5ddf2ac2b2 | update include files to 1.2 | 4 years ago |
|
|
7516130c7e | delete code | 4 years ago |
|
|
de47249a72 |
!1407 update commit id to r1.2 0330
From: @shenwei41 Reviewed-by: @xsmq,@lilongfei15 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
df1592e97a | Merge remote-tracking branch 'origin/r1.2' into code_sync_0330 | 4 years ago |
|
|
7ec6e4fe61 | r13_l2 | 4 years ago |
|
|
7ed03d0d0e |
!1398 fix import
From: @youui Reviewed-by: @ljl0711,@liujunzhu Signed-off-by: @liujunzhu |
4 years ago |
|
|
1d0359d1c6 | fixed pclint warning | 4 years ago |
|
|
e9868abe29 | fixed sc warning by wangxiaotian | 4 years ago |
|
|
4fe73f77bc | fixed sc warning | 4 years ago |
|
|
59a3e2e0ff | fix import | 4 years ago |
|
|
4a7f623b12 | while loop failed to restore input desc | 4 years ago |
|
|
8e0634323d | modified: ge/graph/passes/base_pass.h | 4 years ago |
|
|
f19cd2fca9 |
!1386 Adding dependencies by parallel groups
From: @xchu42 Reviewed-by: @ji_chen,@wqtshg Signed-off-by: @lbisdaddy |
4 years ago |
|
|
c691f2a7d7 |
!1385 Fix error of single_op memory free.
From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
|
e2f04ddabd |
!1375 bugfix for atomic_addr_clean_pass
From: @yangyongqiang5033 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
|
50552c3631 |
modified: ge/graph/passes/base_pass.cc
modified: ge/graph/passes/base_pass.h modified: ge/graph/passes/infershape_pass.cc modified: ge/hybrid/executor/hybrid_model_async_executor.cc modified: ge/hybrid/executor/subgraph_executor.cc modified: ge/hybrid/node_executor/aicore/aicore_op_task.cc |
4 years ago |
|
|
167621141b | hccl ops with same parallel group can not be execute parallelly | 4 years ago |
|
|
aad154cdf1 | Fix error of single_op memory free. | 4 years ago |
|
|
aead0be2d6 |
!1372 online_inference c77
From: @dimitri_rose Reviewed-by: @ji_chen,@sheng-nan Signed-off-by: @ji_chen |
4 years ago |
|
|
2cf49ced1c | online_inference c77 | 4 years ago |
|
|
b8621d9d0e |
!1370 bugfix for auto find fp
From: @ni100die Reviewed-by: @xchu42,@wqtshg Signed-off-by: @lbisdaddy |
4 years ago |
|
|
9d34427af9 | bugfix for atomic_addr_clean_pass | 4 years ago |
|
|
37c928ed29 | bugfix for auto find fp | 4 years ago |
|
|
0901ca5581 |
!1337 Fix bug of single_op inferdepend.
From: @zhao_zhixuan Reviewed-by: @xchu42,@xchu42,@ji_chen Signed-off-by: @liyihan123,@ji_chen |
4 years ago |
|
|
1224cdee8a |
!1306 dump for unknownshape
From: @jiming6 Reviewed-by: @xchu42,@wqtshg Signed-off-by: @ji_chen |
4 years ago |
|
|
56007bea30 |
!1351 sync runtime head
From: @zhou_chao1993 Reviewed-by: @xchu42,@youui,@ji_chen Signed-off-by: @liyihan123,@ji_chen |
4 years ago |
|
|
168508b063 |
!1354 update include file 0325
From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmq |
4 years ago |
|
|
23b471ca2b | update include file 0325 | 4 years ago |
|
|
e2f929b761 |
!1352 update graphengine_0325
From: @shenwei41 Reviewed-by: @majorzhang,@xsmq Signed-off-by: @xsmq |
4 years ago |
|
|
518ac24516 | update metdef | 4 years ago |
|
|
a6bcb04c7f | Merge remote-tracking branch 'origin/r1.2' into code_sync_0325 | 4 years ago |
|
|
5367bbe395 |
!1316 fixed compiled issue for proto files
From: @li-lei0106 Reviewed-by: @ji_chen,@wqtshg Signed-off-by: @ji_chen |
4 years ago |
|
|
f0d897b0bb | fixed compiled issue for proto files | 4 years ago |
|
|
13ecbe405a | sync runtime head | 4 years ago |
|
|
3050d3984a |
!1307 fix bug of dynamic shape load error
From: @wan_xuelei Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @lbisdaddy |
4 years ago |
|
|
1ccd0dd9ee |
!1341 modify profiing reporter data max len
From: @zhengyuanhua Reviewed-by: @ji_chen,@xchu42 Signed-off-by: @lbisdaddy |
4 years ago |
|
|
97d93adaa5 | modify profiling reporter data max len | 4 years ago |
|
|
4238e11e99 |
!1339 refactor label manager
From: @zhoufeng54 Reviewed-by: @liujunzhu,@youui Signed-off-by: @youui |
4 years ago |
|
|
ba2fcefa04 |
refactor label manager
Signed-off-by: zhoufeng <zhoufeng54@huawei.com> |
4 years ago |
|
|
77d5468cf6 | Fix bug of single_op inferdepend. | 4 years ago |
|
|
a89113e743 | fix bug of dynamic shape load error | 4 years ago |
|
|
e3fbf4d860 |
!1277 offline dynamic shape inference support
From: @lichun30 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
|
36eb9620d4 |
!1304 label goto implentment modified to be same as label switch
From: @zhoufeng54 Reviewed-by: @liujunzhu,@majorzhang Signed-off-by: @majorzhang |
4 years ago |
|
|
179e10f36b |
label switch
Signed-off-by: zhupuxu <zhupuxu@huawei.com> |
4 years ago |
|
|
745153a252 |
!1302 update include headers 0318
From: @shenwei41 Reviewed-by: @xsmq,@liucunwei Signed-off-by: @liucunwei |
4 years ago |
|
|
f87dd9d016 | update include headers | 4 years ago |
|
|
d9d99c3cf5 |
!1300 Update GE commit id
From: @shenwei41 Reviewed-by: @xsmq,@liucunwei Signed-off-by: @liucunwei |
4 years ago |
|
|
dfb2f4b7af | update commit id format | 4 years ago |
|
|
4a18a6791d | Merge remote-tracking branch 'origin/r1.2' into code_sync_0318 | 4 years ago |
|
|
e6d3c77e80 | offline dynamic shape inference support | 4 years ago |
|
|
81ac111f09 |
!1259 Unique LabelGoto args addr
From: @zhangxiaokun9 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @lbisdaddy |
4 years ago |
|
|
40e5c42a12 |
!1267 workspace of comm op can be reused
From: @zhoufeng54 Reviewed-by: @youui,@liujunzhu Signed-off-by: @youui |
4 years ago |
|
|
d6308151e0 |
reuse workspace memory of hccl op
Signed-off-by: zhoufeng <zhoufeng54@huawei.com> |
4 years ago |
|
|
6e874e8b87 | Unique LabelGoto args addr | 4 years ago |
|
|
4d6e7acc14 |
!1252 update submodule
From: @wqtshg Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chen |
4 years ago |
|
|
67bdf03f4b | update submodule | 4 years ago |
|
|
f65be61197 |
!1250 update ge
From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmq |
4 years ago |
|
|
6a82dd1947 | update ge | 4 years ago |
|
|
6ce82eff9b |
!1242 update ge Compile Error
From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmq |
4 years ago |
|
|
948000fe5b | fix ge Compile Error | 4 years ago |
|
|
c0f3dcb4f4 |
!1241 update include header files 0311
From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmq |
4 years ago |
|
|
54a48678ae | update include headers 0311 | 4 years ago |
|
|
8737b1843d |
!1234 update ge_0311
From: @shenwei41 Reviewed-by: @xsmq Signed-off-by: |
4 years ago |
|
|
10faa7b052 | change | 4 years ago |
|
|
5dd83a9abd | Merge remote-tracking branch 'origin/release' into code_sync_0311 | 4 years ago |
|
|
3401ca857c | dump for unknownshape | 4 years ago |
|
|
92286b21ec |
!1155 update metadef
From: @shenwei41 Reviewed-by: @xsmq,@lilongfei15 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
a1795d6554 | update metdef | 4 years ago |
|
|
f37d94ba21 |
!1153 update_headerfiles_0225
From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmq |
4 years ago |
|
|
3e5f9dac74 | update include headers_0225 | 4 years ago |
|
|
d487b4781d |
!1150 update graphengine_0225
From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmq |
4 years ago |
|
|
0ebcb55f3f | Merge remote-tracking branch 'origin/release' into code_sync_0225 | 4 years ago |
|
|
210a007d8e |
!1139 update_headerfiles_0222
From: @shenwei41 Reviewed-by: @xsmq,@lilongfei15 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
cc456d5803 | update header files 0222 | 4 years ago |
|
|
b9050a8c51 |
!1133 update graphengine_0222
From: @shenwei41 Reviewed-by: @lilongfei15,@ljl0711 Signed-off-by: @ljl0711 |
4 years ago |
|
|
028f47826e | Merge remote-tracking branch 'origin/release' into code_sync_0222 | 4 years ago |
|
|
6ebc4745de |
!1104 modify pkg lib
From: @changzherui Reviewed-by: @ljl0711,@liujunzhu Signed-off-by: @liujunzhu |
4 years ago |
|
|
aa4045b31d | modify pkg lib | 4 years ago |
|
|
8dc712ca01 |
!1096 update
From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmq |
4 years ago |
|
|
de4224d3ba | update | 4 years ago |
|
|
d28090d511 |
!1095 update metadef
From: @shenwei41 Reviewed-by: @xsmq,@lilongfei15 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
543fd0a0e8 | update metadef | 4 years ago |
|
|
06894dcd48 |
!1093 update prebuild
From: @shenwei41 Reviewed-by: @xsmq,@lilongfei15 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
29e380abf6 | update prebuild | 4 years ago |
|
|
e7036bb984 |
!1079 update include file
From: @shenwei41 Reviewed-by: @lilongfei15,@liucunwei Signed-off-by: @liucunwei |
4 years ago |
|
|
cffc6b2e2e | update include file | 4 years ago |
|
|
ea0e2eadad |
!1071 update commit id
From: @shenwei41 Reviewed-by: @liujunzhu,@lilongfei15 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
c332519b2e | Merge remote-tracking branch 'origin/release' into code_sync_0203 | 4 years ago |
|
|
08e4e60663 |
!1056 modify error_codes.h
From: @changzherui Reviewed-by: @lilongfei15,@liujunzhu Signed-off-by: @liujunzhu |
4 years ago |
|
|
b4539d54cd | modify error_codes.h | 4 years ago |
|
|
2d95c17f11 |
!1040 sync code 0126
From: @changzherui Reviewed-by: @liujunzhu,@lilongfei15 Signed-off-by: @liucunwei |
4 years ago |
|
|
46dbe7a2fc | Merge remote-tracking branch 'upstream/release' into code_sync_126 | 4 years ago |
|
|
dfa4dd4acd |
!1024 Synchronization code .h 0125
From: @changzherui Reviewed-by: @guoqi1024,@liujunzhu Signed-off-by: @guoqi1024 |
4 years ago |
|
|
ed7e35f927 | sync code 0125 .h | 4 years ago |
|
|
7183c03452 |
!1022 Synchronization code 0125
From: @changzherui Reviewed-by: @ljl0711,@guoqi1024 Signed-off-by: @guoqi1024 |
4 years ago |
|
|
a8b7570e71 | Merge remote-tracking branch 'upstream/release' into code_sync_0125 | 4 years ago |
|
|
687d5f2ab1 |
!966 sync code 0116 h
From: @changzherui Reviewed-by: @liujunzhu,@ljl0711 Signed-off-by: @ljl0711 |
4 years ago |
|
|
e3b32cd2a0 | clang-format | 4 years ago |
|
|
411e71f1f3 | sync code h | 4 years ago |
|
|
50f17e37d9 |
!963 code sync 0116
From: @changzherui Reviewed-by: @liujunzhu,@guoqi1024 Signed-off-by: @guoqi1024 |
4 years ago |
|
|
2b200b25ed | sync code 0116 | 4 years ago |
|
|
20a0326976 |
!567 prioritize json downloading from gitee
From: @nicholas_yhr Reviewed-by: @liujunzhu,@youui Signed-off-by: @youui |
5 years ago |
|
|
d77f36e017 | prioritize json downloading from gitee | 5 years ago |
|
|
75572bb987 |
!562 Synchronize latest Ascend software suite 09 Dec 2020
From: @nicholas_yhr Reviewed-by: @liujunzhu,@ljl0711 Signed-off-by: @ljl0711 |
5 years ago |
|
|
d5a82a7f98 | Synchronize latest Ascend software suite 09 Dec 2020 | 5 years ago |
|
|
24b53b9282 |
!546 fix geruntime missing files and error codes
From: @nicholas_yhr Reviewed-by: @youui,@liujunzhu Signed-off-by: @liujunzhu |
5 years ago |
|
|
d731918198 | fix geruntime missing files and error codes | 5 years ago |
|
|
8712387b3c |
!533 update headers for release branch, for mindspore use
From: @nicholas_yhr Reviewed-by: @youui,@liujunzhu Signed-off-by: @liujunzhu |
5 years ago |
|
|
22ac2e8c14 |
!537 find libraries from both atc and fwk paths
From: @nicholas_yhr Reviewed-by: @youui,@liujunzhu Signed-off-by: @liujunzhu |
5 years ago |
|
|
fec2e70eda | find libraries from both atc and fwk paths | 5 years ago |
|
|
8011e1ea9a | update headers | 5 years ago |
| @@ -1,8 +1,8 @@ | |||
| [submodule "parser"] | |||
| path = parser | |||
| url = https://gitee.com/ascend/parser.git | |||
| branch = master | |||
| branch = r1.3.0 | |||
| [submodule "metadef"] | |||
| path = metadef | |||
| url = https://gitee.com/ascend/metadef.git | |||
| branch = master | |||
| branch = r1.3.0 | |||
| @@ -82,8 +82,8 @@ if (ENABLE_OPEN_SRC) | |||
| elseif(ENABLE_GE_COV OR ENABLE_GE_UT) | |||
| add_subdirectory(tests) | |||
| else() | |||
| find_module(slog libalog.so ${ASCEND_ATC_DIR}) | |||
| find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) | |||
| find_module(slog libalog.so ${ASCEND_ATC_DIR} ${ASCEND_DRIVER_COMMON_DIR}) | |||
| find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR}) | |||
| if(PLATFORM STREQUAL "train") | |||
| find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | |||
| find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | |||
| @@ -150,6 +150,7 @@ elseif(ENABLE_MS_TESTCASES) | |||
| include(cmake/external_libs/protobuf_static.cmake) | |||
| include(cmake/external_libs/protoc.cmake) | |||
| include(cmake/external_libs/securec.cmake) | |||
| include(cmake/external_libs/json.cmake) | |||
| include(cmake/FindModule.cmake) | |||
| include(cmake/intf_pub_linux.cmake) | |||
| @@ -1,3 +1,18 @@ | |||
| ### Major Features and Improvements | |||
| * Multiple parallel communication groups can be distinguished and the communication tasks of different parallel communication groups can be divided into different streams. | |||
| * Parallel tasks are added for the entire map optimization. The execution sequence is optimized for parallel groups. | |||
| * Dynamic shape is supported for single operator in inference scenarios. | |||
| * Online inference supports concurrent execution of multiple threads. | |||
| * Memory allocation supports address reuse in the buffer pool. | |||
| * Supports Event resource reuse. | |||
| * Supports the BF16 data type. | |||
| ## Thanks to our Contributors | |||
| Thanks goes to these wonderful people: wuweikang,weiyang,yanghaorang,xutianchun,shibeiji,zhouchao, tanghuikang, zhoulili, liujunzhu, zhengyuanhua, taoxiangdong Contributions of any kind are welcome! | |||
| Contributions of any kind are welcome! | |||
| # Release 1.0.0 | |||
| ## Major Features and Improvements | |||
| @@ -229,7 +229,7 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | |||
| rm -rf ${BASEPATH}/cov | |||
| mkdir ${BASEPATH}/cov | |||
| lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | |||
| lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info | |||
| lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' '/usr/include/*' '*/metadef/*' '*/parser/*' -o cov/coverage.info | |||
| cd ${BASEPATH}/cov | |||
| genhtml coverage.info | |||
| fi | |||
| @@ -9,10 +9,6 @@ if (GE_PB_PKG) | |||
| set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip") | |||
| set(MD5 "0dc903888211db3a0f170304cd9f3a89") | |||
| set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) | |||
| #elseif (ENABLE_GITEE) | |||
| # set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip") | |||
| # set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7") | |||
| #set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include") | |||
| else() | |||
| set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip") | |||
| set(MD5 "0dc903888211db3a0f170304cd9f3a89") | |||
| @@ -31,6 +31,7 @@ set(PROTO_HEADER_LIST | |||
| protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | |||
| protobuf_generate(ge PROTO_CLIENT_SRCS PROTO_CLIENT_HDRS ${PROTO_CLIENT_LIST}) | |||
| protobuf_generate(ge PROTO_HEADER_SRCS PROTO_HEADER_HDRS ${PROTO_HEADER_LIST}) | |||
| protobuf_generate(ge_client PROTO_CLIENT_HEADER_SRCS PROTO_CLIENT_HEADER_HDRS ${PROTO_HEADER_LIST}) | |||
| if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | |||
| ############ libge_proto_common.a ############ | |||
| @@ -56,7 +57,7 @@ target_link_libraries(ge_proto_common PRIVATE | |||
| ############ libge_proto_client.a ############ | |||
| add_library(ge_proto_client STATIC | |||
| ${PROTO_HEADER_HDRS} | |||
| ${PROTO_CLIENT_HEADER_HDRS} | |||
| ${PROTO_CLIENT_SRCS} | |||
| ) | |||
| @@ -65,6 +66,11 @@ target_compile_definitions(ge_proto_client PRIVATE | |||
| google=ascend_private | |||
| ) | |||
| target_include_directories(ge_proto_client PRIVATE | |||
| ${CMAKE_BINARY_DIR}/proto/ge_client | |||
| ${CMAKE_BINARY_DIR}/proto/ge_client/proto | |||
| ) | |||
| target_compile_options(ge_proto_client PRIVATE | |||
| -O2 | |||
| -fno-common | |||
| @@ -221,7 +221,10 @@ ge::Status Analyzer::SaveAnalyzerDataToFile(uint64_t session_id, uint64_t graph_ | |||
| try { | |||
| json_file_ << jsn.dump(kJsonDumpLevel) << std::endl; | |||
| } catch (nlohmann::detail::type_error &e) { | |||
| GELOGE(FAILED, "[Json.dump][GraphInfo]json.dump to analyze file [%s] failed because [%s], session_id:%lu, graph_id:%lu", json_file_name_.c_str(), e.what(), session_id, graph_id); | |||
| GELOGE(FAILED, | |||
| "[Json.dump][GraphInfo]json.dump to analyze file [%s] failed because [%s]," | |||
| "session_id:%lu, graph_id:%lu", | |||
| json_file_name_.c_str(), e.what(), session_id, graph_id); | |||
| ret_failed = true; | |||
| } | |||
| json_file_.close(); | |||
| @@ -241,7 +244,9 @@ ge::Status Analyzer::DoAnalyze(DataInfo &data_info) { | |||
| GE_CHECK_NOTNULL(graph_info); | |||
| auto status = SaveOpInfo(desc, data_info, graph_info); | |||
| if (status != SUCCESS) { | |||
| GELOGE(status, "[Check][SaveOpInfo]save op info: desc_name [%s] desc_type [%s] failed!", desc->GetName().c_str(), desc->GetType().c_str()); | |||
| GELOGE(status, | |||
| "[Check][SaveOpInfo]save op info: desc_name [%s] desc_type [%s] failed!", | |||
| desc->GetName().c_str(), desc->GetType().c_str()); | |||
| return FAILED; | |||
| } | |||
| // create json file | |||
| @@ -16,6 +16,7 @@ set(PROTO_LIST | |||
| ) | |||
| protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | |||
| protobuf_generate(ge_static PROTO_STATIC_SRCS PROTO_STATIC_HDRS ${PROTO_LIST}) | |||
| set(SRC_LIST | |||
| "context/ctx.cc" | |||
| @@ -127,7 +128,7 @@ target_link_libraries(ge_common PRIVATE | |||
| ) | |||
| ############ libge_common.a ############ | |||
| add_library(ge_common_static STATIC ${SRC_LIST} ${PROTO_HDRS}) | |||
| add_library(ge_common_static STATIC ${SRC_LIST} ${PROTO_STATIC_HDRS}) | |||
| target_compile_definitions(ge_common_static PRIVATE | |||
| PROTOBUF_INLINE_NOT_IN_HEADERS=0 | |||
| HOST_VISIBILITY | |||
| @@ -158,7 +159,7 @@ target_include_directories(ge_common_static PRIVATE | |||
| ${METADEF_DIR}/inc/external/graph | |||
| ${METADEF_DIR}/inc/graph | |||
| ${CMAKE_BINARY_DIR} | |||
| ${CMAKE_BINARY_DIR}/proto/ge | |||
| ${CMAKE_BINARY_DIR}/proto/ge_static | |||
| #### yellow zone #### | |||
| ${GE_DEPEND_DIR}/inc | |||
| ${GE_DEPEND_DIR}/inc/cce | |||
| @@ -96,7 +96,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf | |||
| dump_mode = dump_config.dump_mode; | |||
| GELOGI("Dump mode is %s", dump_mode.c_str()); | |||
| dump_properties.SetDumpMode(dump_mode); | |||
| dump_properties_map_.emplace(kInferSessionId, dump_properties); | |||
| dump_properties_map_[kInferSessionId] = dump_properties; | |||
| return SUCCESS; | |||
| } | |||
| @@ -20,6 +20,7 @@ | |||
| #include "common/ge/datatype_util.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/util.h" | |||
| #include "framework/common/types.h" | |||
| #include "graph/anchor.h" | |||
| #include "graph/ge_tensor.h" | |||
| #include "graph/op_desc.h" | |||
| @@ -55,8 +56,10 @@ void DumpOp::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond | |||
| loop_cond_ = reinterpret_cast<uintptr_t>(loop_cond); | |||
| } | |||
| void DumpOp::SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id) { | |||
| void DumpOp::SetDynamicModelInfo(const string &dynamic_model_name, const string &dynamic_om_name, | |||
| uint32_t dynamic_model_id) { | |||
| dynamic_model_name_ = dynamic_model_name; | |||
| dynamic_om_name_ = dynamic_om_name; | |||
| dynamic_model_id_ = dynamic_model_id; | |||
| } | |||
| @@ -200,6 +203,32 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { | |||
| return SUCCESS; | |||
| } | |||
| Status DumpOp::SetDumpModelName(aicpu::dump::OpMappingInfo &op_mapping_info) { | |||
| if (dynamic_model_name_.empty() && dynamic_om_name_.empty()) { | |||
| GELOGI("Single op dump, no need set model name"); | |||
| return SUCCESS; | |||
| } | |||
| std::set<std::string> model_list = dump_properties_.GetAllDumpModel(); | |||
| bool not_find_by_omname = model_list.find(dynamic_om_name_) == model_list.end(); | |||
| bool not_find_by_modelname = model_list.find(dynamic_model_name_) == model_list.end(); | |||
| std::string dump_model_name = not_find_by_omname ? dynamic_model_name_ : dynamic_om_name_; | |||
| if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) { | |||
| if (not_find_by_omname && not_find_by_modelname) { | |||
| std::string model_list_str; | |||
| for (auto &model : model_list) { | |||
| model_list_str += "[" + model + "]."; | |||
| } | |||
| GELOGW("Model %s will not be set to dump, dump list: %s", dump_model_name.c_str(), model_list_str.c_str()); | |||
| return FAILED; | |||
| } | |||
| } | |||
| if (!dump_model_name.empty() && dump_properties_.IsDumpOpen()) { | |||
| GELOGD("Dump model name is %s", dump_model_name.c_str()); | |||
| op_mapping_info.set_model_name(dump_model_name); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status DumpOp::LaunchDumpOp() { | |||
| GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str()); | |||
| int32_t device_id = 0; | |||
| @@ -209,8 +238,7 @@ Status DumpOp::LaunchDumpOp() { | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| if (device_id < 0) { | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, | |||
| "Check device_id failed, device_id = %d, which should be not less than 0.", | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Check device_id failed, device_id = %d, which should be not less than 0.", | |||
| device_id); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||
| } | |||
| @@ -220,11 +248,12 @@ Status DumpOp::LaunchDumpOp() { | |||
| op_mapping_info.set_flag(kAicpuLoadFlag); | |||
| op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); | |||
| op_mapping_info.set_model_id(dynamic_model_id_); | |||
| if (!dynamic_model_name_.empty() && dump_properties_.IsDumpOpen()) { | |||
| op_mapping_info.set_model_name(dynamic_model_name_); | |||
| if (SetDumpModelName(op_mapping_info) != SUCCESS) { | |||
| return SUCCESS; | |||
| } | |||
| SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | |||
| GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(), | |||
| GELOGI("Dump step is %s ,dump path is %s in Launch dump op", dump_properties_.GetDumpStep().c_str(), | |||
| dump_path.c_str()); | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| @@ -273,4 +302,4 @@ Status DumpOp::LaunchDumpOp() { | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namesapce ge | |||
| } // namespace ge | |||
| @@ -34,12 +34,13 @@ class DumpOp { | |||
| vector<uintptr_t> output_addrs, rtStream_t stream); | |||
| Status LaunchDumpOp(); | |||
| void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond); | |||
| void SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id); | |||
| void SetDynamicModelInfo(const string &dynamic_model_name, const string &dynamic_om_name, uint32_t dynamic_model_id); | |||
| private: | |||
| Status ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info); | |||
| Status DumpOutput(aicpu::dump::Task &task); | |||
| Status DumpInput(aicpu::dump::Task &task); | |||
| Status SetDumpModelName(aicpu::dump::OpMappingInfo &op_mapping_info); | |||
| DumpProperties dump_properties_; | |||
| OpDescPtr op_desc_; | |||
| @@ -54,6 +55,7 @@ class DumpOp { | |||
| uintptr_t loop_cond_; | |||
| std::string dynamic_model_name_; | |||
| std::string dynamic_om_name_; | |||
| std::uint32_t dynamic_model_id_; | |||
| }; | |||
| } // namespace ge | |||
| @@ -35,14 +35,14 @@ const std::string kDumpStatusOpen = "on"; | |||
| const uint32_t kAicoreOverflow = (0x1 << 0); | |||
| const uint32_t kAtomicOverflow = (0x1 << 1); | |||
| const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); | |||
| } | |||
| } // namespace | |||
| namespace ge { | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) { | |||
| CopyFrom(other); | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties::operator=( | |||
| const DumpProperties &other) { | |||
| const DumpProperties &other) { | |||
| CopyFrom(other); | |||
| return *this; | |||
| } | |||
| @@ -97,7 +97,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti | |||
| // The following is the new dump scenario of the fusion operator | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::AddPropertyValue( | |||
| const std::string &model, const std::set<std::string> &layers) { | |||
| const std::string &model, const std::set<std::string> &layers) { | |||
| for (const std::string &layer : layers) { | |||
| GELOGI("This model %s config to dump layer %s", model.c_str(), layer.c_str()); | |||
| } | |||
| @@ -138,7 +138,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpPrope | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpProperties::GetPropertyValue( | |||
| const std::string &model) const { | |||
| const std::string &model) const { | |||
| auto iter = model_dump_properties_map_.find(model); | |||
| if (iter != model_dump_properties_map_.end()) { | |||
| return iter->second; | |||
| @@ -147,8 +147,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpPrope | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsLayerNeedDump( | |||
| const std::string &model, const std::string &om_name, const std::string &op_name) const { | |||
| const std::string &model, const std::string &om_name, const std::string &op_name) const { | |||
| // if dump all | |||
| GELOGD("model name is %s om name is %s op is %s in layer need dump", model.c_str(), om_name.c_str(), op_name.c_str()); | |||
| if (model_dump_properties_map_.find(DUMP_ALL_MODEL) != model_dump_properties_map_.end()) { | |||
| return true; | |||
| } | |||
| @@ -203,7 +204,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperti | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch( | |||
| const std::string &dump_op_switch) { | |||
| const std::string &dump_op_switch) { | |||
| dump_op_switch_ = dump_op_switch; | |||
| } | |||
| @@ -270,4 +271,4 @@ void DumpProperties::SetDumpDebugOptions() { | |||
| GELOGI("ge.exec.enableDumpDebug is false or is not set."); | |||
| } | |||
| } | |||
| } // namespace | |||
| } // namespace ge | |||
| @@ -80,13 +80,11 @@ Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_de | |||
| uint32_t debug_stream_id = 0; | |||
| uint32_t debug_task_id = 0; | |||
| #ifdef ONLY_COMPILE_OPEN_SRC | |||
| auto rt_ret = rtDebugRegisterForStream(stream, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtDebugRegisterForStream error, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| #endif | |||
| GELOGD("debug_task_id:%u, debug_stream_id:%u in stream overflow.", debug_task_id, debug_stream_id); | |||
| data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true); | |||
| return SUCCESS; | |||
| @@ -94,7 +92,6 @@ Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_de | |||
| void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) { | |||
| rtError_t rt_ret = RT_ERROR_NONE; | |||
| #ifdef ONLY_COMPILE_OPEN_SRC | |||
| if (stream != nullptr) { | |||
| GELOGD("start call rtDebugUnRegisterForStream in unknown shape over flow."); | |||
| rt_ret = rtDebugUnRegisterForStream(stream); | |||
| @@ -102,8 +99,6 @@ void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) { | |||
| GELOGW("rtDebugUnRegisterForStream failed, ret: 0x%X", rt_ret); | |||
| } | |||
| } | |||
| #endif | |||
| if (op_debug_addr_ != nullptr) { | |||
| rt_ret = rtFree(op_debug_addr_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| @@ -154,7 +154,8 @@ Status DataTypeTransfer::TransDataType(const CastArgs &args, TransResult &result | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to alloc the memory for dst buf %zu, data size %zu", total_size, args.src_data_size); | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to alloc the memory for dst buf %zu, data size %zu", total_size, args.src_data_size); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| } | |||
| @@ -73,7 +73,8 @@ Status CheckArgsForC1hwncoc0ToHwcn(const TransArgs &args) { | |||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size, int64_t total_size) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| @@ -94,7 +94,8 @@ Status TransFormatDhwckToFz3D(const TransArgs &args, TransResult &result) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| @@ -122,7 +123,8 @@ Status TransFormatDhwckToFz3D(const TransArgs &args, TransResult &result) { | |||
| args.data + src_idx * data_size, static_cast<size_t>(data_size)); | |||
| } | |||
| if (ret != EOK) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||
| dst_offset, ret, pad_zero); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| @@ -95,7 +95,8 @@ Status TransFormatDhwncToFz3DTranspose(const TransArgs &args, TransResult &resul | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| @@ -123,7 +124,8 @@ Status TransFormatDhwncToFz3DTranspose(const TransArgs &args, TransResult &resul | |||
| args.data + src_idx * data_size, static_cast<size_t>(data_size)); | |||
| } | |||
| if (ret != EOK) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||
| dst_offset, ret, pad_zero); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| @@ -139,7 +139,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| @@ -175,7 +176,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size * w0)); | |||
| if (ret != EOK) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| @@ -189,7 +191,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| @@ -210,7 +213,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| @@ -246,7 +250,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con | |||
| ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size * w0)); | |||
| if (ret != EOK) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| @@ -260,7 +265,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con | |||
| ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| @@ -274,14 +280,16 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con | |||
| Status FormatTransferFractalNz::TransFormat(const TransArgs &args, TransResult &result) { | |||
| if (!IsDataTypeSupport(args.src_data_type)) { | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||
| "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShape(args.src_format, args.src_shape) || !IsShapeValid(args.dst_shape)) { | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| @@ -325,7 +333,8 @@ Status FormatTransferFractalNz::TransShape(Format src_format, const ShapeVector | |||
| Status FormatTransferFractalNzND::TransFormat(const TransArgs &args, TransResult &result) { | |||
| if (!IsDataTypeSupport(args.src_data_type)) { | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||
| "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| @@ -333,7 +342,8 @@ Status FormatTransferFractalNzND::TransFormat(const TransArgs &args, TransResult | |||
| } | |||
| if (!IsShapeValid(args.src_shape) || !CheckShape(args.dst_format, args.dst_shape)) { | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| @@ -127,7 +127,8 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| dst == nullptr, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION;); | |||
| @@ -173,8 +174,9 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) { | |||
| } | |||
| } | |||
| if (ret != EOK) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d pad mode %d", offset, | |||
| ret, need_pad_zero); | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d pad mode %d", | |||
| offset, ret, need_pad_zero); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| @@ -213,7 +215,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| dst == nullptr, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION;); | |||
| @@ -235,7 +238,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) { | |||
| static_cast<size_t>(data_size)); | |||
| } else { | |||
| if (protected_size < data_size) { | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Failed to operate the dst memory, protected_size is %ld and size is %ld", | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||
| "Failed to operate the dst memory, protected_size is %ld and size is %ld", | |||
| protected_size, data_size); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| @@ -247,7 +251,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) { | |||
| } | |||
| } | |||
| if (ret != EOK) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||
| dst_offset, ret, pad_zero); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| @@ -288,7 +293,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| dst == nullptr, | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION;); | |||
| @@ -310,7 +316,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) { | |||
| static_cast<size_t>(data_size)); | |||
| } else { | |||
| if (protected_size < data_size) { | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Failed to operate the dst memory, protected_size is %ld and size is %ld", | |||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||
| "Failed to operate the dst memory, protected_size is %ld and size is %ld", | |||
| protected_size, data_size); | |||
| return ACL_ERROR_GE_PARAM_INVALID; | |||
| } | |||
| @@ -322,7 +329,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) { | |||
| } | |||
| } | |||
| if (ret != EOK) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||
| dst_offset, ret, pad_zero); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| @@ -140,7 +140,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| @@ -179,7 +180,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size * w0)); | |||
| if (ret != EOK) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| @@ -195,7 +197,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| @@ -217,7 +220,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| @@ -257,7 +261,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size * w0)); | |||
| if (ret != EOK) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| @@ -273,7 +278,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con | |||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | |||
| static_cast<size_t>(size)); | |||
| if (ret != EOK) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||
| } | |||
| } | |||
| @@ -288,14 +294,16 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con | |||
| Status FormatTransferFractalZz::TransFormat(const TransArgs &args, TransResult &result) { | |||
| if (!IsDataTypeSupport(args.src_data_type)) { | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||
| "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| if (!CheckShape(args.src_format, args.src_shape) || !IsShapeValid(args.dst_shape)) { | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| @@ -339,7 +347,8 @@ Status FormatTransferFractalZz::TransShape(Format src_format, const ShapeVector | |||
| Status FormatTransferFractalZzND::TransFormat(const TransArgs &args, TransResult &result) { | |||
| if (!IsDataTypeSupport(args.src_data_type)) { | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||
| "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| @@ -347,7 +356,8 @@ Status FormatTransferFractalZzND::TransFormat(const TransArgs &args, TransResult | |||
| } | |||
| if (!IsShapeValid(args.src_shape) || !CheckShape(args.dst_format, args.dst_shape)) { | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | |||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | |||
| @@ -66,7 +66,7 @@ Status CheckArgsForFracZToHwcn(const TransArgs &args) { | |||
| FmtToStr(ShapeToString(dst_shape)); | |||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_SHAPE_INVALID, error.c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -74,7 +74,8 @@ Status CheckArgsForFracZToHwcn(const TransArgs &args) { | |||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| @@ -59,9 +59,10 @@ Status CheckArgsForFracZToNchw(const TransArgs &args) { | |||
| } | |||
| int64_t c1 = Ceil(dst_shape.at(kNchwC), c0); | |||
| int64_t n0 = Ceil(dst_shape.at(kNchwN), static_cast<int64_t>(kNiSize)); | |||
| if (src_shape.at(kFracZHWC1) != dst_shape.at(kNchwH) * dst_shape.at(kNchwW) * c1 || src_shape.at(kFracZC0) != c0 || | |||
| src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) { | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||
| if (src_shape.at(kFracZHWC1) != dst_shape.at(kNchwH) * dst_shape.at(kNchwW) * c1 || | |||
| src_shape.at(kFracZC0) != c0 || src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) { | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||
| ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| @@ -72,7 +73,8 @@ Status CheckArgsForFracZToNchw(const TransArgs &args) { | |||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| @@ -59,9 +59,10 @@ Status CheckArgsForFracZToNhwc(const TransArgs &args) { | |||
| } | |||
| int64_t c1 = Ceil(dst_shape.at(kNhwcC), c0); | |||
| int64_t n0 = Ceil(dst_shape.at(kNhwcN), static_cast<int64_t>(kNiSize)); | |||
| if (src_shape.at(kFracZHWC1) != dst_shape.at(kNhwcH) * dst_shape.at(kNhwcW) * c1 || src_shape.at(kFracZC0) != c0 || | |||
| src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) { | |||
| GELOGE(PARAM_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||
| if (src_shape.at(kFracZHWC1) != dst_shape.at(kNhwcH) * dst_shape.at(kNhwcW) * c1 || | |||
| src_shape.at(kFracZC0) != c0 || src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) { | |||
| GELOGE(PARAM_INVALID, | |||
| "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||
| ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| @@ -72,7 +73,8 @@ Status CheckArgsForFracZToNhwc(const TransArgs &args) { | |||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size, int64_t total_size) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| GELOGE(OUT_OF_MEMORY, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | |||
| return OUT_OF_MEMORY; | |||
| @@ -140,7 +142,7 @@ Status FormatTransferFracZNhwc::TransFormat(const TransArgs &args, TransResult & | |||
| } | |||
| GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, | |||
| ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); | |||
| ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| GELOGD("Begin to trans format from FracZ to NHWC, src shape %s, data type %s, dst shape %s, memory size %ld", | |||
| @@ -91,7 +91,8 @@ Status CheckArgsForHwcnToC1hwncoc0(const TransArgs &args) { | |||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| @@ -72,7 +72,8 @@ Status CheckArgsForNc1hwc0ToNchw(const TransArgs &args) { | |||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| GELOGE(OUT_OF_MEMORY, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | |||
| return OUT_OF_MEMORY; | |||
| @@ -61,7 +61,8 @@ Status CheckArgsForNc1hwc0ToNhwc(const TransArgs &args) { | |||
| if (src_shape.at(kNc1hwc0H) != dst_shape.at(kNhwcH) || src_shape.at(kNc1hwc0W) != dst_shape.at(kNhwcW) || | |||
| src_shape.at(kNc1hwc0N) != dst_shape.at(kNhwcN) || src_shape.at(kNc1hwc0C0) != c0 || | |||
| src_shape.at(kNc1hwc0C1) != (Ceil(dst_shape.at(kNhwcC), c0))) { | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||
| "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||
| ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | |||
| return ACL_ERROR_GE_SHAPE_INVALID; | |||
| } | |||
| @@ -72,7 +73,8 @@ Status CheckArgsForNc1hwc0ToNhwc(const TransArgs &args) { | |||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| @@ -125,7 +125,8 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) { | |||
| return ACL_ERROR_GE_INTERNAL_ERROR); | |||
| auto t1 = h_o * w_o; | |||
| auto t2 = n_o * c_o; | |||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), | |||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR); | |||
| int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | |||
| @@ -140,7 +141,8 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| @@ -212,7 +214,8 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin | |||
| return ACL_ERROR_GE_INTERNAL_ERROR); | |||
| auto t1 = h_o * w_o; | |||
| auto t2 = n_o * c_o; | |||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), | |||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,"int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||
| return ACL_ERROR_GE_INTERNAL_ERROR); | |||
| int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | |||
| @@ -228,7 +231,8 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin | |||
| dst.reset(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| @@ -275,7 +279,8 @@ Status FormatTransferNchwToFZC04::TransFormat(const TransArgs &args, TransResult | |||
| } | |||
| std::vector<int64_t> expect_shape; | |||
| ret = TransShape(args_tmp.src_format, args_tmp.src_shape, args_tmp.src_data_type, args_tmp.dst_format, expect_shape); | |||
| ret = TransShape(args_tmp.src_format, args_tmp.src_shape, args_tmp.src_data_type, | |||
| args_tmp.dst_format, expect_shape); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| @@ -92,7 +92,8 @@ Status CheckArgsForNhwcToNc1hwc0(const TransArgs &args) { | |||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | |||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | |||
| if (dst == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| @@ -87,12 +87,13 @@ Status ModelHelper::SaveSizeToModelDef(const GeModelPtr &ge_model) { | |||
| std::shared_ptr<ModelTaskDef> model_task_def = ge_model->GetModelTaskDefPtr(); | |||
| if (model_task_def == nullptr) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create model task def ptr failed"); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
| GELOGD("SaveSizeToModelDef task_info_size is 0."); | |||
| om_info.push_back(0); | |||
| } else { | |||
| size_t partition_task_size = model_task_def->ByteSizeLong(); | |||
| GELOGD("SaveSizeToModelDef task_info_size is %zu", partition_task_size); | |||
| om_info.push_back(partition_task_size); | |||
| } | |||
| size_t partition_task_size = model_task_def->ByteSizeLong(); | |||
| GELOGD("SaveSizeToModelDef task_info_size is %zu", partition_task_size); | |||
| om_info.push_back(partition_task_size); | |||
| GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(*(ge_model.get()), "om_info_list", om_info), | |||
| GELOGE(FAILED, "SetListInt of om_info_list failed."); | |||
| @@ -598,6 +599,7 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) { | |||
| is_first_model = false; | |||
| root_model_->SetRootGraph(GraphUtils::GetComputeGraph(cur_model->GetGraph())); | |||
| root_model_->SetModelId(cur_model->GetModelId()); | |||
| root_model_->SetModelName(cur_model->GetName()); | |||
| model_ = cur_model; | |||
| continue; | |||
| } | |||
| @@ -31,7 +31,7 @@ const char *const kFpPoint = "fp_point"; | |||
| const char *const kBpPoint = "bp_point"; | |||
| #ifdef DAVINCI_SUPPORT_PROFILING | |||
| const size_t kReportMaxLen = 2048; | |||
| const size_t kReportMaxLen = 1024; | |||
| const int32_t kMaxDeviceNum = 256; | |||
| const uint32_t kInteval = 2; | |||
| const std::string kConfigNumsdev = "devNums"; | |||
| @@ -15,6 +15,8 @@ | |||
| */ | |||
| #include "common/tbe_kernel_store.h" | |||
| #include "graph/utils/attr_utils.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| namespace ge { | |||
| @@ -31,6 +33,15 @@ void TBEKernelStore::LoadTBEKernelBinToOpDesc(const std::shared_ptr<ge::OpDesc> | |||
| GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, kernel_bin), | |||
| GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for kernel_bin failed");) | |||
| GELOGI("Load tbe kernel:%s, %zu", kernel_bin->GetName().c_str(), kernel_bin->GetBinDataSize()); | |||
| std::string atomic_kernel_name; | |||
| (void) AttrUtils::GetStr(op_desc, ATOMIC_ATTR_TBE_KERNEL_NAME, atomic_kernel_name); | |||
| if (!atomic_kernel_name.empty()) { | |||
| GELOGI("Get atomic kernel name is %s.", atomic_kernel_name.c_str()); | |||
| auto atomic_kernel_bin = FindKernel(atomic_kernel_name); | |||
| GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(EXT_ATTR_ATOMIC_TBE_KERNEL, atomic_kernel_bin), | |||
| GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for atomic kernel_bin failed");) | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @@ -8,6 +8,7 @@ set(PROTO_LIST | |||
| ) | |||
| protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | |||
| protobuf_generate(ge_static PROTO_STATIC_SRCS PROTO_STATIC_HDRS ${PROTO_LIST}) | |||
| set(SRC_LIST | |||
| "ge_executor.cc" | |||
| @@ -162,7 +163,7 @@ set(SRC_LIST | |||
| ) | |||
| ######## libge_executor.a ######## | |||
| add_library(ge_executor STATIC ${SRC_LIST} ${PROTO_HDRS}) | |||
| add_library(ge_executor STATIC ${SRC_LIST} ${PROTO_STATIC_HDRS}) | |||
| target_compile_options(ge_executor PRIVATE | |||
| $<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common> | |||
| @@ -191,7 +192,7 @@ target_include_directories(ge_executor SYSTEM PRIVATE | |||
| ${METADEF_DIR}/inc/external/graph | |||
| ${METADEF_DIR}/inc/graph | |||
| ${CMAKE_BINARY_DIR} | |||
| ${CMAKE_BINARY_DIR}/proto/ge | |||
| ${CMAKE_BINARY_DIR}/proto/ge_static | |||
| #### yellow zone #### | |||
| ${GE_CODE_DIR}/../inc | |||
| ${GE_CODE_DIR}/../inc/cce | |||
| @@ -30,6 +30,8 @@ | |||
| #include "single_op/single_op_manager.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "opskernel_manager/ops_kernel_builder_manager.h" | |||
| #include "graph/opsproto_manager.h" | |||
| #include "ge_local_engine/engine/host_cpu_engine.h" | |||
| using std::string; | |||
| using std::vector; | |||
| @@ -199,6 +201,33 @@ bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims, | |||
| namespace ge { | |||
| bool GeExecutor::isInit_ = false; | |||
| static void InitOpsProtoManager() { | |||
| string opsproto_path; | |||
| const char *path_env = std::getenv("ASCEND_OPP_PATH"); | |||
| if (path_env != nullptr) { | |||
| string path = path_env; | |||
| string file_path = RealPath(path.c_str()); | |||
| if (file_path.empty()) { | |||
| GELOGE(FAILED, "[Check][EnvPath]ASCEND_OPP_PATH path [%s] is invalid.", path.c_str()); | |||
| REPORT_INPUT_ERROR("E68016", {"ASCEND_OPP_PATH", path}); | |||
| return; | |||
| } | |||
| opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/"); | |||
| GELOGI("Get opsproto so path from env : %s", path.c_str()); | |||
| } else { | |||
| string path_base = PluginManager::GetPath(); | |||
| GELOGI("path_base is %s", path_base.c_str()); | |||
| path_base = path_base.substr(0, path_base.rfind('/')); | |||
| path_base = path_base.substr(0, path_base.rfind('/') + 1); | |||
| opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); | |||
| } | |||
| GELOGI("Get opsproto path is %s", opsproto_path.c_str()); | |||
| OpsProtoManager *manager = OpsProtoManager::Instance(); | |||
| map<string, string> option_tmp; | |||
| option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path)); | |||
| (void)manager->Initialize(option_tmp); | |||
| } | |||
| GeExecutor::GeExecutor() {} | |||
| Status GeExecutor::Initialize() { | |||
| @@ -208,6 +237,16 @@ Status GeExecutor::Initialize() { | |||
| return ge::SUCCESS; | |||
| } | |||
| OpTilingManager::GetInstance().LoadSo(); | |||
| Status init_hostcpu_engine_status = HostCpuEngine::GetInstance().Initialize(); | |||
| if (init_hostcpu_engine_status != SUCCESS) { | |||
| GELOGE(init_hostcpu_engine_status, "Failed to initialize HostCpuEngine"); | |||
| return init_hostcpu_engine_status; | |||
| } | |||
| InitOpsProtoManager(); | |||
| std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM); | |||
| mem_type.push_back(RT_MEMORY_P2P_DDR); | |||
| auto ret = MemManager::Instance().Initialize(mem_type); | |||
| @@ -20,6 +20,8 @@ set(OPS_KERNEL_SRC_LIST | |||
| ) | |||
| protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | |||
| protobuf_generate(ge_ops_shared PROTO_OPS_SHARED_SRCS PROTO_OPS_SHARED_HDRS ${PROTO_LIST}) | |||
| protobuf_generate(ge_ops_static PROTO_OPS_STATIC_SRCS PROTO_OPS_STATIC_HDRS ${PROTO_LIST}) | |||
| ############ libge_local_engine.so ############ | |||
| add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||
| @@ -119,7 +121,7 @@ set_target_properties(atc_ge_local_engine PROPERTIES | |||
| ) | |||
| ############ libge_local_opskernel_builder.so ############ | |||
| add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) | |||
| add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_OPS_SHARED_HDRS}) | |||
| target_compile_options(ge_local_opskernel_builder PRIVATE | |||
| -Werror | |||
| @@ -143,7 +145,7 @@ target_include_directories(ge_local_opskernel_builder PRIVATE | |||
| ${METADEF_DIR}/inc/external/graph | |||
| ${METADEF_DIR}/inc/graph | |||
| ${CMAKE_BINARY_DIR} | |||
| ${CMAKE_BINARY_DIR}/proto/ge | |||
| ${CMAKE_BINARY_DIR}/proto/ge_ops_shared | |||
| #### yellow zone #### | |||
| ${GE_CODE_DIR}/../inc | |||
| #### blue zone #### | |||
| @@ -166,7 +168,7 @@ target_link_libraries(ge_local_opskernel_builder PRIVATE | |||
| ) | |||
| ############ atclib/libge_local_opskernel_builder.so ############ | |||
| add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) | |||
| add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_OPS_SHARED_HDRS}) | |||
| target_compile_options(atc_ge_local_opskernel_builder PRIVATE | |||
| -Werror | |||
| @@ -190,7 +192,7 @@ target_include_directories(atc_ge_local_opskernel_builder PRIVATE | |||
| ${METADEF_DIR}/inc/external/graph | |||
| ${METADEF_DIR}/inc/graph | |||
| ${CMAKE_BINARY_DIR} | |||
| ${CMAKE_BINARY_DIR}/proto/ge | |||
| ${CMAKE_BINARY_DIR}/proto/ge_ops_shared | |||
| #### yellow zone #### | |||
| ${GE_CODE_DIR}/../inc | |||
| #### blue zone #### | |||
| @@ -218,7 +220,7 @@ set_target_properties(atc_ge_local_opskernel_builder PROPERTIES | |||
| ) | |||
| ############ libge_local_opskernel_builder.a ############ | |||
| add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) | |||
| add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_OPS_STATIC_HDRS}) | |||
| target_compile_options(ge_local_opskernel_builder_static PRIVATE | |||
| -Werror | |||
| @@ -243,7 +245,7 @@ target_include_directories(ge_local_opskernel_builder_static PRIVATE | |||
| ${METADEF_DIR}/inc/external/graph | |||
| ${METADEF_DIR}/inc/graph | |||
| ${CMAKE_BINARY_DIR} | |||
| ${CMAKE_BINARY_DIR}/proto/ge | |||
| ${CMAKE_BINARY_DIR}/proto/ge_ops_static | |||
| #### yellow zone #### | |||
| ${GE_CODE_DIR}/../inc | |||
| #### blue zone #### | |||
| @@ -16,6 +16,7 @@ set(GE_SRC_LIST | |||
| "task/label_goto_task.cc" | |||
| "task/label_set_task.cc" | |||
| "task/label_switch_task.cc" | |||
| "task/label_manager.cc" | |||
| ) | |||
| add_library(ge_runtime SHARED ${GE_SRC_LIST}) | |||
| @@ -21,6 +21,7 @@ | |||
| #include "common/ge_inner_error_codes.h" | |||
| #include "common/types.h" | |||
| #include "common/util.h" | |||
| #include "common/math/math_util.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/op/op_parser_util.h" | |||
| #include "graph/types.h" | |||
| @@ -52,15 +52,7 @@ HcclTask::HcclTask(const ModelContext &model_context, const std::shared_ptr<Hccl | |||
| } | |||
| } | |||
| HcclTask::~HcclTask() { | |||
| if (workspace_mem_ != nullptr) { | |||
| rtError_t rt_ret = rtFree(workspace_mem_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtFree workspace_mem_ failed! ret: 0x%X.", rt_ret); | |||
| } | |||
| workspace_mem_ = nullptr; | |||
| } | |||
| } | |||
| HcclTask::~HcclTask() {} | |||
| bool HcclTask::Distribute() { | |||
| // Ops kernel info store | |||
| @@ -79,11 +71,7 @@ bool HcclTask::Distribute() { | |||
| SetSecondaryStream(); | |||
| if (task_info_->workspace_size() > 0) { | |||
| rtError_t rt_ret = rtMalloc(&workspace_mem_, task_info_->workspace_size(), RT_MEMORYINFO_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return false; | |||
| } | |||
| workspace_mem_ = task_info_->workspace_addr(); | |||
| } | |||
| GELOGI("HcclTaskInfo Distribute Start. begin to call function LoadTask in hccl."); | |||
| @@ -16,99 +16,83 @@ | |||
| #include "ge_runtime/task/label_goto_task.h" | |||
| #include "ge_runtime/task/task_factory.h" | |||
| #include "framework/common/util.h" | |||
| namespace ge { | |||
| namespace model_runner { | |||
| LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::shared_ptr<LabelGotoTaskInfo> &task_info) | |||
| : TaskRepeater<LabelGotoTaskInfo>(model_context, task_info), task_info_(task_info) { | |||
| : TaskRepeater<LabelGotoTaskInfo>(model_context, task_info), | |||
| task_info_(task_info), | |||
| stream_(nullptr), | |||
| index_value_(nullptr) { | |||
| if (task_info_ == nullptr) { | |||
| GELOGW("task_info_ is null!"); | |||
| return; | |||
| } | |||
| auto stream_list = model_context.stream_list(); | |||
| auto label_list = model_context.label_list(); | |||
| rt_model_handle_ = model_context.rt_model_handle(); | |||
| uint32_t stream_id = task_info->stream_id(); | |||
| uint32_t label_id = task_info->label_id(); | |||
| label_id_ = task_info->label_id(); | |||
| GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); | |||
| GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id); | |||
| if (stream_id >= stream_list.size() || label_id >= label_list.size()) { | |||
| GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id_); | |||
| if (stream_id >= stream_list.size() || label_id_ >= label_list.size()) { | |||
| GELOGW("Stream/Label id invalid."); | |||
| return; | |||
| } | |||
| stream_ = stream_list[stream_id]; | |||
| label_ = label_list[label_id]; | |||
| label_manager_ = LabelManager::GetInstance(); | |||
| if (label_manager_ == nullptr) { | |||
| GELOGW("Get label manager instance failed."); | |||
| return; | |||
| } | |||
| label_info_ = label_manager_->GetLabelInfo(rt_model_handle_, {label_id_}, label_list); | |||
| } | |||
| LabelGotoTask::~LabelGotoTask() { | |||
| GE_FREE_RT_LOG(label_info_); | |||
| GE_FREE_RT_LOG(index_value_); | |||
| if (index_value_ != nullptr) { | |||
| rtError_t rt_ret = rtFree(index_value_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtFree index_value_ failed! ret: 0x%X.", rt_ret); | |||
| } | |||
| index_value_ = nullptr; | |||
| } | |||
| } | |||
| bool LabelGotoTask::Distribute() { | |||
| GELOGI("LabelGotoTask Distribute start."); | |||
| if (!CheckParamValid()) { | |||
| return false; | |||
| } | |||
| const std::vector<void *> label_list = { label_ }; | |||
| rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||
| return false; | |||
| } | |||
| uint64_t branch_index = 0; | |||
| rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &branch_index, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||
| return false; | |||
| } | |||
| uint32_t label_info_size = sizeof(rtLabelDevInfo) * label_list.size(); | |||
| rt_ret = rtMalloc(&label_info_, label_info_size, RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||
| return false; | |||
| } | |||
| rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info_, label_info_size); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||
| return false; | |||
| } | |||
| rt_ret = rtLabelSwitchByIndex(index_value_, label_list.size(), label_info_, stream_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||
| return false; | |||
| } | |||
| GELOGI("DistributeTask end."); | |||
| return true; | |||
| } | |||
| bool LabelGotoTask::CheckParamValid() { | |||
| if (stream_ == nullptr) { | |||
| GELOGE(PARAM_INVALID, "stream is null!"); | |||
| return false; | |||
| } | |||
| if (label_ == nullptr) { | |||
| GELOGE(PARAM_INVALID, "label is null!"); | |||
| if (label_info_ == nullptr) { | |||
| GELOGE(PARAM_INVALID, "label info is null!"); | |||
| return false; | |||
| } | |||
| if (label_info_ != nullptr) { | |||
| GELOGE(PARAM_INVALID, "label_info_ has dirty data."); | |||
| return false; | |||
| if (index_value_ == nullptr) { | |||
| rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return false; | |||
| } | |||
| uint64_t index = 0; | |||
| rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &index, sizeof(index), RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return false; | |||
| } | |||
| } | |||
| if (index_value_ != nullptr) { | |||
| GELOGE(PARAM_INVALID, "index_value_ has dirty data."); | |||
| void *label_info = label_info_->GetLabelInfo(); | |||
| rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, 1, label_info, stream_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return false; | |||
| } | |||
| GELOGI("DistributeTask end."); | |||
| return true; | |||
| } | |||
| @@ -18,7 +18,11 @@ | |||
| #define GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ | |||
| #include <memory> | |||
| #include <vector> | |||
| #include <map> | |||
| #include <mutex> | |||
| #include "ge_runtime/task/task.h" | |||
| #include "ge_runtime/task/label_manager.h" | |||
| namespace ge { | |||
| namespace model_runner { | |||
| @@ -31,13 +35,13 @@ class LabelGotoTask : public TaskRepeater<LabelGotoTaskInfo> { | |||
| bool Distribute() override; | |||
| private: | |||
| bool CheckParamValid(); | |||
| std::shared_ptr<LabelGotoTaskInfo> task_info_; | |||
| void *stream_{nullptr}; | |||
| void *label_{nullptr}; | |||
| void *label_info_{nullptr}; | |||
| void *index_value_{nullptr}; | |||
| void *stream_; | |||
| std::shared_ptr<LabelGuard> label_info_; | |||
| void *index_value_; | |||
| uint32_t label_id_; | |||
| rtModel_t rt_model_handle_; | |||
| std::shared_ptr<LabelManager> label_manager_; | |||
| }; | |||
| } // namespace model_runner | |||
| } // namespace ge | |||
| @@ -0,0 +1,119 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "ge_runtime/task/label_manager.h" | |||
| #include <algorithm> | |||
| #include <string> | |||
| #include "runtime/mem.h" | |||
| #include "runtime/rt_model.h" | |||
| #include "common/ge_inner_error_codes.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| namespace ge { | |||
| namespace model_runner { | |||
| std::weak_ptr<LabelManager> LabelManager::instance_; | |||
| std::mutex LabelManager::instance_mutex_; | |||
| template <class T> | |||
| static std::string GetVectorString(const std::vector<T> &vec) { | |||
| std::string ret; | |||
| for (size_t i = 0; i < vec.size(); ++i) { | |||
| if (i != 0) { | |||
| ret.push_back(','); | |||
| } | |||
| ret += std::to_string(vec[i]); | |||
| } | |||
| return ret; | |||
| } | |||
| LabelGuard::~LabelGuard() { | |||
| void *label_info = GetLabelInfo(); | |||
| if (label_info != nullptr) { | |||
| rtError_t rt_ret = rtFree(label_info); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtFree label_info failed! ret: 0x%X.", rt_ret); | |||
| } | |||
| } | |||
| } | |||
| std::shared_ptr<LabelManager> LabelManager::GetInstance() { | |||
| std::lock_guard<std::mutex> lock(instance_mutex_); | |||
| auto instance = instance_.lock(); | |||
| if (instance != nullptr) { | |||
| return instance; | |||
| } | |||
| instance = std::make_shared<LabelManager>(); | |||
| instance_ = instance; | |||
| return instance; | |||
| } | |||
| std::shared_ptr<LabelGuard> LabelManager::GetLabelInfo(rtModel_t model, const std::vector<uint32_t> &label_ids, | |||
| const std::vector<void *> &all_label) { | |||
| std::lock_guard<std::mutex> lock(model_info_mapping_mutex_); | |||
| rtError_t rt_ret; | |||
| auto model_iter = model_info_mapping_.find(model); | |||
| if (model_iter == model_info_mapping_.end()) { | |||
| model_info_mapping_.emplace(model, std::map<std::string, std::weak_ptr<LabelGuard>>()); | |||
| model_iter = model_info_mapping_.find(model); | |||
| } | |||
| std::string label_id_str = GetVectorString(label_ids); | |||
| auto &label_map = model_iter->second; | |||
| auto label_iter = label_map.find(label_id_str); | |||
| if (label_iter != label_map.end()) { | |||
| auto label_guard = label_iter->second.lock(); | |||
| if (label_guard != nullptr) { | |||
| GELOGI("model %p find same label id %s.", model, label_id_str.c_str()); | |||
| return label_guard; | |||
| } | |||
| } | |||
| GELOGI("Alloc label id %s for model %p.", label_id_str.c_str(), model); | |||
| void *label_info; | |||
| std::vector<void *> label_list; | |||
| bool status = true; | |||
| std::transform(label_ids.begin(), label_ids.end(), std::back_inserter(label_list), | |||
| [&all_label, &status](uint32_t idx) -> void * { | |||
| if (idx >= all_label.size()) { | |||
| GELOGE(PARAM_INVALID, "Invalid label id %u, all label list size %zu.", idx, all_label.size()); | |||
| status = false; | |||
| return nullptr; | |||
| } | |||
| return all_label[idx]; | |||
| }); | |||
| if (!status) { | |||
| GELOGE(PARAM_INVALID, "Get label info failed."); | |||
| return nullptr; | |||
| } | |||
| uint32_t label_info_size = sizeof(rtLabelDevInfo) * label_list.size(); | |||
| rt_ret = rtMalloc(&label_info, label_info_size, RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return nullptr; | |||
| } | |||
| rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info, label_info_size); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return nullptr; | |||
| } | |||
| auto label_guard = std::make_shared<LabelGuard>(label_info); | |||
| label_map.emplace(label_id_str, label_guard); | |||
| return label_guard; | |||
| } | |||
| } // namespace model_runner | |||
| } // namespace ge | |||
| @@ -0,0 +1,54 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_ | |||
| #define GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <mutex> | |||
| #include <map> | |||
| #include <runtime/base.h> | |||
| namespace ge { | |||
| namespace model_runner { | |||
| class LabelGuard { | |||
| public: | |||
| explicit LabelGuard(void *label_info) : label_info_(reinterpret_cast<uintptr_t>(label_info)) {} | |||
| ~LabelGuard(); | |||
| void *GetLabelInfo() { return reinterpret_cast<void *>(label_info_); } | |||
| private: | |||
| uintptr_t label_info_; | |||
| }; | |||
| class LabelManager { | |||
| public: | |||
| static std::shared_ptr<LabelManager> GetInstance(); | |||
| std::shared_ptr<LabelGuard> GetLabelInfo(rtModel_t model, const std::vector<uint32_t> &label_ids, | |||
| const std::vector<void *> &all_label); | |||
| private: | |||
| std::mutex model_info_mapping_mutex_; | |||
| std::map<rtModel_t, std::map<std::string, std::weak_ptr<LabelGuard>>> model_info_mapping_; | |||
| static std::weak_ptr<LabelManager> instance_; | |||
| static std::mutex instance_mutex_; | |||
| }; | |||
| } // namespace model_runner | |||
| } // namespace ge | |||
| #endif // GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_ | |||
| @@ -24,14 +24,14 @@ LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context, | |||
| : TaskRepeater<LabelSwitchTaskInfo>(model_context, task_info), | |||
| task_info_(task_info), | |||
| stream_(nullptr), | |||
| all_label_resource_(), | |||
| label_info_(nullptr) { | |||
| if (task_info_ == nullptr) { | |||
| GELOGW("task_info_ is null!"); | |||
| return; | |||
| } | |||
| all_label_resource_ = model_context.label_list(); | |||
| rt_model_handle_ = model_context.rt_model_handle(); | |||
| auto all_label_resource = model_context.label_list(); | |||
| auto stream_list = model_context.stream_list(); | |||
| uint32_t stream_id = task_info->stream_id(); | |||
| GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); | |||
| @@ -40,52 +40,24 @@ LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context, | |||
| return; | |||
| } | |||
| stream_ = stream_list[stream_id]; | |||
| } | |||
| LabelSwitchTask::~LabelSwitchTask() { | |||
| if (label_info_ != nullptr) { | |||
| rtError_t rt_ret = rtFree(label_info_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtFree fwkOpBuf failed! ret: 0x%X.", rt_ret); | |||
| } | |||
| label_info_ = nullptr; | |||
| label_manager_ = LabelManager::GetInstance(); | |||
| if (label_manager_ == nullptr) { | |||
| GELOGW("Get label manager instance failed."); | |||
| return; | |||
| } | |||
| label_info_ = label_manager_->GetLabelInfo(rt_model_handle_, task_info_->label_list(), all_label_resource); | |||
| } | |||
| LabelSwitchTask::~LabelSwitchTask() {} | |||
| bool LabelSwitchTask::Distribute() { | |||
| GELOGI("LabelSwitchTask Distribute start."); | |||
| if (!CheckParamValid()) { | |||
| return false; | |||
| } | |||
| const std::vector<uint32_t> &label_index_list = task_info_->label_list(); | |||
| std::vector<void *> label_list(task_info_->label_size(), nullptr); | |||
| for (size_t i = 0; i < task_info_->label_size(); ++i) { | |||
| uint32_t label_index = label_index_list[i]; | |||
| if (label_index >= all_label_resource_.size()) { | |||
| GELOGE(PARAM_INVALID, "label %zu index is %u, but there are %zu labels in total.", i, label_index, | |||
| all_label_resource_.size()); | |||
| return false; | |||
| } | |||
| label_list[i] = all_label_resource_[label_index]; | |||
| GELOGI("Case %zu: label id %zu.", i, label_index); | |||
| } | |||
| uint32_t label_info_size = sizeof(rtLabelDevInfo) * task_info_->label_size(); | |||
| rtError_t rt_ret = rtMalloc(&label_info_, label_info_size, RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return false; | |||
| } | |||
| rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info_, label_info_size); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return false; | |||
| } | |||
| rt_ret = rtLabelSwitchByIndex(task_info_->cond(), label_list.size(), label_info_, stream_); | |||
| void *label_info = label_info_->GetLabelInfo(); | |||
| rtError_t rt_ret = rtLabelSwitchByIndex(task_info_->cond(), task_info_->label_size(), label_info, stream_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return false; | |||
| @@ -117,8 +89,8 @@ bool LabelSwitchTask::CheckParamValid() { | |||
| return false; | |||
| } | |||
| if (label_info_ != nullptr) { | |||
| GELOGE(PARAM_INVALID, "label_info_ has dirty data."); | |||
| if (label_info_ == nullptr) { | |||
| GELOGE(PARAM_INVALID, "CopyLabelList failed, label info is null."); | |||
| return false; | |||
| } | |||
| @@ -126,6 +98,5 @@ bool LabelSwitchTask::CheckParamValid() { | |||
| } | |||
| REGISTER_TASK(TaskInfoType::LABEL_SWITCH, LabelSwitchTask, LabelSwitchTaskInfo); | |||
| } // namespace model_runner | |||
| } // namespace ge | |||
| @@ -19,6 +19,7 @@ | |||
| #include <memory> | |||
| #include "ge_runtime/task/task.h" | |||
| #include "ge_runtime/task/label_manager.h" | |||
| namespace ge { | |||
| namespace model_runner { | |||
| @@ -35,8 +36,9 @@ class LabelSwitchTask : public TaskRepeater<LabelSwitchTaskInfo> { | |||
| std::shared_ptr<LabelSwitchTaskInfo> task_info_; | |||
| void *stream_; | |||
| std::vector<void *> all_label_resource_; | |||
| void *label_info_; | |||
| rtModel_t rt_model_handle_; | |||
| std::shared_ptr<LabelGuard> label_info_; | |||
| std::shared_ptr<LabelManager> label_manager_; | |||
| }; | |||
| } // namespace model_runner | |||
| } // namespace ge | |||
| @@ -67,6 +67,9 @@ bool ContainsDynamicInpus(const ge::OpDesc &op_desc) { | |||
| } | |||
| return false; | |||
| } | |||
| bool IsOptional(const ge::GeTensorDesc &tensor_desc) { | |||
| return tensor_desc.GetFormat() == ge::FORMAT_RESERVED && tensor_desc.GetDataType() == ge::DT_UNDEFINED; | |||
| } | |||
| } // namespace | |||
| namespace ge { | |||
| @@ -154,7 +157,7 @@ static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_ty | |||
| } | |||
| static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index, | |||
| bool attr) { | |||
| bool attr, int32_t &data_index) { | |||
| GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | |||
| GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | |||
| @@ -197,9 +200,10 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const | |||
| "[Add][InputDesc]fail for node:%s", data_op->GetName().c_str()); | |||
| GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, | |||
| "[Add][OutputDesc]fail for node:%s", data_op->GetName().c_str()); | |||
| if (attr) { | |||
| GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, index), return FAILED, | |||
| if (attr && !is_const) { | |||
| GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, data_index), return FAILED, | |||
| "[Set][Attr:%s]fail for node:%s", ATTR_NAME_INDEX.c_str(), data_op->GetName().c_str()); | |||
| ++data_index; | |||
| } | |||
| ge::NodePtr arg_node = graph->AddNode(data_op); | |||
| @@ -565,6 +569,44 @@ bool GeGenerator::Impl::SetOmSystemInfo(AttrHolder &obj) { | |||
| return true; | |||
| } | |||
| Status GeGenerator::SetModelNameForDump(const GeRootModelPtr &ge_root_model) { | |||
| bool is_unknown_shape = false; | |||
| Status ret = ge_root_model->CheckIsUnknownShape(is_unknown_shape); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(FAILED, "[Check][IsUnknownShape]Check root model is unknown shape failed, model id:%u", | |||
| ge_root_model->GetModelId()); | |||
| REPORT_CALL_ERROR("E19999", "Check root model is unknown shape failed, model id:%zu", | |||
| ge_root_model->GetModelId()); | |||
| return FAILED; | |||
| } | |||
| GeModelPtr model_root = nullptr; | |||
| if (is_unknown_shape) { | |||
| model_root = MakeShared<GeModel>(); | |||
| GE_CHECK_NOTNULL(model_root); | |||
| model_root->SetGraph(GraphUtils::CreateGraphFromComputeGraph(ge_root_model->GetRootGraph())); | |||
| ge_root_model->SetSubgraphInstanceNameToModel(ge_root_model->GetRootGraph()->GetName(), model_root); | |||
| } | |||
| ModelHelper model_helper; | |||
| string model_name; | |||
| GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | |||
| Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), | |||
| model_name); | |||
| if (name_ret != SUCCESS) { | |||
| ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"}); | |||
| GELOGE(FAILED, "[Check][GetModelNameStep]Get model_name failed. Param --output is invalid, root graph name: %s", | |||
| ge_root_model->GetRootGraph()->GetName().c_str()); | |||
| REPORT_CALL_ERROR("E19999", "Get model_name failed. Param --output is invalid, root graph name: %s", | |||
| ge_root_model->GetRootGraph()->GetName().c_str()); | |||
| return PARAM_INVALID; | |||
| } | |||
| map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||
| GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; | |||
| GE_CHECK_NOTNULL(ge_model); | |||
| ge_model->SetName(model_name); | |||
| return SUCCESS; | |||
| } | |||
| Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs, | |||
| ModelBufferData &model, bool is_offline) { | |||
| rtContext_t ctx = nullptr; | |||
| @@ -599,20 +641,10 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr | |||
| } | |||
| GE_CHECK_NOTNULL(ge_root_model); | |||
| GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | |||
| ModelHelper model_helper; | |||
| string model_name = ""; | |||
| Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), | |||
| model_name); | |||
| if (name_ret != SUCCESS) { | |||
| ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"}); | |||
| GELOGE(FAILED, "Get model_name failed. Param --output is invalid."); | |||
| return PARAM_INVALID; | |||
| ret = SetModelNameForDump(ge_root_model); | |||
| if (ret != SUCCESS) { | |||
| return ret; | |||
| } | |||
| map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||
| GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; | |||
| GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null"); | |||
| ge_model->SetName(model_name); | |||
| ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Save model failed"); | |||
| @@ -663,6 +695,34 @@ namespace { | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| bool CheckNoAicore(const ComputeGraphPtr &graph) { | |||
| for (const auto &node : graph->GetDirectNode()) { | |||
| if (node == nullptr) { | |||
| continue; | |||
| } | |||
| auto op_desc = node->GetOpDesc(); | |||
| if (op_desc == nullptr) { | |||
| continue; | |||
| } | |||
| if (op_desc->GetOpEngineName() == kAIcoreEngine) { | |||
| return false; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| } | |||
| void GeGenerator::RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) { | |||
| for (auto &input : inputs) { | |||
| GeTensorDesc input_desc = input.GetTensorDesc(); | |||
| bool is_const = false; | |||
| (void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const); | |||
| bool is_optional = IsOptional(input_desc); | |||
| if (!is_optional && !is_const) { | |||
| outputs.emplace_back(input); | |||
| } | |||
| } | |||
| } | |||
| Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | |||
| @@ -729,7 +789,9 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
| GELOGI("ATC parser success in single op build."); | |||
| GeRootModelPtr ge_root_model = nullptr; | |||
| GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, ge_root_model)); | |||
| vector<GeTensor> data_inputs; | |||
| RemoveConst(inputs, data_inputs); | |||
| GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, data_inputs, ge_root_model)); | |||
| map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs(); | |||
| GE_CHECK_NOTNULL(ge_root_model); | |||
| GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | |||
| @@ -745,7 +807,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
| bool all_shape = false; | |||
| (void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape); | |||
| if (all_shape) { | |||
| if (all_shape && CheckNoAicore(root_graph)) { | |||
| GELOGD("Get aicpu all_shape kernel!"); | |||
| vector<GeTensor> inputs_dynamic; | |||
| vector<GeTensor> outputs_dynamic; | |||
| @@ -812,18 +874,19 @@ Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor | |||
| // 2. Create InputData node. | |||
| int32_t arg_index = 0; | |||
| int32_t data_index = 0; | |||
| if (inputs.empty()) { | |||
| for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) { | |||
| GE_CHECK_NOTNULL_EXEC(input_desc, return INTERNAL_ERROR); | |||
| if (!IsNeedConnectInputOpForSingleOp(*input_desc)) { | |||
| continue; | |||
| } | |||
| GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false)); | |||
| GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false, data_index)); | |||
| arg_index++; | |||
| } | |||
| } else { | |||
| for (const auto &in_desc : inputs) { | |||
| GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true)); | |||
| GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true, data_index)); | |||
| arg_index++; | |||
| } | |||
| } | |||
| @@ -882,13 +945,12 @@ Status GeGenerator::Impl::SaveRootModel(const string &file_name_prefix, GeRootMo | |||
| "ge root model has no sub model") | |||
| GeModelPtr model_root = nullptr; | |||
| if (is_unknown_shape) { | |||
| model_root = make_shared<GeModel>(); | |||
| model_root->SetGraph(GraphUtils::CreateGraphFromComputeGraph(ge_root_model->GetRootGraph())); | |||
| ge_root_model->SetSubgraphInstanceNameToModel(ge_root_model->GetRootGraph()->GetName(), model_root); | |||
| model_root->SetName(ge_root_model->GetRootGraph()->GetName()); | |||
| auto name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||
| model_root = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; | |||
| } else { | |||
| model_root = ge_root_model->GetSubgraphInstanceNameToModel().begin()->second; | |||
| } | |||
| GE_CHECK_NOTNULL(model_root); | |||
| // set atc version | |||
| if (!SetAtcVersionInfo(*(model_root.get()))) { | |||
| GELOGW("SetPackageVersionInfo of atc failed!"); | |||
| @@ -382,58 +382,6 @@ Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPt | |||
| return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id); | |||
| } | |||
| static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor, | |||
| const std::vector<InDataAnchorPtr> &in_anchors, const std::string &name) { | |||
| GE_CHECK_NOTNULL(out_anchor); | |||
| NodePtr in_node = out_anchor->GetOwnerNode(); | |||
| GE_CHECK_NOTNULL(in_node); | |||
| OpDescBuilder op_desc_builder(name, MEMCPYADDRASYNC); | |||
| OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0)) | |||
| .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0)) | |||
| .Build(); | |||
| (void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false); | |||
| if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) { | |||
| GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) { | |||
| if (graph->GetGraphUnknownFlag()) { | |||
| GELOGI("Graph %s is unknown graph, ignore gen_task for constant.", graph->GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| for (auto &node : graph->GetDirectNode()) { | |||
| // CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT | |||
| auto op_desc = node->GetOpDesc(); | |||
| if (op_desc == nullptr) { | |||
| continue; | |||
| } | |||
| auto op_type = op_desc->GetType(); | |||
| if (op_type == NETOUTPUT) { | |||
| for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { | |||
| const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||
| GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | |||
| NodePtr in_node = peer_out_anchor->GetOwnerNode(); | |||
| GE_CHECK_NOTNULL(in_node); | |||
| std::string in_node_op_type = in_node->GetType(); | |||
| if (in_node_op_type == CONSTANT) { | |||
| GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); | |||
| std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; | |||
| if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) { | |||
| GELOGE(FAILED, "Insert memcpy between %s and %s failed.", | |||
| in_node->GetName().c_str(), node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { | |||
| bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag(); | |||
| com_graph->SetGraphUnknownFlag(false); | |||
| @@ -516,9 +464,6 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||
| !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { | |||
| continue; | |||
| } | |||
| GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed."); | |||
| if (sub_graph->GetGraphUnknownFlag()) { | |||
| // unknown shape build flow | |||
| GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), | |||
| @@ -597,11 +597,13 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | |||
| int64_t size = 0; | |||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); | |||
| GE_IF_BOOL_EXEC(size < 0, | |||
| GELOGE(FAILED, "[Check][TensorSize]tensor_size:%ld is invalid, maybe it is unknown shape node, Node_name:%s", | |||
| size, node_op_desc->GetName().c_str()); | |||
| REPORT_INNER_ERROR("E19999", "tensor_size:%ld is invalid, maybe it is unknown shape node, Node_name:%s", | |||
| size, node_op_desc->GetName().c_str()); | |||
| return;); | |||
| GELOGE(FAILED, "[Check][TensorSize]tensor_size:%ld is invalid, " | |||
| "maybe it is unknown shape node, Node_name:%s", | |||
| size, node_op_desc->GetName().c_str()); | |||
| REPORT_INNER_ERROR("E19999", "tensor_size:%ld is invalid, " | |||
| "maybe it is unknown shape node, Node_name:%s", | |||
| size, node_op_desc->GetName().c_str()); | |||
| return;); | |||
| batch_all_memory_size[batch_label].emplace_back(size); | |||
| if (batch_total_size.find(batch_label) == batch_total_size.end()) { | |||
| batch_total_size[batch_label] = size; | |||
| @@ -692,23 +694,23 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou | |||
| auto out_anchor = n->GetOutDataAnchor(out_index); | |||
| GE_IF_BOOL_EXEC(out_anchor == nullptr, | |||
| GELOGE(FAILED, "[Check][Anchor]Node[%s] output[%u] anchor is null.", | |||
| n->GetName().c_str(), out_index); | |||
| n->GetName().c_str(), out_index); | |||
| REPORT_INNER_ERROR("E19999", "output anchor is null, node_name: %s output_index: %u.", | |||
| n->GetName().c_str(), out_index); | |||
| n->GetName().c_str(), out_index); | |||
| return false;); | |||
| for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { | |||
| GE_IF_BOOL_EXEC(peer_in_anchor == nullptr, | |||
| GELOGE(FAILED, "[Check][Anchor]Node[%s] output[%u] peer_in_anchor 0 is null.", | |||
| n->GetName().c_str(), out_index); | |||
| n->GetName().c_str(), out_index); | |||
| REPORT_INNER_ERROR("E19999", "output anchor peer is null, node_name: %s output_index: %u.", | |||
| n->GetName().c_str(), out_index); | |||
| n->GetName().c_str(), out_index); | |||
| return false;); | |||
| auto peer_node = peer_in_anchor->GetOwnerNode(); | |||
| GE_IF_BOOL_EXEC(peer_node == nullptr, | |||
| GELOGE(FAILED, "[Check][Node]Node[%s] output[%u] peer node is null.", | |||
| n->GetName().c_str(), out_index); | |||
| n->GetName().c_str(), out_index); | |||
| REPORT_INNER_ERROR("E19999", "output anchor peer node is null, node_name: %s output_index: %u.", | |||
| n->GetName().c_str(), out_index); | |||
| n->GetName().c_str(), out_index); | |||
| return false;); | |||
| // Get the continuous input type of the node, default is false | |||
| @@ -716,9 +718,9 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou | |||
| auto peer_in_node_desc = peer_node->GetOpDesc(); | |||
| GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr, | |||
| GELOGE(FAILED, "[Check][OpDesc]Node[%s] output[%u] nodedesc is null.", | |||
| n->GetName().c_str(), out_index); | |||
| n->GetName().c_str(), out_index); | |||
| REPORT_INNER_ERROR("E19999", "output anchor peer op_desc is null, node_name:%s output_index:%u.", | |||
| n->GetName().c_str(), out_index); | |||
| n->GetName().c_str(), out_index); | |||
| return false;); | |||
| // If GetBool fail, is_input_continuous is false. | |||
| @@ -819,7 +821,7 @@ bool BlockMemAssigner::IsContinuousMemoryReuse(const NodePtr &n, const NodePtr & | |||
| (in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) || | |||
| (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) { | |||
| GELOGE(FAILED, "[Check][OpDesc]Node[%s] output[%u] peer input node desc is null.", | |||
| n->GetName().c_str(), out_index); | |||
| n->GetName().c_str(), out_index); | |||
| REPORT_INNER_ERROR("E19999", "get output anchor peer op_desc fail, node_name: %s output_index: %u.", | |||
| n->GetName().c_str(), out_index); | |||
| return false; | |||
| @@ -1105,9 +1107,10 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||
| OpMemoryType mem_type, const NodePtr &n, uint32_t out_index, | |||
| const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem, | |||
| const bool continuous, int64_t memory_type) { | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed"); | |||
| return nullptr, "[Check][Param]Input parameter n(type:node_ptr) is null."); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| n == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed"); | |||
| return nullptr, "[Check][Param]Input parameter n(type:node_ptr) is null."); | |||
| auto node_op_desc = n->GetOpDesc(); | |||
| GE_IF_BOOL_EXEC(node_op_desc == nullptr, return nullptr); | |||
| std::string batch_label; | |||
| @@ -1159,10 +1162,12 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||
| } | |||
| auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory, memory_type); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "new a memoryblock object failed. node_name:%s out_index:%u", | |||
| n->GetName().c_str(), out_index); | |||
| return nullptr, "[New][Object]new MemoryBlock failed, node_name:%s out_index:%u", n->GetName().c_str(), out_index); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| block == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "new a memoryblock object failed. node_name:%s out_index:%u", | |||
| n->GetName().c_str(), out_index); | |||
| return nullptr, | |||
| "[New][Object]new MemoryBlock failed, node_name:%s out_index:%u", n->GetName().c_str(), out_index); | |||
| // Data and netoutput need zero copy block | |||
| block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | |||
| @@ -1221,13 +1226,15 @@ void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutpu | |||
| Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, | |||
| const bool is_op_reuse_mem) { | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null"); | |||
| return INTERNAL_ERROR, "[check][param]Input parameter n(type:NodePtr) is null."); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| n == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null"); | |||
| return INTERNAL_ERROR, "[check][param]Input parameter n(type:NodePtr) is null."); | |||
| auto node_op_desc = n->GetOpDesc(); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null"); | |||
| return INTERNAL_ERROR, "[Check][Param]Input parameter n(type:OpDescPtr) is null"); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| node_op_desc == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null"); | |||
| return INTERNAL_ERROR, "[Check][Param]Input parameter n(type:OpDescPtr) is null"); | |||
| // continuous output support ref only when all output ref input | |||
| bool isAllOutputRef = true; | |||
| @@ -1242,7 +1249,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||
| if (!isAllOutputRef && isOutputHasRef) { | |||
| REPORT_INNER_ERROR("E19999", "continuous output node ref part input, not support now. node_name:%s", | |||
| n->GetName().c_str()); | |||
| n->GetName().c_str()); | |||
| GELOGE(INTERNAL_ERROR, "[Check][OutRefStatus]continuous output node ref part input, not support, node_name:%s", | |||
| n->GetName().c_str()); | |||
| return INTERNAL_ERROR; | |||
| @@ -1255,7 +1262,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||
| auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | |||
| if (output_op_desc == nullptr) { | |||
| REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u", | |||
| n->GetName().c_str(), index); | |||
| n->GetName().c_str(), index); | |||
| GELOGE(INTERNAL_ERROR, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| @@ -1268,7 +1275,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||
| int64_t size = 0; | |||
| if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { | |||
| REPORT_CALL_ERROR("E19999", "get tensor_size failed, node_name:%s, output_index:%u", | |||
| n->GetName().c_str(), index); | |||
| n->GetName().c_str(), index); | |||
| GELOGE(INTERNAL_ERROR, "[Get][TensorSize]node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| @@ -1310,7 +1317,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||
| ++(block->ref_count_); | |||
| } else { | |||
| REPORT_CALL_ERROR("E19999", "apply continuousMemory failed, node_name:%s, total_size:%ld", | |||
| n->GetName().c_str(), total_size); | |||
| n->GetName().c_str(), total_size); | |||
| GELOGE(INTERNAL_ERROR, "[Apply][ContinuousMemory]node_name:%s, total_size:%ld", n->GetName().c_str(), total_size); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| @@ -1319,26 +1326,33 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||
| MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges, | |||
| const bool is_op_reuse_mem, const bool continuous) { | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:NodePtr) is null"); | |||
| return nullptr, "[Check][Param]Input parameter n(type:NodePtr) is null"); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| n == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:NodePtr) is null"); | |||
| return nullptr, "[Check][Param]Input parameter n(type:NodePtr) is null"); | |||
| auto node_op_desc = n->GetOpDesc(); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null"); | |||
| return nullptr, "[Check][Param]Input parameter n(type:OpDescPtr) is null"); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| node_op_desc == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null"); | |||
| return nullptr, "[Check][Param]Input parameter n(type:OpDescPtr) is null"); | |||
| MemoryBlock *block = nullptr; | |||
| NodeIndexIO node_index_io(n, index, kOut); | |||
| int64_t size = 0; | |||
| auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | |||
| GE_IF_BOOL_EXEC(output_op_desc == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||
| GELOGE(FAILED, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||
| return nullptr); | |||
| GE_IF_BOOL_EXEC( | |||
| output_op_desc == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u", | |||
| n->GetName().c_str(), index); | |||
| GELOGE(FAILED, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||
| return nullptr); | |||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | |||
| size_t no_align_size = 0; | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, | |||
| REPORT_CALL_ERROR("E19999", "Get no align size failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||
| return nullptr, "[Get][TensorSize]Get no align size, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, | |||
| REPORT_CALL_ERROR("E19999", "Get no align size failed, node_name:%s, output_index:%u", | |||
| n->GetName().c_str(), index); | |||
| return nullptr, | |||
| "[Get][TensorSize]Get no align size, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||
| std::string symbol; | |||
| bool reuse_input = false; | |||
| @@ -1346,9 +1360,9 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||
| block = symbol_blocks_[symbol]; | |||
| GE_IF_BOOL_EXEC(block == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "get ref block failed, node_name:%s, symbol:%s", | |||
| node_op_desc->GetName().c_str(), node_index_io.ToString().c_str()); | |||
| node_op_desc->GetName().c_str(), node_index_io.ToString().c_str()); | |||
| GELOGE(FAILED, "[Get][RefBlock]node_name:%s, symbol:%s", | |||
| node_op_desc->GetName().c_str(), node_index_io.ToString().c_str()); | |||
| node_op_desc->GetName().c_str(), node_index_io.ToString().c_str()); | |||
| return nullptr); | |||
| // reduce old size | |||
| size_t align_size = block->Size(); | |||
| @@ -1392,24 +1406,28 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||
| vector<bool> workspace_reuse_flag; | |||
| block = ApplyMemory(block_size, size, no_align_size, kOutput, n, index, | |||
| workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, | |||
| REPORT_CALL_ERROR("E19999", "apply out Memory failed, node_name:%s, block_size:%ld, out_index:%u", | |||
| n->GetName().c_str(), block_size, index); | |||
| return nullptr, "[Apply][Memory]node_name:%s, block_size:%ld, out_index:%u", | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
| block == nullptr, | |||
| REPORT_CALL_ERROR("E19999", "apply out Memory failed, node_name:%s, block_size:%ld, out_index:%u", | |||
| n->GetName().c_str(), block_size, index); | |||
| return nullptr, | |||
| "[Apply][Memory]node_name:%s, block_size:%ld, out_index:%u", | |||
| n->GetName().c_str(), block_size, index); | |||
| } | |||
| int out_count = 0; | |||
| GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), | |||
| REPORT_INNER_ERROR("E19999", "out index:%u exceed out_size:%lu, node_name:%s", | |||
| index, n->GetAllOutDataAnchors().size(), n->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][OutIndex]index:%u exceed out_size:%lu, node_name:%s", | |||
| index, n->GetAllOutDataAnchors().size(), n->GetName().c_str()); | |||
| return nullptr); | |||
| GE_IF_BOOL_EXEC( | |||
| index >= n->GetAllOutDataAnchors().size(), | |||
| REPORT_INNER_ERROR("E19999", "out index:%u exceed out_size:%lu, node_name:%s", | |||
| index, n->GetAllOutDataAnchors().size(), n->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][OutIndex]index:%u exceed out_size:%lu, node_name:%s", | |||
| index, n->GetAllOutDataAnchors().size(), n->GetName().c_str()); | |||
| return nullptr); | |||
| auto out_data_anchor = n->GetOutDataAnchor(index); | |||
| GE_IF_BOOL_EXEC(out_data_anchor == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "out anchor is null, index:%u, node_name:%s", index, n->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][OutAnchor]is null, index:%u, node_name:%s", index, n->GetName().c_str()); | |||
| return nullptr); | |||
| GE_IF_BOOL_EXEC( | |||
| out_data_anchor == nullptr, | |||
| REPORT_INNER_ERROR("E19999", "out anchor is null, index:%u, node_name:%s", index, n->GetName().c_str()); | |||
| GELOGE(FAILED, "[Check][OutAnchor]is null, index:%u, node_name:%s", index, n->GetName().c_str()); | |||
| return nullptr); | |||
| for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { | |||
| auto owner_node = in_anchor->GetOwnerNode(); | |||
| auto op_desc = owner_node->GetOpDesc(); | |||
| @@ -1616,12 +1634,13 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||
| op_desc->GetOutputsSize(), memorys_type.size()); | |||
| if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) { | |||
| REPORT_INNER_ERROR("E19999", "Attr[%s] size:%zu not equal to node output size:%zu, node_name:%s", | |||
| ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(), | |||
| op_desc->GetOutputsSize(), op_desc->GetName().c_str()); | |||
| GELOGE(INTERNAL_ERROR, | |||
| "[Check][MemTypeAttr]Attr %s size:%zu not equal to node output size:%zu, node_name:%s", | |||
| ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(), | |||
| op_desc->GetOutputsSize(), op_desc->GetName().c_str()); | |||
| ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(), | |||
| op_desc->GetOutputsSize(), op_desc->GetName().c_str()); | |||
| GELOGE( | |||
| INTERNAL_ERROR, | |||
| "[Check][MemTypeAttr]Attr %s size:%zu not equal to node output size:%zu, node_name:%s", | |||
| ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(), | |||
| op_desc->GetOutputsSize(), op_desc->GetName().c_str()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| @@ -1748,9 +1767,11 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||
| if (has_tvm_workspace_mem_type_attr && (temp.size() != tvm_workspace_memory_type.size())) { | |||
| REPORT_INNER_ERROR("E19999", "Attr[%s]size:%zu is not equal to workspace size:%zu, node_name:%s", | |||
| TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), temp.size(), n->GetName().c_str()); | |||
| TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), | |||
| temp.size(), n->GetName().c_str()); | |||
| GELOGE(INTERNAL_ERROR, "[Check][Attr]Attr %s size:%zu is not equal to workspace size:%zu, node_name:%s", | |||
| TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), temp.size(), n->GetName().c_str()); | |||
| TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), | |||
| temp.size(), n->GetName().c_str()); | |||
| return; | |||
| } | |||
| for (size_t i = 0; i < temp.size(); i++) { | |||
| @@ -2160,10 +2181,11 @@ bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, | |||
| ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_memory_type); | |||
| if (has_workspace_mem_type_attr && (workspace_memory_type.size() <= index)) { | |||
| REPORT_INNER_ERROR("E19999", "get workspace mem_type failed, " | |||
| "index %zu invalid, bigger than attr %s size:%zu, node_name:%s", | |||
| index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str()); | |||
| "index %zu invalid, bigger than attr %s size:%zu, node_name:%s", | |||
| index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), | |||
| workspace_memory_type.size(), node->GetName().c_str()); | |||
| GELOGE(INTERNAL_ERROR, "[Get][WorkspaceMemType]index %zu invalid, bigger than attr %s size:%zu, node_name:%s", | |||
| index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str()); | |||
| index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str()); | |||
| return false; | |||
| } | |||
| memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM; | |||
| @@ -496,7 +496,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||
| REPORT_INNER_ERROR("E19999", "find memory offset fail for mem_type:%ld, " | |||
| "when assign continuous input memory for node:%s, ", memory_type, node->GetName().c_str()); | |||
| GELOGE(FAILED, "[Find][MemOffset]fail for mem_type:%ld, when AssignContinuousInputMemory for node:%s", | |||
| memory_type, node->GetName().c_str()); | |||
| memory_type, node->GetName().c_str()); | |||
| return FAILED; | |||
| } | |||
| // The head and tail of hcom continuous input should be added 512 | |||
| @@ -929,8 +929,8 @@ Status GraphMemoryAssigner::AssignReferenceMemory() { | |||
| if (out_op_desc->GetOutputsSize() > output_list.size()) { | |||
| REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, judge invalid in node:%s " | |||
| "when AssignReferenceMemory", | |||
| out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | |||
| "when AssignReferenceMemory", | |||
| out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | |||
| GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s", | |||
| out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | |||
| return ge::FAILED; | |||
| @@ -574,6 +574,50 @@ Status ModelBuilder::MergeWeights() { | |||
| return SUCCESS; | |||
| } | |||
| Status ModelBuilder::SaveAtomicTBEKernel(const OpDescPtr &op_desc) { | |||
| ge::NodePtr atomic_clean_node = nullptr; | |||
| atomic_clean_node = op_desc->TryGetExtAttr("atomic_clean_node_ptr", atomic_clean_node); | |||
| if (atomic_clean_node == nullptr) { | |||
| return SUCCESS; | |||
| } | |||
| ge::OpDescPtr atomic_op_desc = atomic_clean_node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(atomic_op_desc); | |||
| TBEKernelPtr tbe_kernel = atomic_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); | |||
| if (tbe_kernel == nullptr) { | |||
| std::string kernel_name; | |||
| GeAttrValue::BYTES kernel_buffer; | |||
| (void) AttrUtils::GetStr(atomic_op_desc, ATTR_NAME_TBE_KERNEL_NAME, kernel_name); | |||
| (void) AttrUtils::GetBytes(atomic_op_desc, ATTR_NAME_TBE_KERNEL_BUFFER, kernel_buffer); | |||
| if (!kernel_name.empty() && (kernel_buffer.GetSize() > 0)) { | |||
| GE_CHECK_NOTNULL(kernel_buffer.GetData()); | |||
| std::vector<char> data(kernel_buffer.GetData(), kernel_buffer.GetData() + kernel_buffer.GetSize()); | |||
| tbe_kernel = MakeShared<OpKernelBin>(kernel_name, std::move(data)); | |||
| GE_CHECK_NOTNULL(tbe_kernel); | |||
| } | |||
| } | |||
| if (tbe_kernel == nullptr) { | |||
| GELOGD("Atomic_clean_node doesn't have tbe_kernel."); | |||
| return SUCCESS; | |||
| } | |||
| tbe_kernel_store_.AddTBEKernel(tbe_kernel); | |||
| GELOGD("Atomic_clean_node tbe_kernel_name %s!", tbe_kernel->GetName().c_str()); | |||
| (void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TBE_KERNEL_NAME, tbe_kernel->GetName()); | |||
| std::string kernel_name; | |||
| (void) AttrUtils::GetStr(atomic_op_desc, atomic_op_desc->GetName() + "_kernelname", kernel_name); | |||
| (void) AttrUtils::SetStr(op_desc, op_desc->GetName() + "_atomic_kernelname", kernel_name); | |||
| std::string meta_data; | |||
| (void) AttrUtils::GetStr(atomic_op_desc, TVM_ATTR_NAME_METADATA, meta_data); | |||
| (void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TVM_METADATA, meta_data); | |||
| std::string json_string; | |||
| (void) AttrUtils::GetStr(atomic_op_desc, TVM_ATTR_NAME_MAGIC, json_string); | |||
| (void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TVM_MAGIC, json_string); | |||
| return SUCCESS; | |||
| } | |||
| Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | |||
| // Add weight | |||
| ge_model.SetWeight(weight_buffer_); | |||
| @@ -607,6 +651,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | |||
| } | |||
| tbe_name_set.insert(tbe_kernel->GetName()); | |||
| tbe_kernel_store_.AddTBEKernel(tbe_kernel); | |||
| GE_CHK_STATUS_RET(SaveAtomicTBEKernel(node_op_desc), "[Save][TBEKernel] save atomic tbekernel failed!"); | |||
| } | |||
| SetModelCheckAicpuAttr(model, aicpu_op_types, aicpu_tf_op_types); | |||
| @@ -89,6 +89,8 @@ class ModelBuilder { | |||
| void SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types, | |||
| std::set<std::string> &aicpu_tf_op_types); | |||
| Status SaveAtomicTBEKernel(const OpDescPtr &op_desc); | |||
| uint64_t session_id_; | |||
| map<int64_t, size_t> mem_type_to_mem_offset_; | |||
| @@ -49,6 +49,7 @@ const char *const kIsLastNode = "is_last_node"; | |||
| const char *const kIsInputVar = "INPUT_IS_VAR"; | |||
| const char *const kIsOutputVar = "OUTPUT_IS_VAR"; | |||
| const char *const kProfilingMode = "PROFILING_MODE"; | |||
| const char *const kIteratorV2 = "IteratorV2"; | |||
| const uint32_t kProfilingArStep = 2; | |||
| const uint64_t kProfilingFpStartLogid = 1; | |||
| const uint64_t kProfilingBpEndLogid = 2; | |||
| @@ -57,6 +58,7 @@ const uint64_t kProfilingArEndLogid = 4; | |||
| const uint64_t kProfilingIterEndLogid = 65535; | |||
| const int64_t kHashFactor = 100000; | |||
| const int64_t kInvalidGroupId = -1; | |||
| const std::set<std::string> kFpNodeTypes = {ge::DATA, ge::GETNEXT, kIteratorV2}; | |||
| } // namespace | |||
| namespace ge { | |||
| TaskGenerator::TaskGenerator(uint8_t *var_mem_base, uint64_t var_mem_size) { | |||
| @@ -621,8 +623,10 @@ Status TaskGenerator::AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||
| if (op_kernel_lib_name.empty()) { | |||
| continue; | |||
| } | |||
| if (op_desc->GetType() == GETNEXT || op_desc->GetType() == DATA) { | |||
| auto type = op_desc->GetType(); | |||
| std::string original_type; | |||
| (void)AttrUtils::GetStr(op_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, original_type); | |||
| if (kFpNodeTypes.find(type) != kFpNodeTypes.end() || kFpNodeTypes.find(original_type) != kFpNodeTypes.end()) { | |||
| auto out_anchor = node->GetOutDataAnchor(0); | |||
| for (auto &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { | |||
| GE_CHECK_NOTNULL(peer_in_anchor); | |||
| @@ -20,9 +20,12 @@ | |||
| #include <string> | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "omm/csa_interact.h" | |||
| namespace ge { | |||
| using Uint32Pair = pair<uint32_t, uint32_t>; | |||
| const uint32_t kInvalidModelId = UINT32_MAX; | |||
| GraphExecutor::GraphExecutor() | |||
| : init_flag_(false), | |||
| train_graph_flag_(false), | |||
| @@ -358,7 +361,8 @@ Status GraphExecutor::ExecuteGraph(GraphId graph_id, const GeRootModelPtr &ge_ro | |||
| } | |||
| Status GraphExecutor::ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &ge_root_model, | |||
| const std::vector<InputTensorInfo> &input_tensor) { | |||
| const std::vector<InputTensorInfo> &input_tensor, | |||
| const RunAsyncCallback& callback) { | |||
| GELOGI("[GraphExecutor] Start to async execute graph, graph_id=%u", graph_id); | |||
| if (graph_id != last_graph_id_) { | |||
| auto ret = FreeExecuteMemory(); | |||
| @@ -368,7 +372,7 @@ Status GraphExecutor::ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr & | |||
| } | |||
| last_graph_id_ = graph_id; | |||
| GE_CHECK_NOTNULL_EXEC(ge_root_model, return FAILED); | |||
| Status ret = AsyncExecuteModel(ge_root_model->GetModelId(), input_tensor); | |||
| Status ret = AsyncExecuteModel(ge_root_model, input_tensor, callback); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(GE_GRAPH_SYNC_MODEL_FAILED, "[GraphExecutor] AsyncExecuteModel Error!"); | |||
| return GE_GRAPH_SYNC_MODEL_FAILED; | |||
| @@ -378,11 +382,81 @@ Status GraphExecutor::ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr & | |||
| return SUCCESS; | |||
| } | |||
| Status GraphExecutor::AsyncExecuteModel(uint32_t model_id, const std::vector<InputTensorInfo> &inputs) { | |||
| bool CompareByLoad(const Uint32Pair &lhs, const Uint32Pair &rhs) { | |||
| return lhs.second < rhs.second; | |||
| } | |||
| uint32_t GraphExecutor::GetExecuteModelId(const GeRootModelPtr &ge_root_model) { | |||
| std::vector<uint32_t> model_ids = ge_root_model->GetAllModelId(); | |||
| if (model_ids.empty()) { | |||
| return kInvalidModelId; | |||
| } | |||
| if (model_ids.size() == 1) { | |||
| return ge_root_model->GetModelId(); | |||
| } | |||
| std::vector<Uint32Pair> model_id_to_loads; | |||
| auto model_manager = ModelManager::GetInstance(); | |||
| GE_CHECK_NOTNULL(model_manager); | |||
| for (auto model_id : model_ids) { | |||
| auto davinci_model = model_manager->GetModel(model_id); | |||
| auto hybrid_model = model_manager->GetHybridModel(model_id); | |||
| if (hybrid_model == nullptr) { | |||
| GE_CHECK_NOTNULL(davinci_model); | |||
| } | |||
| uint32_t input_load = hybrid_model != nullptr ? hybrid_model->GetDataInputerSize() : | |||
| davinci_model->GetDataInputerSize(); | |||
| uint32_t running_load = hybrid_model != nullptr ? static_cast<uint32_t>(hybrid_model->GetRunningFlag()) : | |||
| static_cast<uint32_t>(davinci_model->GetRunningFlag()); | |||
| uint32_t load = input_load + running_load; | |||
| if (load == 0) { | |||
| return model_id; | |||
| } | |||
| model_id_to_loads.emplace_back(model_id, load); | |||
| } | |||
| sort(model_id_to_loads.begin(), model_id_to_loads.end(), CompareByLoad); | |||
| if (model_id_to_loads.empty()) { | |||
| return kInvalidModelId; | |||
| } | |||
| return model_id_to_loads.begin()->first; | |||
| } | |||
| Status GraphExecutor::SetCallback(uint32_t model_id, const GeRootModelPtr &ge_root_model, | |||
| const RunAsyncCallback &callback) { | |||
| auto model_manager = ge::ModelManager::GetInstance(); | |||
| GE_CHECK_NOTNULL(model_manager); | |||
| if (model_manager->IsNeedHybridLoad(*ge_root_model)) { | |||
| auto model = model_manager->GetHybridModel(model_id); | |||
| GE_CHECK_NOTNULL(model); | |||
| if (model->SetRunAsyncListenerCallback(callback) != SUCCESS) { | |||
| GELOGE(FAILED, "SetRunAsyncListenerCallback failed."); | |||
| return FAILED; | |||
| } | |||
| } else { | |||
| auto model = model_manager->GetModel(model_id); | |||
| GE_CHECK_NOTNULL(model); | |||
| if (model->SetRunAsyncListenerCallback(callback) != SUCCESS) { | |||
| GELOGE(FAILED, "SetRunAsyncListenerCallback failed."); | |||
| return FAILED; | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status GraphExecutor::AsyncExecuteModel(const GeRootModelPtr &ge_root_model, const std::vector<InputTensorInfo> &inputs, | |||
| const RunAsyncCallback &callback) { | |||
| uint32_t model_id = GetExecuteModelId(ge_root_model); | |||
| if (model_id == kInvalidModelId) { | |||
| GELOGE(INTERNAL_ERROR, "No valid model id."); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| try { | |||
| auto model_manager = ge::ModelManager::GetInstance(); | |||
| GE_CHECK_NOTNULL(model_manager); | |||
| GELOGI("RunAsync begin.model_id %u", model_id); | |||
| if (SetCallback(model_id, ge_root_model, callback) != SUCCESS) { | |||
| GELOGE(FAILED, "RunAsync: SetCallBack for model fail"); | |||
| return FAILED; | |||
| } | |||
| Status ret = model_manager->DataInputTensor(model_id, inputs); | |||
| if (ret != SUCCESS) { | |||
| @@ -50,7 +50,7 @@ class GraphExecutor { | |||
| std::vector<GeTensor> &output_tensor); | |||
| ge::Status ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &ge_root_model, | |||
| const std::vector<InputTensorInfo> &input_tensor); | |||
| const std::vector<InputTensorInfo> &input_tensor, const RunAsyncCallback &callback); | |||
| Status SetCondition(std::mutex *mutex, std::condition_variable *cond, std::shared_ptr<GraphModelListener> listener); | |||
| @@ -116,6 +116,8 @@ class GraphExecutor { | |||
| static Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info); | |||
| uint32_t GetExecuteModelId(const GeRootModelPtr &ge_root_model); | |||
| private: | |||
| Status PrepareInputData(const std::vector<GeTensor> &input_tensor, InputData &graph_input_data, | |||
| OutputData &graph_output_data, std::vector<InputOutputDescInfo> &output_desc); | |||
| @@ -123,7 +125,8 @@ class GraphExecutor { | |||
| Status SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &input_tensor, | |||
| std::vector<GeTensor> &output_tensor); | |||
| Status AsyncExecuteModel(uint32_t model_id, const std::vector<InputTensorInfo> &input_tensor); | |||
| Status AsyncExecuteModel(const GeRootModelPtr &ge_root_model, const std::vector<InputTensorInfo> &input_tensor, | |||
| const RunAsyncCallback &callback); | |||
| void InitModelIdInfo(std::vector<uint32_t> &out_model_id_info, std::vector<SubGraphInfoPtr> &sub_graph_vec, | |||
| uint32_t output_size); | |||
| @@ -132,6 +135,9 @@ class GraphExecutor { | |||
| Status MallocInOutBuffer(const std::vector<uint64_t> &buffer_size, std::vector<void *> &data_addr); | |||
| static Status SetCallback(uint32_t model_id, const GeRootModelPtr &ge_root_model, | |||
| const RunAsyncCallback &callback); | |||
| bool init_flag_; | |||
| bool train_graph_flag_; | |||
| @@ -60,7 +60,6 @@ Status GraphLoader::LoadModelOnline(uint32_t &model_id, const std::shared_ptr<ge | |||
| GELOGE(GE_GRAPH_PARAM_NULLPTR, "[LoadGraph] GE load graph model_ptr is nullptr."); | |||
| return GE_GRAPH_PARAM_NULLPTR; | |||
| } | |||
| model_id = ge_root_model_ptr->GetModelId(); | |||
| auto model_manager = ModelManager::GetInstance(); | |||
| GE_CHECK_NOTNULL(model_manager); | |||
| @@ -134,6 +134,8 @@ class DataInputer { | |||
| /// | |||
| void Stop() { queue_.Stop(); } | |||
| uint32_t Size() { return queue_.Size(); } | |||
| private: | |||
| /// | |||
| /// @ingroup domi_ome | |||
| @@ -31,6 +31,7 @@ | |||
| #include "common/scope_guard.h" | |||
| #include "common/thread_pool.h" | |||
| #include "framework/common/debug/ge_log.h" | |||
| #include "framework/common/util.h" | |||
| #include "graph/common/ge_call_wrapper.h" | |||
| #include "graph/compute_graph.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| @@ -297,6 +298,11 @@ void DavinciModel::ReleaseTask() { | |||
| GE_CHK_STATUS(task->Release(), "Release task failed."); | |||
| } | |||
| } | |||
| for (auto &item : label_goto_args_) { | |||
| GE_FREE_RT_LOG(item.second.first); | |||
| } | |||
| label_goto_args_.clear(); | |||
| } | |||
| Status DavinciModel::Assign(const GeModelPtr &ge_model) { | |||
| @@ -1334,6 +1340,39 @@ void DavinciModel::ParseDynamicOutShape(const std::vector<std::string> &str_info | |||
| } | |||
| } | |||
| Status DavinciModel::GetLabelGotoAddr(uint32_t label_index, rtMemType_t mem_type, void *&arg_addr, uint32_t &arg_size) { | |||
| std::lock_guard<std::mutex> lock(label_args_mutex_); | |||
| auto it = label_goto_args_.find(label_index); | |||
| if (it != label_goto_args_.end()) { | |||
| arg_addr = it->second.first; | |||
| arg_size = it->second.second; | |||
| return SUCCESS; | |||
| } | |||
| if (label_index >= label_list_.size()) { | |||
| GELOGE(INTERNAL_ERROR, "Invalid label id:%u, label size:%zu", label_index, label_list_.size()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| GE_CHECK_NOTNULL(label_list_[label_index]); | |||
| vector<rtLabel_t> label_used = { label_list_[label_index] }; | |||
| arg_size = label_used.size() * sizeof(rtLabelDevInfo); | |||
| rtError_t rt_ret = rtMalloc(&arg_addr, arg_size, mem_type); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| label_goto_args_[label_index] = { arg_addr, arg_size }; | |||
| rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), arg_addr, arg_size); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtLabelListCpy failed, error: %#x", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| /// @ingroup ge | |||
| /// @brief LabelSet Op Initialize. | |||
| /// @param [in] op_desc: LabelSet Op descriptor. | |||
| @@ -2547,6 +2586,8 @@ void *DavinciModel::Run(DavinciModel *model) { | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelExecute, ErrorMessage::kModelExecute); | |||
| while (model->RunFlag()) { | |||
| // Model hasn't truly started runing before received data | |||
| model->SetRunningFlag(false); | |||
| bool rslt_flg = true; | |||
| if (model->GetDataInputer() == nullptr) { | |||
| GELOGW("Data inputer is nullptr."); | |||
| @@ -2556,6 +2597,8 @@ void *DavinciModel::Run(DavinciModel *model) { | |||
| std::shared_ptr<InputDataWrapper> data_wrapper; | |||
| Status ret = model->GetDataInputer()->Pop(data_wrapper); | |||
| // Model run indeedly start after received data. | |||
| model->SetRunningFlag(true); | |||
| if (data_wrapper == nullptr || ret != SUCCESS) { | |||
| GELOGI("data_wrapper is null!"); | |||
| continue; | |||
| @@ -2642,7 +2685,9 @@ void *DavinciModel::Run(DavinciModel *model) { | |||
| model->iterator_count_++; | |||
| model->is_first_execute_ = false; | |||
| GELOGI("run iterator count is %lu", model->iterator_count_); | |||
| // model run finished | |||
| model->SetRunningFlag(false); | |||
| GELOGI("run iterator count is %lu, model_id:%u", model->iterator_count_, model->model_id_); | |||
| } | |||
| CsaInteract::GetInstance().WriteInternalErrorCode(); | |||
| @@ -2700,7 +2745,7 @@ Status DavinciModel::ModelRunStart() { | |||
| error_context_ = ErrorManager::GetInstance().GetErrorContext(); | |||
| CREATE_STD_THREAD(thread_id_, DavinciModel::Run, this); | |||
| GELOGI("model tread create success, model id:%u.", model_id_); | |||
| GELOGI("model thread create success, model id:%u.", model_id_); | |||
| return SUCCESS; | |||
| } | |||
| @@ -2836,23 +2881,16 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec | |||
| GELOGI("DavinciModel::UpdateKnownNodeArgs in"); | |||
| GE_CHK_STATUS_RET(CreateKnownZeroCopyMap(inputs, outputs), | |||
| "DavinciModel::UpdateKnownNodeArgs create map for input/output zero copy."); | |||
| if (!base_addr_not_changed_) { | |||
| total_io_addrs_.clear(); | |||
| orig_total_io_addrs_.clear(); | |||
| for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | |||
| auto &task = task_list_[task_index]; | |||
| if (task != nullptr) { | |||
| Status ret = task->UpdateArgs(); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(FAILED, "task %zu created by davinci model is nullptr.", task_index); | |||
| return FAILED; | |||
| } | |||
| total_io_addrs_.clear(); | |||
| for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | |||
| auto &task = task_list_[task_index]; | |||
| if (task != nullptr) { | |||
| Status ret = task->UpdateArgs(); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(FAILED, "task %zu created by davinci model is nullptr.", task_index); | |||
| return FAILED; | |||
| } | |||
| } | |||
| // cache latest iterator io addr | |||
| orig_total_io_addrs_ = total_io_addrs_; | |||
| } else { | |||
| total_io_addrs_ = orig_total_io_addrs_; | |||
| } | |||
| GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_, false), "DavinciModel::UpdateKnownZeroCopyAddr failed."); | |||
| @@ -2892,6 +2930,14 @@ Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) { | |||
| return SUCCESS; | |||
| } | |||
| Status DavinciModel::CheckCapability(rtFeatureType_t featureType, int32_t featureInfo, bool &is_support) const { | |||
| int64_t value = RT_CAPABILITY_SUPPORT; | |||
| auto rt_ret = rtGetRtCapability(featureType, featureInfo, &value); | |||
| GE_CHK_BOOL_RET_STATUS(rt_ret == RT_ERROR_NONE, FAILED, "call rtGetRtCapability failed!"); | |||
| is_support = (value == RT_CAPABILITY_SUPPORT) ? true : false; | |||
| return SUCCESS; | |||
| } | |||
| Status DavinciModel::MallocKnownArgs() { | |||
| GELOGI("DavinciModel::MallocKnownArgs in"); | |||
| const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | |||
| @@ -2910,20 +2956,22 @@ Status DavinciModel::MallocKnownArgs() { | |||
| return ret; | |||
| } | |||
| } | |||
| rtError_t rt_ret; | |||
| // malloc args memory | |||
| if (total_args_size_ == 0) { | |||
| GELOGW("DavinciModel::MallocKnownArgs total_args_size_ equals to zero."); | |||
| return SUCCESS; | |||
| } | |||
| bool is_support = false; | |||
| GE_CHK_STATUS_RET_NOLOG(CheckCapability(FEATURE_TYPE_MEMORY, MEMORY_INFO_TS_4G_LIMITED, is_support)); | |||
| auto mem_type = is_support ? RT_MEMORY_TS_4G : RT_MEMORY_HBM; | |||
| rtError_t rt_ret = rtMalloc(&args_, total_args_size_, RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| if (total_args_size_ != 0) { | |||
| rt_ret = rtMalloc(&args_, total_args_size_, mem_type); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| } | |||
| // malloc dynamic and static hybrid memory | |||
| if (total_hybrid_args_size_ != 0) { | |||
| rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM); | |||
| rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, mem_type); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| @@ -2932,7 +2980,7 @@ Status DavinciModel::MallocKnownArgs() { | |||
| // malloc fixed addr memory, eg: rts op | |||
| if (total_fixed_addr_size_ != 0) { | |||
| GELOGI("Begin to allocate fixed addr."); | |||
| rt_ret = rtMalloc(&fixed_addrs_, total_fixed_addr_size_, RT_MEMORY_HBM); | |||
| rt_ret = rtMalloc(&fixed_addrs_, total_fixed_addr_size_, mem_type); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| @@ -3025,9 +3073,8 @@ Status DavinciModel::DistributeTask() { | |||
| task_def.kernel_ex().op_index()); | |||
| OpDescPtr op = GetOpByIndex(op_index); | |||
| GE_CHECK_NOTNULL(op); | |||
| if (reinterpret_cast<void *>(task->GetDumpArgs()) != nullptr) { | |||
| bool call_dump = GetDumpProperties().IsLayerNeedDump(name_, om_name_, op->GetName()) && task->CallSaveDumpInfo(); | |||
| bool call_dump = OpNeedDump(op->GetName()) && task->CallSaveDumpInfo(); | |||
| if (call_dump || is_op_debug_reg_) { | |||
| SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); | |||
| } | |||
| @@ -3047,11 +3094,16 @@ Status DavinciModel::DistributeTask() { | |||
| return SUCCESS; | |||
| } | |||
| void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { | |||
| bool DavinciModel::ModelNeedDump() { | |||
| auto all_dump_model = GetDumpProperties().GetAllDumpModel(); | |||
| bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end(); | |||
| bool findByModelName = all_dump_model.find(name_) != all_dump_model.end(); | |||
| if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) { | |||
| bool ret = all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || | |||
| all_dump_model.find(dump_model_name_) != all_dump_model.end() || | |||
| all_dump_model.find(om_name_) != all_dump_model.end(); | |||
| return ret; | |||
| } | |||
| void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { | |||
| if (ModelNeedDump()) { | |||
| GELOGI("start save end_graph_info to dumper, task_id is %u, stream_id is %u", task_id, stream_id); | |||
| data_dumper_.SaveEndGraphId(task_id, stream_id); | |||
| } | |||
| @@ -3851,7 +3903,10 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) | |||
| } | |||
| void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name) { | |||
| data_dumper_.SetModelName(name_); | |||
| if(dump_model_name_.empty()) { | |||
| dump_model_name_ = name_; | |||
| } | |||
| data_dumper_.SetModelName(dump_model_name_); | |||
| data_dumper_.SetModelId(model_id_); | |||
| data_dumper_.SetOmName(om_name_); | |||
| data_dumper_.SetComputeGraph(graph); | |||
| @@ -4040,7 +4095,7 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) { | |||
| Status DavinciModel::InitL1DataDumperArgs() { | |||
| auto all_dump_model = GetDumpProperties().GetAllDumpModel(); | |||
| bool find_by_om_name = all_dump_model.find(om_name_) != all_dump_model.end(); | |||
| bool find_by_model_name = all_dump_model.find(name_) != all_dump_model.end(); | |||
| bool find_by_model_name = all_dump_model.find(dump_model_name_) != all_dump_model.end(); | |||
| bool dump_l1fusion_op = | |||
| (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || find_by_om_name || find_by_model_name; | |||
| if (dump_l1fusion_op) { | |||
| @@ -4061,4 +4116,10 @@ Status DavinciModel::InitL1DataDumperArgs() { | |||
| return SUCCESS; | |||
| } | |||
| Status DavinciModel::SetRunAsyncListenerCallback(const RunAsyncCallback &callback) { | |||
| auto listener = dynamic_cast<RunAsyncListener *>(listener_.get()); | |||
| GE_CHECK_NOTNULL(listener); | |||
| listener->SetCallback(callback); | |||
| return SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| @@ -221,6 +221,11 @@ class DavinciModel { | |||
| /// | |||
| DataInputer *const GetDataInputer() const { return data_inputer_; } | |||
| uint32_t GetDataInputerSize() { | |||
| GE_CHECK_NOTNULL(data_inputer_); | |||
| return data_inputer_->Size(); | |||
| } | |||
| // get Stream number | |||
| uint32_t StreamNum() const { return runtime_param_.stream_num; } | |||
| @@ -248,7 +253,10 @@ class DavinciModel { | |||
| string Name() const { return name_; } | |||
| // om_name | |||
| string OmName() const { return om_name_; } | |||
| const string &OmName() const { return om_name_; } | |||
| // dump_model_name | |||
| const string &DumpModelName() const { return dump_model_name_; } | |||
| // version | |||
| uint32_t Version() const { return version_; } | |||
| @@ -273,6 +281,8 @@ class DavinciModel { | |||
| const vector<rtLabel_t> &GetLabelList() const { return label_list_; } | |||
| Status GetLabelGotoAddr(uint32_t label_index, rtMemType_t memory_type, void *&addr, uint32_t &size); | |||
| Status DestroyThread(); | |||
| // get Op | |||
| @@ -481,6 +491,12 @@ class DavinciModel { | |||
| data_dumper_.DumpShrink(); | |||
| } | |||
| bool OpNeedDump(const string &op_name) { | |||
| return GetDumpProperties().IsLayerNeedDump(dump_model_name_, om_name_, op_name); | |||
| } | |||
| bool ModelNeedDump(); | |||
| void SetEndGraphId(uint32_t task_id, uint32_t stream_id); | |||
| DavinciModel &operator=(const DavinciModel &model) = delete; | |||
| @@ -528,11 +544,11 @@ class DavinciModel { | |||
| } | |||
| void SetKnownNode(bool known_node) { known_node_ = known_node; } | |||
| bool IsKnownNode() { return known_node_; } | |||
| Status CheckCapability(rtFeatureType_t featureType, int32_t featureInfo, bool &is_support) const; | |||
| Status MallocKnownArgs(); | |||
| Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | |||
| Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | |||
| Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs, bool update_args = true); | |||
| void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | |||
| Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const; | |||
| Status GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims, | |||
| @@ -540,6 +556,7 @@ class DavinciModel { | |||
| // om file name | |||
| void SetOmName(const string &om_name) { om_name_ = om_name; } | |||
| void SetDumpModelName(const string &dump_model_name) { dump_model_name_ = dump_model_name; } | |||
| void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); } | |||
| const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); } | |||
| @@ -548,6 +565,10 @@ class DavinciModel { | |||
| return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info); | |||
| } | |||
| bool GetRunningFlag() const { return running_flg_; } | |||
| void SetRunningFlag(bool flag) { running_flg_ = flag; } | |||
| Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback); | |||
| private: | |||
| // memory address of weights | |||
| uint8_t *weights_mem_base_; | |||
| @@ -886,6 +907,7 @@ class DavinciModel { | |||
| // used for inference data dump | |||
| string om_name_; | |||
| string dump_model_name_; | |||
| uint32_t version_; | |||
| GeModelPtr ge_model_; // release after DavinciModel::Init | |||
| @@ -911,6 +933,8 @@ class DavinciModel { | |||
| shared_ptr<ModelListener> listener_; | |||
| bool run_flg_; | |||
| // check whether model is running with data | |||
| bool running_flg_ = false; | |||
| mutex mux_run_flg_; | |||
| @@ -930,6 +954,9 @@ class DavinciModel { | |||
| vector<rtLabel_t> label_list_; | |||
| set<uint32_t> label_id_indication_; | |||
| mutex label_args_mutex_; | |||
| map<uint32_t, pair<void *, uint32_t>> label_goto_args_; | |||
| mutex outside_addrs_mutex_; | |||
| vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | |||
| set<const void *> copy_only_addrs_; // Address need copy to original place. | |||
| @@ -1002,8 +1029,6 @@ class DavinciModel { | |||
| map<const void *, void *> known_input_data_info_; | |||
| map<const void *, void *> known_output_data_info_; | |||
| vector<void *> total_io_addrs_; | |||
| vector<void *> orig_total_io_addrs_; | |||
| bool base_addr_not_changed_ = false; | |||
| vector<vector<int64_t>> batch_info_; | |||
| vector<vector<int64_t>> combined_batch_info_; | |||
| @@ -271,7 +271,7 @@ ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector<uin | |||
| return SUCCESS; | |||
| } | |||
| ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string &model_name, | |||
| ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string &om_name, | |||
| const shared_ptr<ge::GeRootModel> &ge_root_model, | |||
| const shared_ptr<ModelListener> &listener) { | |||
| auto hybrid_model = hybrid::HybridDavinciModel::Create(ge_root_model); | |||
| @@ -279,13 +279,24 @@ ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string | |||
| hybrid_model->SetListener(listener); | |||
| hybrid_model->SetModelId(model_id); | |||
| hybrid_model->SetDeviceId(GetContext().DeviceId()); | |||
| hybrid_model->SetModelName(model_name); | |||
| hybrid_model->SetOmName(om_name); | |||
| GE_CHK_STATUS_RET(hybrid_model->Init(), "Failed to init hybrid model. model_id = %u", model_id); | |||
| auto shared_model = std::shared_ptr<hybrid::HybridDavinciModel>(hybrid_model.release()); | |||
| InsertModel(model_id, shared_model); | |||
| return SUCCESS; | |||
| } | |||
| bool ModelManager::IsNeedHybridLoad(ge::GeRootModel &ge_root_model) { | |||
| auto root_graph = ge_root_model.GetRootGraph(); | |||
| if (root_graph == nullptr) { | |||
| GELOGE(FAILED, "no model on root model"); | |||
| return false; | |||
| } | |||
| bool is_shape_unknown = root_graph->GetGraphUnknownFlag(); | |||
| bool is_dsp_partitioned_graph = false; | |||
| (void)AttrUtils::GetBool(root_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dsp_partitioned_graph); | |||
| return is_shape_unknown || is_dsp_partitioned_graph || GetContext().GetHostExecFlag(); | |||
| } | |||
| /// | |||
| /// @ingroup domi_ome | |||
| /// @brief load model online | |||
| @@ -296,13 +307,12 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||
| GE_CHK_BOOL_RET_STATUS(listener.get() != nullptr, PARAM_INVALID, "Param incorrect, listener is null"); | |||
| if (model_id == INVALID_MODEL_ID) { | |||
| GenModelId(&model_id); | |||
| GELOGD("Generate new model_id:%u", model_id); | |||
| } | |||
| auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||
| string model_name = ""; | |||
| bool is_shape_unknown = ge_root_model->GetRootGraph()->GetGraphUnknownFlag(); | |||
| // if multi subgraph is known, do hybrid load process | |||
| if (is_shape_unknown || GetContext().GetHostExecFlag() || (name_to_model.size() > 1)) { | |||
| return DoLoadHybridModelOnline(model_id, model_name, ge_root_model, listener); | |||
| string om_name; | |||
| if (IsNeedHybridLoad(*ge_root_model)) { | |||
| return DoLoadHybridModelOnline(model_id, om_name, ge_root_model, listener); | |||
| } | |||
| mmTimespec timespec = mmGetTickCount(); | |||
| @@ -330,7 +340,18 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||
| GE_IF_BOOL_EXEC(SUCCESS != (ret = davinci_model->Assign(ge_model)), GELOGW("assign model to modeldef failed."); | |||
| break;); | |||
| GE_TIMESTAMP_END(Assign, "GraphLoader::ModelAssign"); | |||
| /// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail. | |||
| /// These session_ids come from the same model, so the values of session_id are the same. | |||
| /// Update session_id for infer in load model to avoid the same session_id. | |||
| if (!ge_root_model->GetTrainFlag()) { | |||
| uint64_t new_session_id; | |||
| ret = GenSessionId(new_session_id); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed."); | |||
| ret = davinci_model->UpdateSessionId(new_session_id); | |||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed."); | |||
| ge_model->InsertSessionMap(model_id, new_session_id); | |||
| GELOGD("Update new session id: %lu.", new_session_id); | |||
| } | |||
| GE_TIMESTAMP_START(Init); | |||
| GE_IF_BOOL_EXEC(SUCCESS != (ret = davinci_model->Init()), GELOGW("DavinciInit failed."); break;); | |||
| GE_TIMESTAMP_END(Init, "GraphLoader::ModelInit"); | |||
| @@ -343,16 +364,16 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||
| return ret; | |||
| } | |||
| void ModelManager::InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model) { | |||
| GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id); | |||
| void ModelManager::InsertModel(uint32_t model_id, std::shared_ptr<DavinciModel> &davinci_model) { | |||
| GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", model_id); | |||
| std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||
| model_map_[id] = davinci_model; | |||
| model_map_[model_id] = davinci_model; | |||
| } | |||
| void ModelManager::InsertModel(uint32_t id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) { | |||
| GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id); | |||
| void ModelManager::InsertModel(uint32_t model_id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) { | |||
| GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", model_id); | |||
| std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||
| hybrid_model_map_[id] = hybrid_model; | |||
| hybrid_model_map_[model_id] = hybrid_model; | |||
| } | |||
| Status ModelManager::DeleteModel(uint32_t id) { | |||
| @@ -294,6 +294,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
| std::vector<InputOutputDims> &output_dims); | |||
| bool IsDynamicShape(uint32_t model_id); | |||
| bool IsNeedHybridLoad(ge::GeRootModel &ge_root_model); | |||
| ge::Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info); | |||
| ge::Status EnableExceptionDump(const std::map<string, string> &options); | |||
| @@ -329,8 +330,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
| /// @ingroup domi_ome | |||
| /// @brief insert new model into model manager set | |||
| /// | |||
| void InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model); | |||
| void InsertModel(uint32_t id, std::shared_ptr<hybrid::HybridDavinciModel> &hybrid_model); | |||
| void InsertModel(uint32_t model_id, std::shared_ptr<DavinciModel> &davinci_model); | |||
| void InsertModel(uint32_t model_id, std::shared_ptr<hybrid::HybridDavinciModel> &hybrid_model); | |||
| /// | |||
| /// @ingroup domi_ome | |||
| @@ -384,7 +384,8 @@ Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDesc | |||
| switch (mem_type) { | |||
| case RT_MEMORY_RDMA_HBM: | |||
| if (offset < 0) { | |||
| GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast<uint8_t *>(offset)); | |||
| GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", | |||
| reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(offset))); | |||
| return PARAM_INVALID; | |||
| } | |||
| var_addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(offset)); | |||
| @@ -45,10 +45,7 @@ Status EndGraphTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
| Status EndGraphTaskInfo::Distribute() { | |||
| GELOGI("EndGraphTaskInfo Distribute Start."); | |||
| GE_CHECK_NOTNULL(davinci_model_); | |||
| auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel(); | |||
| if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || | |||
| all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() || | |||
| all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) { | |||
| if (davinci_model_->ModelNeedDump()) { | |||
| GELOGI("Start to call rtEndGraphEx"); | |||
| rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| @@ -238,8 +238,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
| } | |||
| void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) { | |||
| if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||
| op_desc->GetName())) { | |||
| if (davinci_model_->OpNeedDump(op_desc->GetName())) { | |||
| dump_flag_ = RT_KERNEL_DUMPFLAG; | |||
| dump_args_ = addr; | |||
| } | |||
| @@ -124,7 +124,8 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci | |||
| return FAILED; | |||
| } | |||
| ret = InitTVMTask(args_offset_tmp[0], kernel_def); | |||
| io_addr_offset_ = args_offset_tmp[0]; | |||
| ret = InitTVMTask(io_addr_offset_, kernel_def); | |||
| } else if (kernel_type_ == ccKernelType::CUSTOMIZED) { | |||
| ret = InitAICPUCustomTask(context.op_index(), kernel_def); | |||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||
| @@ -380,7 +381,8 @@ Status KernelTaskInfo::Distribute() { | |||
| GELOGD("KernelTaskInfo Distribute Start."); | |||
| if (davinci_model_->IsKnownNode()) { | |||
| if (kernel_type_ == ccKernelType::TE) { | |||
| args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||
| args_ = l2_buffer_on_ ? davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_) | |||
| : davinci_model_->GetCurrentArgsAddr(args_offset_); | |||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||
| args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_); | |||
| } | |||
| @@ -407,10 +409,7 @@ Status KernelTaskInfo::Distribute() { | |||
| call_skt, task_id_, skt_id_, skt_info.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); | |||
| // l1 fusion enable and env flag open (kCloseSkt for skt debug) | |||
| bool open_dump = false; | |||
| auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel(); | |||
| if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || | |||
| all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() || | |||
| all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) { | |||
| if (davinci_model_->ModelNeedDump()) { | |||
| open_dump = true; | |||
| } | |||
| if (call_skt && (env_flag != kCloseSkt) && !open_dump) { | |||
| @@ -449,29 +448,41 @@ void KernelTaskInfo::SetIoAddrs(const OpDescPtr &op_desc) { | |||
| } | |||
| } | |||
| Status KernelTaskInfo::CopyNoncontinuousArgs(uint16_t offset) { | |||
| GE_CHECK_NOTNULL(davinci_model_); | |||
| // copy new io addrs | |||
| vector<void *> io_addrs = io_addrs_; | |||
| davinci_model_->UpdateKnownZeroCopyAddr(io_addrs); | |||
| auto addr_size = kAddrLen * io_addrs.size(); | |||
| // copy io addr | |||
| errno_t sec_ret = memcpy_s(args_addr.get() + offset, addr_size, io_addrs.data(), addr_size); | |||
| if (sec_ret != EOK) { | |||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||
| return FAILED; | |||
| } | |||
| // copy args to device | |||
| rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGD("Copy noncontinuous args success, kernel type %d.", kernel_type_); | |||
| return SUCCESS; | |||
| } | |||
| Status KernelTaskInfo::UpdateArgs() { | |||
| GELOGI("KernelTaskInfo::UpdateArgs in."); | |||
| GE_CHECK_NOTNULL(davinci_model_); | |||
| if (kernel_type_ == ccKernelType::TE) { | |||
| if (l2_buffer_on_) { | |||
| return CopyNoncontinuousArgs(io_addr_offset_); | |||
| } | |||
| davinci_model_->SetTotalIOAddrs(io_addrs_); | |||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||
| vector<void *> io_addrs = io_addrs_; | |||
| davinci_model_->UpdateKnownZeroCopyAddr(io_addrs); | |||
| uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead); | |||
| auto addrs_size = sizeof(uint64_t) * io_addrs.size(); | |||
| errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size); | |||
| if (sec_ret != EOK) { | |||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||
| return FAILED; | |||
| } | |||
| // copy args to device | |||
| rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| return CopyNoncontinuousArgs(sizeof(aicpu::AicpuParamHead)); | |||
| } | |||
| GELOGI("KernelTaskInfo::UpdateArgs success."); | |||
| return SUCCESS; | |||
| } | |||
| @@ -516,8 +527,8 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { | |||
| return SUCCESS; | |||
| } | |||
| char *sm_contrl = const_cast<char *>(sm_desc.data()); | |||
| rtL2Ctrl_t *l2_ctrl_info = reinterpret_cast<rtL2Ctrl_t *>(sm_contrl); | |||
| char *sm_control = const_cast<char *>(sm_desc.data()); | |||
| rtL2Ctrl_t *l2_ctrl_info = reinterpret_cast<rtL2Ctrl_t *>(sm_control); | |||
| uint64_t gen_base_addr = davinci_model_->GetRtBaseAddr(); | |||
| // There is no weight for te op now. Update L2_mirror_addr by data memory base. | |||
| @@ -545,19 +556,31 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { | |||
| return SUCCESS; | |||
| } | |||
| void KernelTaskInfo::SetContinuousArgs(uint32_t args_size, DavinciModel *davinci_model) { | |||
| args_offset_ = davinci_model->GetTotalArgsSize(); | |||
| davinci_model->SetTotalArgsSize(args_size); | |||
| } | |||
| void KernelTaskInfo::SetNoncontinuousArgs(uint32_t args_size, DavinciModel *davinci_model) { | |||
| hybrid_args_offset_ = davinci_model->GetHybridArgsSize(); | |||
| davinci_model->SetHybridArgsSize(args_size); | |||
| } | |||
| Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| GE_CHECK_NOTNULL(davinci_model); | |||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||
| const domi::KernelContext &context = kernel_def.context(); | |||
| kernel_type_ = static_cast<ccKernelType>(context.kernel_type()); | |||
| uint32_t args_size = kernel_def.args_size(); | |||
| if (kernel_type_ == ccKernelType::TE) { | |||
| uint32_t args_size = kernel_def.args_size(); | |||
| args_offset_ = davinci_model->GetTotalArgsSize(); | |||
| davinci_model->SetTotalArgsSize(args_size); | |||
| GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); | |||
| if (kernel_def.sm_desc().empty()) { | |||
| SetContinuousArgs(args_size, davinci_model); | |||
| return SUCCESS; | |||
| } | |||
| l2_buffer_on_ = true; | |||
| SetNoncontinuousArgs(args_size, davinci_model); | |||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||
| hybrid_args_offset_ = davinci_model->GetHybridArgsSize(); | |||
| davinci_model->SetHybridArgsSize(kernel_def.args_size()); | |||
| GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_); | |||
| SetNoncontinuousArgs(args_size, davinci_model); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -568,8 +591,23 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||
| // get tvm op desc | |||
| OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]); | |||
| errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | |||
| if (sec_ret != EOK) { | |||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||
| return FAILED; | |||
| } | |||
| Status ge_ret = UpdateL2Data(kernel_def); | |||
| // update origin l2 data | |||
| if (ge_ret != SUCCESS) { | |||
| return ge_ret; | |||
| } | |||
| if (davinci_model_->IsKnownNode()) { | |||
| args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||
| args_ = l2_buffer_on_ ? davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_) | |||
| : davinci_model_->GetCurrentArgsAddr(args_offset_); | |||
| InitDumpTask(offset); | |||
| return SUCCESS; | |||
| } | |||
| @@ -609,12 +647,6 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| vector<uint8_t> args_info(args_size_); | |||
| errno_t sec_ret = memcpy_s(args_info.data(), args_size_, kernel_def.args().data(), args_size_); | |||
| if (sec_ret != EOK) { | |||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||
| return FAILED; | |||
| } | |||
| if ((args_size_ <= offset) || (args_size_ - offset < kAddrLen * tensor_device_addrs.size())) { | |||
| GELOGE(FAILED, "offset >= kernelInfo.argsSize or copy content beyond applied memory."); | |||
| @@ -628,7 +660,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| sec_ret = memcpy_s(args_info.data() + offset, args_size_ - offset, tensor_device_addrs.data(), | |||
| sec_ret = memcpy_s(args_addr.get() + offset, args_size_ - offset, tensor_device_addrs.data(), | |||
| kAddrLen * tensor_device_addrs.size()); | |||
| if (sec_ret != EOK) { | |||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||
| @@ -640,19 +672,13 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||
| GE_CHK_BOOL_TRUE_EXEC_INFO(davinci_model_->GetOpDugReg(), dump_args_ = static_cast<char *>(args_) + offset, | |||
| "Op debug is open in TVM task info"); | |||
| Status ge_ret = UpdateL2Data(kernel_def); | |||
| // update origin l2 data | |||
| if (ge_ret != SUCCESS) { | |||
| return ge_ret; | |||
| } | |||
| vector<void *> virtual_io_addrs; // use virtual address for zero copy key. | |||
| virtual_io_addrs.insert(virtual_io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||
| virtual_io_addrs.insert(virtual_io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | |||
| if (op_desc->GetType() == ATOMICADDRCLEAN) { | |||
| virtual_io_addrs.insert(virtual_io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | |||
| } | |||
| davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_info.data(), args_, args_size_, offset); | |||
| davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_addr.get(), args_, args_size_, offset); | |||
| GELOGD("Do InitTVMTask end"); | |||
| return SUCCESS; | |||
| @@ -951,8 +977,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||
| } | |||
| void KernelTaskInfo::InitDumpTask(uint32_t offset) { | |||
| if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||
| op_desc_->GetName())) { | |||
| if (davinci_model_->OpNeedDump(op_desc_->GetName())) { | |||
| if (IsL1FusionOp(op_desc_)) { | |||
| dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG; | |||
| } else { | |||
| @@ -129,6 +129,9 @@ class KernelTaskInfo : public TaskInfo { | |||
| bool IsL1FusionOp(const OpDescPtr &op_desc); | |||
| void SetIoAddrs(const OpDescPtr &op_desc); | |||
| void InitDumpTask(uint32_t offset); | |||
| void SetContinuousArgs(uint32_t args_size, DavinciModel *davinci_model); | |||
| void SetNoncontinuousArgs(uint32_t args_size, DavinciModel *davinci_model); | |||
| Status CopyNoncontinuousArgs(uint16_t offset); | |||
| // For super kernel | |||
| Status SaveSKTDumpInfo(); | |||
| @@ -163,6 +166,8 @@ class KernelTaskInfo : public TaskInfo { | |||
| uint32_t hybrid_args_offset_ = 0; | |||
| int64_t fixed_addr_offset_ = 0; | |||
| std::unique_ptr<uint8_t[]> args_addr = nullptr; | |||
| uint16_t io_addr_offset_ = 0; | |||
| bool l2_buffer_on_ = false; | |||
| bool call_save_dump_ = false; | |||
| // aicpu ext_info device mem | |||
| @@ -22,7 +22,7 @@ namespace ge { | |||
| constexpr uint8_t kGotoBranchMax = 1; | |||
| LabelGotoExTaskInfo::~LabelGotoExTaskInfo() { | |||
| GE_FREE_RT_LOG(args_); | |||
| args_ = nullptr; | |||
| GE_FREE_RT_LOG(index_value_); | |||
| } | |||
| @@ -49,30 +49,12 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da | |||
| return INTERNAL_ERROR; | |||
| } | |||
| const vector<rtLabel_t> &label_list = davinci_model->GetLabelList(); | |||
| if (label_index >= label_list.size()) { | |||
| GELOGE(PARAM_INVALID, "LabelGotoExTaskInfo: Invalid label id:%u, label size:%zu", label_index, label_list.size()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| GE_CHECK_NOTNULL(label_list[label_index]); | |||
| vector<rtLabel_t> label_used = { label_list[label_index] }; | |||
| rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM; | |||
| GELOGI("memory_type: %u", memory_type); | |||
| args_size_ = kGotoBranchMax * sizeof(rtLabelDevInfo); | |||
| rtError_t rt_ret = rtMalloc(&args_, args_size_, memory_type); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), args_, args_size_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtLabelListCpy failed, error: %#x", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GE_CHK_STATUS_RET_NOLOG(davinci_model->GetLabelGotoAddr(label_index, memory_type, args_, args_size_)); | |||
| rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), memory_type); | |||
| rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), memory_type); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| @@ -85,7 +67,7 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGI("LabelGotoExTaskInfo Init Success, label id:%u, label:%p.", label_index, label_list[label_index]); | |||
| GELOGI("LabelGotoExTaskInfo Init Success, label id:%u", label_index); | |||
| return SUCCESS; | |||
| } | |||
| @@ -356,6 +356,14 @@ void CachingAllocator::FreeBlocks() { | |||
| (void) FreeCachedBlocks(); | |||
| } | |||
| void CachingAllocator::TryFreeBlocks() { | |||
| GELOGI("Try free blocks."); | |||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||
| if (allocated_blocks_.empty()) { | |||
| (void) FreeCachedBlocks(); | |||
| } | |||
| } | |||
| void CachingAllocator::FreeBlockBins() { | |||
| GELOGI("Free block bins."); | |||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | |||
| @@ -94,6 +94,13 @@ class CachingAllocator { | |||
| /// | |||
| Status Free(uint8_t *memory_addr, uint32_t device_id = 0); | |||
| /// | |||
| /// @ingroup ge_graph | |||
| /// @brief try to free memory when no memory is referenced | |||
| /// @return void | |||
| /// | |||
| void TryFreeBlocks(); | |||
| private: | |||
| /// | |||
| @@ -117,6 +117,10 @@ const char *const kAIcoreEngine = "AIcoreEngine"; | |||
| const int32_t kDynamicDimsTypeIsGetNext = 0; | |||
| const int32_t kDynamicDimsTypeIsData = 1; | |||
| const char *const kGetNextName = "IteratorV2"; | |||
| const uint32_t kInitGraphCount = 1; | |||
| const uint32_t kNotAdded = 0; | |||
| const uint32_t kStartAdd = 1; | |||
| const uint32_t kDoneAdded = 2; | |||
| bool IsTailingOptimization() { | |||
| string is_tailing_optimization_option; | |||
| @@ -195,6 +199,8 @@ Status GraphManager::Initialize(const std::map<string, string> &options) { | |||
| graph_map_.clear(); | |||
| cache_helper_map_.clear(); | |||
| graph_id_to_add_graph_cond_.clear(); | |||
| graph_count_.clear(); | |||
| init_flag_ = true; | |||
| thread_run_flag_ = true; | |||
| @@ -204,6 +210,20 @@ Status GraphManager::Initialize(const std::map<string, string> &options) { | |||
| return SUCCESS; | |||
| } | |||
| Status GraphManager::UnloadModel(GeRootModelPtr ge_root_model, uint32_t graph_id) { | |||
| Status ret = SUCCESS; | |||
| for (size_t i = 0; i < ge_root_model->GetAllModelId().size(); ++i) { | |||
| uint32_t model_id = ge_root_model->GetAllModelId()[i]; | |||
| GELOGI("Unload model %u.", model_id); | |||
| ret = GraphLoader::UnloadModel(model_id); | |||
| if (ret != SUCCESS) { | |||
| GELOGW("[GraphManager] unload model failed, modelId=%u, graphId=%u.", model_id, graph_id); | |||
| return ret; | |||
| } | |||
| } | |||
| return ret; | |||
| } | |||
| Status GraphManager::Finalize() { | |||
| if (!init_flag_) { | |||
| GELOGW("GraphManager has not been initialized."); | |||
| @@ -234,7 +254,6 @@ Status GraphManager::Finalize() { | |||
| unload_model_ret = GE_GRAPH_GRAPH_IS_RUNNING; | |||
| continue; | |||
| } | |||
| // unload model | |||
| auto ge_root_model = graph_node->GetGeRootModel(); | |||
| if (ge_root_model != nullptr && ge_root_model->GetModelId() != INVALID_MODEL_ID && graph_node->GetLoadFlag()) { | |||
| @@ -244,15 +263,14 @@ Status GraphManager::Finalize() { | |||
| unload_model_ret = FAILED; | |||
| continue; | |||
| } | |||
| ret = GraphLoader::UnloadModel(ge_root_model->GetModelId()); | |||
| ret = UnloadModel(ge_root_model, iter->first); | |||
| if (ret != SUCCESS) { | |||
| GELOGW("[GraphManager] unload model failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(), iter->first); | |||
| GELOGW("[GraphManager] unload model failed, graph_id=%u.", iter->first); | |||
| unload_model_ret = ret; | |||
| } | |||
| rt_ret = rtDeviceReset(GetContext().DeviceId()); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("[GraphManager] rtDeviceReset failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(), | |||
| iter->first); | |||
| GELOGW("[GraphManager] rtDeviceReset failed, graphId=%u.", iter->first); | |||
| unload_model_ret = FAILED; | |||
| continue; | |||
| } | |||
| @@ -267,6 +285,7 @@ Status GraphManager::Finalize() { | |||
| } | |||
| graph_map_.clear(); | |||
| cache_helper_map_.clear(); | |||
| graph_count_.clear(); | |||
| // graph context | |||
| if (graph_context_ != nullptr) { | |||
| @@ -317,30 +336,59 @@ Status GraphManager::InitDynamicParams(ComputeGraphPtr &compute_graph) { | |||
| return SUCCESS; | |||
| } | |||
| Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||
| const std::map<std::string, std::string> &options, | |||
| const OmgContext &omg_context) { | |||
| if (HasGraphNode(graph_id)) { | |||
| GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[GraphManager] graph exists, graph_id = %u.", graph_id); | |||
| return GE_GRAPH_GRAPH_ALREADY_EXIST; | |||
| void GraphManager::SetAddGraphCondition(GraphId graph_id, uint32_t cond) { | |||
| std::lock_guard<std::mutex> lock(add_graph_cond_mutex_); | |||
| graph_id_to_add_graph_cond_[graph_id] = cond; | |||
| GELOGD("Graph [id:%u] has been added.", graph_id); | |||
| } | |||
| uint32_t GraphManager::GetAddGraphCondition(GraphId graph_id) { | |||
| std::lock_guard<std::mutex> lock(add_graph_cond_mutex_); | |||
| auto it = graph_id_to_add_graph_cond_.find(graph_id); | |||
| if (it != graph_id_to_add_graph_cond_.end()) { | |||
| return it->second; | |||
| } else { | |||
| GELOGD("Graph [id:%u] has not been added.", graph_id); | |||
| return kNotAdded; | |||
| } | |||
| } | |||
| auto compute_graph = GraphUtils::GetComputeGraph(graph); | |||
| if (compute_graph != nullptr) { | |||
| compute_graph->SetGraphID(graph_id); | |||
| bool graph_has_been_added = false; | |||
| if (AttrUtils::GetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, graph_has_been_added) | |||
| && graph_has_been_added) { | |||
| GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, | |||
| "[GraphManager] same graph object can not be added again, graph_id = %u.", graph_id); | |||
| return GE_GRAPH_GRAPH_ALREADY_EXIST; | |||
| } | |||
| (void)AttrUtils::SetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, true); | |||
| compute_graph_ = compute_graph; | |||
| void GraphManager::RemoveAddGraphCondition(GraphId graph_id) { | |||
| std::lock_guard<std::mutex> lock(add_graph_cond_mutex_); | |||
| auto it = graph_id_to_add_graph_cond_.find(graph_id); | |||
| if (it != graph_id_to_add_graph_cond_.end()) { | |||
| graph_id_to_add_graph_cond_.erase(it); | |||
| GELOGD("Successfully removed add_graph_cond of graph [id:%u].", graph_id); | |||
| } else { | |||
| GELOGE(FAILED, "compute graph is null"); | |||
| return FAILED; | |||
| GELOGD("Graph [id:%u] has not been added. no need to remove.", graph_id); | |||
| } | |||
| } | |||
| Status GraphManager::CheckRepeatAdd(uint32_t graph_id, bool &is_added) { | |||
| uint32_t count = 0; | |||
| if (GetGraphCount(graph_id, count) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Get graph [id:%u] count failed, graph might have not been added.", graph_id); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| // previous thread owns same graph_id has been in the middle of the AddGraph procession | |||
| if (count > 1 && GetAddGraphCondition(graph_id) == kStartAdd) { | |||
| std::unique_lock<std::mutex> lock(add_graph_mutex_); | |||
| GELOGD("Waitting for build end of previous thread."); | |||
| while (GetAddGraphCondition(graph_id) != kDoneAdded) { | |||
| add_graph_cv_.wait(lock); | |||
| } | |||
| GraphNodePtr graph_node; | |||
| Status ret = GetGraphNode(graph_id, graph_node); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "[AddGraph] GetGraphNode failed, graph_id = %u.", graph_id); | |||
| return ret; | |||
| } | |||
| is_added = true; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| void GraphManager::SetSessionGraphId(ComputeGraphPtr compute_graph, uint32_t graph_id) { | |||
| std::string session_graph_id; | |||
| if (!AttrUtils::GetStr(*compute_graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id) || session_graph_id.empty()) { | |||
| session_graph_id = "-1_" + to_string(graph_id); | |||
| @@ -352,7 +400,24 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||
| } | |||
| GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]"); | |||
| } | |||
| } | |||
| Status GraphManager::NotifyWaittingGraph(uint32_t graph_id) { | |||
| uint32_t count = 0; | |||
| if (GetGraphCount(graph_id, count) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Get graph [id:%u] count failed, graph might have not been added.", graph_id); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| GELOGD("Add graph finished, graph_id:%u", graph_id); | |||
| if (count > 1) { | |||
| GELOGD("Finish addgraph, graph_id:%u, graph_count:%u, start to notify.", graph_id, count); | |||
| add_graph_cv_.notify_all(); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status GraphManager::CreateGraphNode(uint32_t graph_id, const Graph &graph, | |||
| const std::map<std::string, std::string> &options) { | |||
| GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id); | |||
| GE_IF_BOOL_EXEC(graph_node == nullptr, GELOGE(FAILED, "GraphNode make shared failed"); | |||
| return FAILED); | |||
| @@ -365,7 +430,62 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||
| ParseOption(options, TUNING_PATH, options_.tuning_path); | |||
| graph_node->SetGraph(graph_ptr); | |||
| graph_node->SetOptions(options); | |||
| graph_node->IncreaseLoadCount(); | |||
| AddGraphNode(graph_id, graph_node); | |||
| return SUCCESS; | |||
| } | |||
| Status GraphManager::SetStagesOptions(uint32_t graph_id, const GraphManagerOptions &options) { | |||
| CompilerStages &stages = GetCompilerStages(graph_id); | |||
| stages.preparer.SetOptions(options_); | |||
| Status status = stages.optimizer.SetOptions(options_); | |||
| if (status != SUCCESS) { | |||
| GELOGE(status, "Graph optimizer set options failed."); | |||
| return status; | |||
| } | |||
| stages.builder.SetOptions(options_); | |||
| return SUCCESS; | |||
| } | |||
| Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||
| const std::map<std::string, std::string> &options, | |||
| const OmgContext &omg_context) { | |||
| IncreaseGraphCount(graph_id); | |||
| // validation for adding graphs of same graph_id in multi-thread secenario | |||
| // 1.previous thread owns same graph_id has finished the AddGraph procession | |||
| if (GetAddGraphCondition(graph_id) == kDoneAdded) { | |||
| GraphNodePtr graph_node; | |||
| if (GetGraphNode(graph_id, graph_node) != SUCCESS) { | |||
| GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "Graph not exist while done adding previously, graph_id = %u.", graph_id); | |||
| return GE_GRAPH_GRAPH_NOT_EXIST; | |||
| } | |||
| graph_node->IncreaseLoadCount(); | |||
| return SUCCESS; | |||
| } | |||
| // In multi-thread scenario, former thread owns same graph_id has been | |||
| // in the middle of the AddGraph procession while following threads have to wait until | |||
| // done adding graph of the former graph, avoiding repeatively adding same graph. | |||
| bool is_added = false; | |||
| if (CheckRepeatAdd(graph_id, is_added) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "CheckRepeatAdd for graph[id:%u] failed.", graph_id); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| // The former graph (from different thread) owns same graph id has been successfully added. | |||
| if (is_added) { | |||
| return SUCCESS; | |||
| } | |||
| // Do add graph | |||
| SetAddGraphCondition(graph_id, kStartAdd); | |||
| auto compute_graph = GraphUtils::GetComputeGraph(graph); | |||
| GE_CHECK_NOTNULL(compute_graph); | |||
| compute_graph->SetGraphID(graph_id); | |||
| SetSessionGraphId(compute_graph, graph_id); | |||
| if (CreateGraphNode(graph_id, graph, options) != SUCCESS) { | |||
| GELOGE(FAILED, "Failed to create graph_node."); | |||
| return FAILED; | |||
| } | |||
| AddLocalOmgContext(graph_id, omg_context); | |||
| if (!options_.output_datatype.empty()) { | |||
| @@ -376,16 +496,18 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||
| return GRAPH_PARAM_INVALID; | |||
| } | |||
| CompilerStages &stages = GetCompilerStages(graph_id); | |||
| stages.preparer.SetOptions(options_); | |||
| Status status = stages.optimizer.SetOptions(options_); | |||
| if (status != SUCCESS) { | |||
| GELOGE(status, "Graph optimizer set options failed."); | |||
| return status; | |||
| if (SetStagesOptions(graph_id, options_) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Set stage options failed."); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| stages.builder.SetOptions(options_); | |||
| var_acc_ctrl_.AddGraph(graph_id, compute_graph); | |||
| SetAddGraphCondition(graph_id, kDoneAdded); | |||
| // There are threads waitting for adding same graph | |||
| if (NotifyWaittingGraph(graph_id) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "NotifyWaittingGraph failed."); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -895,6 +1017,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: | |||
| if (!graph_node->IsAsync()) { | |||
| ret = LoadGraph(ge_root_model, graph_node); | |||
| } else { | |||
| GE_CHECK_NOTNULL(ge_root_model); | |||
| ret = LoadGraphAsync(ge_root_model, graph_node); | |||
| } | |||
| if (ret != SUCCESS) { | |||
| @@ -909,6 +1032,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: | |||
| if (!graph_node->IsAsync()) { | |||
| ret = LoadGraph(ge_root_model_ptr, graph_node); | |||
| } else { | |||
| GE_CHECK_NOTNULL(ge_root_model); | |||
| ret = LoadGraphAsync(ge_root_model_ptr, graph_node); | |||
| } | |||
| if (ret != SUCCESS) { | |||
| @@ -921,6 +1045,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: | |||
| Status GraphManager::LoadGraph(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { | |||
| GELOGI("[LoadGraph] run_graph_flag[%d], graph_id[%u]", options_.run_graph_flag, graph_node->GetGraphId()); | |||
| if (options_.run_graph_flag && ge_root_model != nullptr) { | |||
| ge_root_model->SetTrainFlag(GetTrainFlag()); | |||
| // synchronization run graph with model | |||
| std::shared_ptr<GraphModelListener> model_listener = GetModelListener(); | |||
| ModelIdInfo model_id_info; | |||
| @@ -1315,54 +1440,29 @@ bool GraphManager::CheckModelLoad(const GeRootModelPtr &ge_root_model, bool load | |||
| } | |||
| Status GraphManager::RemoveGraph(const GraphId &graph_id) { | |||
| auto it = to_be_deleted_graphs_.find(graph_id); | |||
| if (it != to_be_deleted_graphs_.end()) { | |||
| to_be_deleted_graphs_.erase(it); | |||
| } | |||
| GraphNodePtr graph_node = nullptr; | |||
| Status ret = GetGraphNode(graph_id, graph_node); | |||
| if (ret != SUCCESS) { | |||
| if (ret != SUCCESS || graph_node == nullptr) { | |||
| REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid when GraphManager %s", | |||
| graph_id, __FUNCTION__); | |||
| GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "[GraphManager] Id %u does not exists.", graph_id); | |||
| return GE_GRAPH_GRAPH_NOT_EXIST; | |||
| } | |||
| if ((graph_node == nullptr) || (graph_node->GetRunFlag())) { | |||
| GELOGE(GE_GRAPH_GRAPH_IS_RUNNING, "[GraphManager] Id %u is running, can't be deleted.", graph_id); | |||
| return GE_GRAPH_GRAPH_IS_RUNNING; | |||
| if (graph_node->GetRunFlag()) { | |||
| // only put graph into to-be-deleted list when exceptional scenario | |||
| to_be_deleted_graphs_.insert(graph_id); | |||
| GELOGI("[GraphManager] Trying to remove running graph[Id:%u], added into to_be_deleted_graphs_.", graph_id); | |||
| return SUCCESS; | |||
| } | |||
| std::lock_guard<std::mutex> lock(unload_model_mutex_); | |||
| Status middle_ret; | |||
| rtError_t rt_ret; | |||
| const std::vector<SubGraphInfoPtr> &all_sub_graph = graph_node->GetAllSubGraph(); | |||
| for (size_t i = 0; i < all_sub_graph.size(); ++i) { | |||
| // must free buffer firstly | |||
| middle_ret = all_sub_graph[i]->FreeInOutBuffer(); | |||
| if (middle_ret != SUCCESS) { | |||
| GELOGE(middle_ret, "[GraphManager] RemoveGraph free mem failed, graph_id=%u.", graph_id); | |||
| ret = middle_ret; | |||
| } | |||
| if (all_sub_graph[i]->GeModelIsValid() && all_sub_graph[i]->GetModelIdInfo().model_id != INVALID_MODEL_ID) { | |||
| // unload model | |||
| GELOGI("UnloadModel via new ome."); | |||
| rt_ret = rtSetDevice(GetContext().DeviceId()); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", | |||
| all_sub_graph[i]->GetModelIdInfo().model_id, graph_id); | |||
| ret = FAILED; | |||
| continue; | |||
| } | |||
| middle_ret = GraphLoader::UnloadModel(all_sub_graph[i]->GetModelIdInfo().model_id); | |||
| if (middle_ret != SUCCESS) { | |||
| GELOGE(middle_ret, "[GraphManager:] unload model failed, modelId=%u, graph_id=%u.", | |||
| all_sub_graph[i]->GetModelIdInfo().model_id, graph_id); | |||
| ret = middle_ret; | |||
| } | |||
| rt_ret = rtDeviceReset(GetContext().DeviceId()); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "[GraphManager:] unload model failed, modelId=%u, graphId=%u.", | |||
| all_sub_graph[i]->GetModelIdInfo().model_id, graph_id); | |||
| ret = FAILED; | |||
| } | |||
| } | |||
| } | |||
| var_acc_ctrl_.RemoveGraph(graph_id); | |||
| RemoveGraphNode(graph_id); | |||
| @@ -1370,28 +1470,33 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) { | |||
| auto ge_root_model = graph_node->GetGeRootModel(); | |||
| if (CheckModelLoad(ge_root_model, graph_node->GetLoadFlag())) { | |||
| GELOGI("Unload model %u.", ge_root_model->GetModelId()); | |||
| rt_ret = rtSetDevice(GetContext().DeviceId()); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(), | |||
| graph_id); | |||
| return FAILED; | |||
| } | |||
| middle_ret = GraphLoader::UnloadModel(ge_root_model->GetModelId()); | |||
| // same graph may be added for several times, different models were created separately, | |||
| // unload them respectively. | |||
| middle_ret = UnloadModel(ge_root_model, graph_id); | |||
| if (middle_ret != SUCCESS) { | |||
| GELOGE(middle_ret, "[GraphManager:] unload model failed, modelId=%u, graph_id=%u.", ge_root_model->GetModelId(), | |||
| graph_id); | |||
| REPORT_INNER_ERROR("E19999", "UnloadModel for graph:%u failed, check unload detail in GraphLoader %s", | |||
| graph_id, __FUNCTION__); | |||
| GELOGE(middle_ret, "[GraphManager:] unload model failed, graph_id=%u.", graph_id); | |||
| ret = middle_ret; | |||
| } | |||
| rt_ret = rtDeviceReset(GetContext().DeviceId()); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(), | |||
| graph_id); | |||
| REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, graph_id:%u, when GraphManager %s", | |||
| GetContext().DeviceId(), graph_id, __FUNCTION__); | |||
| GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, graphId=%u.", graph_id); | |||
| ret = FAILED; | |||
| } | |||
| } | |||
| RemoveCompilerStages(graph_id); | |||
| RemoveGraphCount(graph_id); | |||
| RemoveAddGraphCondition(graph_id); | |||
| GE_CHK_STATUS_RET(ret, "[GraphManager:] Remove graph failed, graph_id=%u.", graph_id); | |||
| GELOGI("[GraphManager] remove graph success, graph_id=%u.", graph_id); | |||
| @@ -2409,6 +2514,7 @@ void GraphManager::ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_gr | |||
| Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { | |||
| GELOGI("[LoadGraphAsync] run_graph_flag[%d], graph_id[%u]", options_.run_graph_flag, graph_node->GetGraphId()); | |||
| if (options_.run_graph_flag && ge_root_model != nullptr) { | |||
| ge_root_model->SetTrainFlag(GetTrainFlag()); | |||
| // synchronization run graph with model | |||
| ModelIdInfo model_id_info; | |||
| bool is_unknown_shape = false; | |||
| @@ -2425,9 +2531,9 @@ Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const G | |||
| } | |||
| } | |||
| GE_TIMESTAMP_START(LoadGraph); | |||
| GE_CHECK_NOTNULL(graph_node->graph_run_async_listener_); | |||
| Status ret = | |||
| GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, graph_node->graph_run_async_listener_); | |||
| auto listener = MakeShared<RunAsyncListener>(); | |||
| GE_CHECK_NOTNULL(listener); | |||
| Status ret = GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, listener); | |||
| GE_TIMESTAMP_EVENT_END(LoadGraph, "GraphManager::LoadGraphAsync"); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "[LoadGraphAsync] LoadGraphAsync Failed"); | |||
| @@ -2441,6 +2547,52 @@ Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const G | |||
| return SUCCESS; | |||
| } | |||
| void GraphManager::ReleaseMemory(const GeModelPtr &ge_model, GraphNodePtr &graph_node, | |||
| const std::vector<uint32_t> &model_ids, uint32_t graph_id, uint64_t session_id) { | |||
| rtError_t rt_ret = rtSetDevice(GetContext().DeviceId()); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, when GraphManager %s", | |||
| GetContext().DeviceId(), __FUNCTION__); | |||
| GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, graphId=%u.", graph_id); | |||
| return; | |||
| } | |||
| for (auto model_id : model_ids) { | |||
| uint64_t max_memory_size = 0; | |||
| Status result = GraphLoader::GetMaxUsedMemory(model_id, max_memory_size); | |||
| if (result != SUCCESS) { | |||
| continue; | |||
| } | |||
| GELOGI("CheckAndReleaseMemory try to UnloadGraph[%u], model[%u] which MaxUsedMemory[%lu].", graph_id, model_id, | |||
| max_memory_size); | |||
| if (model_ids.size() > 1) { | |||
| result = ge_model->GetSessionId(model_id, session_id); | |||
| if (result != SUCCESS) { | |||
| GELOGW("[GraphManager:] get session failed when dynamic memory, modelId=%u, graphId=%u.", model_id, | |||
| graph_id); | |||
| continue; | |||
| } | |||
| } | |||
| result = GraphLoader::DestroyAicpuKernel(session_id, model_id, 0); | |||
| if (result != SUCCESS) { | |||
| GELOGW("[GraphManager:] destroy aicpu kernel failed when dynamic memory, modelId=%u, graphId=%u.", model_id, | |||
| graph_id); | |||
| } | |||
| result = GraphLoader::UnloadModel(model_id); | |||
| if (result != SUCCESS) { | |||
| GELOGW("[GraphManager:] unload model failed, modelId=%u, graphId=%u.", model_id, graph_id); | |||
| } | |||
| GELOGI("CheckAndReleaseMemory UnloadGraph[%u], model[%u] success.", graph_id, model_id); | |||
| } | |||
| graph_node->SetLoadFlag(false); | |||
| rt_ret = rtDeviceReset(GetContext().DeviceId()); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, when GraphManager %s", | |||
| GetContext().DeviceId(), __FUNCTION__); | |||
| GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, graphId=%u.", graph_id); | |||
| return; | |||
| } | |||
| } | |||
| Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const GraphNodePtr &graph_node) { | |||
| GELOGI("CheckAndReleaseMemory graph_id[%u]", graph_node->GetGraphId()); | |||
| int64_t value = 0; | |||
| @@ -2484,6 +2636,7 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra | |||
| continue; | |||
| } | |||
| auto model_id = model->GetModelId(); | |||
| auto model_ids = model->GetAllModelId(); | |||
| // unload model not release | |||
| bool is_unknown_shape = false; | |||
| GE_CHK_STATUS_RET(model->CheckIsUnknownShape(is_unknown_shape)); | |||
| @@ -2496,34 +2649,7 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra | |||
| GELOGI("CheckAndReleaseMemory graph[%u] has not been loaded.", graph_id); | |||
| continue; | |||
| } | |||
| uint64_t max_memory_size = 0; | |||
| result = GraphLoader::GetMaxUsedMemory(model_id, max_memory_size); | |||
| if (result != SUCCESS) { | |||
| continue; | |||
| } | |||
| GELOGI("CheckAndReleaseMemory try to UnloadGraph[%u], model[%u] which MaxUsedMemory[%lu].", graph_id, model_id, | |||
| max_memory_size); | |||
| rtError_t rt_ret = rtSetDevice(GetContext().DeviceId()); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", model_id, graph_id); | |||
| continue; | |||
| } | |||
| result = GraphLoader::DestroyAicpuKernel(session_id, model_id, 0); | |||
| if (result != SUCCESS) { | |||
| GELOGW("[GraphManager:] destroy aicpu kernel failed when dynamic memory, modelId=%u, graphId=%u.", model_id, | |||
| graph_id); | |||
| } | |||
| result = GraphLoader::UnloadModel(model_id); | |||
| if (result != SUCCESS) { | |||
| GELOGW("[GraphManager:] unload model failed, modelId=%u, graphId=%u.", model_id, graph_id); | |||
| } | |||
| rt_ret = rtDeviceReset(GetContext().DeviceId()); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, modelId=%u, graphId=%u.", model_id, graph_id); | |||
| continue; | |||
| } | |||
| it.second->SetLoadFlag(false); | |||
| GELOGI("CheckAndReleaseMemory UnloadGraph[%u], model[%u] success and set LoadFlag to false.", graph_id, model_id); | |||
| ReleaseMemory(ge_model, it.second, model_ids, graph_id, session_id); | |||
| } | |||
| return SUCCESS; | |||
| @@ -2659,6 +2785,38 @@ void GraphManager::ConstructGeInput(const vector<InputTensorInfo> &inputs, vecto | |||
| } | |||
| } | |||
| Status GraphManager::CheckIncreBuildAndPreRun(GraphManager *graph_manager, const PreRunArgs &args, | |||
| GraphNodePtr &graph_node, GeRootModelPtr &ge_root_model) { | |||
| if (!graph_manager->IsGraphNeedBuild(graph_node)) { | |||
| ge_root_model = graph_node->GetGeRootModel(); | |||
| return SUCCESS; | |||
| } | |||
| if (graph_node->GetBuildFlag()) { | |||
| ReturnError(graph_manager, args.callback, PARAM_INVALID, | |||
| "The graph " + std::to_string(graph_node->GetGraphId()) + | |||
| " need to re-build, you should remove it" | |||
| " from GE first, then AddGraph again and rebuild it."); | |||
| graph_node->Unlock(); | |||
| return PARAM_INVALID; | |||
| } | |||
| // check need incre build. | |||
| GeModelPtr ge_model = nullptr; | |||
| if (graph_manager->IncreBuild(graph_node, ge_model) != SUCCESS) { | |||
| std::vector<GeTensor> ge_inputs; | |||
| ConstructGeInput(args.input_tensor, ge_inputs); | |||
| Status ret = graph_manager->PreRun(graph_node, ge_inputs, ge_root_model, args.session_id); | |||
| // release rts generate context | |||
| RtContextUtil::GetInstance().DestroyRtContexts(args.session_id, graph_node->GetGraphId()); | |||
| if (ret != SUCCESS) { | |||
| ReturnError(graph_manager, args.callback, ret, "PreRun Failed."); | |||
| return ret; | |||
| } | |||
| } | |||
| graph_node->SetBuildFlag(true); | |||
| graph_manager->var_acc_ctrl_.SetGraphBuildEnd(graph_node->GetGraphId()); | |||
| return SUCCESS; | |||
| } | |||
| void GraphManager::PreRunThread(GraphManager *graph_manager) { | |||
| if (prctl(PR_SET_NAME, ("GE_PreRun")) != 0) { | |||
| GELOGW("Set thread name failed."); | |||
| @@ -2671,7 +2829,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { | |||
| continue; | |||
| } | |||
| GELOGI("A new loop start."); | |||
| GELOGI("[PreRunThread] A new loop start, graph_id:%u.", args.graph_id); | |||
| ErrorManager::GetInstance().SetErrorContext(args.error_context); | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | |||
| @@ -2687,7 +2845,24 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { | |||
| "[RunGraph] graph not exist, graph_id=" + std::to_string(args.graph_id)); | |||
| return; | |||
| } | |||
| // more than one graph owns same graph_id | |||
| uint32_t count = 0; | |||
| if (graph_manager->GetGraphCount(args.graph_id, count) != SUCCESS) { | |||
| GELOGE(INTERNAL_ERROR, "Get graph [id:%u] count failed.", args.graph_id); | |||
| return; | |||
| } | |||
| // Avoid repeatively prerun for graphs owns same graph_id in online inference concurrency | |||
| if (count > 1 && graph_node->GetBuildFlag()) { | |||
| graph_node->Lock(); | |||
| GELOGD("Avoid repeatively prerun, graph_id:%u.", args.graph_id); | |||
| // In online inference concurrency senario, graph_node is allowed to be locked for 'count' times | |||
| graph_node->SetSemSize(count); | |||
| graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.error_context, | |||
| args.input_tensor, graph_node->GetGeRootModel(), GetThreadLocalContext(), args.callback })); | |||
| GELOGI("[PreRunThread] Loop end. Start to run with cached build model."); | |||
| continue; | |||
| } | |||
| // Cannot be put ahead of the repeatively prerun judgement | |||
| graph_node->Lock(); | |||
| if (graph_node->GetRunFlag()) { | |||
| @@ -2719,46 +2894,24 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { | |||
| // it will not execute graph preprocess, optimize, parition, build if the graph has built successful. | |||
| GELOGI("Start for run graph async."); | |||
| GeRootModelPtr ge_root_model = nullptr; | |||
| if (graph_manager->IsGraphNeedBuild(graph_node)) { | |||
| if (graph_node->GetBuildFlag()) { | |||
| ReturnError(graph_manager, args.callback, PARAM_INVALID, | |||
| "The graph " + std::to_string(graph_node->GetGraphId()) + | |||
| " need to re-build, you should remove it" | |||
| " from GE first, then AddGraph again and rebuild it."); | |||
| ret = CheckIncreBuildAndPreRun(graph_manager, args, graph_node, ge_root_model); | |||
| if (ret != SUCCESS) { | |||
| graph_node->SetRunFlag(false); | |||
| if (!ge::Analyzer::GetInstance()->IsEnableNetAnalyzeDebug()) { | |||
| ReturnError(graph_manager, args.callback, ret, "CheckIncreBuildAndPreRun Failed, thread exit.."); | |||
| graph_node->Unlock(); | |||
| return; | |||
| } else { | |||
| ReturnError(graph_manager, graph_node, args.callback, ret, | |||
| "CheckIncreBuildAndPreRun Failed, keep geop continue!"); | |||
| graph_node->Unlock(); | |||
| continue; | |||
| } | |||
| // check need incre build. | |||
| GeModelPtr ge_model = nullptr; | |||
| if (graph_manager->IncreBuild(graph_node, ge_model) != SUCCESS) { | |||
| std::vector<GeTensor> ge_inputs; | |||
| ConstructGeInput(args.input_tensor, ge_inputs); | |||
| ret = graph_manager->PreRun(graph_node, ge_inputs, ge_root_model, args.session_id); | |||
| // release rts generate context | |||
| RtContextUtil::GetInstance().DestroyRtContexts(args.session_id, graph_node->GetGraphId()); | |||
| if (ret != SUCCESS) { | |||
| graph_node->SetRunFlag(false); | |||
| if (!ge::Analyzer::GetInstance()->IsEnableNetAnalyzeDebug()) { | |||
| ReturnError(graph_manager, args.callback, ret, "PreRun Failed, thread exit.."); | |||
| graph_node->Unlock(); | |||
| return; | |||
| } else { | |||
| ReturnError(graph_manager, graph_node, args.callback, ret, "PreRun Failed, keep geop continue!"); | |||
| graph_node->Unlock(); | |||
| continue; | |||
| } | |||
| } | |||
| } | |||
| graph_node->SetBuildFlag(true); | |||
| graph_manager->var_acc_ctrl_.SetGraphBuildEnd(graph_node->GetGraphId()); | |||
| } else { | |||
| ge_root_model = graph_node->GetGeRootModel(); | |||
| } | |||
| graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.error_context, | |||
| args.input_tensor, ge_root_model, GetThreadLocalContext(), args.callback })); | |||
| GELOGI("Loop end."); | |||
| GELOGI("[PreRunThread] Loop end."); | |||
| } | |||
| } | |||
| @@ -2855,16 +3008,13 @@ void GraphManager::RunThread(GraphManager *graph_manager) { | |||
| continue; | |||
| } | |||
| GELOGI("A new loop start."); | |||
| GELOGI("[RunThread] A new loop start, graph_id:%u.", args.graph_id); | |||
| ErrorManager::GetInstance().SetErrorContext(args.error_context); | |||
| GetContext().SetSessionId(args.session_id); | |||
| GetThreadLocalContext() = args.context; | |||
| graph_manager->UpdateLocalOmgContext(args.graph_id); | |||
| if (args.graph_node->graph_run_async_listener_ != nullptr) { | |||
| args.graph_node->graph_run_async_listener_->SetCallback(args.callback); | |||
| } | |||
| Status ret; | |||
| // parse inputs.dims to vector<vector<uint64_t>> dynamic_dims | |||
| ret = graph_manager->ParseInputsDims(args.input_tensor); | |||
| @@ -2874,8 +3024,10 @@ void GraphManager::RunThread(GraphManager *graph_manager) { | |||
| return; | |||
| } | |||
| args.graph_node->UpdateLoadFlag(); | |||
| if (!args.graph_node->GetLoadFlag()) { | |||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelLoad, ErrorMessage::kModelLoad); | |||
| args.ge_root_model->SetTrainFlag(graph_manager->GetTrainFlag()); | |||
| ret = graph_manager->LoadGraphAsync(args.ge_root_model, args.graph_node); | |||
| if (ret != SUCCESS || args.ge_root_model == nullptr) { | |||
| StopQueue(graph_manager); | |||
| @@ -2883,6 +3035,10 @@ void GraphManager::RunThread(GraphManager *graph_manager) { | |||
| args.graph_node->Unlock(); | |||
| return; | |||
| } | |||
| // control the times of graph loading in multi-thread scenario | |||
| args.graph_node->DecreaseLoadCount(); | |||
| args.graph_node->IncreaseLoadRecord(); | |||
| args.graph_node->SetLoadFlag(true); | |||
| GELOGI("LoadGraph[%u], model[%u] success and set LoadFlag to true.", args.graph_node->GetGraphId(), | |||
| args.ge_root_model->GetModelId()); | |||
| @@ -2898,7 +3054,7 @@ void GraphManager::RunThread(GraphManager *graph_manager) { | |||
| } | |||
| ret = graph_manager->graph_executor_.ExecuteGraphAsync(args.graph_id, args.graph_node->GetGeRootModel(), | |||
| args.input_tensor); | |||
| args.input_tensor, args.callback); | |||
| args.graph_node->SetRunFlag(false); | |||
| if (ret != SUCCESS) { | |||
| ReturnError(graph_manager, args.callback, ret, "ExecuteGraphAsync failed, thread exit."); | |||
| @@ -3314,4 +3470,49 @@ void GraphManager::RemoveCompilerStages(GraphId graph_id) { | |||
| std::lock_guard<std::mutex> lock(member_mutex_); | |||
| compiler_stages_.erase(graph_id); | |||
| } | |||
| void GraphManager::IncreaseGraphCount(GraphId graph_id) { | |||
| std::lock_guard<std::mutex> lock(graph_count_mutex_); | |||
| auto it = graph_count_.find(graph_id); | |||
| if (it == graph_count_.end()) { | |||
| graph_count_.insert({graph_id, kInitGraphCount}); | |||
| GELOGD("After increaseGraphCount, graph count of id[%u] is %u.", graph_id, graph_count_[graph_id]); | |||
| } else { | |||
| ++graph_count_[graph_id]; | |||
| GELOGD("After increaseGraphCount, graph count of id[%u] is %u.", graph_id, graph_count_[graph_id]); | |||
| } | |||
| } | |||
| void GraphManager::RemoveGraphCount(GraphId graph_id) { | |||
| std::lock_guard<std::mutex> lock(graph_count_mutex_); | |||
| auto it = graph_count_.find(graph_id); | |||
| if (it == graph_count_.end()) { | |||
| GELOGW("Graph of id: %u has not been added, count cannot be decreased.", graph_id); | |||
| } else { | |||
| GELOGD("RemoveGraphCount success, graph count of id[%u] is %u.", graph_id, graph_count_[graph_id]); | |||
| graph_count_.erase(it); | |||
| } | |||
| } | |||
| void GraphManager::DecreaseGraphCount(GraphId graph_id) { | |||
| std::lock_guard<std::mutex> lock(graph_count_mutex_); | |||
| auto it = graph_count_.find(graph_id); | |||
| if (it == graph_count_.end()) { | |||
| GELOGW("Graph of id: %u has not been added, count cannot be decreased.", graph_id); | |||
| } else { | |||
| --it->second; | |||
| GELOGD("After DecreaseGraphCount, graph count of id[%u] is %u.", graph_id, graph_count_[graph_id]); | |||
| } | |||
| } | |||
| Status GraphManager::GetGraphCount(GraphId graph_id, uint32_t &count) { | |||
| std::lock_guard<std::mutex> lock(graph_count_mutex_); | |||
| auto it = graph_count_.find(graph_id); | |||
| if (it == graph_count_.end()) { | |||
| GELOGW("Graph [id:%u] has not been added.", graph_id); | |||
| return FAILED; | |||
| } | |||
| count = it->second; | |||
| return SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| @@ -184,6 +184,20 @@ class GraphManager { | |||
| Status SaveCheckPointResult(const Graph &graph, const std::vector<Tensor> &outputs, map<string, Tensor> &var_results); | |||
| void RemoveGraphCount(GraphId graph_id); | |||
| void IncreaseGraphCount(GraphId graph_id); | |||
| void DecreaseGraphCount(GraphId graph_id); | |||
| Status GetGraphCount(GraphId graph_id, uint32_t &count); | |||
| void SetAddGraphCondition(GraphId graph_id, uint32_t cond); | |||
| uint32_t GetAddGraphCondition(GraphId graph_id); | |||
| void RemoveAddGraphCondition(GraphId graph_id); | |||
| private: | |||
| struct CompilerStages { | |||
| GraphPrepare preparer; | |||
| @@ -380,6 +394,24 @@ class GraphManager { | |||
| CompilerStages &GetCompilerStages(GraphId graph_id); | |||
| void RemoveCompilerStages(GraphId graph_id); | |||
| static Status CheckIncreBuildAndPreRun(GraphManager *graph_manager, const PreRunArgs &args, GraphNodePtr &graph_node, | |||
| GeRootModelPtr &ge_root_model); | |||
| void ReleaseMemory(const GeModelPtr &ge_model, GraphNodePtr &graph_node, const std::vector<uint32_t> &model_ids, | |||
| uint32_t graph_id, uint64_t session_id); | |||
| Status CheckRepeatAdd(uint32_t graph_id, bool &is_added); | |||
| Status NotifyWaittingGraph(uint32_t graph_id); | |||
| Status CreateGraphNode(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options); | |||
| Status SetStagesOptions(uint32_t graph_id, const GraphManagerOptions &options); | |||
| Status UnloadModel(GeRootModelPtr ge_root_model, uint32_t graph_id); | |||
| void SetSessionGraphId(ComputeGraphPtr compute_graph, uint32_t graph_id); | |||
| std::atomic_bool thread_run_flag_; | |||
| BlockingQueue<PreRunArgs> prerun_args_q_{}; | |||
| BlockingQueue<RunArgs> run_args_q_{}; | |||
| @@ -415,6 +447,16 @@ class GraphManager { | |||
| std::mutex member_mutex_; | |||
| std::mutex unload_model_mutex_; | |||
| // avoid repeatively add same graph (owns same graph id) | |||
| std::mutex add_graph_mutex_; | |||
| std::mutex add_graph_cond_mutex_; | |||
| std::condition_variable add_graph_cv_; | |||
| std::map<GraphId, uint32_t> graph_id_to_add_graph_cond_; | |||
| // use for multi-thread online-infer scenario | |||
| std::set<GraphId> to_be_deleted_graphs_; | |||
| std::map<GraphId, uint32_t> graph_count_; | |||
| std::mutex graph_count_mutex_; | |||
| }; | |||
| } // namespace ge | |||
| @@ -60,6 +60,15 @@ void GraphNode::Unlock() { | |||
| sem_.Pop(unused); | |||
| } | |||
| void GraphNode::IncreaseLoadCount() { | |||
| std::unique_lock<std::mutex> lock(load_count_mu_); | |||
| if (load_record_ == kMaxLoadNum) { | |||
| GELOGW("Reach the maximum of load_count:%u", kMaxLoadNum); | |||
| return; | |||
| } | |||
| ++load_count_; | |||
| } | |||
| SubGraphInfo::SubGraphInfo() : subgraph_ptr_(nullptr), ge_model_ptr_(nullptr), malloc_flag_(false) {} | |||
| SubGraphInfo::~SubGraphInfo() { | |||
| @@ -55,6 +55,7 @@ using ConstGraphPtr = std::shared_ptr<const ge::Graph>; | |||
| using GraphPtr = std::shared_ptr<ge::Graph>; | |||
| const uint64_t INVALID_SESSION_ID = 0xffffffffffffffffULL; | |||
| const uint32_t kMaxLoadNum = 8; | |||
| struct ModelIdInfo { | |||
| uint32_t model_id{INVALID_MODEL_ID}; | |||
| @@ -162,6 +163,8 @@ class GraphNode { | |||
| bool GetBuildFlag() const { return build_flag_; } | |||
| void SetBuildFlag(bool buildFlag) { build_flag_ = buildFlag; } | |||
| bool GetLoadFlag() const { return load_flag_; } | |||
| // allow repeatively load graph owns same graph id | |||
| void UpdateLoadFlag() { load_flag_ = load_count_ == 0 || load_record_ >= kMaxLoadNum; } | |||
| void SetLoadFlag(bool load_flag) { load_flag_ = load_flag; } | |||
| void SetGeModel(const GeModelPtr &ge_model) { ge_model_ = ge_model; } | |||
| GeModelPtr GetGeModel() const { return ge_model_; } | |||
| @@ -172,6 +175,13 @@ class GraphNode { | |||
| void Lock(); | |||
| void Unlock(); | |||
| void SetSemSize(uint32_t size) { sem_.SetMaxSize(size); } | |||
| uint32_t GetLoadCount() const { return load_count_; } | |||
| void IncreaseLoadCount(); | |||
| void DecreaseLoadCount() { --load_count_; } | |||
| void IncreaseLoadRecord() { ++load_record_; } | |||
| // run graph asynchronous listener | |||
| std::shared_ptr<RunAsyncListener> graph_run_async_listener_; | |||
| @@ -184,11 +194,17 @@ class GraphNode { | |||
| GraphPtr graph_; | |||
| ComputeGraphPtr compute_graph_; | |||
| bool build_flag_; | |||
| // load_flag_ is true if more than 1 model were loaded | |||
| bool load_flag_; | |||
| bool async_; | |||
| GeModelPtr ge_model_; | |||
| GeRootModelPtr ge_root_model_; | |||
| BlockingQueue<uint8_t> sem_; | |||
| // consist with graph_count of same graph_id in graph_manager | |||
| uint32_t load_count_ = 0; | |||
| // total times of loading a graph with same graph_id. | |||
| uint32_t load_record_ = 0; | |||
| std::mutex load_count_mu_; | |||
| }; | |||
| using GraphNodePtr = std::shared_ptr<GraphNode>; | |||
| @@ -126,11 +126,11 @@ bool AtomicAddrCleanPass::IsOutputIndexPeerInputAtomic(const NodePtr &node, int6 | |||
| bool AtomicAddrCleanPass::CheckSkipInsertInLoopGraph(const NodePtr &node) { | |||
| OpDescPtr op_desc = node->GetOpDesc(); | |||
| std::map<string, std::map<int, int>> node_workspace_offset; | |||
| std::map<string, std::map<int64_t, int64_t>> atomic_workspace_index_size; | |||
| bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX); | |||
| bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX); | |||
| node_workspace_offset = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, node_workspace_offset); | |||
| if (!has_atomic_input && has_atomic_output && node_workspace_offset.empty()) { | |||
| atomic_workspace_index_size = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_index_size); | |||
| if (!has_atomic_input && has_atomic_output && atomic_workspace_index_size.empty()) { | |||
| std::vector<int64_t> atomic_output_index; | |||
| (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index); | |||
| bool is_all_output_peer_also_atomic = true; | |||
| @@ -222,6 +222,39 @@ Status AtomicAddrCleanPass::HandleNormalGraph(ComputeGraphPtr &graph, const vect | |||
| } | |||
| } | |||
| } | |||
| return LinkToPotentialPrecedenceNode(graph, clean_addr_node); | |||
| } | |||
| // Add control edges from atomic clean node to all potential precedence nodes which may execute before atomic clean | |||
| // node. We hope that atomic clean node can execute with the highest priority in the entire graph. Because of stream | |||
| // concurrency mechanism, only placing it at the head can not ensure that priority. Therefore, we need to add control | |||
| // edges from atomic clean node to the nodes that may be the first node on each stream. Generally, the first nodes on | |||
| // each stream are successors of Data/Variable, and Data/Variable won't generate task or execute, so we link to the | |||
| // successors of Data/Variable. | |||
| Status AtomicAddrCleanPass::LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node) { | |||
| GELOGD("Start to add control edges from %s to all second-nodes behind first-nodes which have no input.", | |||
| atomic_clean_node->GetName().c_str()); | |||
| auto out_ctrl_anchor = atomic_clean_node->GetOutControlAnchor(); | |||
| GE_CHECK_NOTNULL(out_ctrl_anchor); | |||
| for (const auto &node : graph->GetDirectNode()) { | |||
| GE_CHECK_NOTNULL(node); | |||
| bool need_handle = (node->GetType() == DATA || node->GetType() == VARIABLE) && node->GetInAllNodes().empty(); | |||
| if (!need_handle) { | |||
| continue; | |||
| } | |||
| auto second_nodes = node->GetOutAllNodes(); | |||
| for (const auto &second_node : second_nodes) { | |||
| GE_CHECK_NOTNULL(second_node); | |||
| auto in_ctrl_anchor = second_node->GetInControlAnchor(); | |||
| GE_CHECK_NOTNULL(in_ctrl_anchor); | |||
| if (!out_ctrl_anchor->IsLinkedWith(in_ctrl_anchor)) { | |||
| GE_CHK_STATUS_RET(out_ctrl_anchor->LinkTo(in_ctrl_anchor)); | |||
| GELOGD("Add control edge from %s to %s.", atomic_clean_node->GetName().c_str(), second_node->GetName().c_str()); | |||
| } | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -332,11 +365,11 @@ bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) { | |||
| } | |||
| // 2.Check atomic attr in node | |||
| std::map<string, std::map<int, int>> node_workspace_offset; | |||
| std::map<string, std::map<int64_t, int64_t>> atomic_workspace_index_size; | |||
| bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX); | |||
| bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX); | |||
| node_workspace_offset = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, node_workspace_offset); | |||
| if (!has_atomic_input && !has_atomic_output && node_workspace_offset.empty()) { | |||
| atomic_workspace_index_size = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_index_size); | |||
| if (!has_atomic_input && !has_atomic_output && atomic_workspace_index_size.empty()) { | |||
| return false; | |||
| } | |||
| @@ -67,6 +67,14 @@ class AtomicAddrCleanPass : public GraphPass { | |||
| */ | |||
| Status LinkToAtomicNode(const NodePtr &atomic_node, NodePtr &atomic_clean_node); | |||
| /** | |||
| * Link atomic clean node to all potential precedence nodes which may execute before atomic clean node | |||
| * @param graph | |||
| * @param atomic_clean_node | |||
| * @return | |||
| */ | |||
| Status LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node); | |||
| /** | |||
| * Check if this node is atomic op. | |||
| * @param node | |||
| @@ -137,7 +137,6 @@ Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &strea | |||
| return INTERNAL_ERROR; | |||
| } | |||
| stream_label = node->GetInDataNodes().at(0)->GetName(); | |||
| GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); | |||
| bool value = false; | |||
| OpDescPtr op_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| @@ -30,8 +30,15 @@ constexpr int kMaxRePassTimes = 10000; | |||
| constexpr size_t kMaxOneInNodes = 1000; | |||
| // Each iteration, we take about 0.3k memory on the stack, we should change the recursion to loop later | |||
| constexpr int kMaxRecursiveDepth = 20; | |||
| struct DuringPassNodeSets { | |||
| std::unordered_set<Node *> nodes_seen; | |||
| std::unordered_set<NodePtr> nodes_deleted; | |||
| std::unordered_set<NodePtr> nodes_re_pass; | |||
| std::unordered_set<NodePtr> nodes_re_pass_immediately; | |||
| std::unordered_set<NodePtr> nodes_last; | |||
| }; | |||
| void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &input_edge_nodes, | |||
| void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::deque<NodePtr> &input_edge_nodes, | |||
| std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_last) { | |||
| nodes_last.clear(); | |||
| for (auto &node : graph->GetDirectNode()) { | |||
| @@ -40,7 +47,7 @@ void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &i | |||
| } | |||
| size_t in_nums = node->GetInNodes().size(); | |||
| if (in_nums == 0) { | |||
| input_edge_nodes.push(node); | |||
| input_edge_nodes.push_back(node); | |||
| nodes_seen.insert(node.get()); | |||
| } else if (in_nums > kMaxOneInNodes) { | |||
| nodes_last.insert(node); | |||
| @@ -48,7 +55,7 @@ void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &i | |||
| } | |||
| } | |||
| void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::queue<NodePtr> &nodes_to_pass, | |||
| void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::deque<NodePtr> &nodes_to_pass, | |||
| std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_last) { | |||
| for (auto &node : nodes) { | |||
| if (node == nullptr) { | |||
| @@ -60,13 +67,30 @@ void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::queue<NodePtr> &n | |||
| bool all_in_nodes_seen = node->IsAllInNodesSeen(nodes_seen); | |||
| if (all_in_nodes_seen && nodes_seen.insert(node.get()).second) { | |||
| nodes_to_pass.push(node); | |||
| nodes_to_pass.push_back(node); | |||
| } | |||
| } | |||
| } | |||
| Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unordered_set<NodePtr> &nodes_re_pass, | |||
| std::unordered_set<NodePtr> &nodes_deleted, std::unordered_set<Node *> &nodes_seen) { | |||
| void PushToRePassIfSeen(NodePtr &node, const std::pair<std::string, BaseNodePass *> &name_to_pass, | |||
| std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_to_re_pass, | |||
| std::unordered_set<NodePtr> &nodes_re_pass) { | |||
| for (const auto &node_to_re_pass : nodes_to_re_pass) { | |||
| if (node_to_re_pass == nullptr) { | |||
| GELOGW("Found null re-pass node when executing %s on node %s type %s", name_to_pass.first.c_str(), | |||
| node->GetName().c_str(), node->GetType().c_str()); | |||
| continue; | |||
| } | |||
| if (nodes_seen.count(node_to_re_pass.get()) > 0 || node_to_re_pass->IsAllInNodesSeen(nodes_seen)) { | |||
| GELOGD("The node %s will be re-pass.", node_to_re_pass->GetName().c_str()); | |||
| nodes_re_pass.insert(node_to_re_pass); | |||
| } else { | |||
| GELOGD("The node %s are not all seen, don't set repass this time", node_to_re_pass->GetName().c_str()); | |||
| } | |||
| } | |||
| } | |||
| Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, DuringPassNodeSets &during_pass_node_set) { | |||
| if (node == nullptr) { | |||
| GELOGE(FAILED, "parameter is null."); | |||
| return FAILED; | |||
| @@ -90,22 +114,15 @@ Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unorder | |||
| } | |||
| auto nodes_to_re_pass = name_to_pass.second->GetNodesNeedRePass(); | |||
| for (const auto &node_to_re_pass : nodes_to_re_pass) { | |||
| if (node_to_re_pass == nullptr) { | |||
| GELOGW("Found null re-pass node when executing %s on node %s type %s", name_to_pass.first.c_str(), | |||
| node->GetName().c_str(), node->GetType().c_str()); | |||
| continue; | |||
| } | |||
| if (nodes_seen.count(node_to_re_pass.get()) > 0 || node_to_re_pass->IsAllInNodesSeen(nodes_seen)) { | |||
| GELOGD("The node %s will be re-pass later", node_to_re_pass->GetName().c_str()); | |||
| nodes_re_pass.insert(node_to_re_pass); | |||
| } else { | |||
| GELOGD("The node %s are not all seen, don't set repass this time", node_to_re_pass->GetName().c_str()); | |||
| } | |||
| } | |||
| PushToRePassIfSeen(node, name_to_pass, during_pass_node_set.nodes_seen, nodes_to_re_pass, | |||
| during_pass_node_set.nodes_re_pass); | |||
| auto nodes_to_re_pass_immediately = name_to_pass.second->GetNodesNeedRePassImmediately(); | |||
| PushToRePassIfSeen(node, name_to_pass, during_pass_node_set.nodes_seen, nodes_to_re_pass_immediately, | |||
| during_pass_node_set.nodes_re_pass_immediately); | |||
| auto nodes_deleted_by_pass = name_to_pass.second->GetNodesDeleted(); | |||
| nodes_deleted.insert(nodes_deleted_by_pass.begin(), nodes_deleted_by_pass.end()); | |||
| during_pass_node_set.nodes_deleted.insert(nodes_deleted_by_pass.begin(), nodes_deleted_by_pass.end()); | |||
| if (nodes_deleted_by_pass.count(node) > 0) { | |||
| GELOGD("The node %s was deleted by pass %s, stop the remain passes", node->GetName().c_str(), | |||
| name_to_pass.first.c_str()); | |||
| @@ -181,36 +198,33 @@ Status GEPass::Run(const NamesToPass &names_to_passes) { | |||
| Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { | |||
| GELOGD("Begin to run pass on graph, passes count %zu", names_to_passes.size()); | |||
| std::queue<NodePtr> nodes; | |||
| std::unordered_set<Node *> nodes_seen; | |||
| std::unordered_set<NodePtr> nodes_deleted; | |||
| std::unordered_set<NodePtr> nodes_re_pass; | |||
| std::unordered_set<NodePtr> nodes_last; | |||
| GetAllNodesNoInputEdge(graph_, nodes, nodes_seen, nodes_last); | |||
| std::deque<NodePtr> nodes; | |||
| DuringPassNodeSets during_pass_node_set; | |||
| GetAllNodesNoInputEdge(graph_, nodes, during_pass_node_set.nodes_seen, during_pass_node_set.nodes_last); | |||
| GELOGD("Start points count %zu", nodes.size()); | |||
| int re_pass_times = 0; | |||
| do { | |||
| for (auto &node : nodes_re_pass) { | |||
| nodes.push(node); | |||
| nodes_seen.insert(node.get()); | |||
| for (auto &node : during_pass_node_set.nodes_re_pass) { | |||
| nodes.push_back(node); | |||
| during_pass_node_set.nodes_seen.insert(node.get()); | |||
| } | |||
| nodes_re_pass.clear(); | |||
| during_pass_node_set.nodes_re_pass.clear(); | |||
| while (!nodes.empty()) { | |||
| NodePtr node = nodes.front(); | |||
| nodes.pop(); | |||
| nodes.pop_front(); | |||
| (void)nodes_re_pass.erase(node); | |||
| (void)during_pass_node_set.nodes_re_pass.erase(node); | |||
| GE_IF_BOOL_EXEC(node == nullptr, GELOGW("node is null"); continue); | |||
| if (nodes_deleted.count(node) > 0) { | |||
| if (during_pass_node_set.nodes_deleted.count(node) > 0) { | |||
| GELOGD("The node %s was deleted before, skip it.", node->GetName().c_str()); | |||
| continue; | |||
| } | |||
| AddNextIterNodes(node->GetOutNodes(), nodes, nodes_seen, nodes_last); | |||
| AddNextIterNodes(node->GetOutNodes(), nodes, during_pass_node_set.nodes_seen, during_pass_node_set.nodes_last); | |||
| auto ret = RunPasses(node, names_to_passes, nodes_re_pass, nodes_deleted, nodes_seen); | |||
| auto ret = RunPasses(node, names_to_passes, during_pass_node_set); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", | |||
| node->GetName().c_str(), node->GetType().c_str(), ret); | |||
| @@ -227,7 +241,7 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { | |||
| if (has_sub_graph) { | |||
| GELOGD("There are subgraphs on node %s, run passes for for the second time", node->GetName().c_str()); | |||
| SetFlagOption(kOptimizeAfterSubGraph, names_to_passes); | |||
| ret = RunPasses(node, names_to_passes, nodes_re_pass, nodes_deleted, nodes_seen); | |||
| ret = RunPasses(node, names_to_passes, during_pass_node_set); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", | |||
| node->GetName().c_str(), node->GetType().c_str(), ret); | |||
| @@ -239,16 +253,21 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { | |||
| // should be called each time at the begin of the iteration | |||
| ClearOption(names_to_passes); | |||
| } | |||
| for (const auto &node : during_pass_node_set.nodes_re_pass_immediately) { | |||
| GELOGD("The node %s will be re-pass immediately.", node->GetName().c_str()); | |||
| nodes.push_front(node); | |||
| } | |||
| during_pass_node_set.nodes_re_pass_immediately.clear(); | |||
| } | |||
| for (auto &node : nodes_last) { | |||
| bool all_in_nodes_seen = node->IsAllInNodesSeen(nodes_seen); | |||
| if (all_in_nodes_seen && nodes_seen.insert(node.get()).second) { | |||
| nodes.push(node); | |||
| for (auto &node : during_pass_node_set.nodes_last) { | |||
| bool all_in_nodes_seen = node->IsAllInNodesSeen(during_pass_node_set.nodes_seen); | |||
| if (all_in_nodes_seen && during_pass_node_set.nodes_seen.insert(node.get()).second) { | |||
| nodes.push_back(node); | |||
| } | |||
| } | |||
| nodes_last.clear(); | |||
| } while ((!nodes_re_pass.empty() || !nodes.empty()) && ++re_pass_times < kMaxRePassTimes); | |||
| during_pass_node_set.nodes_last.clear(); | |||
| } while ((!during_pass_node_set.nodes_re_pass.empty() || !nodes.empty()) && ++re_pass_times < kMaxRePassTimes); | |||
| if (re_pass_times == kMaxRePassTimes) { | |||
| GELOGW("re_pass_times should not come to %d", kMaxRePassTimes); | |||
| @@ -53,6 +53,8 @@ class BaseNodePass { | |||
| std::unordered_set<NodePtr> GetNodesNeedRePass() { return nodes_need_re_pass_; } | |||
| std::unordered_set<NodePtr> GetNodesNeedRePassImmediately() { return nodes_need_re_pass_immediately_; } | |||
| std::unordered_set<NodePtr> GetNodesDeleted() { return nodes_deleted_; } | |||
| void SetOption(NodePassOption option, const std::string &value) { options_[option] = value; } | |||
| @@ -62,6 +64,7 @@ class BaseNodePass { | |||
| void init() { | |||
| nodes_need_re_pass_.clear(); | |||
| nodes_deleted_.clear(); | |||
| nodes_need_re_pass_immediately_.clear(); | |||
| } | |||
| protected: | |||
| @@ -79,6 +82,14 @@ class BaseNodePass { | |||
| /// | |||
| void AddRePassNode(NodePtr &node) { nodes_need_re_pass_.insert(node); } | |||
| /// | |||
| /// Add a node to be optimized immediately again. If you add a new node to the graph, or | |||
| /// change a node connections, and you want to make sure the node will be | |||
| /// optimized by other passes, call this function. | |||
| /// @param node | |||
| /// | |||
| void AddImmediateRePassNode(NodePtr &node) { nodes_need_re_pass_immediately_.insert(node); } | |||
| /// | |||
| /// Add a node and it's input/output data nodes to be optimized again. | |||
| /// @param node | |||
| @@ -109,6 +120,7 @@ class BaseNodePass { | |||
| private: | |||
| std::unordered_set<NodePtr> nodes_need_re_pass_; | |||
| std::unordered_set<NodePtr> nodes_need_re_pass_immediately_; | |||
| std::unordered_set<NodePtr> nodes_deleted_; | |||
| std::map<NodePassOption, std::string> options_; | |||
| }; | |||
| @@ -25,6 +25,7 @@ | |||
| namespace ge { | |||
| Status InferShapePass::Run(NodePtr &node) { | |||
| // kOptimizeAfterSubGraph exist means after subgraph | |||
| auto ret = ShapeRefiner::InferShapeAndType(node, !OptionExists(kOptimizeAfterSubGraph)); | |||
| if (ret != GRAPH_SUCCESS) { | |||
| // select INFERSHAPE failed info | |||
| @@ -41,6 +42,20 @@ Status InferShapePass::Run(NodePtr &node) { | |||
| GELOGE(GE_GRAPH_INFERSHAPE_FAILED, "infershape failed. node: %s", node->GetName().c_str()); | |||
| return GE_GRAPH_INFERSHAPE_FAILED; | |||
| } | |||
| bool need_repass = false; | |||
| auto has_attr = AttrUtils::GetBool(node->GetOpDesc(), "need_infer_again_", need_repass); | |||
| if (has_attr) { | |||
| if (!OptionExists(kOptimizeAfterSubGraph)) { | |||
| return SUCCESS; | |||
| } | |||
| if (need_repass) { | |||
| AddImmediateRePassNode(node); | |||
| GELOGD("Node %s need repass immediately.", node->GetName().c_str()); | |||
| } else { | |||
| // clear attr on while | |||
| node->GetOpDesc()->DelAttr("need_infer_again_"); | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| @@ -23,7 +23,9 @@ namespace ge { | |||
| Status MergeInputMemcpyPass::Run(ComputeGraphPtr graph) { | |||
| GELOGD("MergeInputMemcpyPass Enter"); | |||
| for (const auto &node : graph->GetDirectNode()) { | |||
| if ((node->GetType() != MERGE) && (node->GetType() != REFMERGE)) { | |||
| std::string type; | |||
| GE_CHK_STATUS_RET(GetOriginalType(node, type), "Get node type failed."); | |||
| if ((type != MERGE) && (type != REFMERGE)) { | |||
| continue; | |||
| } | |||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||
| @@ -95,4 +97,3 @@ NodePtr MergeInputMemcpyPass::CreateMemcpyAsyncNode(const ComputeGraphPtr &graph | |||
| return graph->AddNode(op_desc); | |||
| } | |||
| } // namespace ge | |||
| @@ -25,7 +25,9 @@ Status MergeToStreamMergePass::Run(ComputeGraphPtr graph) { | |||
| bypass_nodes_.clear(); | |||
| for (const auto &node : graph->GetDirectNode()) { | |||
| if ((node->GetType() != MERGE) && (node->GetType() != REFMERGE)) { | |||
| std::string type; | |||
| GE_CHK_STATUS_RET(GetOriginalType(node, type), "Get node type failed."); | |||
| if ((type != MERGE) && (type != REFMERGE)) { | |||
| continue; | |||
| } | |||
| @@ -101,7 +101,8 @@ Status NextIterationPass::FindWhileGroups() { | |||
| const std::string &frame_name = loop_group_iter.first; | |||
| for (const auto &enter_node : loop_group_iter.second->enter_nodes) { | |||
| for (const auto &out_node : enter_node->GetOutAllNodes()) { | |||
| const string &type = out_node->GetType(); | |||
| std::string type; | |||
| GE_CHK_STATUS_RET(GetOriginalType(out_node, type), "Get node type failed."); | |||
| if ((type != MERGE) && (type != REFMERGE)) { | |||
| continue; | |||
| } | |||
| @@ -310,7 +311,8 @@ Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string | |||
| } | |||
| for (const auto &tmp_node : nodes) { | |||
| const std::string type = tmp_node->GetType(); | |||
| std::string type; | |||
| GE_CHK_STATUS_RET(GetOriginalType(tmp_node, type), "Get node type failed."); | |||
| if ((target_type == LOOPCOND) && (type == target_type)) { | |||
| target_node = tmp_node; | |||
| break; | |||
| @@ -35,9 +35,9 @@ | |||
| #include "graph/utils/op_desc_utils.h" | |||
| #include "graph/utils/tensor_utils.h" | |||
| #include "graph/utils/type_utils.h" | |||
| #include "utils/node_utils.h" | |||
| namespace ge { | |||
| Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, std::vector<int64_t> &data, | |||
| std::vector<GeTensorPtr> &v_output, const bool scalar_output) { | |||
| Status ret = SUCCESS; | |||
| @@ -246,6 +246,12 @@ NodePtr PassUtils::GetInDataNode(const ConstNodePtr &node, int index) { | |||
| return src_node; | |||
| } | |||
| NodePtr PassUtils::GetInNodeCrossSubgraphByIndex(const ConstNodePtr &node, int index) { | |||
| auto src_node = GetInDataNode(node, index); | |||
| return NodeUtils::GetInNodeCrossSubgraph(src_node); | |||
| } | |||
| bool PassUtils::IsNeedTrainIteFlowCtrl(const ComputeGraphPtr &compute_graph) { | |||
| if (compute_graph == nullptr) { | |||
| return false; | |||
| @@ -30,6 +30,8 @@ class PassUtils { | |||
| static NodePtr GetInDataNode(const ConstNodePtr &node, int index); | |||
| static NodePtr GetInNodeCrossSubgraphByIndex(const ConstNodePtr &node, int index); | |||
| static bool IsConstant(const ConstNodePtr &node); | |||
| static Status SetOutNodeWeight(const OutDataAnchorPtr &out_data_anchor, const NodePtr &src_node); | |||
| @@ -279,7 +279,7 @@ Status SubexpressionMigrationPass::GraphNodeMigration(const ComputeGraphPtr &gra | |||
| const auto &in_anchor = in_anchors.at(i); | |||
| const auto &base_node = in_anchor->GetOwnerNode(); | |||
| GELOGD("Get Data direct node: %s", base_node->GetName().c_str()); | |||
| if (!base_node->GetHostNode()) { | |||
| if (!base_node->GetHostNode() || base_node->GetType() == SWITCH) { | |||
| continue; | |||
| } | |||
| @@ -94,6 +94,12 @@ Status SwitchDeadBranchElimination::DeleteSwitchNode(NodePtr &node, NodePtr &pre | |||
| GELOGE(FAILED, "parameter is null."); | |||
| return FAILED; | |||
| } | |||
| // If two nodes aren't in same graph, get node's direct in_node instead of pred_node. | |||
| if (node->GetOwnerComputeGraph() != pred_node->GetOwnerComputeGraph()) { | |||
| pred_node = PassUtils::GetInDataNode(node, kPredInputIndex); | |||
| } | |||
| // link pred's in control nodes to switch | |||
| if (GraphUtils::CopyInCtrlEdges(pred_node, node) != GRAPH_SUCCESS) { | |||
| return FAILED; | |||
| @@ -131,7 +137,7 @@ Status SwitchDeadBranchElimination::Run(NodePtr &node) { | |||
| return SUCCESS; | |||
| } | |||
| auto pred_node = PassUtils::GetInDataNode(node, kPredInputIndex); | |||
| auto pred_node = PassUtils::GetInNodeCrossSubgraphByIndex(node, kPredInputIndex); | |||
| if (pred_node == nullptr) { | |||
| GELOGD("[%s] Pred input is null.", node->GetName().c_str()); | |||
| return SUCCESS; | |||
| @@ -143,7 +149,7 @@ Status SwitchDeadBranchElimination::Run(NodePtr &node) { | |||
| return SUCCESS; | |||
| } | |||
| auto input_node = PassUtils::GetInDataNode(node, kDataInputIndex); | |||
| auto input_node = PassUtils::GetInNodeCrossSubgraphByIndex(node, kDataInputIndex); | |||
| if (input_node == nullptr) { | |||
| GELOGD("[%s] Data input is null.", node->GetName().c_str()); | |||
| return SUCCESS; | |||
| @@ -448,6 +448,8 @@ Status SwitchToStreamSwitchPass::CombineSwitchNode(const ComputeGraphPtr &graph) | |||
| // select first stream_switch | |||
| NodePtr stream_switch = switch_list.front(); | |||
| // set stream_label | |||
| GE_CHK_STATUS_RET(SetStreamLabel(stream_switch, cast_node->GetName()), "Set stream label failed."); | |||
| OpDescPtr switch_desc = stream_switch->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(switch_desc); | |||
| switch_desc->SetName(CheckDuplicateName(cond_group + "/" + STREAMSWITCH + (true_branch_flag ? "_t" : "_f"))); | |||
| @@ -1772,8 +1772,8 @@ Status GraphPrepare::CheckUserInput(const std::vector<GeTensor> &user_input) { | |||
| if (dim < UNKNOWN_DIM_NUM) { | |||
| std::string situation = "data dim[" + std::to_string(i) + "][" + std::to_string(dim) + "]" ; | |||
| std::string reason = "it need >= -2"; | |||
| REPORT_INPUT_ERROR( | |||
| "E19025", std::vector<std::string>({"situation", "reason"}),std::vector<std::string>({situation, reason})); | |||
| REPORT_INPUT_ERROR("E19025", std::vector<std::string>({"situation", "reason"}), | |||
| std::vector<std::string>({situation, reason})); | |||
| GELOGE(GE_GRAPH_INIT_FAILED, "[Check][InputDim]data dim %zu is not supported, need >= -2, real:%ld.", i, dim); | |||
| return GE_GRAPH_INIT_FAILED; | |||
| } | |||
| @@ -428,7 +428,8 @@ Status AippOp::ConvertRelatedInputNameToRank() { | |||
| if (!convert_flag) { | |||
| string error_msg = "Top name " + related_input_name + "convert rank failed, Please" | |||
| " ensure top name in aipp config is the top name of data node."; | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||
| GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||
| REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||
| return PARAM_INVALID; | |||
| } | |||
| @@ -124,13 +124,15 @@ Status InsertNewOpUtil::CheckInputNamePositionNotRepeat() { | |||
| if (another_item->related_input_name().empty()) { | |||
| string error_msg = "Can not both set related_input_name and related_input_rank!" | |||
| " Please ensure param is the same with the first aipp config(related_input_name)."; | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||
| GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||
| REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||
| return PARAM_INVALID; | |||
| } | |||
| if (item->related_input_name() == another_item->related_input_name()) { | |||
| string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_name" | |||
| " param is different in different aipp config."; | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||
| GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||
| REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||
| return PARAM_INVALID; | |||
| } | |||
| } | |||
| @@ -150,13 +152,15 @@ Status InsertNewOpUtil::CheckInputRankPositionNoRepeat() { | |||
| if (!another_item->related_input_name().empty()) { | |||
| string error_msg = "Can not both set related_input_rank and related_input_name!" | |||
| " Please ensure param is the same with the first aipp config(related_input_rank)."; | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||
| GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||
| REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||
| return PARAM_INVALID; | |||
| } | |||
| if (item->related_input_rank() == another_item->related_input_rank()) { | |||
| string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_rank" | |||
| " param is different in different aipp config."; | |||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||
| GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||
| REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||
| return PARAM_INVALID; | |||
| } | |||
| } | |||
| @@ -212,7 +216,7 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { | |||
| } | |||
| } | |||
| } | |||
| GE_CHK_LOG_AND_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), | |||
| GE_CHK_LOG_AND_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), | |||
| PARAM_INVALID, | |||
| "Can not config part of outputs of Data node to support AIPP, config all " | |||
| "of the outputs of Data to support AIPP, or config none of them"); | |||
| @@ -3,6 +3,7 @@ set(PROTO_LIST | |||
| ) | |||
| protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | |||
| protobuf_generate(ge_atcstub PROTO_ATCSTUB_SRCS PROTO_ATCSTUB_HDRS ${PROTO_LIST}) | |||
| set(SRC_LIST | |||
| "engine/host_cpu_engine.cc" | |||
| @@ -61,7 +62,7 @@ target_link_libraries(host_cpu_engine PRIVATE | |||
| ) | |||
| ############ atcstub/libhost_cpu_engine.so ############ | |||
| add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||
| add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_ATCSTUB_HDRS}) | |||
| target_compile_options(atc_host_cpu_engine PRIVATE | |||
| -Werror | |||
| @@ -84,7 +85,7 @@ target_include_directories(atc_host_cpu_engine PRIVATE | |||
| ${METADEF_DIR}/inc/external | |||
| ${METADEF_DIR}/inc/external/graph | |||
| ${CMAKE_BINARY_DIR} | |||
| ${CMAKE_BINARY_DIR}/proto/ge | |||
| ${CMAKE_BINARY_DIR}/proto/ge_atcstub | |||
| #### yellow zone #### | |||
| ${GE_CODE_DIR}/../inc | |||
| #### blue zone #### | |||
| @@ -407,7 +407,8 @@ Status GatherV2Kernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGe | |||
| // check input data type | |||
| auto x_data_type = tensor0->GetTensorDesc().GetDataType(); | |||
| if (supported_type.find(x_data_type) == supported_type.end()) { | |||
| GELOGI("GatherV2Kernel does not support this Data type:%s.", TypeUtils::DataTypeToSerialString(x_data_type).c_str()); | |||
| GELOGI("GatherV2Kernel does not support this Data type:%s.", | |||
| TypeUtils::DataTypeToSerialString(x_data_type).c_str()); | |||
| return NOT_CHANGED; | |||
| } | |||
| // calc output shape | |||
| @@ -68,9 +68,10 @@ struct GraphExecutionContext { | |||
| DumpProperties dump_properties; | |||
| bool trace_enabled = false; | |||
| bool dump_enabled = false; | |||
| std::atomic_bool is_eos_; | |||
| std::atomic_bool is_eos_{false}; | |||
| long profiling_level = 0; | |||
| long iteration = 0; | |||
| void *global_step = nullptr; | |||
| private: | |||
| Status status = SUCCESS; | |||
| @@ -46,10 +46,6 @@ void HybridModelAsyncExecutor::SetModelId(uint32_t model_id) { | |||
| model_id_ = model_id; | |||
| } | |||
| void HybridModelAsyncExecutor::SetModelName(const string &model_name) { | |||
| om_name_ = model_name; | |||
| } | |||
| Status HybridModelAsyncExecutor::EnqueueData(const shared_ptr<InputDataWrapper> &data) { | |||
| GE_CHK_STATUS_EXEC(data_inputer_->Push(data), return domi::DATA_QUEUE_ISFULL, | |||
| "Data queue is full, please call again later, model_id %u ", model_id_); | |||
| @@ -67,6 +63,7 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr<ModelListener> &lis | |||
| future_ = std::async(std::launch::async, [&]() -> Status { | |||
| GetThreadLocalContext() = *executor_->GetContext()->ge_context; | |||
| GetContext().SetSessionId(executor_->GetContext()->session_id); | |||
| GetContext().SetContextId(executor_->GetContext()->context_id); | |||
| return RunInternal(); | |||
| }); | |||
| @@ -105,7 +102,7 @@ Status HybridModelAsyncExecutor::Init() { | |||
| executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_)); | |||
| GE_CHECK_NOTNULL(executor_); | |||
| GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine"); | |||
| GE_CHK_STATUS_RET(DumpOpDebug(),"Dump op debug failed in hybrid engine"); | |||
| GE_CHK_STATUS_RET(DumpOpDebug(), "Dump op debug failed in hybrid engine"); | |||
| GELOGI("HybridModel stage nums:%zu", model_->GetRootGraphItem()->NumGroups()); | |||
| if (model_->GetRootGraphItem()->NumGroups() >= kMinimumPiplineStages) { | |||
| @@ -136,8 +133,12 @@ Status HybridModelAsyncExecutor::RunInternal() { | |||
| GE_MAKE_GUARD(not_used_var, [&] { GE_CHK_RT(rtDeviceReset(device_id)); }); | |||
| while (run_flag_) { | |||
| // Model has not indeedly started running before received data | |||
| SetRunningFlag(false); | |||
| std::shared_ptr<InputDataWrapper> data_wrapper; | |||
| Status ret = data_inputer_->Pop(data_wrapper); | |||
| // Model indeedly start running | |||
| SetRunningFlag(true); | |||
| if (data_wrapper == nullptr || ret != SUCCESS) { | |||
| GELOGI("data_wrapper is null!, ret = %u", ret); | |||
| continue; | |||
| @@ -166,6 +167,7 @@ Status HybridModelAsyncExecutor::RunInternal() { | |||
| } else { | |||
| GELOGI("HybridModel will execute in singleline mode"); | |||
| ge::GetContext().SetSessionId(executor_->GetContext()->session_id); | |||
| ge::GetContext().SetContextId(executor_->GetContext()->context_id); | |||
| ret = executor_->Execute(args); | |||
| } | |||
| ret = HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); | |||
| @@ -176,7 +178,8 @@ Status HybridModelAsyncExecutor::RunInternal() { | |||
| RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[RunInternal] [iteration = %d] End", iterator_count_); | |||
| iterator_count_++; | |||
| GELOGI("run iterator count is %lu", iterator_count_); | |||
| SetRunningFlag(false); | |||
| GELOGI("run iterator count is %lu, model_id:%u", iterator_count_, model_id_); | |||
| } | |||
| CsaInteract::GetInstance().WriteInternalErrorCode(); | |||
| @@ -51,12 +51,16 @@ class HybridModelAsyncExecutor { | |||
| void SetModelId(uint32_t model_id); | |||
| void SetModelName(const string &model_name); | |||
| Status Stop(); | |||
| Status EnqueueData(const std::shared_ptr<InputDataWrapper> &data); | |||
| uint32_t GetDataInputerSize() { return data_inputer_->Size(); } | |||
| bool GetRunningFlag() const { return running_flag_; } | |||
| void SetRunningFlag(bool flag) { running_flag_ = flag; } | |||
| private: | |||
| Status InitInputDesc(); | |||
| @@ -86,6 +90,8 @@ class HybridModelAsyncExecutor { | |||
| uint32_t device_id_ = 0U; | |||
| uint32_t model_id_ = 0U; | |||
| std::atomic_bool run_flag_; | |||
| // check whether model is running with data | |||
| bool running_flag_ = false; | |||
| std::unique_ptr<DataInputer> data_inputer_; | |||
| std::unique_ptr<HybridModelExecutor> executor_; | |||
| std::unique_ptr<HybridModelPipelineExecutor> pipe_executor_; | |||
| @@ -97,7 +103,6 @@ class HybridModelAsyncExecutor { | |||
| std::map<uint32_t, GeTensorDescPtr> input_tensor_desc_; | |||
| std::vector<bool> is_input_dynamic_; | |||
| std::shared_ptr<ModelListener> listener_; | |||
| string om_name_; | |||
| DataDumper data_dumper_; | |||
| bool is_op_debug_reg_ = false; | |||
| OpdebugRegister op_debug_register_; | |||
| @@ -46,7 +46,10 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { | |||
| GELOGD("Start to execute model."); | |||
| auto root_graph_item = model_->GetRootGraphItem(); | |||
| GE_CHECK_NOTNULL(root_graph_item); | |||
| if (context_.global_step != nullptr) { | |||
| GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration, | |||
| sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream)); | |||
| } | |||
| SubgraphExecutor executor(model_->GetRootGraphItem(), &context_); | |||
| auto ret = ExecuteGraphInternal(executor, args); | |||
| Cleanup(); | |||
| @@ -98,6 +101,7 @@ Status HybridModelExecutor::InitExecutionContext() { | |||
| GE_CHK_RT_RET(rtCtxCreate(&context_.rt_gen_context, RT_CTX_GEN_MODE, 0)); | |||
| GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context)); | |||
| context_.global_step = model_->GetGlobalStep(); | |||
| context_.stream = stream_; | |||
| context_.model = model_; | |||
| context_.is_eos_ = false; | |||
| @@ -130,6 +134,16 @@ Status HybridModelExecutor::ResetExecutionContext(GraphExecutionContext &context | |||
| string ctx_id = std::to_string(context.context_id); | |||
| RuntimeInferenceContext::DestroyContext(ctx_id); | |||
| GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); | |||
| RuntimeInferenceContext *ctx = nullptr; | |||
| GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context"); | |||
| for (auto &host_tensor : context.model->GetHostTensors()) { | |||
| auto node_id = host_tensor.first; | |||
| for (const auto &output_idx_and_tensor : host_tensor.second) { | |||
| auto output_idx = output_idx_and_tensor.first; | |||
| GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx); | |||
| ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone()); | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace hybrid | |||
| @@ -38,6 +38,16 @@ Status StageExecutor::ResetExecutionContext(GraphExecutionContext &context) { | |||
| string ctx_id = std::to_string(context.context_id); | |||
| RuntimeInferenceContext::DestroyContext(ctx_id); | |||
| GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); | |||
| RuntimeInferenceContext *ctx = nullptr; | |||
| GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context"); | |||
| for (auto &host_tensor : context.model->GetHostTensors()) { | |||
| auto node_id = host_tensor.first; | |||
| for (const auto &output_idx_and_tensor : host_tensor.second) { | |||
| auto output_idx = output_idx_and_tensor.first; | |||
| GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx); | |||
| ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone()); | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -35,12 +35,14 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item( | |||
| node_item.NodeName().c_str(), | |||
| this->num_pending_shapes_); | |||
| for (int i = 0; i < node_item.num_inputs; ++i){ | |||
| input_tensor_desc.emplace_back(*node_item.MutableInputDesc(i)); | |||
| input_tensor_desc.resize(node_item.num_inputs); | |||
| for (int i = 0; i < node_item.num_inputs; ++i) { | |||
| node_item.GetInputDesc(i, input_tensor_desc[i]); | |||
| } | |||
| for (int i = 0; i < node_item.num_outputs; ++i){ | |||
| output_tensor_desc.emplace_back(*node_item.MutableOutputDesc(i)); | |||
| output_tensor_desc.resize(node_item.num_outputs); | |||
| for (int i = 0; i < node_item.num_outputs; ++i) { | |||
| node_item.GetOutputDesc(i, output_tensor_desc[i]); | |||
| } | |||
| } | |||
| @@ -227,6 +227,7 @@ Status SubgraphExecutor::PrepareNodes(int group) { | |||
| if (node_item.is_dynamic) { | |||
| auto prepare_future = pre_run_pool_.commit([this, p_node_state]() -> Status { | |||
| GetContext().SetSessionId(context_->session_id); | |||
| GetContext().SetContextId(context_->context_id); | |||
| GE_CHK_STATUS_RET_NOLOG(InferShape(shape_inference_engine_.get(), *p_node_state)); | |||
| return PrepareForExecution(context_, *p_node_state); | |||
| }); | |||
| @@ -273,10 +274,8 @@ Status SubgraphExecutor::PrepareNodes(int group) { | |||
| } | |||
| Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const { | |||
| GetContext().SetSessionId(context_->context_id); | |||
| HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), | |||
| "[%s] Failed to InferShape.", node_state.GetName().c_str()); | |||
| GetContext().SetSessionId(context_->session_id); | |||
| HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state), | |||
| "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); | |||
| return SUCCESS; | |||
| @@ -345,6 +344,7 @@ Status SubgraphExecutor::ScheduleTasks(int group) { | |||
| GELOGD("[%s] Start to schedule prepare workers.", graph_item_->GetName().c_str()); | |||
| auto prepare_future = std::async(std::launch::async, [&]() -> Status { | |||
| GetContext().SetSessionId(context_->session_id); | |||
| GetContext().SetContextId(context_->context_id); | |||
| auto ret = PrepareNodes(group); | |||
| ready_queue_.Push(nullptr); | |||
| return ret; | |||
| @@ -206,37 +206,35 @@ Status NodeDoneCallback::DumpDynamicNode() { | |||
| return PARAM_INVALID; | |||
| } | |||
| auto op_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(graph_context_); | |||
| const HybridModel *model = graph_context_->model; | |||
| GE_CHECK_NOTNULL(model); | |||
| std::string dynamic_model_name = model->GetModelName(); | |||
| std::string dynamic_om_name = model->GetOmName(); | |||
| uint32_t model_id = model->GetModelId(); | |||
| if (!context_->GetDumpProperties().IsLayerNeedDump(dynamic_model_name, dynamic_om_name, op_desc->GetName())) { | |||
| GELOGI("[%s] is not in dump list, no need dump", op_desc->GetName().c_str()); | |||
| return SUCCESS; | |||
| } | |||
| dump_op_.SetDynamicModelInfo(dynamic_model_name, dynamic_om_name, model_id); | |||
| auto stream = context_->GetStream(); | |||
| vector<uintptr_t> input_addrs; | |||
| vector<uintptr_t> output_addrs; | |||
| for (int i = 0; i < context_->NumInputs(); i++) { | |||
| auto tensor_value = context_->GetInput(i); | |||
| GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr"); | |||
| uint64_t input_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData()); | |||
| uintptr_t input_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData()); | |||
| input_addrs.emplace_back(input_addr); | |||
| } | |||
| for (int j = 0; j < context_->NumOutputs(); j++) { | |||
| auto tensor_value = context_->GetOutput(j); | |||
| GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr"); | |||
| uint64_t output_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData()); | |||
| uintptr_t output_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData()); | |||
| output_addrs.emplace_back(output_addr); | |||
| } | |||
| dump_op_.SetDumpInfo(context_->GetDumpProperties(), op_desc, input_addrs, output_addrs, stream); | |||
| GE_CHECK_NOTNULL(graph_context_); | |||
| const HybridModel *model = graph_context_->model; | |||
| GE_CHECK_NOTNULL(model); | |||
| std::string dynamic_model_name = model->GetModelName(); | |||
| uint32_t model_id = model->GetModelId(); | |||
| dump_op_.SetDynamicModelInfo(dynamic_model_name, model_id); | |||
| void *global_step = nullptr; | |||
| TensorValue *varible_global_step = context_->GetVariable(NODE_NAME_GLOBAL_STEP); | |||
| if (varible_global_step != nullptr) { | |||
| global_step = const_cast<void *>(varible_global_step->GetData()); | |||
| } | |||
| void *loop_per_iter = nullptr; | |||
| TensorValue *varible_loop_per_iter = context_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER); | |||
| if (varible_loop_per_iter != nullptr) { | |||
| @@ -248,6 +246,7 @@ Status NodeDoneCallback::DumpDynamicNode() { | |||
| if (varible_loop_cond != nullptr) { | |||
| loop_cond = const_cast<void *>(varible_loop_cond->GetData()); | |||
| } | |||
| void *global_step = context_->GetExecutionContext()->global_step; | |||
| dump_op_.SetLoopAddr(global_step, loop_per_iter, loop_cond); | |||
| GE_CHK_STATUS_RET(dump_op_.LaunchDumpOp(), "Failed to launch dump op in hybird model"); | |||
| @@ -19,6 +19,7 @@ | |||
| #include "hybrid/model/hybrid_model.h" | |||
| #include "hybrid/executor/hybrid_model_async_executor.h" | |||
| #include "hybrid/node_executor/node_executor.h" | |||
| #include "graph/manager/graph_manager_utils.h" | |||
| namespace ge { | |||
| namespace hybrid { | |||
| @@ -76,9 +77,8 @@ class HybridDavinciModel::Impl { | |||
| executor_.SetDeviceId(device_id); | |||
| } | |||
| void SetModelName(const string &model_name) { | |||
| model_.SetModelName(model_name); | |||
| executor_.SetModelName(model_name); | |||
| void SetOmName(const string &model_name) { | |||
| model_.SetOmName(model_name); | |||
| } | |||
| uint64_t GetSessionId() { | |||
| @@ -108,6 +108,17 @@ class HybridDavinciModel::Impl { | |||
| model_.SetModelDescVersion(is_new_model_desc); | |||
| } | |||
| uint32_t GetDataInputerSize() { return executor_.GetDataInputerSize(); } | |||
| bool GetRunningFlag() const { return executor_.GetRunningFlag(); } | |||
| Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback) { | |||
| auto listener = dynamic_cast<RunAsyncListener *>(listener_.get()); | |||
| GE_CHECK_NOTNULL(listener); | |||
| listener->SetCallback(callback); | |||
| return SUCCESS; | |||
| } | |||
| private: | |||
| std::shared_ptr<ModelListener> listener_; | |||
| HybridModel model_; | |||
| @@ -181,9 +192,9 @@ void HybridDavinciModel::SetDeviceId(uint32_t device_id) { | |||
| } | |||
| } | |||
| void HybridDavinciModel::SetModelName(const string &model_name) { | |||
| void HybridDavinciModel::SetOmName(const string &om_name) { | |||
| if (impl_ != nullptr) { | |||
| impl_->SetModelName(model_name); | |||
| impl_->SetOmName(om_name); | |||
| } | |||
| } | |||
| @@ -222,5 +233,16 @@ uint64_t HybridDavinciModel::GetSessionId() { | |||
| GE_CHECK_NOTNULL(impl_); | |||
| return impl_->GetSessionId(); | |||
| } | |||
| uint32_t HybridDavinciModel::GetDataInputerSize() { | |||
| GE_CHECK_NOTNULL(impl_); | |||
| return impl_->GetDataInputerSize(); | |||
| } | |||
| bool HybridDavinciModel::GetRunningFlag() const { return impl_->GetRunningFlag(); } | |||
| Status HybridDavinciModel::SetRunAsyncListenerCallback(const RunAsyncCallback &callback) { | |||
| return impl_->SetRunAsyncListenerCallback(callback); | |||
| } | |||
| } // namespace hybrid | |||
| } // namespace ge | |||
| @@ -57,7 +57,7 @@ class HybridDavinciModel { | |||
| void SetDeviceId(uint32_t device_id); | |||
| void SetModelName(const string &model_name); | |||
| void SetOmName(const string &om_name); | |||
| uint64_t GetSessionId(); | |||
| @@ -74,6 +74,12 @@ class HybridDavinciModel { | |||
| void SetModelDescVersion(bool is_new_model_desc); | |||
| uint32_t GetDataInputerSize(); | |||
| bool GetRunningFlag() const; | |||
| Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback); | |||
| private: | |||
| HybridDavinciModel() = default; | |||
| class Impl; | |||
| @@ -61,13 +61,17 @@ void HybridDavinciModel::SetModelId(uint32_t model_id) { | |||
| void HybridDavinciModel::SetDeviceId(uint32_t device_id) { | |||
| } | |||
| void HybridDavinciModel::SetModelName(const string &model_name) { | |||
| void HybridDavinciModel::SetOmName(const string &om_name) { | |||
| } | |||
| uint64_t HybridDavinciModel::GetSessionId() { | |||
| return 0; | |||
| } | |||
| uint32_t HybridDavinciModel::GetDataInputerSize() { | |||
| return 0; | |||
| } | |||
| Status HybridDavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) { | |||
| return UNSUPPORTED; | |||
| } | |||
| @@ -87,5 +91,13 @@ Status HybridDavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &i | |||
| void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) { | |||
| } | |||
| bool HybridDavinciModel::GetRunningFlag() const { | |||
| return false; | |||
| } | |||
| Status HybridDavinciModel::SetRunAsyncListenerCallback(const RunAsyncCallback &callback) { | |||
| return UNSUPPORTED; | |||
| } | |||
| } // namespace hybrid | |||
| } // namespace ge | |||