You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

isamax_power8.S 5.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. /* .file "isamax.c"
  2. .abiversion 2
  3. .section ".text"
  4. .align 2
  5. .p2align 4,,15
  6. .globl isamax_k
  7. .type isamax_k, @function
  8. */
  9. #define ASSEMBLER
  10. #include "common.h"
  11. PROLOGUE
  12. isamax_k:
  13. .LCF0:
  14. 0: addis 2,12,.TOC.-.LCF0@ha
  15. addi 2,2,.TOC.-.LCF0@l
  16. .localentry isamax_k,.-isamax_k
  17. mr. 11,3
  18. ble 0,.L36
  19. cmpdi 7,5,0
  20. li 3,0
  21. blelr 7
  22. cmpdi 7,5,1
  23. beq 7,.L69
  24. rldicr. 7,11,0,61
  25. beq 0,.L40
  26. sldi 3,5,1
  27. xxlxor 0,0,0
  28. sldi 6,5,2
  29. add 3,3,5
  30. sldi 0,5,4
  31. sldi 3,3,2
  32. sldi 5,5,3
  33. mr 9,4
  34. li 8,0
  35. li 10,0
  36. .p2align 4,,15
  37. .L31:
  38. lfs 12,0(9)
  39. fabs 12,12
  40. fcmpu 7,12,0
  41. bng 7,.L23
  42. fmr 0,12
  43. mr 8,10
  44. .L23:
  45. lfsx 12,9,6
  46. fabs 12,12
  47. fcmpu 7,12,0
  48. bng 7,.L25
  49. fmr 0,12
  50. addi 8,10,1
  51. .L25:
  52. lfsx 12,9,5
  53. fabs 12,12
  54. fcmpu 7,12,0
  55. bng 7,.L27
  56. fmr 0,12
  57. addi 8,10,2
  58. .L27:
  59. lfsx 12,9,3
  60. add 9,9,0
  61. fabs 12,12
  62. fcmpu 7,12,0
  63. bng 7,.L29
  64. fmr 0,12
  65. addi 8,10,3
  66. .L29:
  67. addi 10,10,4
  68. cmpd 7,7,10
  69. bgt 7,.L31
  70. addi 7,7,-1
  71. srdi 7,7,2
  72. addi 7,7,1
  73. sldi 9,7,2
  74. mulld 7,6,7
  75. cmpd 7,11,9
  76. ble 7,.L67
  77. .L22:
  78. addi 10,9,1
  79. sldi 7,7,2
  80. cmpd 7,10,11
  81. subf 10,9,11
  82. mtctr 10
  83. add 4,4,7
  84. bgt 7,.L54
  85. li 3,-1
  86. rldicr 3,3,0,0
  87. cmpd 7,11,3
  88. beq 7,.L54
  89. .p2align 4,,15
  90. .L35:
  91. lfs 12,0(4)
  92. add 4,4,6
  93. fabs 12,12
  94. fcmpu 7,12,0
  95. bng 7,.L33
  96. fmr 0,12
  97. mr 8,9
  98. .L33:
  99. addi 9,9,1
  100. bdnz .L35
  101. .L67:
  102. addi 3,8,1
  103. blr
  104. .p2align 4,,15
  105. .L36:
  106. li 3,0
  107. blr
  108. .p2align 4,,15
  109. .L69:
  110. rldicr. 10,11,0,57
  111. bne 0,.L70
  112. addi 7,10,1
  113. sldi 9,10,2
  114. xxlxor 12,12,12
  115. cmpd 7,7,11
  116. add 4,4,9
  117. subf 9,10,11
  118. li 8,0
  119. mtctr 9
  120. bgt 7,.L60
  121. li 3,-1
  122. rldicr 3,3,0,0
  123. cmpd 7,11,3
  124. beq 7,.L60
  125. .p2align 4,,15
  126. .L61:
  127. lfs 0,0(4)
  128. addi 4,4,4
  129. fabs 0,0
  130. fcmpu 7,0,12
  131. bng 7,.L63
  132. fmr 12,0
  133. mr 8,10
  134. .L63:
  135. addi 10,10,1
  136. bdnz .L61
  137. b .L67
  138. .p2align 4,,15
  139. .L70:
  140. li 0,-64
  141. std 31,-8(1)
  142. addis 3,2,.LC2@toc@ha
  143. vspltisw 18,0
  144. vspltisw 12,0
  145. addis 5,2,.LC3@toc@ha
  146. addis 6,2,.LC6@toc@ha
  147. stvx 29,1,0
  148. li 0,-48
  149. addis 8,2,.LC7@toc@ha
  150. xxlor 35,50,50
  151. addi 3,3,.LC2@toc@l
  152. addi 5,5,.LC3@toc@l
  153. stvx 30,1,0
  154. addi 6,6,.LC6@toc@l
  155. li 0,-32
  156. addi 8,8,.LC7@toc@l
  157. lxvd2x 51,0,3
  158. lxvd2x 34,0,5
  159. addis 7,2,.LC4@toc@ha
  160. stvx 31,1,0
  161. lxvd2x 47,0,6
  162. addis 9,2,.LC5@toc@ha
  163. addi 7,7,.LC4@toc@l
  164. lxvd2x 48,0,8
  165. addi 9,9,.LC5@toc@l
  166. vspltisw 17,8
  167. vadduwm 17,17,17
  168. lxvd2x 36,0,7
  169. li 7,0
  170. lxvd2x 37,0,9
  171. mr 9,4
  172. .p2align 4,,15
  173. .L5:
  174. addi 5,9,16
  175. addi 6,9,32
  176. lxvd2x 41,0,9
  177. vadduwm 31,3,15
  178. addi 8,9,64
  179. addi 31,9,48
  180. addi 12,9,80
  181. addi 3,9,96
  182. lxvd2x 5,0,5
  183. lxvd2x 43,0,6
  184. addi 5,9,112
  185. addi 6,9,128
  186. lxvd2x 1,0,8
  187. lxvd2x 9,0,31
  188. addi 8,9,160
  189. addi 31,9,144
  190. lxvd2x 6,0,12
  191. lxvd2x 13,0,3
  192. addi 12,9,176
  193. addi 3,9,192
  194. lxvd2x 11,0,5
  195. lxvd2x 2,0,6
  196. xvabssp 41,41
  197. addi 5,9,208
  198. addi 6,9,224
  199. lxvd2x 3,0,8
  200. lxvd2x 7,0,31
  201. addi 8,9,240
  202. lxvd2x 10,0,12
  203. lxvd2x 4,0,3
  204. xvabssp 43,43
  205. xvabssp 5,5
  206. addi 7,7,64
  207. lxvd2x 8,0,5
  208. lxvd2x 0,0,6
  209. xvabssp 9,9
  210. xvabssp 1,1
  211. cmpd 7,10,7
  212. addi 9,9,256
  213. lxvd2x 12,0,8
  214. xvabssp 6,6
  215. xvabssp 13,13
  216. xvabssp 11,11
  217. xvabssp 2,2
  218. xvabssp 7,7
  219. xvabssp 3,3
  220. xvabssp 10,10
  221. xvabssp 4,4
  222. xvabssp 8,8
  223. xvabssp 0,0
  224. xvabssp 12,12
  225. xvcmpgtsp 32,5,41
  226. xvcmpgtsp 61,9,43
  227. xvcmpgtsp 45,6,1
  228. xvcmpgtsp 62,11,13
  229. xvcmpgtsp 38,7,2
  230. xvcmpgtsp 46,10,3
  231. xvcmpgtsp 40,8,4
  232. xvcmpgtsp 39,12,0
  233. xxsel 5,41,5,32
  234. xxsel 32,51,34,32
  235. xxsel 9,43,9,61
  236. xxsel 6,1,6,45
  237. xxsel 11,13,11,62
  238. xxsel 43,51,34,45
  239. xxsel 7,2,7,38
  240. xvcmpgtsp 41,9,5
  241. xxsel 10,3,10,46
  242. xvcmpgtsp 45,11,6
  243. xxsel 8,4,8,40
  244. xxsel 62,36,37,62
  245. xxsel 0,0,12,39
  246. xvcmpgtsp 42,10,7
  247. xxsel 61,36,37,61
  248. xxsel 40,51,34,40
  249. xvcmpgtsp 33,0,8
  250. xxsel 39,36,37,39
  251. xxsel 38,51,34,38
  252. xxsel 46,36,37,46
  253. xxsel 9,5,9,41
  254. xxsel 41,32,61,41
  255. xxsel 12,6,11,45
  256. xxsel 45,43,62,45
  257. xxsel 11,7,10,42
  258. xvcmpgtsp 32,12,9
  259. vadduwm 13,13,17
  260. xxsel 42,38,46,42
  261. xxsel 0,8,0,33
  262. xxsel 33,40,39,33
  263. xvcmpgtsp 43,0,11
  264. vadduwm 1,1,17
  265. xxsel 12,9,12,32
  266. xxsel 32,41,45,32
  267. vadduwm 0,3,0
  268. vadduwm 3,3,16
  269. xxsel 0,11,0,43
  270. xxsel 33,42,33,43
  271. xvcmpgtsp 45,0,12
  272. vadduwm 1,31,1
  273. xxsel 0,12,0,45
  274. xxsel 32,32,33,45
  275. xvcmpgtsp 33,0,44
  276. xxsel 50,50,32,33
  277. xxsel 44,44,0,33
  278. bgt 7,.L5
  279. xxsldwi 12,44,44,1
  280. xscvspdp 10,44
  281. vspltw 0,18,0
  282. xxsldwi 0,44,44,3
  283. xscvspdp 12,12
  284. mfvsrwz 3,50
  285. mfvsrwz 6,32
  286. vspltw 0,18,3
  287. xscvspdp 0,0
  288. xxsldwi 44,44,44,2
  289. mfvsrwz 7,32
  290. vspltw 0,18,2
  291. xscvspdp 44,44
  292. mfvsrwz 9,32
  293. fcmpu 7,12,10
  294. rldicl 8,3,0,32
  295. rldicl 31,6,0,32
  296. fmr 11,0
  297. rldicl 0,7,0,32
  298. rldicl 5,9,0,32
  299. beq 7,.L71
  300. bnl 7,.L8
  301. fmr 12,10
  302. mr 8,31
  303. .L8:
  304. xscmpudp 7,0,44
  305. bne 7,.L11
  306. cmplw 7,7,9
  307. ble 7,.L12
  308. mr 7,9
  309. .L12:
  310. rldicl 5,7,0,32
  311. .L13:
  312. fcmpu 7,12,11
  313. beq 7,.L72
  314. bnl 7,.L17
  315. fmr 12,11
  316. mr 8,5
  317. .L17:
  318. cmpd 7,11,10
  319. ble 7,.L16
  320. addi 7,10,1
  321. sldi 9,10,2
  322. cmpd 7,7,11
  323. add 4,4,9
  324. subf 9,10,11
  325. mtctr 9
  326. bgt 7,.L53
  327. li 3,-1
  328. rldicr 3,3,0,0
  329. cmpd 7,11,3
  330. beq 7,.L53
  331. .p2align 4,,15
  332. .L21:
  333. lfs 0,0(4)
  334. addi 4,4,4
  335. fabs 0,0
  336. fcmpu 7,0,12
  337. bng 7,.L19
  338. fmr 12,0
  339. mr 8,10
  340. .L19:
  341. addi 10,10,1
  342. bdnz .L21
  343. .L16:
  344. li 0,-64
  345. ld 31,-8(1)
  346. addi 3,8,1
  347. lvx 29,1,0
  348. li 0,-48
  349. lvx 30,1,0
  350. li 0,-32
  351. lvx 31,1,0
  352. blr
  353. .p2align 4,,15
  354. .L71:
  355. cmplw 7,3,6
  356. ble 7,.L7
  357. mr 3,6
  358. .L7:
  359. rldicl 8,3,0,32
  360. b .L8
  361. .p2align 4,,15
  362. .L40:
  363. xxlxor 0,0,0
  364. sldi 6,5,2
  365. li 8,0
  366. li 9,0
  367. b .L22
  368. .p2align 4,,15
  369. .L11:
  370. blt 7,.L39
  371. mr 5,0
  372. b .L13
  373. .p2align 4,,15
  374. .L72:
  375. cmpd 7,8,5
  376. ble 7,.L17
  377. mr 8,5
  378. b .L17
  379. .p2align 4,,15
  380. .L39:
  381. xscpsgndp 11,44,44
  382. b .L13
  383. .L53:
  384. li 9,1
  385. mtctr 9
  386. b .L21
  387. .L54:
  388. li 10,1
  389. mtctr 10
  390. b .L35
  391. .L60:
  392. li 9,1
  393. mtctr 9
  394. b .L61
  395. .long 0
  396. .byte 0,0,0,0,0,1,0,0
  397. .size isamax_k,.-isamax_k
  398. .section .rodata.cst16,"aM",@progbits,16
  399. .align 4
  400. .LC2:
  401. .long 0
  402. .long 1
  403. .long 2
  404. .long 3
  405. .LC3:
  406. .long 4
  407. .long 5
  408. .long 6
  409. .long 7
  410. .LC4:
  411. .long 8
  412. .long 9
  413. .long 10
  414. .long 11
  415. .LC5:
  416. .long 12
  417. .long 13
  418. .long 14
  419. .long 15
  420. .LC6:
  421. .long 32
  422. .long 32
  423. .long 32
  424. .long 32
  425. .LC7:
  426. .long 64
  427. .long 64
  428. .long 64
  429. .long 64
  430. .ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]"
  431. .section .note.GNU-stack,"",@progbits