You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

isamax_power9.S 5.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. /*
  2. .file "isamax.c"
  3. .abiversion 2
  4. .section ".text"
  5. .align 2
  6. .p2align 4,,15
  7. .globl isamax_k
  8. .type isamax_k, @function
  9. */
  10. #define ASSEMBLER
  11. #include "common.h"
  12. PROLOGUE
  13. isamax_k:
  14. .LCF0:
  15. 0: addis 2,12,.TOC.-.LCF0@ha
  16. addi 2,2,.TOC.-.LCF0@l
  17. .localentry isamax_k,.-isamax_k
  18. mr. 11,3
  19. ble 0,.L36
  20. cmpdi 7,5,0
  21. li 3,0
  22. blelr 7
  23. cmpdi 7,5,1
  24. beq 7,.L69
  25. rldicr. 7,11,0,61
  26. beq 0,.L40
  27. sldi 10,5,1
  28. sldi 6,5,2
  29. sldi 0,5,4
  30. sldi 3,5,3
  31. mr 9,4
  32. xxlxor 0,0,0
  33. li 8,0
  34. add 5,10,5
  35. li 10,0
  36. sldi 5,5,2
  37. .p2align 4,,15
  38. .L31:
  39. lfs 12,0(9)
  40. fabs 12,12
  41. fcmpu 7,12,0
  42. bng 7,.L23
  43. fmr 0,12
  44. mr 8,10
  45. .L23:
  46. lfsx 12,9,6
  47. fabs 12,12
  48. fcmpu 7,12,0
  49. bng 7,.L25
  50. fmr 0,12
  51. addi 8,10,1
  52. .L25:
  53. lfsx 12,9,3
  54. fabs 12,12
  55. fcmpu 7,12,0
  56. bng 7,.L27
  57. fmr 0,12
  58. addi 8,10,2
  59. .L27:
  60. lfsx 12,9,5
  61. add 9,9,0
  62. fabs 12,12
  63. fcmpu 7,12,0
  64. bng 7,.L29
  65. fmr 0,12
  66. addi 8,10,3
  67. .L29:
  68. addi 10,10,4
  69. cmpd 7,7,10
  70. bgt 7,.L31
  71. addi 7,7,-1
  72. srdi 7,7,2
  73. addi 7,7,1
  74. sldi 9,7,2
  75. mulld 7,6,7
  76. cmpd 7,11,9
  77. ble 7,.L67
  78. .L22:
  79. addi 10,9,1
  80. sldi 7,7,2
  81. subf 5,9,11
  82. cmpd 7,10,11
  83. mtctr 5
  84. add 4,4,7
  85. bgt 7,.L54
  86. li 3,-1
  87. rldicr 3,3,0,0
  88. cmpd 7,11,3
  89. beq 7,.L54
  90. .p2align 4,,15
  91. .L35:
  92. lfs 12,0(4)
  93. add 4,4,6
  94. fabs 12,12
  95. fcmpu 7,12,0
  96. bng 7,.L33
  97. fmr 0,12
  98. mr 8,9
  99. .L33:
  100. addi 9,9,1
  101. bdnz .L35
  102. .L67:
  103. addi 3,8,1
  104. blr
  105. .p2align 4,,15
  106. .L36:
  107. li 3,0
  108. blr
  109. .p2align 4,,15
  110. .L69:
  111. rldicr. 10,11,0,57
  112. bne 0,.L70
  113. addi 7,10,1
  114. sldi 9,10,2
  115. subf 6,10,11
  116. li 8,0
  117. xxlxor 12,12,12
  118. cmpd 7,7,11
  119. mtctr 6
  120. add 4,4,9
  121. bgt 7,.L60
  122. li 3,-1
  123. rldicr 3,3,0,0
  124. cmpd 7,11,3
  125. beq 7,.L60
  126. .p2align 4,,15
  127. .L61:
  128. lfs 0,0(4)
  129. addi 4,4,4
  130. fabs 0,0
  131. fcmpu 7,0,12
  132. bng 7,.L63
  133. fmr 12,0
  134. mr 8,10
  135. .L63:
  136. addi 10,10,1
  137. bdnz .L61
  138. b .L67
  139. .p2align 4,,15
  140. .L70:
  141. addis 6,2,.LC2@toc@ha
  142. addis 7,2,.LC3@toc@ha
  143. addis 8,2,.LC4@toc@ha
  144. addis 9,2,.LC5@toc@ha
  145. xxspltib 46,0
  146. stxv 61,-48(1)
  147. stxv 62,-32(1)
  148. addi 6,6,.LC2@toc@l
  149. addi 7,7,.LC3@toc@l
  150. stxv 63,-16(1)
  151. xxspltib 61,32
  152. xxspltib 63,16
  153. xxspltib 62,64
  154. addi 8,8,.LC4@toc@l
  155. addi 9,9,.LC5@toc@l
  156. lxv 47,0(6)
  157. xxspltib 34,0
  158. lxv 48,0(7)
  159. xxlor 51,46,46
  160. lxv 49,0(8)
  161. lxv 50,0(9)
  162. li 8,0
  163. mr 9,4
  164. vextsb2w 29,29
  165. vextsb2w 31,31
  166. vextsb2w 30,30
  167. stxv 59,-80(1)
  168. stxv 60,-64(1)
  169. .p2align 4,,15
  170. .L5:
  171. lxv 0,0(9)
  172. vadduwm 27,19,29
  173. lxv 12,240(9)
  174. addi 8,8,64
  175. addi 9,9,256
  176. cmpd 7,10,8
  177. xvabssp 44,0
  178. lxv 0,-240(9)
  179. xvabssp 12,12
  180. xvabssp 5,0
  181. lxv 0,-224(9)
  182. xvabssp 32,0
  183. lxv 0,-208(9)
  184. xvcmpgtsp 35,5,44
  185. xvabssp 9,0
  186. lxv 0,-192(9)
  187. xxsel 5,44,5,35
  188. xxsel 35,47,48,35
  189. xvabssp 1,0
  190. lxv 0,-176(9)
  191. xvcmpgtsp 60,9,32
  192. xvabssp 6,0
  193. lxv 0,-160(9)
  194. xxsel 9,32,9,60
  195. xxsel 60,49,50,60
  196. xvabssp 13,0
  197. lxv 0,-144(9)
  198. xvcmpgtsp 42,9,5
  199. xvcmpgtsp 37,6,1
  200. xvabssp 11,0
  201. lxv 0,-128(9)
  202. xxsel 9,5,9,42
  203. xxsel 42,35,60,42
  204. xxsel 6,1,6,37
  205. xxsel 37,47,48,37
  206. xvabssp 2,0
  207. lxv 0,-112(9)
  208. xvcmpgtsp 36,11,13
  209. xvabssp 7,0
  210. lxv 0,-96(9)
  211. xxsel 11,13,11,36
  212. xxsel 36,49,50,36
  213. xvabssp 3,0
  214. lxv 0,-80(9)
  215. xvcmpgtsp 45,11,6
  216. xvcmpgtsp 39,7,2
  217. xvabssp 10,0
  218. lxv 0,-64(9)
  219. xxsel 7,2,7,39
  220. xxsel 39,47,48,39
  221. xvabssp 4,0
  222. lxv 0,-48(9)
  223. xvcmpgtsp 38,10,3
  224. xvabssp 8,0
  225. lxv 0,-32(9)
  226. xxsel 10,3,10,38
  227. xxsel 38,49,50,38
  228. xvabssp 0,0
  229. xvcmpgtsp 43,10,7
  230. xvcmpgtsp 41,8,4
  231. xvcmpgtsp 40,12,0
  232. xxsel 8,4,8,41
  233. xxsel 41,47,48,41
  234. xxsel 0,0,12,40
  235. xxsel 12,6,11,45
  236. xxsel 11,7,10,43
  237. xxsel 45,37,36,45
  238. xvcmpgtsp 33,0,8
  239. xvcmpgtsp 32,12,9
  240. vadduwm 13,13,31
  241. xxsel 40,49,50,40
  242. xxsel 43,39,38,43
  243. xxsel 0,8,0,33
  244. xxsel 12,9,12,32
  245. xxsel 33,41,40,33
  246. xxsel 32,42,45,32
  247. xvcmpgtsp 44,0,11
  248. vadduwm 1,1,31
  249. vadduwm 0,19,0
  250. vadduwm 19,19,30
  251. xxsel 0,11,0,44
  252. xxsel 33,43,33,44
  253. xvcmpgtsp 45,0,12
  254. vadduwm 1,27,1
  255. xxsel 0,12,0,45
  256. xxsel 32,32,33,45
  257. xvcmpgtsp 33,0,34
  258. xxsel 46,46,32,33
  259. xxsel 34,34,0,33
  260. bgt 7,.L5
  261. xxsldwi 12,34,34,3
  262. xxsldwi 11,34,34,2
  263. li 9,0
  264. li 8,12
  265. xxsldwi 0,34,34,1
  266. xscvspdp 34,34
  267. vextuwrx 3,9,14
  268. li 9,4
  269. xscvspdp 12,12
  270. xscvspdp 11,11
  271. xscvspdp 0,0
  272. vextuwrx 6,9,14
  273. li 9,8
  274. vextuwrx 7,9,14
  275. vextuwrx 9,8,14
  276. rldicl 12,6,0,32
  277. rldicl 8,3,0,32
  278. rldicl 0,7,0,32
  279. rldicl 5,9,0,32
  280. fcmpu 7,12,11
  281. fmr 10,0
  282. beq 7,.L71
  283. bnl 7,.L8
  284. mr 8,12
  285. fmr 12,11
  286. .L8:
  287. xscmpudp 7,0,34
  288. bne 7,.L11
  289. cmplw 7,7,9
  290. ble 7,.L12
  291. mr 7,9
  292. .L12:
  293. rldicl 5,7,0,32
  294. .L13:
  295. fcmpu 7,12,10
  296. beq 7,.L72
  297. bnl 7,.L17
  298. mr 8,5
  299. fmr 12,10
  300. .L17:
  301. cmpd 7,11,10
  302. ble 7,.L16
  303. addi 7,10,1
  304. sldi 9,10,2
  305. subf 6,10,11
  306. cmpd 7,7,11
  307. mtctr 6
  308. add 4,4,9
  309. bgt 7,.L53
  310. li 3,-1
  311. rldicr 3,3,0,0
  312. cmpd 7,11,3
  313. beq 7,.L53
  314. .p2align 4,,15
  315. .L21:
  316. lfs 0,0(4)
  317. addi 4,4,4
  318. fabs 0,0
  319. fcmpu 7,0,12
  320. bng 7,.L19
  321. fmr 12,0
  322. mr 8,10
  323. .L19:
  324. addi 10,10,1
  325. bdnz .L21
  326. .L16:
  327. lxv 59,-80(1)
  328. lxv 60,-64(1)
  329. addi 3,8,1
  330. lxv 61,-48(1)
  331. lxv 62,-32(1)
  332. lxv 63,-16(1)
  333. blr
  334. .p2align 4,,15
  335. .L71:
  336. cmplw 7,3,6
  337. ble 7,.L7
  338. mr 3,6
  339. .L7:
  340. rldicl 8,3,0,32
  341. b .L8
  342. .p2align 4,,15
  343. .L40:
  344. sldi 6,5,2
  345. li 8,0
  346. li 9,0
  347. xxlxor 0,0,0
  348. b .L22
  349. .p2align 4,,15
  350. .L11:
  351. blt 7,.L39
  352. mr 5,0
  353. b .L13
  354. .p2align 4,,15
  355. .L72:
  356. cmpd 7,8,5
  357. ble 7,.L17
  358. mr 8,5
  359. b .L17
  360. .p2align 4,,15
  361. .L39:
  362. xscpsgndp 10,34,34
  363. b .L13
  364. .L53:
  365. li 9,1
  366. mtctr 9
  367. b .L21
  368. .L54:
  369. li 10,1
  370. mtctr 10
  371. b .L35
  372. .L60:
  373. li 9,1
  374. mtctr 9
  375. b .L61
  376. .long 0
  377. .byte 0,0,0,0,0,0,0,0
  378. .size isamax_k,.-isamax_k
  379. .section .rodata.cst16,"aM",@progbits,16
  380. .align 4
  381. .LC2:
  382. .long 0
  383. .long 1
  384. .long 2
  385. .long 3
  386. .LC3:
  387. .long 4
  388. .long 5
  389. .long 6
  390. .long 7
  391. .LC4:
  392. .long 8
  393. .long 9
  394. .long 10
  395. .long 11
  396. .LC5:
  397. .long 12
  398. .long 13
  399. .long 14
  400. .long 15
  401. .ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]"
  402. .section .note.GNU-stack,"",@progbits