You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

izamin_ppc440.S 10 kB


  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #define RET r3
  41. #define X r4
  42. #define INCX r5
  43. #define N r6
  44. #define NN r7
  45. #define XX r8
  46. #define PRE r9
  47. #define INC1 r10
  48. #define FZERO f1
  49. #define STACKSIZE 160
  50. PROLOGUE
  51. PROFCODE
  52. addi SP, SP, -STACKSIZE
  53. li r0, 0
  54. stfd f14, 0(SP)
  55. stfd f15, 8(SP)
  56. stfd f16, 16(SP)
  57. stfd f17, 24(SP)
  58. stfd f18, 32(SP)
  59. stfd f19, 40(SP)
  60. stfd f20, 48(SP)
  61. stfd f21, 56(SP)
  62. stfd f22, 64(SP)
  63. stfd f23, 72(SP)
  64. stfd f24, 80(SP)
  65. stfd f25, 88(SP)
  66. stfd f26, 96(SP)
  67. stfd f27, 104(SP)
  68. stfd f28, 112(SP)
  69. stfd f29, 120(SP)
  70. stfd f30, 128(SP)
  71. stfd f31, 136(SP)
  72. stw r0, 144(SP)
  73. lfs FZERO,144(SP)
  74. #ifdef F_INTERFACE
  75. LDINT N, 0(r3)
  76. LDINT INCX, 0(INCX)
  77. #else
  78. mr N, r3
  79. #endif
  80. li RET, 0
  81. slwi INCX, INCX, ZBASE_SHIFT
  82. sub X, X, INCX
  83. li INC1, SIZE
  84. li PRE, 3 * 16 * SIZE
  85. mr NN, N
  86. mr XX, X
  87. cmpwi cr0, N, 0
  88. ble- LL(9999)
  89. cmpwi cr0, INCX, 0
  90. ble- LL(9999)
  91. LFDUX f1, X, INCX
  92. LFDX f2, X, INC1
  93. fabs f1, f1
  94. fabs f2, f2
  95. fadd f1, f1, f2
  96. subi N, N, 1
  97. fmr f0, f1
  98. srawi. r0, N, 3
  99. fmr f2, f1
  100. mtspr CTR, r0
  101. fmr f3, f1
  102. beq- LL(150)
  103. LFDUX f24, X, INCX
  104. LFDX f25, X, INC1
  105. LFDUX f26, X, INCX
  106. LFDX f27, X, INC1
  107. LFDUX f28, X, INCX
  108. LFDX f29, X, INC1
  109. LFDUX f30, X, INCX
  110. LFDX f31, X, INC1
  111. fabs f8, f24
  112. fabs f9, f25
  113. fabs f10, f26
  114. fabs f11, f27
  115. fabs f12, f28
  116. fabs f13, f29
  117. fabs f14, f30
  118. fabs f15, f31
  119. LFDUX f24, X, INCX
  120. LFDX f25, X, INC1
  121. LFDUX f26, X, INCX
  122. LFDX f27, X, INC1
  123. LFDUX f28, X, INCX
  124. LFDX f29, X, INC1
  125. LFDUX f30, X, INCX
  126. LFDX f31, X, INC1
  127. bdz LL(120)
  128. .align 4
  129. LL(110):
  130. fadd f4, f8, f9
  131. #ifdef PPCG4
  132. dcbt X, PRE
  133. #endif
  134. fadd f5, f10, f11
  135. fadd f6, f12, f13
  136. fadd f7, f14, f15
  137. fabs f8, f24
  138. LFDUX f24, X, INCX
  139. fabs f9, f25
  140. LFDX f25, X, INC1
  141. fabs f10, f26
  142. LFDUX f26, X, INCX
  143. fabs f11, f27
  144. LFDX f27, X, INC1
  145. fabs f12, f28
  146. #if defined(PPCG4) && defined(DOUBLE)
  147. dcbt X, PRE
  148. #endif
  149. fabs f13, f29
  150. LFDUX f28, X, INCX
  151. fabs f14, f30
  152. LFDX f29, X, INC1
  153. fabs f15, f31
  154. LFDUX f30, X, INCX
  155. fsub f16, f0, f4
  156. LFDX f31, X, INC1
  157. fsub f17, f1, f5
  158. fsub f18, f2, f6
  159. fsub f19, f3, f7
  160. fadd f20, f8, f9
  161. #ifdef PPCG4
  162. dcbt X, PRE
  163. #endif
  164. fadd f21, f10, f11
  165. fadd f22, f12, f13
  166. fadd f23, f14, f15
  167. fabs f8, f24
  168. LFDUX f24, X, INCX
  169. fabs f9, f25
  170. LFDX f25, X, INC1
  171. fabs f10, f26
  172. LFDUX f26, X, INCX
  173. fabs f11, f27
  174. LFDX f27, X, INC1
  175. fsel f0, f16, f4, f0
  176. #if defined(PPCG4) && defined(DOUBLE)
  177. dcbt X, PRE
  178. #endif
  179. fsel f1, f17, f5, f1
  180. fsel f2, f18, f6, f2
  181. fsel f3, f19, f7, f3
  182. fabs f12, f28
  183. LFDUX f28, X, INCX
  184. fabs f13, f29
  185. LFDX f29, X, INC1
  186. fabs f14, f30
  187. LFDUX f30, X, INCX
  188. fabs f15, f31
  189. LFDX f31, X, INC1
  190. fsub f16, f0, f20
  191. fsub f17, f1, f21
  192. fsub f18, f2, f22
  193. fsub f19, f3, f23
  194. fsel f0, f16, f20, f0
  195. fsel f1, f17, f21, f1
  196. fsel f2, f18, f22, f2
  197. fsel f3, f19, f23, f3
  198. bdnz LL(110)
  199. .align 4
  200. LL(120):
  201. fadd f4, f8, f9
  202. fadd f5, f10, f11
  203. fadd f6, f12, f13
  204. fadd f7, f14, f15
  205. fabs f8, f24
  206. fabs f9, f25
  207. fabs f10, f26
  208. fabs f11, f27
  209. fabs f12, f28
  210. fabs f13, f29
  211. fabs f14, f30
  212. fabs f15, f31
  213. fsub f16, f0, f4
  214. fsub f17, f1, f5
  215. fsub f18, f2, f6
  216. fsub f19, f3, f7
  217. fadd f20, f8, f9
  218. fadd f21, f10, f11
  219. fadd f22, f12, f13
  220. fadd f23, f14, f15
  221. fsel f0, f16, f4, f0
  222. fsel f1, f17, f5, f1
  223. fsel f2, f18, f6, f2
  224. fsel f3, f19, f7, f3
  225. fsub f16, f0, f20
  226. fsub f17, f1, f21
  227. fsub f18, f2, f22
  228. fsub f19, f3, f23
  229. fsel f0, f16, f20, f0
  230. fsel f1, f17, f21, f1
  231. fsel f2, f18, f22, f2
  232. fsel f3, f19, f23, f3
  233. .align 4
  234. LL(150):
  235. andi. r0, N, 7
  236. mtspr CTR, r0
  237. beq LL(999)
  238. .align 4
  239. LL(160):
  240. LFDUX f8, X, INCX
  241. LFDX f9, X, INC1
  242. fabs f8, f8
  243. fabs f9, f9
  244. fadd f8, f8, f9
  245. fsub f16, f1, f8
  246. fsel f1, f16, f8, f1
  247. bdnz LL(160)
  248. .align 4
  249. LL(999):
  250. fsub f8, f0, f1
  251. fsub f9, f2, f3
  252. fsel f0, f8, f1, f0
  253. fsel f2, f9, f3, f2
  254. fsub f8, f0, f2
  255. fsel f1, f8, f2, f0
  256. .align 4
  257. LL(1000):
  258. srawi. r0, NN, 3
  259. mtspr CTR, r0
  260. beq- LL(1150)
  261. LFDUX f24, XX, INCX
  262. LFDX f25, XX, INC1
  263. LFDUX f26, XX, INCX
  264. LFDX f27, XX, INC1
  265. LFDUX f28, XX, INCX
  266. LFDX f29, XX, INC1
  267. LFDUX f30, XX, INCX
  268. LFDX f31, XX, INC1
  269. bdz LL(1120)
  270. .align 4
  271. LL(1110):
  272. fabs f8, f24
  273. LFDUX f24, XX, INCX
  274. fabs f9, f25
  275. LFDX f25, XX, INC1
  276. fabs f10, f26
  277. LFDUX f26, XX, INCX
  278. fabs f11, f27
  279. LFDX f27, XX, INC1
  280. #ifdef PPCG4
  281. dcbt XX, PRE
  282. #endif
  283. fabs f12, f28
  284. LFDUX f28, XX, INCX
  285. fabs f13, f29
  286. LFDX f29, XX, INC1
  287. fabs f14, f30
  288. LFDUX f30, XX, INCX
  289. fabs f15, f31
  290. LFDX f31, XX, INC1
  291. fadd f4, f8, f9
  292. #if defined(PPCG4) && defined(DOUBLE)
  293. dcbt X, PRE
  294. #endif
  295. fadd f5, f10, f11
  296. fadd f6, f12, f13
  297. fadd f7, f14, f15
  298. addi RET, RET, 1
  299. fcmpu cr0, f1, f4
  300. beq cr0, LL(9999)
  301. addi RET, RET, 1
  302. fcmpu cr0, f1, f5
  303. beq cr0, LL(9999)
  304. addi RET, RET, 1
  305. fcmpu cr0, f1, f6
  306. beq cr0, LL(9999)
  307. addi RET, RET, 1
  308. fcmpu cr0, f1, f7
  309. beq cr0, LL(9999)
  310. fabs f8, f24
  311. LFDUX f24, XX, INCX
  312. fabs f9, f25
  313. LFDX f25, XX, INC1
  314. fabs f10, f26
  315. LFDUX f26, XX, INCX
  316. fabs f11, f27
  317. LFDX f27, XX, INC1
  318. #ifdef PPCG4
  319. dcbt XX, PRE
  320. #endif
  321. fabs f12, f28
  322. LFDUX f28, XX, INCX
  323. fabs f13, f29
  324. LFDX f29, XX, INC1
  325. fabs f14, f30
  326. LFDUX f30, XX, INCX
  327. fabs f15, f31
  328. LFDX f31, XX, INC1
  329. fadd f4, f8, f9
  330. #if defined(PPCG4) && defined(DOUBLE)
  331. dcbt X, PRE
  332. #endif
  333. fadd f5, f10, f11
  334. fadd f6, f12, f13
  335. fadd f7, f14, f15
  336. addi RET, RET, 1
  337. fcmpu cr0, f1, f4
  338. beq cr0, LL(9999)
  339. addi RET, RET, 1
  340. fcmpu cr0, f1, f5
  341. beq cr0, LL(9999)
  342. addi RET, RET, 1
  343. fcmpu cr0, f1, f6
  344. beq cr0, LL(9999)
  345. addi RET, RET, 1
  346. fcmpu cr0, f1, f7
  347. beq cr0, LL(9999)
  348. bdnz LL(1110)
  349. .align 4
  350. LL(1120):
  351. fabs f8, f24
  352. LFDUX f24, XX, INCX
  353. fabs f9, f25
  354. LFDX f25, XX, INC1
  355. fabs f10, f26
  356. LFDUX f26, XX, INCX
  357. fabs f11, f27
  358. LFDX f27, XX, INC1
  359. fabs f12, f28
  360. LFDUX f28, XX, INCX
  361. fabs f13, f29
  362. LFDX f29, XX, INC1
  363. fabs f14, f30
  364. LFDUX f30, XX, INCX
  365. fabs f15, f31
  366. LFDX f31, XX, INC1
  367. fadd f4, f8, f9
  368. fadd f5, f10, f11
  369. fadd f6, f12, f13
  370. fadd f7, f14, f15
  371. addi RET, RET, 1
  372. fcmpu cr0, f1, f4
  373. beq cr0, LL(9999)
  374. addi RET, RET, 1
  375. fcmpu cr0, f1, f5
  376. beq cr0, LL(9999)
  377. addi RET, RET, 1
  378. fcmpu cr0, f1, f6
  379. beq cr0, LL(9999)
  380. addi RET, RET, 1
  381. fcmpu cr0, f1, f7
  382. beq cr0, LL(9999)
  383. fabs f8, f24
  384. fabs f9, f25
  385. fabs f10, f26
  386. fabs f11, f27
  387. fabs f12, f28
  388. fabs f13, f29
  389. fabs f14, f30
  390. fabs f15, f31
  391. fadd f4, f8, f9
  392. fadd f5, f10, f11
  393. fadd f6, f12, f13
  394. fadd f7, f14, f15
  395. addi RET, RET, 1
  396. fcmpu cr0, f1, f4
  397. beq cr0, LL(9999)
  398. addi RET, RET, 1
  399. fcmpu cr0, f1, f5
  400. beq cr0, LL(9999)
  401. addi RET, RET, 1
  402. fcmpu cr0, f1, f6
  403. beq cr0, LL(9999)
  404. addi RET, RET, 1
  405. fcmpu cr0, f1, f7
  406. beq cr0, LL(9999)
  407. .align 4
  408. LL(1150):
  409. andi. r0, NN, 7
  410. mtspr CTR, r0
  411. beq LL(9999)
  412. .align 4
  413. LL(1160):
  414. LFDUX f8, XX, INCX
  415. LFDX f9, XX, INC1
  416. fabs f8, f8
  417. fabs f9, f9
  418. fadd f8, f8, f9
  419. addi RET, RET, 1
  420. fcmpu cr0, f1, f8
  421. beq cr0, LL(9999)
  422. bdnz LL(1160)
  423. .align 4
  424. LL(9999):
  425. lfd f14, 0(SP)
  426. lfd f15, 8(SP)
  427. lfd f16, 16(SP)
  428. lfd f17, 24(SP)
  429. lfd f18, 32(SP)
  430. lfd f19, 40(SP)
  431. lfd f20, 48(SP)
  432. lfd f21, 56(SP)
  433. lfd f22, 64(SP)
  434. lfd f23, 72(SP)
  435. lfd f24, 80(SP)
  436. lfd f25, 88(SP)
  437. lfd f26, 96(SP)
  438. lfd f27, 104(SP)
  439. lfd f28, 112(SP)
  440. lfd f29, 120(SP)
  441. lfd f30, 128(SP)
  442. lfd f31, 136(SP)
  443. addi SP, SP, STACKSIZE
  444. blr
  445. EPILOGUE