You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

KERNEL.POWER8 8.4 kB

5 years ago

  1. # Big-endian 32bit (AIX) is supported through the POWER6 GEMM kernels, no separate TRMM
  2. ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1)
  3. SGEMMKERNEL = gemm_kernel_power6.S
  4. SGEMMINCOPY =
  5. SGEMMITCOPY =
  6. SGEMMONCOPY = gemm_ncopy_4.S
  7. SGEMMOTCOPY = gemm_tcopy_4.S
  8. SGEMMINCOPYOBJ =
  9. SGEMMITCOPYOBJ =
  10. SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
  11. SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
  12. DGEMMKERNEL = gemm_kernel_power6.S
  13. DGEMMINCOPY =
  14. DGEMMITCOPY =
  15. DGEMMONCOPY = gemm_ncopy_4.S
  16. DGEMMOTCOPY = gemm_tcopy_4.S
  17. DGEMMINCOPYOBJ =
  18. DGEMMITCOPYOBJ =
  19. DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
  20. DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
  21. CGEMMKERNEL = zgemm_kernel_power6.S
  22. CGEMMINCOPY = ../generic/zgemm_ncopy_2.c
  23. CGEMMITCOPY = ../generic/zgemm_tcopy_2.c
  24. CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
  25. CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
  26. CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
  27. CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
  28. CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
  29. CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
  30. ZGEMMKERNEL = zgemm_kernel_power6.S
  31. ZGEMMINCOPY = ../generic/zgemm_ncopy_2.c
  32. ZGEMMITCOPY = ../generic/zgemm_tcopy_2.c
  33. ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
  34. ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
  35. ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
  36. ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
  37. ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
  38. ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
  39. else
  40. #SGEMM_BETA = ../generic/gemm_beta.c
  41. #DGEMM_BETA = ../generic/gemm_beta.c
  42. #CGEMM_BETA = ../generic/zgemm_beta.c
  43. #ZGEMM_BETA = ../generic/zgemm_beta.c
  44. STRMMKERNEL = strmm_kernel_16x8_power8.S
  45. DTRMMKERNEL = dtrmm_kernel_16x4_power8.S
  46. CTRMMKERNEL = ctrmm_kernel_8x4_power8.S
  47. ZTRMMKERNEL = ztrmm_kernel_8x2_power8.S
  48. SGEMMKERNEL = sgemm_kernel_16x8_power8.S
  49. SGEMMINCOPY = ../generic/gemm_ncopy_16.c
  50. SGEMMITCOPY = sgemm_tcopy_16_power8.S
  51. SGEMMONCOPY = ../generic/gemm_ncopy_8.c
  52. SGEMMOTCOPY = sgemm_tcopy_8_power8.S
  53. SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
  54. SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
  55. SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
  56. SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
  57. DGEMMKERNEL = dgemm_kernel_16x4_power8.S
  58. DGEMMINCOPY = ../generic/gemm_ncopy_16.c
  59. DGEMMITCOPY = dgemm_tcopy_16_power8.S
  60. DGEMMONCOPY = dgemm_ncopy_4_power8.S
  61. DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
  62. DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
  63. DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
  64. DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
  65. DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
  66. CGEMMKERNEL = cgemm_kernel_8x4_power8.S
  67. CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
  68. CGEMMITCOPY = cgemm_tcopy_8_power8.S
  69. CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
  70. CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
  71. CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
  72. CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
  73. CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
  74. CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
  75. ZGEMMKERNEL = zgemm_kernel_8x2_power8.S
  76. ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
  77. ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
  78. ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c
  79. ZGEMMITCOPY = zgemm_tcopy_8_power8.S
  80. ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
  81. ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
  82. ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
  83. ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
  84. endif
  85. STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
  86. STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
  87. STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
  88. STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
  89. ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1)
  90. DTRSMKERNEL_LN = trsm_kernel_power6_LN.S
  91. DTRSMKERNEL_LT = trsm_kernel_power6_LT.S
  92. DTRSMKERNEL_RN = trsm_kernel_power6_LT.S
  93. DTRSMKERNEL_RT = trsm_kernel_power6_RT.S
  94. else
  95. DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
  96. DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
  97. DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
  98. DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
  99. endif
  100. CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
  101. CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
  102. CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
  103. CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
  104. ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
  105. ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
  106. ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
  107. ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
  108. #Todo: CGEMM3MKERNEL should be 4x4 blocksizes.
  109. #CGEMM3MKERNEL = zgemm3m_kernel_8x4_sse3.S
  110. #ZGEMM3MKERNEL = zgemm3m_kernel_4x4_sse3.S
  111. #Pure C for other kernels
  112. #SAMAXKERNEL = ../arm/amax.c
  113. #DAMAXKERNEL = ../arm/amax.c
  114. #CAMAXKERNEL = ../arm/zamax.c
  115. #ZAMAXKERNEL = ../arm/zamax.c
  116. #
  117. #SAMINKERNEL = ../arm/amin.c
  118. #DAMINKERNEL = ../arm/amin.c
  119. #CAMINKERNEL = ../arm/zamin.c
  120. #ZAMINKERNEL = ../arm/zamin.c
  121. #
  122. #SMAXKERNEL = ../arm/max.c
  123. #DMAXKERNEL = ../arm/max.c
  124. #
  125. #SMINKERNEL = ../arm/min.c
  126. #DMINKERNEL = ../arm/min.c
  127. ISMINKERNEL = imin.S
  128. ISMAXKERNEL = imax.S
  129. ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
  130. ifneq ($(GCCVERSIONGTEQ9),1)
  131. ISAMAXKERNEL = isamax_power8.S
  132. else
  133. ISAMAXKERNEL = isamax.c
  134. endif
  135. else
  136. ISAMAXKERNEL = isamax.c
  137. endif
  138. #
  139. IDAMAXKERNEL = idamax.c
  140. #
  141. ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
  142. ifneq ($(GCCVERSIONGTEQ9),1)
  143. ICAMAXKERNEL = icamax_power8.S
  144. else
  145. ICAMAXKERNEL = icamax.c
  146. endif
  147. else
  148. ICAMAXKERNEL = icamax.c
  149. endif
  150. #
  151. IZAMAXKERNEL = izamax.c
  152. #
  153. ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
  154. ifneq ($(GCCVERSIONGTEQ9),1)
  155. ISAMINKERNEL = isamin_power8.S
  156. else
  157. ISAMINKERNEL = isamin.c
  158. endif
  159. else
  160. ISAMINKERNEL = isamin.c
  161. endif
  162. #
  163. IDAMINKERNEL = idamin.c
  164. #
  165. ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
  166. ifneq ($(GCCVERSIONGTEQ9),1)
  167. ICAMINKERNEL = icamin_power8.S
  168. else
  169. ICAMINKERNEL = icamin.c
  170. endif
  171. else
  172. ICAMINKERNEL = icamin.c
  173. endif
  174. #
  175. IZAMINKERNEL = izamin.c
  176. #
  177. #ISMAXKERNEL = ../arm/imax.c
  178. #IDMAXKERNEL = ../arm/imax.c
  179. #
  180. #ISMINKERNEL = ../arm/imin.c
  181. #IDMINKERNEL = ../arm/imin.c
  182. #
  183. SASUMKERNEL = sasum.c
  184. DASUMKERNEL = dasum.c
  185. CASUMKERNEL = casum.c
  186. ZASUMKERNEL = zasum.c
  187. #
  188. SAXPYKERNEL = saxpy.c
  189. DAXPYKERNEL = daxpy.c
  190. #
  191. ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1)
  192. CAXPYKERNEL = zaxpy.S
  193. else
  194. ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
  195. ifneq ($(GCCVERSIONGTEQ9),1)
  196. CAXPYKERNEL = caxpy_power8.S
  197. else
  198. CAXPYKERNEL = caxpy.c
  199. endif
  200. else
  201. CAXPYKERNEL = caxpy.c
  202. endif
  203. endif
  204. #
  205. ZAXPYKERNEL = zaxpy.c
  206. #
  207. SCOPYKERNEL = scopy.c
  208. DCOPYKERNEL = dcopy.c
  209. CCOPYKERNEL = ccopy.c
  210. ZCOPYKERNEL = zcopy.c
  211. #
  212. SDOTKERNEL = sdot.c
  213. DDOTKERNEL = ddot.c
  214. DSDOTKERNEL = sdot.c
  215. CDOTKERNEL = cdot.c
  216. ZDOTKERNEL = zdot.c
  217. #
  218. SNRM2KERNEL = ../arm/nrm2.c
  219. DNRM2KERNEL = ../arm/nrm2.c
  220. CNRM2KERNEL = ../arm/znrm2.c
  221. ZNRM2KERNEL = ../arm/znrm2.c
  222. #
  223. SROTKERNEL = srot.c
  224. DROTKERNEL = drot.c
  225. CROTKERNEL = crot.c
  226. ZROTKERNEL = zrot.c
  227. #
  228. SSCALKERNEL = sscal.c
  229. DSCALKERNEL = dscal.c
  230. ifeq ($(C_COMPILER), PGI)
  231. CSCALKERNEL = ../arm/zscal.c
  232. ZSCALKERNEL = ../arm/zscal.c
  233. else
  234. CSCALKERNEL = zscal.c
  235. ZSCALKERNEL = zscal.c
  236. endif
  237. #
  238. SSWAPKERNEL = sswap.c
  239. DSWAPKERNEL = dswap.c
  240. CSWAPKERNEL = cswap.c
  241. ZSWAPKERNEL = zswap.c
  242. #
  243. SGEMVNKERNEL = sgemv_n.c
  244. DGEMVNKERNEL = dgemv_n.c
  245. CGEMVNKERNEL = cgemv_n.c
  246. ZGEMVNKERNEL = zgemv_n_4.c
  247. #
  248. SGEMVTKERNEL = sgemv_t.c
  249. DGEMVTKERNEL = dgemv_t.c
  250. CGEMVTKERNEL = cgemv_t.c
  251. ZGEMVTKERNEL = zgemv_t_4.c
  252. #SSYMV_U_KERNEL = ../generic/symv_k.c
  253. #SSYMV_L_KERNEL = ../generic/symv_k.c
  254. #DSYMV_U_KERNEL = ../generic/symv_k.c
  255. #DSYMV_L_KERNEL = ../generic/symv_k.c
  256. #QSYMV_U_KERNEL = ../generic/symv_k.c
  257. #QSYMV_L_KERNEL = ../generic/symv_k.c
  258. #CSYMV_U_KERNEL = ../generic/zsymv_k.c
  259. #CSYMV_L_KERNEL = ../generic/zsymv_k.c
  260. #ZSYMV_U_KERNEL = ../generic/zsymv_k.c
  261. #ZSYMV_L_KERNEL = ../generic/zsymv_k.c
  262. #XSYMV_U_KERNEL = ../generic/zsymv_k.c
  263. #XSYMV_L_KERNEL = ../generic/zsymv_k.c
  264. #ZHEMV_U_KERNEL = ../generic/zhemv_k.c
  265. #ZHEMV_L_KERNEL = ../generic/zhemv_k.c
  266. LSAME_KERNEL = ../generic/lsame.c
  267. SCABS_KERNEL = ../generic/cabs.c
  268. DCABS_KERNEL = ../generic/cabs.c
  269. QCABS_KERNEL = ../generic/cabs.c
  270. #Dump kernel
  271. CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
  272. ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
  273. ifeq ($(__BYTE_ORDER__)$(ELF_VERSION),__ORDER_BIG_ENDIAN__2)
  274. IDAMAXKERNEL = ../arm/iamax.c
  275. IDAMINKERNEL = ../arm/iamin.c
  276. IZAMAXKERNEL = ../arm/izamax.c
  277. IZAMINKERNEL = ../arm/izamin.c
  278. endif