You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

l2param.h 4.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. #ifndef GEMV_PARAM_H
  2. #define GEMV_PARAM_H
  3. #ifdef movsd
  4. #undef movsd
  5. #endif
  6. #undef movapd
  7. #define movapd movaps
  8. #ifdef ATHLON
  9. #define ALIGNED_ACCESS
  10. #define MOVUPS_A movaps
  11. #define MOVUPS_XL movaps
  12. #define MOVUPS_XS movaps
  13. #define MOVUPS_YL movaps
  14. #define MOVUPS_YS movaps
  15. #define PREFETCH prefetcht0
  16. #define PREFETCHSIZE 64 * 3
  17. #endif
  18. #ifdef PENTIUM4
  19. #define ALIGNED_ACCESS
  20. #define MOVUPS_A movaps
  21. #define MOVUPS_XL movaps
  22. #define MOVUPS_XS movaps
  23. #define MOVUPS_YL movaps
  24. #define MOVUPS_YS movaps
  25. #define PREFETCH prefetcht0
  26. #define PREFETCHSIZE 64 * 2
  27. #endif
  28. #ifdef CORE2
  29. #define ALIGNED_ACCESS
  30. #define MOVUPS_A movaps
  31. #define MOVUPS_XL movaps
  32. #define MOVUPS_XS movaps
  33. #define MOVUPS_YL movaps
  34. #define MOVUPS_YS movaps
  35. #define PREFETCH prefetcht0
  36. #define PREFETCHSIZE 64 * 4
  37. #endif
  38. #ifdef PENRYN
  39. #define ALIGNED_ACCESS
  40. #define MOVUPS_A movaps
  41. #define MOVUPS_XL movaps
  42. #define MOVUPS_XS movaps
  43. #define MOVUPS_YL movaps
  44. #define MOVUPS_YS movaps
  45. #define PREFETCH prefetcht0
  46. #define PREFETCHSIZE 64 * 4
  47. #endif
  48. #ifdef NEHALEM
  49. #define MOVUPS_A movups
  50. #define MOVUPS_XL movups
  51. #define MOVUPS_XS movups
  52. #define MOVUPS_YL movups
  53. #define MOVUPS_YS movups
  54. #define PREFETCH prefetcht0
  55. #define PREFETCHW prefetcht0
  56. #define PREFETCHSIZE 64 * 3
  57. #endif
  58. #ifdef SANDYBRIDGE
  59. #define MOVUPS_A movups
  60. #define MOVUPS_XL movups
  61. #define MOVUPS_XS movups
  62. #define MOVUPS_YL movups
  63. #define MOVUPS_YS movups
  64. #define PREFETCH prefetcht0
  65. #define PREFETCHW prefetcht0
  66. #define PREFETCHSIZE 64 * 3
  67. #endif
  68. #ifdef OPTERON
  69. #define PREFETCH prefetch
  70. #define PREFETCHW prefetchw
  71. #ifndef COMPLEX
  72. #define PREFETCHSIZE 64 * 1
  73. #else
  74. #define PREFETCHSIZE 64 * 1
  75. #endif
  76. #define movsd movlps
  77. #endif
  78. #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BARCELONA_OPTIMIZATION)
  79. #define ALIGNED_ACCESS
  80. #define MOVUPS_A movaps
  81. #define MOVUPS_XL movaps
  82. #define MOVUPS_XS movaps
  83. #define MOVUPS_YL movaps
  84. #define MOVUPS_YS movaps
  85. #define PREFETCH prefetch
  86. #define PREFETCHW prefetchw
  87. #ifndef COMPLEX
  88. #define PREFETCHSIZE 64 * 2
  89. #else
  90. #define PREFETCHSIZE 64 * 4
  91. #endif
  92. #endif
  93. #ifdef NANO
  94. #define ALIGNED_ACCESS
  95. #define MOVUPS_A movaps
  96. #define MOVUPS_XL movaps
  97. #define MOVUPS_XS movaps
  98. #define MOVUPS_YL movaps
  99. #define MOVUPS_YS movaps
  100. #define PREFETCH prefetcht0
  101. #ifndef COMPLEX
  102. #define PREFETCHSIZE 64 * 1
  103. #else
  104. #define PREFETCHSIZE 64 * 2
  105. #endif
  106. #endif
  107. #ifndef PREOFFSET
  108. #ifdef L1_DATA_LINESIZE
  109. #define PREOFFSET (L1_DATA_LINESIZE >> 1)
  110. #else
  111. #define PREOFFSET 32
  112. #endif
  113. #endif
  114. #ifndef GEMV_UNROLL
  115. #define GEMV_UNROLL 4
  116. #endif
  117. #ifndef ZGEMV_UNROLL
  118. #define ZGEMV_UNROLL 4
  119. #endif
  120. /* #define COPY_FORCE */ /* Always copy X or Y to the buffer */
  121. /* #define NOCOPY_UNALIGNED */ /* Not copy if X or Y is not aligned */
  122. #ifdef MOVUPS_A
  123. #define MOVUPS_A1(OFF, ADDR, REGS) MOVUPS_A OFF(ADDR), REGS
  124. #define MOVUPS_A2(OFF, ADDR, BASE, SCALE, REGS) MOVUPS_A OFF(ADDR, BASE, SCALE), REGS
  125. #else
  126. #define MOVUPS_A1(OFF, ADDR, REGS) movsd OFF(ADDR), REGS; movhps OFF + 8(ADDR), REGS
  127. #define MOVUPS_A2(OFF, ADDR, BASE, SCALE, REGS) movsd OFF(ADDR, BASE, SCALE), REGS; movhps OFF + 8(ADDR, BASE, SCALE), REGS
  128. #endif
  129. #define MOVRPS_A1(OFF, ADDR, REGS) movsd OFF + 8(ADDR), REGS; movhps OFF(ADDR), REGS
  130. #define MOVRPS_A2(OFF, ADDR, BASE, SCALE, REGS) movsd OFF + 8(ADDR, BASE, SCALE), REGS; movhps OFF(ADDR, BASE, SCALE), REGS
  131. #ifdef MOVUPS_XL
  132. #define MOVUPS_XL1(OFF, ADDR, REGS) MOVUPS_XL OFF(ADDR), REGS
  133. #else
  134. #define MOVUPS_XL1(OFF, ADDR, REGS) movsd OFF(ADDR), REGS; movhps OFF + 8(ADDR), REGS
  135. #endif
  136. #ifdef MOVUPS_XS
  137. #define MOVUPS_XS1(OFF, ADDR, REGS) MOVUPS_XS REGS, OFF(ADDR)
  138. #else
  139. #define MOVUPS_XS1(OFF, ADDR, REGS) movsd REGS, OFF(ADDR); movhps REGS, OFF + 8(ADDR)
  140. #endif
  141. #ifdef MOVUPS_YL
  142. #define MOVUPS_YL1(OFF, ADDR, REGS) MOVUPS_YL OFF(ADDR), REGS
  143. #else
  144. #define MOVUPS_YL1(OFF, ADDR, REGS) movsd OFF(ADDR), REGS; movhps OFF + 8(ADDR), REGS
  145. #endif
  146. #ifdef MOVUPS_YS
  147. #define MOVUPS_YS1(OFF, ADDR, REGS) MOVUPS_YS REGS, OFF(ADDR)
  148. #else
  149. #define MOVUPS_YS1(OFF, ADDR, REGS) movsd REGS, OFF(ADDR); movhps REGS, OFF + 8(ADDR)
  150. #endif
  151. #endif