@@ -29,6 +29,7 @@ BARCELONA | |||||
SHANGHAI | SHANGHAI | ||||
ISTANBUL | ISTANBUL | ||||
BOBCAT | BOBCAT | ||||
BULLDOZER | |||||
c)VIA CPU: | c)VIA CPU: | ||||
SSE_GENERIC | SSE_GENERIC | ||||
@@ -163,7 +163,7 @@ int get_L2_size(void){ | |||||
int eax, ebx, ecx, edx; | int eax, ebx, ecx, edx; | ||||
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || \ | |||||
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ | |||||
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ | defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ | ||||
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) | defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) | ||||
@@ -385,12 +385,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define FORCE_INTEL | #define FORCE_INTEL | ||||
#define ARCHITECTURE "X86" | #define ARCHITECTURE "X86" | ||||
#define SUBARCHITECTURE "BULLDOZER" | #define SUBARCHITECTURE "BULLDOZER" | ||||
#define ARCHCONFIG "-DBARCELONA " \ | |||||
#define ARCHCONFIG "-DBULLDOZER " \ | |||||
"-DL1_DATA_SIZE=49152 -DL1_DATA_LINESIZE=64 " \ | "-DL1_DATA_SIZE=49152 -DL1_DATA_LINESIZE=64 " \ | ||||
"-DL2_SIZE=1024000 -DL2_LINESIZE=64 -DL3_SIZE=16777216 " \ | "-DL2_SIZE=1024000 -DL2_LINESIZE=64 -DL3_SIZE=16777216 " \ | ||||
"-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 " \ | "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 " \ | ||||
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \ | "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \ | ||||
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU" \ | |||||
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU " \ | |||||
"-DHAVE_AVX -DHAVE_FMA4" | "-DHAVE_AVX -DHAVE_FMA4" | ||||
#define LIBNAME "bulldozer" | #define LIBNAME "bulldozer" | ||||
#define CORENAME "BULLDOZER" | #define CORENAME "BULLDOZER" | ||||
@@ -810,6 +810,22 @@ static void init_parameter(void) { | |||||
#endif | #endif | ||||
#endif | #endif | ||||
#ifdef BULLDOZER | |||||
#ifdef DEBUG | |||||
fprintf(stderr, "Bulldozer\n"); | |||||
#endif | |||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||||
#ifdef EXPRECISION | |||||
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||||
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||||
#endif | |||||
#endif | |||||
#ifdef NANO | #ifdef NANO | ||||
#ifdef DEBUG | #ifdef DEBUG | ||||
@@ -596,7 +596,7 @@ | |||||
.L22: | .L22: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
addps %xmm2, %xmm4 | addps %xmm2, %xmm4 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movsd 4 * SIZE(BB), %xmm2 | movsd 4 * SIZE(BB), %xmm2 | ||||
@@ -842,7 +842,7 @@ | |||||
.L32: | .L32: | ||||
mulss %xmm0, %xmm2 | mulss %xmm0, %xmm2 | ||||
addss %xmm2, %xmm4 | addss %xmm2, %xmm4 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movss 4 * SIZE(BB), %xmm2 | movss 4 * SIZE(BB), %xmm2 | ||||
@@ -1168,7 +1168,7 @@ | |||||
.L52: | .L52: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
mulps 4 * SIZE(BB), %xmm0 | mulps 4 * SIZE(BB), %xmm0 | ||||
@@ -1198,7 +1198,7 @@ | |||||
addps %xmm0, %xmm5 | addps %xmm0, %xmm5 | ||||
movaps 32 * SIZE(AA), %xmm0 | movaps 32 * SIZE(AA), %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | ||||
#endif | #endif | ||||
mulps %xmm1, %xmm2 | mulps %xmm1, %xmm2 | ||||
@@ -1347,7 +1347,7 @@ | |||||
ALIGN_4 | ALIGN_4 | ||||
.L62: | .L62: | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
@@ -1531,7 +1531,7 @@ | |||||
.L72: | .L72: | ||||
mulss %xmm0, %xmm2 | mulss %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
mulss 4 * SIZE(BB), %xmm0 | mulss 4 * SIZE(BB), %xmm0 | ||||
@@ -1778,7 +1778,7 @@ | |||||
.L92: | .L92: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movaps 4 * SIZE(AA), %xmm0 | movaps 4 * SIZE(AA), %xmm0 | ||||
@@ -1793,7 +1793,7 @@ | |||||
mulps 12 * SIZE(BB), %xmm0 | mulps 12 * SIZE(BB), %xmm0 | ||||
addps %xmm0, %xmm7 | addps %xmm0, %xmm7 | ||||
movaps 32 * SIZE(AA), %xmm0 | movaps 32 * SIZE(AA), %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | ||||
#endif | #endif | ||||
mulps %xmm1, %xmm3 | mulps %xmm1, %xmm3 | ||||
@@ -1924,7 +1924,7 @@ | |||||
.L102: | .L102: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movsd 2 * SIZE(AA), %xmm0 | movsd 2 * SIZE(AA), %xmm0 | ||||
@@ -2069,7 +2069,7 @@ | |||||
.L112: | .L112: | ||||
mulss %xmm0, %xmm2 | mulss %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movss 1 * SIZE(AA), %xmm0 | movss 1 * SIZE(AA), %xmm0 | ||||
@@ -269,7 +269,7 @@ | |||||
sarl $5, I | sarl $5, I | ||||
jle .L113 | jle .L113 | ||||
#if defined(BARCELONA) | |||||
#if defined(BARCELONA) || defined(BULLDOZER) | |||||
movaps %xmm0, %xmm1 | movaps %xmm0, %xmm1 | ||||
mulps -32 * SIZE(X), %xmm1 | mulps -32 * SIZE(X), %xmm1 | ||||
@@ -253,7 +253,7 @@ | |||||
sarl $4, I | sarl $4, I | ||||
jle .L113 | jle .L113 | ||||
#if defined(BARCELONA) | |||||
#if defined(BARCELONA) || defined(BULLDOZER) | |||||
movaps %xmm0, %xmm1 | movaps %xmm0, %xmm1 | ||||
mulpd -16 * SIZE(X), %xmm1 | mulpd -16 * SIZE(X), %xmm1 | ||||
@@ -69,7 +69,7 @@ | |||||
#define STACK_ALIGN 4096 | #define STACK_ALIGN 4096 | ||||
#define STACK_OFFSET 1024 | #define STACK_OFFSET 1024 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHSIZE (8 * 10 + 4) | #define PREFETCHSIZE (8 * 10 + 4) | ||||
#endif | #endif | ||||
@@ -439,7 +439,7 @@ | |||||
.L22: | .L22: | ||||
mulsd %xmm0, %xmm2 | mulsd %xmm0, %xmm2 | ||||
addsd %xmm2, %xmm4 | addsd %xmm2, %xmm4 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AA) | PREFETCH (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movlpd 2 * SIZE(BB), %xmm2 | movlpd 2 * SIZE(BB), %xmm2 | ||||
@@ -488,7 +488,7 @@ | |||||
movlpd 40 * SIZE(BB), %xmm3 | movlpd 40 * SIZE(BB), %xmm3 | ||||
addsd %xmm0, %xmm7 | addsd %xmm0, %xmm7 | ||||
movlpd 8 * SIZE(AA), %xmm0 | movlpd 8 * SIZE(AA), %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 8) * SIZE(AA) | PREFETCH (PREFETCHSIZE + 8) * SIZE(AA) | ||||
#endif | #endif | ||||
mulsd %xmm1, %xmm2 | mulsd %xmm1, %xmm2 | ||||
@@ -1697,7 +1697,7 @@ | |||||
.L42: | .L42: | ||||
mulpd %xmm0, %xmm2 | mulpd %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
mulpd 2 * SIZE(BB), %xmm0 | mulpd 2 * SIZE(BB), %xmm0 | ||||
@@ -1727,7 +1727,7 @@ | |||||
addpd %xmm0, %xmm7 | addpd %xmm0, %xmm7 | ||||
movapd 16 * SIZE(AA), %xmm0 | movapd 16 * SIZE(AA), %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 8) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 8) * SIZE(AA) | ||||
#endif | #endif | ||||
mulpd %xmm1, %xmm2 | mulpd %xmm1, %xmm2 | ||||
@@ -64,7 +64,7 @@ | |||||
#define BORIG 60(%esp) | #define BORIG 60(%esp) | ||||
#define BUFFER 128(%esp) | #define BUFFER 128(%esp) | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHSIZE (16 * 10 + 8) | #define PREFETCHSIZE (16 * 10 + 8) | ||||
@@ -437,7 +437,7 @@ | |||||
.L32: | .L32: | ||||
mulss %xmm0, %xmm2 | mulss %xmm0, %xmm2 | ||||
addss %xmm2, %xmm4 | addss %xmm2, %xmm4 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movss 4 * SIZE(BB), %xmm2 | movss 4 * SIZE(BB), %xmm2 | ||||
@@ -833,7 +833,7 @@ | |||||
.L22: | .L22: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
addps %xmm2, %xmm4 | addps %xmm2, %xmm4 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movaps 4 * SIZE(BB), %xmm2 | movaps 4 * SIZE(BB), %xmm2 | ||||
@@ -1848,7 +1848,7 @@ | |||||
.L72: | .L72: | ||||
mulss %xmm0, %xmm2 | mulss %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
mulss 4 * SIZE(BB), %xmm0 | mulss 4 * SIZE(BB), %xmm0 | ||||
@@ -2109,7 +2109,7 @@ | |||||
ALIGN_4 | ALIGN_4 | ||||
.L62: | .L62: | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
@@ -2429,7 +2429,7 @@ | |||||
.L52: | .L52: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
mulps 4 * SIZE(BB), %xmm0 | mulps 4 * SIZE(BB), %xmm0 | ||||
@@ -2459,7 +2459,7 @@ | |||||
addps %xmm0, %xmm5 | addps %xmm0, %xmm5 | ||||
movaps 32 * SIZE(AA), %xmm0 | movaps 32 * SIZE(AA), %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | ||||
#endif | #endif | ||||
mulps %xmm1, %xmm2 | mulps %xmm1, %xmm2 | ||||
@@ -2952,7 +2952,7 @@ | |||||
.L112: | .L112: | ||||
mulss %xmm0, %xmm2 | mulss %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movss 1 * SIZE(AA), %xmm0 | movss 1 * SIZE(AA), %xmm0 | ||||
@@ -3148,7 +3148,7 @@ | |||||
.L102: | .L102: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movsd 2 * SIZE(AA), %xmm0 | movsd 2 * SIZE(AA), %xmm0 | ||||
@@ -3389,7 +3389,7 @@ | |||||
.L92: | .L92: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movaps 4 * SIZE(AA), %xmm0 | movaps 4 * SIZE(AA), %xmm0 | ||||
@@ -3404,7 +3404,7 @@ | |||||
mulps 12 * SIZE(BB), %xmm0 | mulps 12 * SIZE(BB), %xmm0 | ||||
addps %xmm0, %xmm7 | addps %xmm0, %xmm7 | ||||
movaps 32 * SIZE(AA), %xmm0 | movaps 32 * SIZE(AA), %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | ||||
#endif | #endif | ||||
mulps %xmm1, %xmm3 | mulps %xmm1, %xmm3 | ||||
@@ -69,7 +69,7 @@ | |||||
#define STACK_ALIGN 4096 | #define STACK_ALIGN 4096 | ||||
#define STACK_OFFSET 1024 | #define STACK_OFFSET 1024 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHSIZE (8 * 10 + 4) | #define PREFETCHSIZE (8 * 10 + 4) | ||||
#endif | #endif | ||||
@@ -910,7 +910,7 @@ | |||||
.L22: | .L22: | ||||
mulsd %xmm0, %xmm2 | mulsd %xmm0, %xmm2 | ||||
addsd %xmm2, %xmm4 | addsd %xmm2, %xmm4 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AA) | PREFETCH (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movlpd 2 * SIZE(BB), %xmm2 | movlpd 2 * SIZE(BB), %xmm2 | ||||
@@ -959,7 +959,7 @@ | |||||
movlpd 40 * SIZE(BB), %xmm3 | movlpd 40 * SIZE(BB), %xmm3 | ||||
addsd %xmm0, %xmm7 | addsd %xmm0, %xmm7 | ||||
movlpd 8 * SIZE(AA), %xmm0 | movlpd 8 * SIZE(AA), %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 8) * SIZE(AA) | PREFETCH (PREFETCHSIZE + 8) * SIZE(AA) | ||||
#endif | #endif | ||||
mulsd %xmm1, %xmm2 | mulsd %xmm1, %xmm2 | ||||
@@ -1439,7 +1439,7 @@ | |||||
.L42: | .L42: | ||||
mulpd %xmm0, %xmm2 | mulpd %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
mulpd 2 * SIZE(BB), %xmm0 | mulpd 2 * SIZE(BB), %xmm0 | ||||
@@ -1469,7 +1469,7 @@ | |||||
addpd %xmm0, %xmm7 | addpd %xmm0, %xmm7 | ||||
movapd 16 * SIZE(AA), %xmm0 | movapd 16 * SIZE(AA), %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 8) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 8) * SIZE(AA) | ||||
#endif | #endif | ||||
mulpd %xmm1, %xmm2 | mulpd %xmm1, %xmm2 | ||||
@@ -64,7 +64,7 @@ | |||||
#define BORIG 60(%esp) | #define BORIG 60(%esp) | ||||
#define BUFFER 128(%esp) | #define BUFFER 128(%esp) | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHSIZE (16 * 10 + 8) | #define PREFETCHSIZE (16 * 10 + 8) | ||||
@@ -872,7 +872,7 @@ | |||||
.L22: | .L22: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
addps %xmm2, %xmm4 | addps %xmm2, %xmm4 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movaps 4 * SIZE(BB), %xmm2 | movaps 4 * SIZE(BB), %xmm2 | ||||
@@ -1316,7 +1316,7 @@ | |||||
.L32: | .L32: | ||||
mulss %xmm0, %xmm2 | mulss %xmm0, %xmm2 | ||||
addss %xmm2, %xmm4 | addss %xmm2, %xmm4 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movss 4 * SIZE(BB), %xmm2 | movss 4 * SIZE(BB), %xmm2 | ||||
@@ -1855,7 +1855,7 @@ | |||||
.L52: | .L52: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
mulps 4 * SIZE(BB), %xmm0 | mulps 4 * SIZE(BB), %xmm0 | ||||
@@ -1885,7 +1885,7 @@ | |||||
addps %xmm0, %xmm5 | addps %xmm0, %xmm5 | ||||
movaps 32 * SIZE(AA), %xmm0 | movaps 32 * SIZE(AA), %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | ||||
#endif | #endif | ||||
mulps %xmm1, %xmm2 | mulps %xmm1, %xmm2 | ||||
@@ -2249,7 +2249,7 @@ | |||||
ALIGN_4 | ALIGN_4 | ||||
.L62: | .L62: | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
@@ -2562,7 +2562,7 @@ | |||||
.L72: | .L72: | ||||
mulss %xmm0, %xmm2 | mulss %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
mulss 4 * SIZE(BB), %xmm0 | mulss 4 * SIZE(BB), %xmm0 | ||||
@@ -2957,7 +2957,7 @@ | |||||
.L92: | .L92: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movaps 4 * SIZE(AA), %xmm0 | movaps 4 * SIZE(AA), %xmm0 | ||||
@@ -2972,7 +2972,7 @@ | |||||
mulps 12 * SIZE(BB), %xmm0 | mulps 12 * SIZE(BB), %xmm0 | ||||
addps %xmm0, %xmm7 | addps %xmm0, %xmm7 | ||||
movaps 32 * SIZE(AA), %xmm0 | movaps 32 * SIZE(AA), %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | ||||
#endif | #endif | ||||
mulps %xmm1, %xmm3 | mulps %xmm1, %xmm3 | ||||
@@ -3280,7 +3280,7 @@ | |||||
.L102: | .L102: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movsd 2 * SIZE(AA), %xmm0 | movsd 2 * SIZE(AA), %xmm0 | ||||
@@ -3515,7 +3515,7 @@ | |||||
.L112: | .L112: | ||||
mulss %xmm0, %xmm2 | mulss %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movss 1 * SIZE(AA), %xmm0 | movss 1 * SIZE(AA), %xmm0 | ||||
@@ -69,7 +69,7 @@ | |||||
#define STACK_ALIGN 4096 | #define STACK_ALIGN 4096 | ||||
#define STACK_OFFSET 1024 | #define STACK_OFFSET 1024 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHSIZE (8 * 10 + 4) | #define PREFETCHSIZE (8 * 10 + 4) | ||||
#endif | #endif | ||||
@@ -1036,7 +1036,7 @@ | |||||
.L42: | .L42: | ||||
mulpd %xmm0, %xmm2 | mulpd %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
mulpd 2 * SIZE(BB), %xmm0 | mulpd 2 * SIZE(BB), %xmm0 | ||||
@@ -1066,7 +1066,7 @@ | |||||
addpd %xmm0, %xmm7 | addpd %xmm0, %xmm7 | ||||
movapd 16 * SIZE(AA), %xmm0 | movapd 16 * SIZE(AA), %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 8) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 8) * SIZE(AA) | ||||
#endif | #endif | ||||
mulpd %xmm1, %xmm2 | mulpd %xmm1, %xmm2 | ||||
@@ -2224,7 +2224,7 @@ | |||||
.L22: | .L22: | ||||
mulsd %xmm0, %xmm2 | mulsd %xmm0, %xmm2 | ||||
addsd %xmm2, %xmm4 | addsd %xmm2, %xmm4 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AA) | PREFETCH (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movlpd 2 * SIZE(BB), %xmm2 | movlpd 2 * SIZE(BB), %xmm2 | ||||
@@ -2273,7 +2273,7 @@ | |||||
movlpd 40 * SIZE(BB), %xmm3 | movlpd 40 * SIZE(BB), %xmm3 | ||||
addsd %xmm0, %xmm7 | addsd %xmm0, %xmm7 | ||||
movlpd 8 * SIZE(AA), %xmm0 | movlpd 8 * SIZE(AA), %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 8) * SIZE(AA) | PREFETCH (PREFETCHSIZE + 8) * SIZE(AA) | ||||
#endif | #endif | ||||
mulsd %xmm1, %xmm2 | mulsd %xmm1, %xmm2 | ||||
@@ -64,7 +64,7 @@ | |||||
#define BORIG 60(%esp) | #define BORIG 60(%esp) | ||||
#define BUFFER 128(%esp) | #define BUFFER 128(%esp) | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHSIZE (16 * 10 + 8) | #define PREFETCHSIZE (16 * 10 + 8) | ||||
@@ -439,7 +439,7 @@ | |||||
.L92: | .L92: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movaps 4 * SIZE(AA), %xmm0 | movaps 4 * SIZE(AA), %xmm0 | ||||
@@ -454,7 +454,7 @@ | |||||
mulps 12 * SIZE(BB), %xmm0 | mulps 12 * SIZE(BB), %xmm0 | ||||
addps %xmm0, %xmm7 | addps %xmm0, %xmm7 | ||||
movaps 32 * SIZE(AA), %xmm0 | movaps 32 * SIZE(AA), %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | ||||
#endif | #endif | ||||
mulps %xmm1, %xmm3 | mulps %xmm1, %xmm3 | ||||
@@ -758,7 +758,7 @@ | |||||
.L102: | .L102: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movsd 2 * SIZE(AA), %xmm0 | movsd 2 * SIZE(AA), %xmm0 | ||||
@@ -993,7 +993,7 @@ | |||||
.L112: | .L112: | ||||
mulss %xmm0, %xmm2 | mulss %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movss 1 * SIZE(AA), %xmm0 | movss 1 * SIZE(AA), %xmm0 | ||||
@@ -1324,7 +1324,7 @@ | |||||
.L52: | .L52: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
mulps 4 * SIZE(BB), %xmm0 | mulps 4 * SIZE(BB), %xmm0 | ||||
@@ -1354,7 +1354,7 @@ | |||||
addps %xmm0, %xmm5 | addps %xmm0, %xmm5 | ||||
movaps 32 * SIZE(AA), %xmm0 | movaps 32 * SIZE(AA), %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | ||||
#endif | #endif | ||||
mulps %xmm1, %xmm2 | mulps %xmm1, %xmm2 | ||||
@@ -1718,7 +1718,7 @@ | |||||
ALIGN_4 | ALIGN_4 | ||||
.L62: | .L62: | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
@@ -2031,7 +2031,7 @@ | |||||
.L72: | .L72: | ||||
mulss %xmm0, %xmm2 | mulss %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
mulss 4 * SIZE(BB), %xmm0 | mulss 4 * SIZE(BB), %xmm0 | ||||
@@ -2859,7 +2859,7 @@ | |||||
.L22: | .L22: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
addps %xmm2, %xmm4 | addps %xmm2, %xmm4 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movaps 4 * SIZE(BB), %xmm2 | movaps 4 * SIZE(BB), %xmm2 | ||||
@@ -3303,7 +3303,7 @@ | |||||
.L32: | .L32: | ||||
mulss %xmm0, %xmm2 | mulss %xmm0, %xmm2 | ||||
addss %xmm2, %xmm4 | addss %xmm2, %xmm4 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movss 4 * SIZE(BB), %xmm2 | movss 4 * SIZE(BB), %xmm2 | ||||
@@ -74,7 +74,7 @@ | |||||
#define BB %ecx | #define BB %ecx | ||||
#define LDC %ebp | #define LDC %ebp | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
#define movsd movlps | #define movsd movlps | ||||
#endif | #endif | ||||
@@ -625,7 +625,7 @@ | |||||
.L22: | .L22: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
addps %xmm2, %xmm4 | addps %xmm2, %xmm4 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movsd 4 * SIZE(BB), %xmm2 | movsd 4 * SIZE(BB), %xmm2 | ||||
@@ -870,7 +870,7 @@ | |||||
.L32: | .L32: | ||||
mulss %xmm0, %xmm2 | mulss %xmm0, %xmm2 | ||||
addss %xmm2, %xmm4 | addss %xmm2, %xmm4 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movss 4 * SIZE(BB), %xmm2 | movss 4 * SIZE(BB), %xmm2 | ||||
@@ -1173,7 +1173,7 @@ | |||||
.L52: | .L52: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
mulps 4 * SIZE(BB), %xmm0 | mulps 4 * SIZE(BB), %xmm0 | ||||
@@ -1203,7 +1203,7 @@ | |||||
addps %xmm0, %xmm5 | addps %xmm0, %xmm5 | ||||
movaps 32 * SIZE(AA), %xmm0 | movaps 32 * SIZE(AA), %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | ||||
#endif | #endif | ||||
mulps %xmm1, %xmm2 | mulps %xmm1, %xmm2 | ||||
@@ -1359,7 +1359,7 @@ | |||||
ALIGN_4 | ALIGN_4 | ||||
.L62: | .L62: | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
@@ -1536,7 +1536,7 @@ | |||||
.L72: | .L72: | ||||
mulss %xmm0, %xmm2 | mulss %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
mulss 4 * SIZE(BB), %xmm0 | mulss 4 * SIZE(BB), %xmm0 | ||||
@@ -1794,7 +1794,7 @@ | |||||
.L92: | .L92: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movaps 4 * SIZE(AA), %xmm0 | movaps 4 * SIZE(AA), %xmm0 | ||||
@@ -1809,7 +1809,7 @@ | |||||
mulps 12 * SIZE(BB), %xmm0 | mulps 12 * SIZE(BB), %xmm0 | ||||
addps %xmm0, %xmm7 | addps %xmm0, %xmm7 | ||||
movaps 32 * SIZE(AA), %xmm0 | movaps 32 * SIZE(AA), %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | ||||
#endif | #endif | ||||
mulps %xmm1, %xmm3 | mulps %xmm1, %xmm3 | ||||
@@ -1936,7 +1936,7 @@ | |||||
.L102: | .L102: | ||||
mulps %xmm0, %xmm2 | mulps %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movsd 2 * SIZE(AA), %xmm0 | movsd 2 * SIZE(AA), %xmm0 | ||||
@@ -2069,7 +2069,7 @@ | |||||
.L112: | .L112: | ||||
mulss %xmm0, %xmm2 | mulss %xmm0, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | ||||
#endif | #endif | ||||
movss 1 * SIZE(AA), %xmm0 | movss 1 * SIZE(AA), %xmm0 | ||||
@@ -71,7 +71,7 @@ | |||||
#define movsd movlps | #define movsd movlps | ||||
#endif | #endif | ||||
#ifdef BARCELONA | |||||
#if defined(BARCELONA) || defined(BULLDOZER) | |||||
#define PREFETCH prefetchnta | #define PREFETCH prefetchnta | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHSIZE (16 * 5) | #define PREFETCHSIZE (16 * 5) | ||||
@@ -58,7 +58,7 @@ | |||||
#define movsd movlps | #define movsd movlps | ||||
#endif | #endif | ||||
#ifdef BARCELONA | |||||
#if defined(BARCELONA) || defined(BULLDOZER) | |||||
#define PREFETCH prefetchnta | #define PREFETCH prefetchnta | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHSIZE (8 * 5) | #define PREFETCHSIZE (8 * 5) | ||||
@@ -71,7 +71,7 @@ | |||||
#define movsd movlps | #define movsd movlps | ||||
#endif | #endif | ||||
#ifdef BARCELONA | |||||
#if defined(BARCELONA) || defined(BULLDOZER) | |||||
#define PREFETCH prefetchnta | #define PREFETCH prefetchnta | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHSIZE (16 * 5) | #define PREFETCHSIZE (16 * 5) | ||||
@@ -58,7 +58,7 @@ | |||||
#define movsd movlps | #define movsd movlps | ||||
#endif | #endif | ||||
#ifdef BARCELONA | |||||
#if defined(BARCELONA) || defined(BULLDOZER) | |||||
#define PREFETCH prefetchnta | #define PREFETCH prefetchnta | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHSIZE (8 * 5) | #define PREFETCHSIZE (8 * 5) | ||||
@@ -75,7 +75,7 @@ | |||||
#define STACK_ALIGN 4096 | #define STACK_ALIGN 4096 | ||||
#define STACK_OFFSET 1024 | #define STACK_OFFSET 1024 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCHSIZE (16 * 10 + 8) | #define PREFETCHSIZE (16 * 10 + 8) | ||||
#define WPREFETCHSIZE 112 | #define WPREFETCHSIZE 112 | ||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
@@ -533,7 +533,7 @@ | |||||
addps %xmm0, %xmm7 | addps %xmm0, %xmm7 | ||||
movsd 16 * SIZE(AA), %xmm0 | movsd 16 * SIZE(AA), %xmm0 | ||||
mulps %xmm1, %xmm2 | mulps %xmm1, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht1 (PREFETCHSIZE + 16) * SIZE(AA) | prefetcht1 (PREFETCHSIZE + 16) * SIZE(AA) | ||||
#endif | #endif | ||||
addps %xmm2, %xmm4 | addps %xmm2, %xmm4 | ||||
@@ -75,7 +75,7 @@ | |||||
#define STACK_ALIGN 4096 | #define STACK_ALIGN 4096 | ||||
#define STACK_OFFSET 1024 | #define STACK_OFFSET 1024 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCHSIZE (16 * 10 + 8) | #define PREFETCHSIZE (16 * 10 + 8) | ||||
#define WPREFETCHSIZE 112 | #define WPREFETCHSIZE 112 | ||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
@@ -994,7 +994,7 @@ | |||||
addps %xmm0, %xmm7 | addps %xmm0, %xmm7 | ||||
movsd 16 * SIZE(AA), %xmm0 | movsd 16 * SIZE(AA), %xmm0 | ||||
mulps %xmm1, %xmm2 | mulps %xmm1, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht1 (PREFETCHSIZE + 16) * SIZE(AA) | prefetcht1 (PREFETCHSIZE + 16) * SIZE(AA) | ||||
#endif | #endif | ||||
addps %xmm2, %xmm4 | addps %xmm2, %xmm4 | ||||
@@ -75,7 +75,7 @@ | |||||
#define STACK_ALIGN 4096 | #define STACK_ALIGN 4096 | ||||
#define STACK_OFFSET 1024 | #define STACK_OFFSET 1024 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCHSIZE (16 * 10 + 8) | #define PREFETCHSIZE (16 * 10 + 8) | ||||
#define WPREFETCHSIZE 112 | #define WPREFETCHSIZE 112 | ||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
@@ -1820,7 +1820,7 @@ | |||||
addps %xmm0, %xmm7 | addps %xmm0, %xmm7 | ||||
movsd 16 * SIZE(AA), %xmm0 | movsd 16 * SIZE(AA), %xmm0 | ||||
mulps %xmm1, %xmm2 | mulps %xmm1, %xmm2 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||||
prefetcht1 (PREFETCHSIZE + 16) * SIZE(AA) | prefetcht1 (PREFETCHSIZE + 16) * SIZE(AA) | ||||
#endif | #endif | ||||
addps %xmm2, %xmm4 | addps %xmm2, %xmm4 | ||||
@@ -930,7 +930,7 @@ | |||||
.L22: | .L22: | ||||
mulps %xmm8, %xmm9 | mulps %xmm8, %xmm9 | ||||
addps %xmm9, %xmm0 | addps %xmm9, %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | ||||
#endif | #endif | ||||
movaps 4 * SIZE(BO), %xmm9 | movaps 4 * SIZE(BO), %xmm9 | ||||
@@ -983,7 +983,7 @@ | |||||
addps %xmm8, %xmm3 | addps %xmm8, %xmm3 | ||||
movaps 0 * SIZE(AO), %xmm8 | movaps 0 * SIZE(AO), %xmm8 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 16) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 16) * SIZE(AO) | ||||
#endif | #endif | ||||
mulps %xmm10, %xmm9 | mulps %xmm10, %xmm9 | ||||
@@ -1178,7 +1178,7 @@ | |||||
.L32: | .L32: | ||||
mulps %xmm8, %xmm9 | mulps %xmm8, %xmm9 | ||||
addps %xmm9, %xmm0 | addps %xmm9, %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | ||||
#endif | #endif | ||||
movsd 4 * SIZE(BO), %xmm9 | movsd 4 * SIZE(BO), %xmm9 | ||||
@@ -1423,7 +1423,7 @@ | |||||
.L42: | .L42: | ||||
mulss %xmm8, %xmm9 | mulss %xmm8, %xmm9 | ||||
addss %xmm9, %xmm0 | addss %xmm9, %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | ||||
#endif | #endif | ||||
movss 4 * SIZE(BO), %xmm9 | movss 4 * SIZE(BO), %xmm9 | ||||
@@ -1765,7 +1765,7 @@ | |||||
.L62: | .L62: | ||||
mulps %xmm8, %xmm9 | mulps %xmm8, %xmm9 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | ||||
#endif | #endif | ||||
mulps 4 * SIZE(BO), %xmm8 | mulps 4 * SIZE(BO), %xmm8 | ||||
@@ -1793,7 +1793,7 @@ | |||||
addps %xmm8, %xmm5 | addps %xmm8, %xmm5 | ||||
movaps 32 * SIZE(AO), %xmm8 | movaps 32 * SIZE(AO), %xmm8 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 16) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 16) * SIZE(AO) | ||||
#endif | #endif | ||||
mulps %xmm10, %xmm11 | mulps %xmm10, %xmm11 | ||||
@@ -1822,7 +1822,7 @@ | |||||
addps %xmm10, %xmm5 | addps %xmm10, %xmm5 | ||||
movaps 48 * SIZE(AO), %xmm10 | movaps 48 * SIZE(AO), %xmm10 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 32) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 32) * SIZE(AO) | ||||
#endif | #endif | ||||
mulps %xmm12, %xmm13 | mulps %xmm12, %xmm13 | ||||
@@ -1851,7 +1851,7 @@ | |||||
addps %xmm12, %xmm5 | addps %xmm12, %xmm5 | ||||
movaps 64 * SIZE(AO), %xmm12 | movaps 64 * SIZE(AO), %xmm12 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 48) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 48) * SIZE(AO) | ||||
#endif | #endif | ||||
mulps %xmm14, %xmm15 | mulps %xmm14, %xmm15 | ||||
@@ -2024,7 +2024,7 @@ | |||||
.L72: | .L72: | ||||
mulps %xmm8, %xmm9 | mulps %xmm8, %xmm9 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | ||||
#endif | #endif | ||||
@@ -2208,7 +2208,7 @@ | |||||
.L82: | .L82: | ||||
mulps %xmm8, %xmm9 | mulps %xmm8, %xmm9 | ||||
addps %xmm9, %xmm0 | addps %xmm9, %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | ||||
#endif | #endif | ||||
movsd 4 * SIZE(BO), %xmm9 | movsd 4 * SIZE(BO), %xmm9 | ||||
@@ -2395,7 +2395,7 @@ | |||||
.L92: | .L92: | ||||
mulps %xmm8, %xmm9 | mulps %xmm8, %xmm9 | ||||
addps %xmm9, %xmm0 | addps %xmm9, %xmm0 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | ||||
#endif | #endif | ||||
movss 4 * SIZE(BO), %xmm9 | movss 4 * SIZE(BO), %xmm9 | ||||
@@ -2670,7 +2670,7 @@ | |||||
.L112: | .L112: | ||||
mulps %xmm9, %xmm8 | mulps %xmm9, %xmm8 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | ||||
#endif | #endif | ||||
@@ -2687,7 +2687,7 @@ | |||||
addps %xmm9, %xmm4 | addps %xmm9, %xmm4 | ||||
movaps 8 * SIZE(BO), %xmm9 | movaps 8 * SIZE(BO), %xmm9 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 16) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 16) * SIZE(AO) | ||||
#endif | #endif | ||||
mulps %xmm9, %xmm10 | mulps %xmm9, %xmm10 | ||||
@@ -2704,7 +2704,7 @@ | |||||
addps %xmm9, %xmm4 | addps %xmm9, %xmm4 | ||||
movaps 32 * SIZE(BO), %xmm9 | movaps 32 * SIZE(BO), %xmm9 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 32) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 32) * SIZE(AO) | ||||
#endif | #endif | ||||
mulps %xmm11, %xmm12 | mulps %xmm11, %xmm12 | ||||
@@ -2721,7 +2721,7 @@ | |||||
addps %xmm11, %xmm4 | addps %xmm11, %xmm4 | ||||
movaps 24 * SIZE(BO), %xmm11 | movaps 24 * SIZE(BO), %xmm11 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 48) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 48) * SIZE(AO) | ||||
#endif | #endif | ||||
mulps %xmm11, %xmm14 | mulps %xmm11, %xmm14 | ||||
@@ -2857,7 +2857,7 @@ | |||||
.L122: | .L122: | ||||
mulps %xmm8, %xmm9 | mulps %xmm8, %xmm9 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | ||||
#endif | #endif | ||||
movaps -28 * SIZE(AO), %xmm8 | movaps -28 * SIZE(AO), %xmm8 | ||||
@@ -2873,7 +2873,7 @@ | |||||
addps %xmm8, %xmm3 | addps %xmm8, %xmm3 | ||||
movaps 0 * SIZE(AO), %xmm8 | movaps 0 * SIZE(AO), %xmm8 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 16) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 16) * SIZE(AO) | ||||
#endif | #endif | ||||
mulps %xmm10, %xmm11 | mulps %xmm10, %xmm11 | ||||
@@ -3003,7 +3003,7 @@ | |||||
.L132: | .L132: | ||||
mulps %xmm8, %xmm9 | mulps %xmm8, %xmm9 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | ||||
#endif | #endif | ||||
movsd -30 * SIZE(AO), %xmm8 | movsd -30 * SIZE(AO), %xmm8 | ||||
@@ -3150,7 +3150,7 @@ | |||||
.L142: | .L142: | ||||
mulss %xmm8, %xmm9 | mulss %xmm8, %xmm9 | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | ||||
#endif | #endif | ||||
movss -31 * SIZE(AO), %xmm8 | movss -31 * SIZE(AO), %xmm8 | ||||
@@ -39,7 +39,7 @@ | |||||
#define ASSEMBLER | #define ASSEMBLER | ||||
#include "common.h" | #include "common.h" | ||||
#if defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
#define RPREFETCHSIZE (12 + 4) | #define RPREFETCHSIZE (12 + 4) | ||||
#define WPREFETCHSIZE (48 + 4) | #define WPREFETCHSIZE (48 + 4) | ||||
#define MOVNTQ MOVQ | #define MOVNTQ MOVQ | ||||
@@ -79,7 +79,7 @@ | |||||
#define AO3 %r13 | #define AO3 %r13 | ||||
#define AO4 %rax | #define AO4 %rax | ||||
#if defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
#define RPREFETCH prefetch | #define RPREFETCH prefetch | ||||
#else | #else | ||||
#define RPREFETCH prefetch | #define RPREFETCH prefetch | ||||
@@ -39,7 +39,7 @@ | |||||
#define ASSEMBLER | #define ASSEMBLER | ||||
#include "common.h" | #include "common.h" | ||||
#if defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
#define RPREFETCHSIZE (12 + 4) | #define RPREFETCHSIZE (12 + 4) | ||||
#define WPREFETCHSIZE (12 + 4) | #define WPREFETCHSIZE (12 + 4) | ||||
#define MOVNTQ MOVQ | #define MOVNTQ MOVQ | ||||
@@ -96,7 +96,7 @@ | |||||
#endif | #endif | ||||
#if defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
#define RPREFETCH prefetch | #define RPREFETCH prefetch | ||||
#else | #else | ||||
#define RPREFETCH prefetch | #define RPREFETCH prefetch | ||||
@@ -469,7 +469,7 @@ | |||||
ALIGN_4 | ALIGN_4 | ||||
.L71: | .L71: | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
prefetch PREFETCHSIZE * SIZE(X) | prefetch PREFETCHSIZE * SIZE(X) | ||||
#endif | #endif | ||||
@@ -266,7 +266,7 @@ | |||||
sarq $5, I | sarq $5, I | ||||
jle .L113 | jle .L113 | ||||
#if defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
movaps %xmm0, %xmm1 | movaps %xmm0, %xmm1 | ||||
mulps -32 * SIZE(X), %xmm1 | mulps -32 * SIZE(X), %xmm1 | ||||
@@ -251,7 +251,7 @@ | |||||
sarq $4, I | sarq $4, I | ||||
jle .L113 | jle .L113 | ||||
#if defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
movaps %xmm0, %xmm1 | movaps %xmm0, %xmm1 | ||||
mulpd -16 * SIZE(X), %xmm1 | mulpd -16 * SIZE(X), %xmm1 | ||||
@@ -76,7 +76,7 @@ | |||||
#define movsd movlps | #define movsd movlps | ||||
#endif | #endif | ||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHSIZE (16 * 16) | #define PREFETCHSIZE (16 * 16) | ||||
@@ -76,7 +76,7 @@ | |||||
#define movsd movlpd | #define movsd movlpd | ||||
#endif | #endif | ||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHSIZE (16 * 16) | #define PREFETCHSIZE (16 * 16) | ||||
@@ -76,7 +76,7 @@ | |||||
#define movsd movlps | #define movsd movlps | ||||
#endif | #endif | ||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHSIZE (16 * 16) | #define PREFETCHSIZE (16 * 16) | ||||
@@ -76,7 +76,7 @@ | |||||
#define movsd movlpd | #define movsd movlpd | ||||
#endif | #endif | ||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHSIZE (16 * 16) | #define PREFETCHSIZE (16 * 16) | ||||
@@ -86,7 +86,7 @@ | |||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#endif | #endif | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define movsd movlps | #define movsd movlps | ||||
@@ -86,7 +86,7 @@ | |||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#endif | #endif | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define movsd movlps | #define movsd movlps | ||||
@@ -86,7 +86,7 @@ | |||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#endif | #endif | ||||
#if defined(OPTERON) || defined(BARCELONA) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define movsd movlps | #define movsd movlps | ||||
@@ -85,7 +85,7 @@ | |||||
#define movsd movlpd | #define movsd movlpd | ||||
#endif | #endif | ||||
#if defined(BARCELONA) || defined(SHANGHAI) | |||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
#define RPREFETCHSIZE 32 | #define RPREFETCHSIZE 32 | ||||
#define WPREFETCHSIZE 48 | #define WPREFETCHSIZE 48 | ||||
#endif | #endif | ||||
@@ -160,7 +160,7 @@ | |||||
#define a3 %xmm14 | #define a3 %xmm14 | ||||
#define xt1 %xmm15 | #define xt1 %xmm15 | ||||
#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||||
#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define MOVDDUP(a, b, c) movddup a(b), c | #define MOVDDUP(a, b, c) movddup a(b), c | ||||
#define MOVDDUP2(a, b, c) movddup a##b, c | #define MOVDDUP2(a, b, c) movddup a##b, c | ||||
#else | #else | ||||
@@ -76,7 +76,7 @@ | |||||
#define movsd movlpd | #define movsd movlpd | ||||
#endif | #endif | ||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHSIZE (16 * 16) | #define PREFETCHSIZE (16 * 16) | ||||
@@ -167,7 +167,7 @@ | |||||
#define a3 %xmm14 | #define a3 %xmm14 | ||||
#define xt1 %xmm15 | #define xt1 %xmm15 | ||||
#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
#define MOVDDUP(a, b, c) movddup a(b), c | #define MOVDDUP(a, b, c) movddup a(b), c | ||||
#define MOVDDUP2(a, b, c) movddup a##b, c | #define MOVDDUP2(a, b, c) movddup a##b, c | ||||
#else | #else | ||||
@@ -76,7 +76,7 @@ | |||||
#define movsd movlpd | #define movsd movlpd | ||||
#endif | #endif | ||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHSIZE (16 * 16) | #define PREFETCHSIZE (16 * 16) | ||||
@@ -166,7 +166,7 @@ | |||||
#define xt1 %xmm14 | #define xt1 %xmm14 | ||||
#define xt2 %xmm15 | #define xt2 %xmm15 | ||||
#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
#define MOVDDUP(a, b, c) movddup a(b), c | #define MOVDDUP(a, b, c) movddup a(b), c | ||||
#define MOVDDUP2(a, b, c) movddup a##b, c | #define MOVDDUP2(a, b, c) movddup a##b, c | ||||
#else | #else | ||||
@@ -76,7 +76,7 @@ | |||||
#define movsd movlpd | #define movsd movlpd | ||||
#endif | #endif | ||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHSIZE (16 * 16) | #define PREFETCHSIZE (16 * 16) | ||||
@@ -166,7 +166,7 @@ | |||||
#define a3 %xmm14 | #define a3 %xmm14 | ||||
#define xt1 %xmm15 | #define xt1 %xmm15 | ||||
#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) | |||||
#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||||
#define MOVDDUP(a, b, c) movddup a(b), c | #define MOVDDUP(a, b, c) movddup a(b), c | ||||
#define MOVDDUP2(a, b, c) movddup a##b, c | #define MOVDDUP2(a, b, c) movddup a##b, c | ||||
#else | #else | ||||
@@ -86,7 +86,7 @@ | |||||
#define BORIG 72(%rsp) | #define BORIG 72(%rsp) | ||||
#define BUFFER 128(%rsp) | #define BUFFER 128(%rsp) | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHNTA prefetchnta | #define PREFETCHNTA prefetchnta | ||||
@@ -95,7 +95,7 @@ | |||||
#define PREFETCHSIZE (8 * 6 + 4) | #define PREFETCHSIZE (8 * 6 + 4) | ||||
#endif | #endif | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHNTA prefetchnta | #define PREFETCHNTA prefetchnta | ||||
@@ -86,7 +86,7 @@ | |||||
#define BORIG 72(%rsp) | #define BORIG 72(%rsp) | ||||
#define BUFFER 128(%rsp) | #define BUFFER 128(%rsp) | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHNTA prefetchnta | #define PREFETCHNTA prefetchnta | ||||
@@ -95,7 +95,7 @@ | |||||
#define PREFETCHSIZE (8 * 6 + 4) | #define PREFETCHSIZE (8 * 6 + 4) | ||||
#endif | #endif | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHNTA prefetchnta | #define PREFETCHNTA prefetchnta | ||||
@@ -86,7 +86,7 @@ | |||||
#define BORIG 72(%rsp) | #define BORIG 72(%rsp) | ||||
#define BUFFER 128(%rsp) | #define BUFFER 128(%rsp) | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHNTA prefetchnta | #define PREFETCHNTA prefetchnta | ||||
@@ -95,7 +95,7 @@ | |||||
#define PREFETCHSIZE (8 * 6 + 4) | #define PREFETCHSIZE (8 * 6 + 4) | ||||
#endif | #endif | ||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||||
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define PREFETCH prefetch | #define PREFETCH prefetch | ||||
#define PREFETCHW prefetchw | #define PREFETCHW prefetchw | ||||
#define PREFETCHNTA prefetchnta | #define PREFETCHNTA prefetchnta | ||||
@@ -74,6 +74,13 @@ | |||||
#define ALIGNED_ACCESS | #define ALIGNED_ACCESS | ||||
#endif | #endif | ||||
#ifdef BULLDOZER | |||||
#define PREFETCH prefetch | |||||
#define PREFETCHW prefetchw | |||||
#define PREFETCHSIZE (128 * 5) | |||||
#define ALIGNED_ACCESS | |||||
#endif | |||||
#ifdef NANO | #ifdef NANO | ||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
@@ -85,7 +85,7 @@ | |||||
#define movsd movlps | #define movsd movlps | ||||
#endif | #endif | ||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||||
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||||
#define ALIGNED_ACCESS | #define ALIGNED_ACCESS | ||||
#define MOVUPS_A movaps | #define MOVUPS_A movaps | ||||
#define MOVUPS_XL movaps | #define MOVUPS_XL movaps | ||||