| @@ -4,7 +4,15 @@ | |||||
| #else | #else | ||||
| #include "config_kernel.h" | #include "config_kernel.h" | ||||
| #endif | #endif | ||||
| #include "common.h" | |||||
| #if (defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) || defined(_WIN32) || defined(_WIN64)) && defined(__64BIT__) | |||||
| typedef long long BLASLONG; | |||||
| typedef unsigned long long BLASULONG; | |||||
| #else | |||||
| typedef long BLASLONG; | |||||
| typedef unsigned long BLASULONG; | |||||
| #endif | |||||
| #include "param.h" | |||||
| int main(int argc, char **argv) { | int main(int argc, char **argv) { | ||||
| @@ -169,8 +169,13 @@ ZROTKERNEL = zrot.c | |||||
| # | # | ||||
| SSCALKERNEL = sscal.c | SSCALKERNEL = sscal.c | ||||
| DSCALKERNEL = dscal.c | DSCALKERNEL = dscal.c | ||||
| ifeq ($(C_COMPILER), PGI) | |||||
| CSCALKERNEL = ../arm/zscal.c | |||||
| ZSCALKERNEL = ../arm/zscal.c | |||||
| else | |||||
| CSCALKERNEL = zscal.c | CSCALKERNEL = zscal.c | ||||
| ZSCALKERNEL = zscal.c | ZSCALKERNEL = zscal.c | ||||
| endif | |||||
| # | # | ||||
| SSWAPKERNEL = sswap.c | SSWAPKERNEL = sswap.c | ||||
| DSWAPKERNEL = dswap.c | DSWAPKERNEL = dswap.c | ||||
| @@ -242,8 +242,13 @@ ZROTKERNEL = zrot.c | |||||
| # | # | ||||
| SSCALKERNEL = sscal.c | SSCALKERNEL = sscal.c | ||||
| DSCALKERNEL = dscal.c | DSCALKERNEL = dscal.c | ||||
| ifeq ($(C_COMPILER), PGI) | |||||
| CSCALKERNEL = ../arm/zscal.c | |||||
| ZSCALKERNEL = ../arm/zscal.c | |||||
| else | |||||
| CSCALKERNEL = zscal.c | CSCALKERNEL = zscal.c | ||||
| ZSCALKERNEL = zscal.c | ZSCALKERNEL = zscal.c | ||||
| endif | |||||
| # | # | ||||
| SSWAPKERNEL = sswap.c | SSWAPKERNEL = sswap.c | ||||
| DSWAPKERNEL = dswap.c | DSWAPKERNEL = dswap.c | ||||
| @@ -166,8 +166,13 @@ ZROTKERNEL = zrot.c | |||||
| # | # | ||||
| SSCALKERNEL = sscal.c | SSCALKERNEL = sscal.c | ||||
| DSCALKERNEL = dscal.c | DSCALKERNEL = dscal.c | ||||
| ifeq ($(C_COMPILER), PGI) | |||||
| CSCALKERNEL = ../arm/zscal.c | |||||
| ZSCALKERNEL = ../arm/zscal.c | |||||
| else | |||||
| CSCALKERNEL = zscal.c | CSCALKERNEL = zscal.c | ||||
| ZSCALKERNEL = zscal.c | ZSCALKERNEL = zscal.c | ||||
| endif | |||||
| # | # | ||||
| SSWAPKERNEL = sswap.c | SSWAPKERNEL = sswap.c | ||||
| DSWAPKERNEL = dswap.c | DSWAPKERNEL = dswap.c | ||||
| @@ -491,4 +491,3 @@ SSUMKERNEL = ../arm/sum.c | |||||
| DSUMKERNEL = ../arm/sum.c | DSUMKERNEL = ../arm/sum.c | ||||
| SOMATCOPY_RT = omatcopy_rt.c | SOMATCOPY_RT = omatcopy_rt.c | ||||
| DOMATCOPY_RT = omatcopy_rt.c | |||||
| @@ -1,10 +1,10 @@ | |||||
| /* the direct sgemm code written by Arjan van der Ven */ | /* the direct sgemm code written by Arjan van der Ven */ | ||||
| #include "common.h" | |||||
| #if defined(SKYLAKEX) || defined (COOPERLAKE) | #if defined(SKYLAKEX) || defined (COOPERLAKE) | ||||
| #include <immintrin.h> | #include <immintrin.h> | ||||
| #include "common.h" | |||||
| /* | /* | ||||
| * "Direct sgemm" code. This code operates directly on the inputs and outputs | * "Direct sgemm" code. This code operates directly on the inputs and outputs | ||||
| @@ -472,7 +472,7 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG s | |||||
| } | } | ||||
| } | } | ||||
| #else | #else | ||||
| #include "common.h" | |||||
| void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG strideA, float * __restrict B, BLASLONG strideB , float * __restrict R, BLASLONG strideR) | void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG strideA, float * __restrict B, BLASLONG strideB , float * __restrict R, BLASLONG strideR) | ||||
| {} | {} | ||||
| #endif | #endif | ||||
| @@ -2466,13 +2466,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define SGEMM_DEFAULT_P 512 | #define SGEMM_DEFAULT_P 512 | ||||
| #define DGEMM_DEFAULT_P 384 | #define DGEMM_DEFAULT_P 384 | ||||
| #define CGEMM_DEFAULT_P 512 | |||||
| #define CGEMM_DEFAULT_P 512 | |||||
| #define ZGEMM_DEFAULT_P 256 | #define ZGEMM_DEFAULT_P 256 | ||||
| #define SGEMM_DEFAULT_Q 512 | #define SGEMM_DEFAULT_Q 512 | ||||
| #define DGEMM_DEFAULT_Q 512 | #define DGEMM_DEFAULT_Q 512 | ||||
| #define CGEMM_DEFAULT_Q 1026 | |||||
| #define ZGEMM_DEFAULT_Q 1026 | |||||
| #define CGEMM_DEFAULT_Q 384 | |||||
| #define ZGEMM_DEFAULT_Q 384 | |||||
| #define SGEMM_DEFAULT_R 4096 | #define SGEMM_DEFAULT_R 4096 | ||||
| #define DGEMM_DEFAULT_R 4096 | #define DGEMM_DEFAULT_R 4096 | ||||