|
|
|
@@ -104,7 +104,7 @@ typedef struct { |
|
|
|
#define BETA_OPERATION(M_FROM, M_TO, N_FROM, N_TO, BETA, C, LDC) \ |
|
|
|
GEMM_BETA((M_TO) - (M_FROM), (N_TO - N_FROM), 0, \ |
|
|
|
BETA[0], BETA[1], NULL, 0, NULL, 0, \ |
|
|
|
(FLOAT *)(C) + (M_FROM) + (N_FROM) * (LDC) * COMPSIZE, LDC) |
|
|
|
(FLOAT *)(C) + ((M_FROM) + (N_FROM) * (LDC)) * COMPSIZE, LDC) |
|
|
|
#endif |
|
|
|
|
|
|
|
#ifndef ICOPYB_OPERATION |
|
|
|
@@ -414,7 +414,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, |
|
|
|
|
|
|
|
for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){ |
|
|
|
min_jj = MIN(n_to, xxx + div_n) - jjs; |
|
|
|
if (min_jj > GEMM3M_UNROLL_N) min_jj = GEMM3M_UNROLL_N; |
|
|
|
if (min_jj > GEMM3M_UNROLL_N*3) min_jj = GEMM3M_UNROLL_N*3; |
|
|
|
|
|
|
|
START_RPCC(); |
|
|
|
|
|
|
|
@@ -550,7 +550,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, |
|
|
|
|
|
|
|
for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){ |
|
|
|
min_jj = MIN(n_to, xxx + div_n) - jjs; |
|
|
|
if (min_jj > GEMM3M_UNROLL_N) min_jj = GEMM3M_UNROLL_N; |
|
|
|
if (min_jj > GEMM3M_UNROLL_N*3) min_jj = GEMM3M_UNROLL_N*3; |
|
|
|
|
|
|
|
START_RPCC(); |
|
|
|
|
|
|
|
@@ -687,7 +687,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, |
|
|
|
|
|
|
|
for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){ |
|
|
|
min_jj = MIN(n_to, xxx + div_n) - jjs; |
|
|
|
if (min_jj > GEMM3M_UNROLL_N) min_jj = GEMM3M_UNROLL_N; |
|
|
|
if (min_jj > GEMM3M_UNROLL_N*3) min_jj = GEMM3M_UNROLL_N*3; |
|
|
|
|
|
|
|
START_RPCC(); |
|
|
|
|
|
|
|
|