Browse Source

fix a bug of trmm

tags/v0.3.11^2
y00512012 5 years ago
parent
commit
06cf73a239
1 changed files with 42 additions and 6 deletions
  1. +42
    -6
      driver/level3/trmm_L.c

+ 42
- 6
driver/level3/trmm_L.c View File

@@ -122,6 +122,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (min_l > GEMM_Q) min_l = GEMM_Q; if (min_l > GEMM_Q) min_l = GEMM_Q;
min_i = min_l; min_i = min_l;
if (min_i > GEMM_P) min_i = GEMM_P; if (min_i > GEMM_P) min_i = GEMM_P;
if( min_i > GEMM_UNROLL_M){
min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M;
}


START_RPCC(); START_RPCC();


@@ -161,9 +164,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
} }




for(is = min_i; is < min_l; is += GEMM_P){
for(is = min_i; is < min_l; is += min_i){
min_i = min_l - is; min_i = min_l - is;
if (min_i > GEMM_P) min_i = GEMM_P; if (min_i > GEMM_P) min_i = GEMM_P;
if( min_i > GEMM_UNROLL_M){
min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M;
}


START_RPCC(); START_RPCC();


@@ -192,6 +198,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (min_l > GEMM_Q) min_l = GEMM_Q; if (min_l > GEMM_Q) min_l = GEMM_Q;
min_i = ls; min_i = ls;
if (min_i > GEMM_P) min_i = GEMM_P; if (min_i > GEMM_P) min_i = GEMM_P;
if( min_i > GEMM_UNROLL_M){
min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M;
}



START_RPCC(); START_RPCC();


@@ -231,9 +241,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
STOP_RPCC(gemmcost); STOP_RPCC(gemmcost);
} }


for(is = min_i; is < ls; is += GEMM_P){
for(is = min_i; is < ls; is += min_i){
min_i = ls - is; min_i = ls - is;
if (min_i > GEMM_P) min_i = GEMM_P; if (min_i > GEMM_P) min_i = GEMM_P;
if( min_i > GEMM_UNROLL_M){
min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M;
}


START_RPCC(); START_RPCC();


@@ -256,9 +269,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
STOP_RPCC(gemmcost); STOP_RPCC(gemmcost);
} }


for(is = ls; is < ls + min_l; is += GEMM_P){
for(is = ls; is < ls + min_l; is += min_i){
min_i = ls + min_l - is; min_i = ls + min_l - is;
if (min_i > GEMM_P) min_i = GEMM_P; if (min_i > GEMM_P) min_i = GEMM_P;
if( min_i > GEMM_UNROLL_M){
min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M;
}


START_RPCC(); START_RPCC();


@@ -287,6 +303,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (min_l > GEMM_Q) min_l = GEMM_Q; if (min_l > GEMM_Q) min_l = GEMM_Q;
min_i = min_l; min_i = min_l;
if (min_i > GEMM_P) min_i = GEMM_P; if (min_i > GEMM_P) min_i = GEMM_P;
if (min_i > GEMM_UNROLL_M){
min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M;
}



START_RPCC(); START_RPCC();


@@ -327,9 +347,14 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
STOP_RPCC(trmmcost); STOP_RPCC(trmmcost);
} }


for(is = m - min_l + min_i; is < m; is += GEMM_P){
for(is = m - min_l + min_i; is < m; is += min_i){
min_i = m - is; min_i = m - is;
if (min_i > GEMM_P) min_i = GEMM_P; if (min_i > GEMM_P) min_i = GEMM_P;
if (min_i > GEMM_UNROLL_M){
min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M;
}




START_RPCC(); START_RPCC();


@@ -357,6 +382,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
if (min_l > GEMM_Q) min_l = GEMM_Q; if (min_l > GEMM_Q) min_l = GEMM_Q;
min_i = min_l; min_i = min_l;
if (min_i > GEMM_P) min_i = GEMM_P; if (min_i > GEMM_P) min_i = GEMM_P;
if (min_i > GEMM_UNROLL_M){
min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M;
}



START_RPCC(); START_RPCC();


@@ -397,9 +426,13 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
STOP_RPCC(trmmcost); STOP_RPCC(trmmcost);
} }


for(is = ls - min_l + min_i; is < ls; is += GEMM_P){
for(is = ls - min_l + min_i; is < ls; is += min_i){
min_i = ls - is; min_i = ls - is;
if (min_i > GEMM_P) min_i = GEMM_P; if (min_i > GEMM_P) min_i = GEMM_P;
if (min_i > GEMM_UNROLL_M){
min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M;
}



START_RPCC(); START_RPCC();


@@ -423,9 +456,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
} }




for(is = ls; is < m; is += GEMM_P){
for(is = ls; is < m; is += min_i){
min_i = m - is; min_i = m - is;
if (min_i > GEMM_P) min_i = GEMM_P; if (min_i > GEMM_P) min_i = GEMM_P;
if (min_i > GEMM_UNROLL_M){
min_i = (min_i / GEMM_UNROLL_M) * GEMM_UNROLL_M;
}


START_RPCC(); START_RPCC();




Loading…
Cancel
Save