Browse Source

THUNDERX2T99: Bug Fixes in D/Z NRM2 and ZGEMM

tags/v0.2.20^2
Ashwin Sekhar T K 8 years ago
parent
commit
67473d09dd
2 changed files with 9 additions and 9 deletions
  1. +1
    -1
      kernel/arm64/dznrm2_thunderx2t99.c
  2. +8
    -8
      kernel/arm64/zgemm_kernel_4x4_thunderx2t99.S

+ 1
- 1
kernel/arm64/dznrm2_thunderx2t99.c View File

@@ -301,7 +301,7 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x,
: "cc", : "cc",
"memory", "memory",
"x0", "x1", "x2", "x3", "x4", "x5", "x0", "x1", "x2", "x3", "x4", "x5",
"d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"
"d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8"
); );


} }


+ 8
- 8
kernel/arm64/zgemm_kernel_4x4_thunderx2t99.S View File

@@ -474,19 +474,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ld2 {v2.2d, v3.2d}, [pA] ld2 {v2.2d, v3.2d}, [pA]
add pA, pA, #32 add pA, pA, #32


OP_rr v20.2d, v0.2d, v8.d[0]
OP_ii v20.2d, v1.2d, v8.d[1]
OP_ri v21.2d, v0.2d, v8.d[1]
OP_ir v21.2d, v1.2d, v8.d[0]
OP_rr v20.2d, v0.2d, v9.d[0]
OP_ii v20.2d, v1.2d, v9.d[1]
OP_ri v21.2d, v0.2d, v9.d[1]
OP_ir v21.2d, v1.2d, v9.d[0]


ldr q10, [pB] ldr q10, [pB]
ldr q11, [pB, #16] ldr q11, [pB, #16]
add pB, pB, #32 add pB, pB, #32


OP_rr v18.2d, v2.2d, v9.d[0]
OP_ii v18.2d, v3.2d, v9.d[1]
OP_ri v19.2d, v2.2d, v9.d[1]
OP_ir v19.2d, v3.2d, v9.d[0]
OP_rr v18.2d, v2.2d, v8.d[0]
OP_ii v18.2d, v3.2d, v8.d[1]
OP_ri v19.2d, v2.2d, v8.d[1]
OP_ir v19.2d, v3.2d, v8.d[0]


prfm PLDL1KEEP, [pB, #B_PRE_SIZE] prfm PLDL1KEEP, [pB, #B_PRE_SIZE]




Loading…
Cancel
Save