Browse Source

Merge pull request #482 from jeromerobert/develop

Allow to do gemv and ger buffer allocation on the stack
tags/v0.2.14^2
Zhang Xianyi 10 years ago
parent
commit
41aad0407f
4 changed files with 32 additions and 3 deletions
  1. +4
    -0
      Makefile.system
  2. +15
    -1
      interface/gemv.c
  3. +12
    -1
      interface/ger.c
  4. +1
    -1
      kernel/x86_64/sgemv_t_4.c

+ 4
- 0
Makefile.system View File

@@ -311,6 +311,10 @@ ifdef SANITY_CHECK
CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU) CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU)
endif endif


ifdef MAX_STACK_ALLOC
CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC)
endif

# #
# Architecture dependent settings # Architecture dependent settings
# #


+ 15
- 1
interface/gemv.c View File

@@ -208,7 +208,18 @@ void CNAME(enum CBLAS_ORDER order,
if (incx < 0) x -= (lenx - 1) * incx; if (incx < 0) x -= (lenx - 1) * incx;
if (incy < 0) y -= (leny - 1) * incy; if (incy < 0) y -= (leny - 1) * incy;


#ifdef MAX_STACK_ALLOC
int stack_alloc_size = m + n;
if(stack_alloc_size < 128)
//dgemv_n.S require a 128 bytes buffer
stack_alloc_size = 128;
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT))
stack_alloc_size = 0;
FLOAT stack_buffer[stack_alloc_size];
buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1);
#else
buffer = (FLOAT *)blas_memory_alloc(1); buffer = (FLOAT *)blas_memory_alloc(1);
#endif


#ifdef SMP #ifdef SMP


@@ -237,7 +248,10 @@ void CNAME(enum CBLAS_ORDER order,
} }
#endif #endif


blas_memory_free(buffer);
#ifdef MAX_STACK_ALLOC
if(!stack_alloc_size)
#endif
blas_memory_free(buffer);


FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);




+ 12
- 1
interface/ger.c View File

@@ -171,7 +171,15 @@ void CNAME(enum CBLAS_ORDER order,
if (incy < 0) y -= (n - 1) * incy; if (incy < 0) y -= (n - 1) * incy;
if (incx < 0) x -= (m - 1) * incx; if (incx < 0) x -= (m - 1) * incx;


#ifdef MAX_STACK_ALLOC
int stack_alloc_size = m;
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT))
stack_alloc_size = 0;
FLOAT stack_buffer[stack_alloc_size];
buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1);
#else
buffer = (FLOAT *)blas_memory_alloc(1); buffer = (FLOAT *)blas_memory_alloc(1);
#endif


#ifdef SMPTEST #ifdef SMPTEST
nthreads = num_cpu_avail(2); nthreads = num_cpu_avail(2);
@@ -190,7 +198,10 @@ void CNAME(enum CBLAS_ORDER order,
} }
#endif #endif


blas_memory_free(buffer);
#ifdef MAX_STACK_ALLOC
if(!stack_alloc_size)
#endif
blas_memory_free(buffer);


FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);




+ 1
- 1
kernel/x86_64/sgemv_t_4.c View File

@@ -306,7 +306,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
if ( n < 1 ) return(0); if ( n < 1 ) return(0);


xbuffer = buffer; xbuffer = buffer;
ytemp = buffer + NBMAX;
ytemp = buffer + (m < NBMAX ? m : NBMAX);
n0 = n / NBMAX; n0 = n / NBMAX;
n1 = (n % NBMAX) >> 2 ; n1 = (n % NBMAX) >> 2 ;


Loading…
Cancel
Save