| @@ -727,6 +727,7 @@ typedef struct { | |||||
| #endif | #endif | ||||
| #ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
| #include "common_stackalloc.h" | |||||
| #if 0 | #if 0 | ||||
| #include "symcopy.h" | #include "symcopy.h" | ||||
| #endif | #endif | ||||
| @@ -0,0 +1,73 @@ | |||||
| /******************************************************************************* | |||||
| Copyright (c) 2016, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *******************************************************************************/ | |||||
| #define STACK_ALLOC_PROTECT | |||||
| #ifdef STACK_ALLOC_PROTECT | |||||
| // Try to detect stack smashing | |||||
| #include <assert.h> | |||||
| #define STACK_ALLOC_PROTECT_SET volatile BLASLONG stack_check = 0x7ff8010203040506; | |||||
| #define STACK_ALLOC_PROTECT_CHECK assert(stack_check == 0x7ff8010203040506); | |||||
| #else | |||||
| #define STACK_ALLOC_PROTECT_SET | |||||
| #define STACK_ALLOC_PROTECT_CHECK | |||||
| #endif | |||||
| #if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0 | |||||
| /* | |||||
| * Allocate a buffer on the stack if the size is smaller than MAX_STACK_ALLOC. | |||||
| * Stack allocation is much faster than blas_memory_alloc or malloc, particularly | |||||
| * when OpenBLAS is used from a multi-threaded application. | |||||
| * SIZE must be carefully chosen to be: | |||||
| * - as small as possible to maximize the number of stack allocation | |||||
| * - large enough to support all architectures and kernel | |||||
| * Chosing a too small SIZE will lead to a stack smashing. | |||||
| */ | |||||
| #define STACK_ALLOC(SIZE, TYPE, BUFFER) \ | |||||
| /* make it volatile because some function (ex: dgemv_n.S) */ \ | |||||
| /* do not restore all register */ \ | |||||
| volatile int stack_alloc_size = SIZE; \ | |||||
| if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(TYPE)) \ | |||||
| stack_alloc_size = 0; \ | |||||
| STACK_ALLOC_PROTECT_SET \ | |||||
| TYPE stack_buffer[stack_alloc_size]; \ | |||||
| BUFFER = stack_alloc_size ? stack_buffer : (TYPE *)blas_memory_alloc(1); | |||||
| #else | |||||
| //Original OpenBLAS/GotoBLAS codes. | |||||
| #define STACK_ALLOC(SIZE, TYPE, BUFFER) BUFFER = (TYPE *)blas_memory_alloc(1) | |||||
| #endif | |||||
| #if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0 | |||||
| #define STACK_FREE(BUFFER) \ | |||||
| STACK_ALLOC_PROTECT_CHECK \ | |||||
| if(!stack_alloc_size) \ | |||||
| blas_memory_free(BUFFER); | |||||
| #else | |||||
| #define STACK_FREE(BUFFER) blas_memory_free(BUFFER) | |||||
| #endif | |||||
| @@ -37,7 +37,6 @@ | |||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| #include <stdio.h> | #include <stdio.h> | ||||
| #include <assert.h> | |||||
| #include "common.h" | #include "common.h" | ||||
| #include "l1param.h" | #include "l1param.h" | ||||
| #ifdef FUNCTION_PROFILE | #ifdef FUNCTION_PROFILE | ||||
| @@ -80,6 +79,7 @@ void NAME(char *TRANS, blasint *M, blasint *N, | |||||
| FLOAT alpha = *ALPHA; | FLOAT alpha = *ALPHA; | ||||
| FLOAT beta = *BETA; | FLOAT beta = *BETA; | ||||
| FLOAT *buffer; | FLOAT *buffer; | ||||
| int buffer_size; | |||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads; | int nthreads; | ||||
| int nthreads_max; | int nthreads_max; | ||||
| @@ -135,7 +135,7 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| FLOAT *buffer; | FLOAT *buffer; | ||||
| blasint lenx, leny; | blasint lenx, leny; | ||||
| int trans; | |||||
| int trans, buffer_size; | |||||
| blasint info, t; | blasint info, t; | ||||
| #ifdef SMP | #ifdef SMP | ||||
| int nthreads; | int nthreads; | ||||
| @@ -216,33 +216,13 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| if (incx < 0) x -= (lenx - 1) * incx; | if (incx < 0) x -= (lenx - 1) * incx; | ||||
| if (incy < 0) y -= (leny - 1) * incy; | if (incy < 0) y -= (leny - 1) * incy; | ||||
| #ifdef MAX_STACK_ALLOC | |||||
| // make it volatile because some gemv implementation (ex: dgemv_n.S) | |||||
| // do not restore all register | |||||
| volatile int stack_alloc_size = 0; | |||||
| //for gemv_n and gemv_t, try to allocate on stack | |||||
| stack_alloc_size = m + n; | |||||
| #ifdef ALIGNED_ACCESS | |||||
| stack_alloc_size += 3; | |||||
| #endif | |||||
| // if(stack_alloc_size < 128) | |||||
| //dgemv_n.S require a 128 bytes buffer | |||||
| // increasing instead of capping 128 | |||||
| // ABI STACK for windows 288 bytes | |||||
| stack_alloc_size += 288 / sizeof(FLOAT) ; | |||||
| if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT)) | |||||
| stack_alloc_size = 0; | |||||
| // stack overflow check | |||||
| volatile double stack_check = 3.14159265358979323846; | |||||
| FLOAT stack_buffer[stack_alloc_size]; | |||||
| buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1); | |||||
| // printf("stack_alloc_size=%d\n", stack_alloc_size); | |||||
| #else | |||||
| //Original OpenBLAS/GotoBLAS codes. | |||||
| buffer = (FLOAT *)blas_memory_alloc(1); | |||||
| buffer_size = m + n + 128 / sizeof(FLOAT); | |||||
| #ifdef WINDOWS_ABI | |||||
| buffer_size += 160 / sizeof(FLOAT) ; | |||||
| #endif | #endif | ||||
| // for alignment | |||||
| buffer_size = (buffer_size + 3) & ~3; | |||||
| STACK_ALLOC(buffer_size, FLOAT, buffer); | |||||
| #ifdef SMP | #ifdef SMP | ||||
| @@ -271,17 +251,7 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| } | } | ||||
| #endif | #endif | ||||
| #ifdef MAX_STACK_ALLOC | |||||
| // stack overflow check | |||||
| assert(stack_check==3.14159265358979323846); | |||||
| if(!stack_alloc_size){ | |||||
| blas_memory_free(buffer); | |||||
| } | |||||
| #else | |||||
| blas_memory_free(buffer); | |||||
| #endif | |||||
| STACK_FREE(buffer); | |||||
| FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); | FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); | ||||
| IDEBUG_END; | IDEBUG_END; | ||||
| @@ -171,15 +171,7 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| if (incy < 0) y -= (n - 1) * incy; | if (incy < 0) y -= (n - 1) * incy; | ||||
| if (incx < 0) x -= (m - 1) * incx; | if (incx < 0) x -= (m - 1) * incx; | ||||
| #ifdef MAX_STACK_ALLOC | |||||
| volatile int stack_alloc_size = m; | |||||
| if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT)) | |||||
| stack_alloc_size = 0; | |||||
| FLOAT stack_buffer[stack_alloc_size]; | |||||
| buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1); | |||||
| #else | |||||
| buffer = (FLOAT *)blas_memory_alloc(1); | |||||
| #endif | |||||
| STACK_ALLOC(m, FLOAT, buffer); | |||||
| #ifdef SMPTEST | #ifdef SMPTEST | ||||
| nthreads = num_cpu_avail(2); | nthreads = num_cpu_avail(2); | ||||
| @@ -198,11 +190,7 @@ void CNAME(enum CBLAS_ORDER order, | |||||
| } | } | ||||
| #endif | #endif | ||||
| #ifdef MAX_STACK_ALLOC | |||||
| if(!stack_alloc_size) | |||||
| #endif | |||||
| blas_memory_free(buffer); | |||||
| STACK_FREE(buffer); | |||||
| FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); | FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); | ||||
| IDEBUG_END; | IDEBUG_END; | ||||