|
- /*****************************************************************************
- Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
- 3. Neither the name of the ISCAS nor the names of its contributors may
- be used to endorse or promote products derived from this software
- without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- **********************************************************************************/
-
- /*********************************************************************/
- /* Copyright 2009, 2010 The University of Texas at Austin. */
- /* All rights reserved. */
- /* */
- /* Redistribution and use in source and binary forms, with or */
- /* without modification, are permitted provided that the following */
- /* conditions are met: */
- /* */
- /* 1. Redistributions of source code must retain the above */
- /* copyright notice, this list of conditions and the following */
- /* disclaimer. */
- /* */
- /* 2. Redistributions in binary form must reproduce the above */
- /* copyright notice, this list of conditions and the following */
- /* disclaimer in the documentation and/or other materials */
- /* provided with the distribution. */
- /* */
- /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
- /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
- /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
- /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
- /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
- /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
- /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
- /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
- /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
- /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
- /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
- /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
- /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
- /* POSSIBILITY OF SUCH DAMAGE. */
- /* */
- /* The views and conclusions contained in the software and */
- /* documentation are those of the authors and should not be */
- /* interpreted as representing official policies, either expressed */
- /* or implied, of The University of Texas at Austin. */
- /*********************************************************************/
-
- //#undef DEBUG
-
- #include "common.h"
- #include <errno.h>
-
- #ifdef OS_WINDOWS
- #define ALLOC_WINDOWS
- #ifndef MEM_LARGE_PAGES
- #define MEM_LARGE_PAGES 0x20000000
- #endif
- #else
- #define ALLOC_MMAP
- #define ALLOC_MALLOC
- #endif
-
- #include <stdlib.h>
- #include <stdio.h>
- #include <fcntl.h>
-
- #ifndef OS_WINDOWS
- #include <sys/mman.h>
- #include <sys/shm.h>
- #include <sys/ipc.h>
- #endif
-
- #include <sys/types.h>
-
- #ifdef OS_LINUX
- #include <sys/sysinfo.h>
- #include <sched.h>
- #include <errno.h>
- #include <linux/unistd.h>
- #include <sys/syscall.h>
- #endif
-
- #if defined(OS_FreeBSD) || defined(OS_Darwin)
- #include <sys/sysctl.h>
- #endif
-
- #if defined(OS_WINDOWS) && (defined(__MINGW32__) || defined(__MINGW64__))
- #include <conio.h>
- #undef printf
- #define printf _cprintf
- #endif
-
- #ifdef OS_LINUX
-
- #ifndef MPOL_PREFERRED
- #define MPOL_PREFERRED 1
- #endif
-
- #endif
-
- #if (defined(PPC440) || !defined(OS_LINUX) || defined(HPL)) && !defined(NO_WARMUP)
- #define NO_WARMUP
- #endif
-
- #ifdef ALLOC_HUGETLB
- #define SHM_HUGETLB 04000
- #endif
-
- #ifndef FIXED_PAGESIZE
- #define FIXED_PAGESIZE 4096
- #endif
-
- #define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
-
- #define CONSTRUCTOR __attribute__ ((constructor))
- #define DESTRUCTOR __attribute__ ((destructor))
-
- #ifdef DYNAMIC_ARCH
- gotoblas_t *gotoblas = NULL;
- #endif
-
- #ifndef SMP
-
- #define blas_cpu_number 1
- #define blas_num_threads 1
-
- /* Dummy Function */
- int goto_get_num_procs (void) { return 1;};
- void goto_set_num_threads(int num_threads) {};
-
- #else
-
- #ifdef OS_LINUX
- #ifndef NO_AFFINITY
- int get_num_procs(void);
- #else
- int get_num_procs(void) {
- static int nums = 0;
- if (!nums) nums = get_nprocs();
- return nums;
- }
- #endif
- #endif
-
- #ifdef OS_WINDOWS
-
- int get_num_procs(void) {
-
- static int nums = 0;
-
- if (nums == 0) {
-
- SYSTEM_INFO sysinfo;
-
- GetSystemInfo(&sysinfo);
-
- nums = sysinfo.dwNumberOfProcessors;
- }
-
- return nums;
- }
-
- #endif
-
- #if defined(OS_FreeBSD) || defined(OS_Darwin)
-
- int get_num_procs(void) {
-
- static int nums = 0;
-
- int m[2];
- size_t len;
-
- if (nums == 0) {
- m[0] = CTL_HW;
- m[1] = HW_NCPU;
- len = sizeof(int);
- sysctl(m, 2, &nums, &len, NULL, 0);
- }
-
- return nums;
- }
-
- #endif
-
- int blas_cpu_number = 0;
- int blas_num_threads = 0;
-
- int goto_get_num_procs (void) {
- return blas_cpu_number;
- }
-
- int blas_get_cpu_number(void){
- char *p;
- #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FreeBSD) || defined(OS_Darwin)
- int max_num;
- #endif
- int blas_goto_num = 0;
- int blas_omp_num = 0;
-
- if (blas_num_threads) return blas_num_threads;
-
- #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FreeBSD) || defined(OS_Darwin)
- max_num = get_num_procs();
- #endif
-
- blas_goto_num = 0;
- #ifndef USE_OPENMP
- p = getenv("OPENBLAS_NUM_THREADS");
- if (p) blas_goto_num = atoi(p);
- if (blas_goto_num < 0) blas_goto_num = 0;
-
- if (blas_goto_num == 0) {
- p = getenv("GOTO_NUM_THREADS");
- if (p) blas_goto_num = atoi(p);
- if (blas_goto_num < 0) blas_goto_num = 0;
- }
-
- #endif
-
- blas_omp_num = 0;
- p = getenv("OMP_NUM_THREADS");
- if (p) blas_omp_num = atoi(p);
- if (blas_omp_num < 0) blas_omp_num = 0;
-
- if (blas_goto_num > 0) blas_num_threads = blas_goto_num;
- else if (blas_omp_num > 0) blas_num_threads = blas_omp_num;
- else blas_num_threads = MAX_CPU_NUMBER;
-
- #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FreeBSD) || defined(OS_Darwin)
- if (blas_num_threads > max_num) blas_num_threads = max_num;
- #endif
-
- if (blas_num_threads > MAX_CPU_NUMBER) blas_num_threads = MAX_CPU_NUMBER;
-
- #ifdef DEBUG
- printf( "Adjusted number of threads : %3d\n", blas_num_threads);
- #endif
-
- blas_cpu_number = blas_num_threads;
-
- return blas_num_threads;
- }
- #endif
-
- struct release_t {
- void *address;
- void (*func)(struct release_t *);
- long attr;
- };
-
- int hugetlb_allocated = 0;
-
- static struct release_t release_info[NUM_BUFFERS];
- static int release_pos = 0;
-
- #if defined(OS_LINUX) && !defined(NO_WARMUP)
- static int hot_alloc = 0;
- #endif
-
- #ifdef ALLOC_MMAP
-
- static void alloc_mmap_free(struct release_t *release){
-
- if (munmap(release -> address, BUFFER_SIZE)) {
- printf("OpenBLAS : munmap failed\n");
- }
- }
-
- #ifdef NO_WARMUP
-
- static void *alloc_mmap(void *address){
- void *map_address;
-
- if (address){
- map_address = mmap(address,
- BUFFER_SIZE,
- MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0);
- } else {
- map_address = mmap(address,
- BUFFER_SIZE,
- MMAP_ACCESS, MMAP_POLICY, -1, 0);
- }
-
- if (map_address != (void *)-1) {
- release_info[release_pos].address = map_address;
- release_info[release_pos].func = alloc_mmap_free;
- release_pos ++;
- }
-
- #ifdef OS_LINUX
- my_mbind(map_address, BUFFER_SIZE, MPOL_PREFERRED, NULL, 0, 0);
- #endif
-
- return map_address;
- }
-
- #else
-
- #define BENCH_ITERATION 4
- #define SCALING 2
-
- static inline BLASULONG run_bench(BLASULONG address, long size) {
-
- BLASULONG original, *p;
- BLASULONG start, stop, min;
- int iter, i, count;
-
- min = (BLASULONG)-1;
-
- original = *(BLASULONG *)(address + size - PAGESIZE);
-
- *(BLASULONG *)(address + size - PAGESIZE) = (BLASULONG)address;
-
- for (iter = 0; iter < BENCH_ITERATION; iter ++ ) {
-
- p = (BLASULONG *)address;
-
- count = size / PAGESIZE;
-
- start = rpcc();
-
- for (i = 0; i < count; i ++) {
- p = (BLASULONG *)(*p);
- }
-
- stop = rpcc();
-
- if (min > stop - start) min = stop - start;
- }
-
- *(BLASULONG *)(address + size - PAGESIZE + 0) = original;
- *(BLASULONG *)(address + size - PAGESIZE + 8) = (BLASULONG)p;
-
- return min;
- }
-
- static void *alloc_mmap(void *address){
- void *map_address, *best_address;
- BLASULONG best, start, current;
- BLASULONG allocsize;
-
- if (address){
- /* Just give up use advanced operation */
- map_address = mmap(address, BUFFER_SIZE, MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0);
-
- #ifdef OS_LINUX
- my_mbind(map_address, BUFFER_SIZE, MPOL_PREFERRED, NULL, 0, 0);
- #endif
-
- } else {
- #if defined(OS_LINUX) && !defined(NO_WARMUP)
- if (hot_alloc == 0) {
- map_address = mmap(NULL, BUFFER_SIZE, MMAP_ACCESS, MMAP_POLICY, -1, 0);
-
- #ifdef OS_LINUX
- my_mbind(map_address, BUFFER_SIZE, MPOL_PREFERRED, NULL, 0, 0);
- #endif
-
- } else {
- #endif
-
- map_address = mmap(NULL, BUFFER_SIZE * SCALING,
- MMAP_ACCESS, MMAP_POLICY, -1, 0);
-
- if (map_address != (void *)-1) {
-
- #ifdef OS_LINUX
- #ifdef DEBUG
- int ret=0;
- ret=my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0);
- if(ret==-1){
- int errsv=errno;
- perror("OpenBLAS alloc_mmap:");
- printf("error code=%d,\tmap_address=%lx\n",errsv,map_address);
- }
-
- #else
- my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0);
- #endif
- #endif
-
-
- allocsize = DGEMM_P * DGEMM_Q * sizeof(double);
-
- start = (BLASULONG)map_address;
- current = (SCALING - 1) * BUFFER_SIZE;
-
- while(current > 0) {
- *(long *)start = (long)start + PAGESIZE;
- start += PAGESIZE;
- current -= PAGESIZE;
- }
-
- *(long *)(start - PAGESIZE) = (BLASULONG)map_address;
-
- start = (BLASULONG)map_address;
-
- best = (BLASULONG)-1;
- best_address = map_address;
-
- while ((start + allocsize < (BLASULONG)map_address + (SCALING - 1) * BUFFER_SIZE)) {
-
- current = run_bench(start, allocsize);
-
- if (best > current) {
- best = current;
- best_address = (void *)start;
- }
-
- start += PAGESIZE;
-
- }
-
- if ((BLASULONG)best_address > (BLASULONG)map_address)
- munmap(map_address, (BLASULONG)best_address - (BLASULONG)map_address);
-
- munmap((void *)((BLASULONG)best_address + BUFFER_SIZE), (SCALING - 1) * BUFFER_SIZE + (BLASULONG)map_address - (BLASULONG)best_address);
-
- map_address = best_address;
-
- #if defined(OS_LINUX) && !defined(NO_WARMUP)
- hot_alloc = 2;
- #endif
- }
- }
- #if defined(OS_LINUX) && !defined(NO_WARMUP)
- }
- #endif
-
- if (map_address != (void *)-1) {
- release_info[release_pos].address = map_address;
- release_info[release_pos].func = alloc_mmap_free;
- release_pos ++;
- }
-
- return map_address;
- }
-
- #endif
-
- #endif
-
-
- #ifdef ALLOC_MALLOC
-
- static void alloc_malloc_free(struct release_t *release){
-
- free(release -> address);
-
- }
-
- static void *alloc_malloc(void *address){
-
- void *map_address;
-
- map_address = (void *)malloc(BUFFER_SIZE + FIXED_PAGESIZE);
-
- if (map_address == (void *)NULL) map_address = (void *)-1;
-
- if (map_address != (void *)-1) {
- release_info[release_pos].address = map_address;
- release_info[release_pos].func = alloc_malloc_free;
- release_pos ++;
- }
-
- return map_address;
-
- }
-
- #endif
-
- #ifdef ALLOC_QALLOC
-
- void *qalloc(int flags, size_t bytes);
- void *qfree (void *address);
-
- #define QNONCACHE 0x1
- #define QCOMMS 0x2
- #define QFAST 0x4
-
- static void alloc_qalloc_free(struct release_t *release){
-
- qfree(release -> address);
-
- }
-
- static void *alloc_qalloc(void *address){
- void *map_address;
-
- map_address = (void *)qalloc(QCOMMS | QFAST, BUFFER_SIZE + FIXED_PAGESIZE);
-
- if (map_address == (void *)NULL) map_address = (void *)-1;
-
- if (map_address != (void *)-1) {
- release_info[release_pos].address = map_address;
- release_info[release_pos].func = alloc_qalloc_free;
- release_pos ++;
- }
-
- return (void *)(((BLASULONG)map_address + FIXED_PAGESIZE - 1) & ~(FIXED_PAGESIZE - 1));
- }
-
- #endif
-
- #ifdef ALLOC_WINDOWS
-
- static void alloc_windows_free(struct release_t *release){
-
- VirtualFree(release -> address, BUFFER_SIZE, MEM_DECOMMIT);
-
- }
-
- static void *alloc_windows(void *address){
- void *map_address;
-
- map_address = VirtualAlloc(address,
- BUFFER_SIZE,
- MEM_RESERVE | MEM_COMMIT,
- PAGE_READWRITE);
-
- if (map_address == (void *)NULL) map_address = (void *)-1;
-
- if (map_address != (void *)-1) {
- release_info[release_pos].address = map_address;
- release_info[release_pos].func = alloc_windows_free;
- release_pos ++;
- }
-
- return map_address;
- }
-
- #endif
-
- #ifdef ALLOC_DEVICEDRIVER
- #ifndef DEVICEDRIVER_NAME
- #define DEVICEDRIVER_NAME "/dev/mapper"
- #endif
-
- static void alloc_devicedirver_free(struct release_t *release){
-
- if (munmap(release -> address, BUFFER_SIZE)) {
- printf("OpenBLAS : Bugphysarea unmap failed.\n");
- }
-
- if (close(release -> attr)) {
- printf("OpenBLAS : Bugphysarea close failed.\n");
- }
-
- }
-
- static void *alloc_devicedirver(void *address){
-
- int fd;
- void *map_address;
-
- if ((fd = open(DEVICEDRIVER_NAME, O_RDWR | O_SYNC)) < 0) {
-
- return (void *)-1;
-
- }
-
- map_address = mmap(address, BUFFER_SIZE,
- PROT_READ | PROT_WRITE,
- MAP_FILE | MAP_SHARED,
- fd, 0);
-
- if (map_address != (void *)-1) {
- release_info[release_pos].address = map_address;
- release_info[release_pos].attr = fd;
- release_info[release_pos].func = alloc_devicedirver_free;
- release_pos ++;
- }
-
- return map_address;
- }
-
- #endif
-
- #ifdef ALLOC_SHM
-
- static void alloc_shm_free(struct release_t *release){
-
- if (shmdt(release -> address)) {
- printf("OpenBLAS : Shared memory unmap failed.\n");
- }
- }
-
- static void *alloc_shm(void *address){
- void *map_address;
- int shmid;
-
- shmid = shmget(IPC_PRIVATE, BUFFER_SIZE,IPC_CREAT | 0600);
-
- map_address = (void *)shmat(shmid, address, 0);
-
- if (map_address != (void *)-1){
-
- #ifdef OS_LINUX
- my_mbind(map_address, BUFFER_SIZE, MPOL_PREFERRED, NULL, 0, 0);
- #endif
-
- shmctl(shmid, IPC_RMID, 0);
-
- release_info[release_pos].address = map_address;
- release_info[release_pos].attr = shmid;
- release_info[release_pos].func = alloc_shm_free;
- release_pos ++;
- }
-
- return map_address;
- }
-
- #endif
-
- #if defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS
-
- static void alloc_hugetlb_free(struct release_t *release){
-
- #if defined(OS_LINUX) || defined(OS_AIX)
- if (shmdt(release -> address)) {
- printf("OpenBLAS : Hugepage unmap failed.\n");
- }
- #endif
-
- #ifdef __sun__
-
- munmap(release -> address, BUFFER_SIZE);
-
- #endif
-
- #ifdef OS_WINDOWS
-
- VirtualFree(release -> address, BUFFER_SIZE, MEM_LARGE_PAGES | MEM_DECOMMIT);
-
- #endif
-
- }
-
- static void *alloc_hugetlb(void *address){
-
- void *map_address = (void *)-1;
-
- #if defined(OS_LINUX) || defined(OS_AIX)
- int shmid;
-
- shmid = shmget(IPC_PRIVATE, BUFFER_SIZE,
- #ifdef OS_LINUX
- SHM_HUGETLB |
- #endif
- #ifdef OS_AIX
- SHM_LGPAGE | SHM_PIN |
- #endif
- IPC_CREAT | SHM_R | SHM_W);
-
- if (shmid != -1) {
- map_address = (void *)shmat(shmid, address, SHM_RND);
-
- #ifdef OS_LINUX
- my_mbind(map_address, BUFFER_SIZE, MPOL_PREFERRED, NULL, 0, 0);
- #endif
-
- if (map_address != (void *)-1){
- shmctl(shmid, IPC_RMID, 0);
- }
- }
- #endif
-
- #ifdef __sun__
- struct memcntl_mha mha;
-
- mha.mha_cmd = MHA_MAPSIZE_BSSBRK;
- mha.mha_flags = 0;
- mha.mha_pagesize = HUGE_PAGESIZE;
- memcntl(NULL, 0, MC_HAT_ADVISE, (char *)&mha, 0, 0);
-
- map_address = (BLASULONG)memalign(HUGE_PAGESIZE, BUFFER_SIZE);
- #endif
-
- #ifdef OS_WINDOWS
-
- HANDLE hToken;
- TOKEN_PRIVILEGES tp;
-
- if (OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES, &hToken) != TRUE) return (void *) -1;
-
- tp.PrivilegeCount = 1;
- tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
-
- if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) return (void *) -1;
-
- if (AdjustTokenPrivileges(hToken, FALSE, (PTOKEN_PRIVILEGES)&tp, 0, NULL, NULL) != TRUE) return (void *) -1;
-
- map_address = (void *)VirtualAlloc(address,
- BUFFER_SIZE,
- MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT,
- PAGE_READWRITE);
-
- AdjustTokenPrivileges(hToken, TRUE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, NULL);
-
- if (map_address == (void *)NULL) map_address = (void *)-1;
-
- #endif
-
- if (map_address != (void *)-1){
- release_info[release_pos].address = map_address;
- release_info[release_pos].func = alloc_hugetlb_free;
- release_pos ++;
- }
-
- return map_address;
- }
- #endif
-
- #ifdef ALLOC_HUGETLBFILE
-
- static int hugetlb_pid = 0;
-
- static void alloc_hugetlbfile_free(struct release_t *release){
-
- if (munmap(release -> address, BUFFER_SIZE)) {
- printf("OpenBLAS : HugeTLBfs unmap failed.\n");
- }
-
- if (close(release -> attr)) {
- printf("OpenBLAS : HugeTLBfs close failed.\n");
- }
- }
-
- static void *alloc_hugetlbfile(void *address){
-
- void *map_address = (void *)-1;
- int fd;
- char filename[64];
-
- if (!hugetlb_pid) hugetlb_pid = getpid();
-
- sprintf(filename, "%s/gotoblas.%d", HUGETLB_FILE_NAME, hugetlb_pid);
-
- if ((fd = open(filename, O_RDWR | O_CREAT, 0700)) < 0) {
- return (void *)-1;
- }
-
- unlink(filename);
-
- map_address = mmap(address, BUFFER_SIZE,
- PROT_READ | PROT_WRITE,
- MAP_SHARED,
- fd, 0);
-
- if (map_address != (void *)-1) {
- release_info[release_pos].address = map_address;
- release_info[release_pos].attr = fd;
- release_info[release_pos].func = alloc_hugetlbfile_free;
- release_pos ++;
- }
-
- return map_address;
- }
- #endif
-
- /* Global lock for memory allocation */
-
- #if defined(USE_PTHREAD_LOCK)
- static pthread_mutex_t alloc_lock = PTHREAD_MUTEX_INITIALIZER;
- #elif defined(USE_PTHREAD_SPINLOCK)
- static pthread_spinlock_t alloc_lock = 0;
- #else
- static BLASULONG alloc_lock = 0UL;
- #endif
-
- #ifdef SEEK_ADDRESS
- static BLASULONG base_address = 0UL;
- #else
- static BLASULONG base_address = BASE_ADDRESS;
- #endif
-
- static volatile struct {
- BLASULONG lock;
- void *addr;
- #if defined(WHEREAMI) && !defined(USE_OPENMP)
- int pos;
- #endif
- int used;
- #ifndef __64BIT__
- char dummy[48];
- #else
- char dummy[40];
- #endif
-
- } memory[NUM_BUFFERS];
-
- static int memory_initialized = 0;
- static void gotoblas_memory_init(void);
-
- /* Memory allocation routine */
- /* procpos ... indicates where it comes from */
- /* 0 : Level 3 functions */
- /* 1 : Level 2 functions */
- /* 2 : Thread */
-
- void *blas_memory_alloc(int procpos){
-
- int position;
- #if defined(WHEREAMI) && !defined(USE_OPENMP)
- int mypos;
- #endif
-
- void *map_address;
-
- void *(*memoryalloc[])(void *address) = {
- #ifdef ALLOC_DEVICEDRIVER
- alloc_devicedirver,
- #endif
- #if defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS
- alloc_hugetlb,
- #endif
- #ifdef ALLOC_SHM
- alloc_shm,
- #endif
- #ifdef ALLOC_MMAP
- alloc_mmap,
- #endif
- #ifdef ALLOC_QALLOC
- alloc_qalloc,
- #endif
- #ifdef ALLOC_WINDOWS
- alloc_windows,
- #endif
- #ifdef ALLOC_MALLOC
- alloc_malloc,
- #endif
- NULL,
- };
- void *(**func)(void *address);
-
- if (!memory_initialized) {
-
- LOCK_COMMAND(&alloc_lock);
-
- if (!memory_initialized) {
-
- #if defined(WHEREAMI) && !defined(USE_OPENMP)
- for (position = 0; position < NUM_BUFFERS; position ++){
- memory[position].addr = (void *)0;
- memory[position].pos = -1;
- memory[position].used = 0;
- memory[position].lock = 0;
- }
- #endif
-
- #ifdef DYNAMIC_ARCH
- gotoblas_dynamic_init();
- #endif
-
- #if defined(SMP) && defined(OS_LINUX) && !defined(NO_AFFINITY)
- gotoblas_affinity_init();
- #endif
-
- #ifdef SMP
- if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number();
- #endif
-
- #if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
- #ifndef DYNAMIC_ARCH
- blas_set_parameter();
- #endif
- #endif
-
- memory_initialized = 1;
- }
-
- UNLOCK_COMMAND(&alloc_lock);
- }
-
- #ifdef DEBUG
- printf("Alloc Start ...\n");
- #endif
-
- #if defined(WHEREAMI) && !defined(USE_OPENMP)
-
- mypos = WhereAmI();
-
- position = mypos;
- while (position > NUM_BUFFERS) position >>= 1;
-
- do {
- if (!memory[position].used && (memory[position].pos == mypos)) {
-
- blas_lock(&memory[position].lock);
-
- if (!memory[position].used) goto allocation;
-
- blas_unlock(&memory[position].lock);
- }
-
- position ++;
-
- } while (position < NUM_BUFFERS);
-
-
- #endif
-
- position = 0;
-
- do {
- if (!memory[position].used) {
-
- blas_lock(&memory[position].lock);
-
- if (!memory[position].used) goto allocation;
-
- blas_unlock(&memory[position].lock);
- }
-
- position ++;
-
- } while (position < NUM_BUFFERS);
-
- goto error;
-
- allocation :
-
- #ifdef DEBUG
- printf(" Position -> %d\n", position);
- #endif
-
- memory[position].used = 1;
-
- blas_unlock(&memory[position].lock);
-
- if (!memory[position].addr) {
- do {
- #ifdef DEBUG
- printf("Allocation Start : %lx\n", base_address);
- #endif
-
- map_address = (void *)-1;
-
- func = &memoryalloc[0];
-
- while ((func != NULL) && (map_address == (void *) -1)) {
-
- map_address = (*func)((void *)base_address);
-
- #ifdef ALLOC_DEVICEDRIVER
- if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) {
- fprintf(stderr, "OpenBLAS Warning ... Physically contigous allocation was failed.\n");
- }
- #endif
-
- #ifdef ALLOC_HUGETLBFILE
- if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) {
- #ifndef OS_WINDOWS
- fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation was failed.\n");
- #endif
- }
- #endif
-
- #if defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS
- if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1;
- #endif
-
- func ++;
- }
-
- #ifdef DEBUG
- printf(" Success -> %08lx\n", map_address);
- #endif
- if (((BLASLONG) map_address) == -1) base_address = 0UL;
-
- if (base_address) base_address += BUFFER_SIZE + FIXED_PAGESIZE;
-
- } while ((BLASLONG)map_address == -1);
-
- memory[position].addr = map_address;
-
- #ifdef DEBUG
- printf(" Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position);
- #endif
- }
-
- #if defined(WHEREAMI) && !defined(USE_OPENMP)
-
- if (memory[position].pos == -1) memory[position].pos = mypos;
-
- #endif
-
- #ifdef DYNAMIC_ARCH
-
- if (memory_initialized == 1) {
-
- LOCK_COMMAND(&alloc_lock);
-
- if (memory_initialized == 1) {
-
- if (!gotoblas) gotoblas_dynamic_init();
-
- memory_initialized = 2;
- }
-
- UNLOCK_COMMAND(&alloc_lock);
-
- }
- #endif
-
-
- #ifdef DEBUG
- printf("Mapped : %p %3d\n\n",
- (void *)memory[position].addr, position);
- #endif
-
- return (void *)memory[position].addr;
-
- error:
- printf("BLAS : Program is Terminated. Because you tried to allocate too many memory regions.\n");
-
- return NULL;
- }
-
- void blas_memory_free(void *free_area){
-
- int position;
-
- #ifdef DEBUG
- printf("Unmapped Start : %p ...\n", free_area);
- #endif
-
- position = 0;
-
- while ((memory[position].addr != free_area)
- && (position < NUM_BUFFERS)) position++;
-
- if (memory[position].addr != free_area) goto error;
-
- #ifdef DEBUG
- printf(" Position : %d\n", position);
- #endif
-
- memory[position].used = 0;
-
- #ifdef DEBUG
- printf("Unmap Succeeded.\n\n");
- #endif
-
- return;
-
- error:
- printf("BLAS : Bad memory unallocation! : %4d %p\n", position, free_area);
-
- #ifdef DEBUG
- for (position = 0; position < NUM_BUFFERS; position++)
- printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used);
- #endif
-
- return;
- }
-
- void blas_shutdown(void){
-
- int pos;
-
- #ifdef SMP
- BLASFUNC(blas_thread_shutdown)();
- #endif
-
- LOCK_COMMAND(&alloc_lock);
-
- for (pos = 0; pos < release_pos; pos ++) {
- release_info[pos].func(&release_info[pos]);
- }
-
- #ifdef SEEK_ADDRESS
- base_address = 0UL;
- #else
- base_address = BASE_ADDRESS;
- #endif
-
- for (pos = 0; pos < NUM_BUFFERS; pos ++){
- memory[pos].addr = (void *)0;
- memory[pos].used = 0;
- #if defined(WHEREAMI) && !defined(USE_OPENMP)
- memory[pos].pos = -1;
- #endif
- memory[pos].lock = 0;
- }
-
- UNLOCK_COMMAND(&alloc_lock);
-
- return;
- }
-
- #if defined(OS_LINUX) && !defined(NO_WARMUP)
-
- #ifdef SMP
- #if defined(USE_PTHREAD_LOCK)
- static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
- #elif defined(USE_PTHREAD_SPINLOCK)
- static pthread_spinlock_t init_lock = 0;
- #else
- static BLASULONG init_lock = 0UL;
- #endif
- #endif
-
- static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n,
- void *sa, void *sb, BLASLONG pos) {
-
- #ifndef ARCH_POWER
-
- long size;
- BLASULONG buffer;
-
- size = BUFFER_SIZE - PAGESIZE;
- buffer = (BLASULONG)sa + GEMM_OFFSET_A;
-
- #if defined(OS_LINUX) && !defined(NO_WARMUP)
- if (hot_alloc != 2) {
- #endif
-
- #ifdef SMP
- LOCK_COMMAND(&init_lock);
- #endif
-
- while (size > 0) {
- *(int *)buffer = size;
- buffer += PAGESIZE;
- size -= PAGESIZE;
- }
-
- #ifdef SMP
- UNLOCK_COMMAND(&init_lock);
- #endif
-
- size = MIN((BUFFER_SIZE - PAGESIZE), L2_SIZE);
- buffer = (BLASULONG)sa + GEMM_OFFSET_A;
-
- while (size > 0) {
- *(int *)buffer = size;
- buffer += 64;
- size -= 64;
- }
-
- #if defined(OS_LINUX) && !defined(NO_WARMUP)
- }
- #endif
-
- #endif
- }
-
- #ifdef SMP
-
- static void _init_thread_memory(void *buffer) {
-
- blas_queue_t queue[MAX_CPU_NUMBER];
- int num_cpu;
-
- for (num_cpu = 0; num_cpu < blas_num_threads; num_cpu++) {
-
- blas_queue_init(&queue[num_cpu]);
- queue[num_cpu].mode = BLAS_DOUBLE | BLAS_REAL;
- queue[num_cpu].routine = &_touch_memory;
- queue[num_cpu].args = NULL;
- queue[num_cpu].next = &queue[num_cpu + 1];
- }
-
- queue[num_cpu - 1].next = NULL;
- queue[0].sa = buffer;
-
- exec_blas(num_cpu, queue);
-
- }
- #endif
-
- static void gotoblas_memory_init(void) {
-
- void *buffer;
-
- hot_alloc = 1;
-
- buffer = (void *)blas_memory_alloc(0);
-
- #ifdef SMP
- if (blas_cpu_number == 0) blas_get_cpu_number();
- #ifdef SMP_SERVER
- if (blas_server_avail == 0) blas_thread_init();
- #endif
-
- _init_thread_memory((void *)((BLASULONG)buffer + GEMM_OFFSET_A));
-
- #else
-
- _touch_memory(NULL, NULL, NULL, (void *)((BLASULONG)buffer + GEMM_OFFSET_A), NULL, 0);
-
- #endif
-
- blas_memory_free(buffer);
- }
- #endif
-
- /* Initialization for all function; this function should be called before main */
-
- static int gotoblas_initialized = 0;
-
- void CONSTRUCTOR gotoblas_init(void) {
-
- if (gotoblas_initialized) return;
-
- #ifdef PROFILE
- moncontrol (0);
- #endif
-
- #ifdef DYNAMIC_ARCH
- gotoblas_dynamic_init();
- #endif
-
- #if defined(SMP) && defined(OS_LINUX) && !defined(NO_AFFINITY)
- gotoblas_affinity_init();
- #endif
-
- #if defined(OS_LINUX) && !defined(NO_WARMUP)
- gotoblas_memory_init();
- #endif
-
- #ifdef SMP
- if (blas_cpu_number == 0) blas_get_cpu_number();
- #ifdef SMP_SERVER
- if (blas_server_avail == 0) blas_thread_init();
- #endif
- #endif
-
- #ifdef FUNCTION_PROFILE
- gotoblas_profile_init();
- #endif
-
- gotoblas_initialized = 1;
-
- #ifdef PROFILE
- moncontrol (1);
- #endif
-
- }
-
- void DESTRUCTOR gotoblas_quit(void) {
-
- if (gotoblas_initialized == 0) return;
-
- #ifdef PROFILE
- moncontrol (0);
- #endif
-
- #ifdef FUNCTION_PROFILE
- gotoblas_profile_quit();
- #endif
-
- #if defined(SMP) && defined(OS_LINUX) && !defined(NO_AFFINITY)
- gotoblas_affinity_quit();
- #endif
-
- #ifdef DYNAMIC_ARCH
- gotoblas_dynamic_quit();
- #endif
-
- gotoblas_initialized = 0;
-
- #ifdef PROFILE
- moncontrol (1);
- #endif
-
- }
-
- #if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
- /* Don't call me; this is just work around for PGI / Sun bug */
- void gotoblas_dummy_for_PGI(void) {
-
- gotoblas_init();
- gotoblas_quit();
-
- #if 0
- asm ("\t.section\t.ctors,\"aw\",@progbits; .align 8; .quad gotoblas_init; .section .text");
- asm ("\t.section\t.dtors,\"aw\",@progbits; .align 8; .quad gotoblas_quit; .section .text");
- #else
- asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text");
- asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text");
- #endif
- }
- #endif
|