You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

memory_qalloc.c 9.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include "common.h"
  40. #ifdef OS_LINUX
  41. #include <sys/sysinfo.h>
  42. #include <sched.h>
  43. #include <errno.h>
  44. #include <linux/unistd.h>
  45. #include <sys/syscall.h>
  46. #include <sys/time.h>
  47. #include <sys/resource.h>
  48. #endif
  49. #ifdef OS_HAIKU
  50. #include <unistd.h>
  51. #endif
  52. #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN)
  53. #include <sys/sysctl.h>
  54. #include <sys/resource.h>
  55. #endif
  56. #define FIXED_PAGESIZE 4096
  57. void *sa = NULL;
  58. void *sb = NULL;
  59. static double static_buffer[BUFFER_SIZE/sizeof(double)];
  60. void *blas_memory_alloc(int numproc){
  61. if (sa == NULL){
  62. #if 0
  63. sa = (void *)qalloc(QFAST, BUFFER_SIZE);
  64. #else
  65. sa = (void *)malloc(BUFFER_SIZE);
  66. #endif
  67. sb = (void *)&static_buffer[0];
  68. }
  69. return sa;
  70. }
  71. void blas_memory_free(void *free_area){
  72. return;
  73. }
  74. extern void openblas_warning(int verbose, const char * msg);
  75. #ifndef SMP
  76. #define blas_cpu_number 1
  77. #define blas_num_threads 1
  78. /* Dummy Function */
  79. int goto_get_num_procs (void) { return 1;};
  80. void goto_set_num_threads(int num_threads) {};
  81. #else
  82. #if defined(OS_LINUX) || defined(OS_SUNOS)
  83. #ifndef NO_AFFINITY
  84. int get_num_procs(void);
  85. #else
  86. int get_num_procs(void) {
  87. static int nums = 0;
  88. cpu_set_t cpuset,*cpusetp;
  89. size_t size;
  90. int ret;
  91. #if defined(__GLIBC_PREREQ)
  92. #if !__GLIBC_PREREQ(2, 7)
  93. int i;
  94. #if !__GLIBC_PREREQ(2, 6)
  95. int n;
  96. #endif
  97. #endif
  98. #endif
  99. if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
  100. #if !defined(OS_LINUX)
  101. return nums;
  102. #endif
  103. /*
  104. #if !defined(__GLIBC_PREREQ)
  105. return nums;
  106. #else
  107. #if !__GLIBC_PREREQ(2, 3)
  108. return nums;
  109. #endif
  110. #if !__GLIBC_PREREQ(2, 7)
  111. ret = sched_getaffinity(0,sizeof(cpuset), &cpuset);
  112. if (ret!=0) return nums;
  113. n=0;
  114. #if !__GLIBC_PREREQ(2, 6)
  115. for (i=0;i<nums;i++)
  116. if (CPU_ISSET(i,&cpuset)) n++;
  117. nums=n;
  118. #else
  119. nums = CPU_COUNT(sizeof(cpuset),&cpuset);
  120. #endif
  121. return nums;
  122. #else
  123. if (nums >= CPU_SETSIZE) {
  124. cpusetp = CPU_ALLOC(nums);
  125. if (cpusetp == NULL) {
  126. return nums;
  127. }
  128. size = CPU_ALLOC_SIZE(nums);
  129. ret = sched_getaffinity(0,size,cpusetp);
  130. if (ret!=0) {
  131. CPU_FREE(cpusetp);
  132. return nums;
  133. }
  134. ret = CPU_COUNT_S(size,cpusetp);
  135. if (ret > 0 && ret < nums) nums = ret;
  136. CPU_FREE(cpusetp);
  137. return nums;
  138. } else {
  139. ret = sched_getaffinity(0,sizeof(cpuset),&cpuset);
  140. if (ret!=0) {
  141. return nums;
  142. }
  143. ret = CPU_COUNT(&cpuset);
  144. if (ret > 0 && ret < nums) nums = ret;
  145. return nums;
  146. }
  147. #endif
  148. #endif
  149. */
  150. return 1;
  151. }
  152. #endif
  153. #endif
  154. #ifdef OS_ANDROID
  155. int get_num_procs(void) {
  156. static int nums = 0;
  157. if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
  158. return nums;
  159. }
  160. #endif
  161. #ifdef OS_HAIKU
  162. int get_num_procs(void) {
  163. static int nums = 0;
  164. if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
  165. return nums;
  166. }
  167. #endif
  168. #ifdef OS_AIX
  169. int get_num_procs(void) {
  170. static int nums = 0;
  171. if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
  172. return nums;
  173. }
  174. #endif
  175. #ifdef OS_WINDOWS
  176. int get_num_procs(void) {
  177. static int nums = 0;
  178. if (nums == 0) {
  179. SYSTEM_INFO sysinfo;
  180. GetSystemInfo(&sysinfo);
  181. nums = sysinfo.dwNumberOfProcessors;
  182. }
  183. return nums;
  184. }
  185. #endif
  186. #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY)
  187. int get_num_procs(void) {
  188. static int nums = 0;
  189. int m[2];
  190. size_t len;
  191. if (nums == 0) {
  192. m[0] = CTL_HW;
  193. m[1] = HW_NCPU;
  194. len = sizeof(int);
  195. sysctl(m, 2, &nums, &len, NULL, 0);
  196. }
  197. return nums;
  198. }
  199. #endif
  200. #if defined(OS_DARWIN)
  201. int get_num_procs(void) {
  202. static int nums = 0;
  203. size_t len;
  204. if (nums == 0){
  205. len = sizeof(int);
  206. sysctlbyname("hw.physicalcpu", &nums, &len, NULL, 0);
  207. }
  208. return nums;
  209. }
  210. /*
  211. void set_stack_limit(int limitMB){
  212. int result=0;
  213. struct rlimit rl;
  214. rlim_t StackSize;
  215. StackSize=limitMB*1024*1024;
  216. result=getrlimit(RLIMIT_STACK, &rl);
  217. if(result==0){
  218. if(rl.rlim_cur < StackSize){
  219. rl.rlim_cur=StackSize;
  220. result=setrlimit(RLIMIT_STACK, &rl);
  221. if(result !=0){
  222. fprintf(stderr, "OpenBLAS: set stack limit error =%d\n", result);
  223. }
  224. }
  225. }
  226. }
  227. */
  228. #endif
  229. /*
  230. OpenBLAS uses the numbers of CPU cores in multithreading.
  231. It can be set by openblas_set_num_threads(int num_threads);
  232. */
  233. int blas_cpu_number = 0;
  234. /*
  235. The numbers of threads in the thread pool.
  236. This value is equal or large than blas_cpu_number. This means some threads are sleep.
  237. */
  238. int blas_num_threads = 0;
  239. int goto_get_num_procs (void) {
  240. return blas_cpu_number;
  241. }
  242. void openblas_fork_handler(void)
  243. {
  244. // This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is
  245. // built with "make USE_OPENMP=0".
  246. // Hanging can still happen when OpenBLAS is built against the libgomp
  247. // implementation of OpenMP. The problem is tracked at:
  248. // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035
  249. // In the mean time build with USE_OPENMP=0 or link against another
  250. // implementation of OpenMP.
  251. #if !((defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT)) || defined(OS_ANDROID)) && defined(SMP_SERVER)
  252. int err;
  253. err = pthread_atfork ((void (*)(void)) BLASFUNC(blas_thread_shutdown), NULL, NULL);
  254. if(err != 0)
  255. openblas_warning(0, "OpenBLAS Warning ... cannot install fork handler. You may meet hang after fork.\n");
  256. #endif
  257. }
  258. extern int openblas_num_threads_env(void);
  259. extern int openblas_goto_num_threads_env(void);
  260. extern int openblas_omp_num_threads_env(void);
  261. int blas_get_cpu_number(void){
  262. #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
  263. int max_num;
  264. #endif
  265. int blas_goto_num = 0;
  266. int blas_omp_num = 0;
  267. if (blas_num_threads) return blas_num_threads;
  268. #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
  269. max_num = get_num_procs();
  270. #endif
  271. // blas_goto_num = 0;
  272. #ifndef USE_OPENMP
  273. blas_goto_num=openblas_num_threads_env();
  274. if (blas_goto_num < 0) blas_goto_num = 0;
  275. if (blas_goto_num == 0) {
  276. blas_goto_num=openblas_goto_num_threads_env();
  277. if (blas_goto_num < 0) blas_goto_num = 0;
  278. }
  279. #endif
  280. // blas_omp_num = 0;
  281. blas_omp_num=openblas_omp_num_threads_env();
  282. if (blas_omp_num < 0) blas_omp_num = 0;
  283. if (blas_goto_num > 0) blas_num_threads = blas_goto_num;
  284. else if (blas_omp_num > 0) blas_num_threads = blas_omp_num;
  285. else blas_num_threads = MAX_CPU_NUMBER;
  286. #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID)
  287. if (blas_num_threads > max_num) blas_num_threads = max_num;
  288. #endif
  289. if (blas_num_threads > MAX_CPU_NUMBER) blas_num_threads = MAX_CPU_NUMBER;
  290. #ifdef DEBUG
  291. printf( "Adjusted number of threads : %3d\n", blas_num_threads);
  292. #endif
  293. blas_cpu_number = blas_num_threads;
  294. return blas_num_threads;
  295. }
  296. #endif
  297. int openblas_get_num_procs(void) {
  298. #ifndef SMP
  299. return 1;
  300. #else
  301. return get_num_procs();
  302. #endif
  303. }
  304. int openblas_get_num_threads(void) {
  305. #ifndef SMP
  306. return 1;
  307. #else
  308. // init blas_cpu_number if needed
  309. blas_get_cpu_number();
  310. return blas_cpu_number;
  311. #endif
  312. }