You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common.h 20 kB

13 years ago
10 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
13 years ago
13 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #ifndef COMMON_H
  39. #define COMMON_H
  40. #ifdef __cplusplus
  41. extern "C" {
  42. /* Assume C declarations for C++ */
  43. #endif /* __cplusplus */
  44. #ifndef _GNU_SOURCE
  45. #define _GNU_SOURCE
  46. #endif
  47. #ifndef __USE_XOPEN
  48. #define __USE_XOPEN
  49. #endif
  50. #ifndef __USE_SVID
  51. #define __USE_SVID
  52. #endif
  53. #ifdef BUILD_KERNEL
  54. #include "config_kernel.h"
  55. #else
  56. #include "config.h"
  57. #endif
  58. #undef ENABLE_SSE_EXCEPTION
  59. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  60. #define SMP
  61. #endif
  62. #if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX)
  63. #define WINDOWS_ABI
  64. #define OS_WINDOWS
  65. #ifdef DOUBLE
  66. #define DOUBLE_DEFINED DOUBLE
  67. #undef DOUBLE
  68. #endif
  69. #endif
  70. #if !defined(NOINCLUDE) && !defined(ASSEMBLER)
  71. #include <stdio.h>
  72. #include <stdlib.h>
  73. #include <string.h>
  74. #if !defined(_MSC_VER)
  75. #include <unistd.h>
  76. #endif
  77. #include <time.h>
  78. #ifdef OS_LINUX
  79. #include <malloc.h>
  80. #include <sched.h>
  81. #endif
  82. #if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_ANDROID)
  83. #include <sched.h>
  84. #endif
  85. #ifdef OS_ANDROID
  86. #define NO_SYSV_IPC
  87. //Android NDK only supports complex.h since Android 5.0
  88. #if __ANDROID_API__ < 21
  89. #define FORCE_OPENBLAS_COMPLEX_STRUCT
  90. #endif
  91. #endif
  92. #ifdef OS_WINDOWS
  93. #ifdef ATOM
  94. #define GOTO_ATOM ATOM
  95. #undef ATOM
  96. #endif
  97. #include <windows.h>
  98. #include <math.h>
  99. #ifdef GOTO_ATOM
  100. #define ATOM GOTO_ATOM
  101. #undef GOTO_ATOM
  102. #endif
  103. #else
  104. #include <sys/mman.h>
  105. #ifndef NO_SYSV_IPC
  106. #include <sys/shm.h>
  107. #endif
  108. #include <sys/time.h>
  109. #include <time.h>
  110. #include <unistd.h>
  111. #include <math.h>
  112. #ifdef SMP
  113. #include <pthread.h>
  114. #endif
  115. #endif
  116. #if defined(OS_SUNOS)
  117. #include <thread.h>
  118. #endif
  119. #ifdef __DECC
  120. #include <c_asm.h>
  121. #include <machine/builtins.h>
  122. #endif
  123. #if defined(ARCH_IA64) && defined(ENABLE_SSE_EXCEPTION)
  124. #include <fenv.h>
  125. #endif
  126. #endif
  127. #if defined(OS_WINDOWS) && defined(DOUBLE_DEFINED)
  128. #define DOUBLE DOUBLE_DEFINED
  129. #undef DOUBLE_DEFINED
  130. #endif
  131. #undef DEBUG_INFO
  132. #define SMP_DEBUG
  133. #undef MALLOC_DEBUG
  134. #undef SMP_ALLOC_DEBUG
  135. #ifndef ZERO
  136. #ifdef XDOUBLE
  137. #define ZERO 0.e0L
  138. #elif defined DOUBLE
  139. #define ZERO 0.e0
  140. #else
  141. #define ZERO 0.e0f
  142. #endif
  143. #endif
  144. #ifndef ONE
  145. #ifdef XDOUBLE
  146. #define ONE 1.e0L
  147. #elif defined DOUBLE
  148. #define ONE 1.e0
  149. #else
  150. #define ONE 1.e0f
  151. #endif
  152. #endif
  153. #define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
  154. #define ALLOCA_ALIGN 63UL
  155. #define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
  156. #ifdef NEEDBUNDERSCORE
  157. #define BLASFUNC(FUNC) FUNC##_
  158. #else
  159. #define BLASFUNC(FUNC) FUNC
  160. #endif
  161. #undef USE_PTHREAD_LOCK
  162. #undef USE_PTHREAD_SPINLOCK
  163. #if defined(USE_PTHREAD_LOCK) && defined(USE_PTHREAD_SPINLOCK)
  164. #error "You can't specify both LOCK operation!"
  165. #endif
  166. #ifdef SMP
  167. #define USE_PTHREAD_LOCK
  168. #undef USE_PTHREAD_SPINLOCK
  169. #endif
  170. #ifdef OS_WINDOWS
  171. #undef USE_PTHREAD_LOCK
  172. #undef USE_PTHREAD_SPINLOCK
  173. #endif
  174. #if defined(USE_PTHREAD_LOCK)
  175. #define LOCK_COMMAND(x) pthread_mutex_lock(x)
  176. #define UNLOCK_COMMAND(x) pthread_mutex_unlock(x)
  177. #elif defined(USE_PTHREAD_SPINLOCK)
  178. #ifndef ASSEMBLER
  179. typedef volatile int pthread_spinlock_t;
  180. int pthread_spin_lock (pthread_spinlock_t *__lock);
  181. int pthread_spin_unlock (pthread_spinlock_t *__lock);
  182. #endif
  183. #define LOCK_COMMAND(x) pthread_spin_lock(x)
  184. #define UNLOCK_COMMAND(x) pthread_spin_unlock(x)
  185. #else
  186. #define LOCK_COMMAND(x) blas_lock(x)
  187. #define UNLOCK_COMMAND(x) blas_unlock(x)
  188. #endif
  189. #define GOTO_SHMID 0x510510
  190. #if 0
  191. #ifndef __CUDACC__
  192. #define __global__
  193. #define __device__
  194. #define __host__
  195. #define __shared__
  196. #endif
  197. #endif
  198. #ifndef ASSEMBLER
  199. #ifdef QUAD_PRECISION
  200. typedef struct {
  201. unsigned long x[2];
  202. } xdouble;
  203. #elif defined EXPRECISION
  204. #define xdouble long double
  205. #else
  206. #define xdouble double
  207. #endif
  208. #if defined(OS_WINDOWS) && defined(__64BIT__)
  209. typedef long long BLASLONG;
  210. typedef unsigned long long BLASULONG;
  211. #else
  212. typedef long BLASLONG;
  213. typedef unsigned long BLASULONG;
  214. #endif
  215. #ifdef USE64BITINT
  216. typedef BLASLONG blasint;
  217. #else
  218. typedef int blasint;
  219. #endif
  220. #else
  221. #ifdef USE64BITINT
  222. #define INTSHIFT 3
  223. #define INTSIZE 8
  224. #else
  225. #define INTSHIFT 2
  226. #define INTSIZE 4
  227. #endif
  228. #endif
  229. #ifdef XDOUBLE
  230. #define FLOAT xdouble
  231. #ifdef QUAD_PRECISION
  232. #define XFLOAT xidouble
  233. #endif
  234. #ifdef QUAD_PRECISION
  235. #define SIZE 32
  236. #define BASE_SHIFT 5
  237. #define ZBASE_SHIFT 6
  238. #else
  239. #define SIZE 16
  240. #define BASE_SHIFT 4
  241. #define ZBASE_SHIFT 5
  242. #endif
  243. #elif defined(DOUBLE)
  244. #define FLOAT double
  245. #define SIZE 8
  246. #define BASE_SHIFT 3
  247. #define ZBASE_SHIFT 4
  248. #else
  249. #define FLOAT float
  250. #define SIZE 4
  251. #define BASE_SHIFT 2
  252. #define ZBASE_SHIFT 3
  253. #endif
  254. #ifndef XFLOAT
  255. #define XFLOAT FLOAT
  256. #endif
  257. #ifndef COMPLEX
  258. #define COMPSIZE 1
  259. #else
  260. #define COMPSIZE 2
  261. #endif
  262. #define Address_H(x) (((x)+(1<<15))>>16)
  263. #define Address_L(x) ((x)-((Address_H(x))<<16))
  264. #ifndef MAX_CPU_NUMBER
  265. #define MAX_CPU_NUMBER 2
  266. #endif
  267. #if defined(OS_SUNOS)
  268. #define YIELDING thr_yield()
  269. #endif
  270. #if defined(OS_WINDOWS)
  271. #if defined(_MSC_VER) && !defined(__clang__)
  272. #define YIELDING YieldProcessor()
  273. #else
  274. #define YIELDING SwitchToThread()
  275. #endif
  276. #endif
  277. #if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
  278. #define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
  279. #endif
  280. #ifdef BULLDOZER
  281. #ifndef YIELDING
  282. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  283. #endif
  284. #endif
  285. #ifdef POWER8
  286. #ifndef YIELDING
  287. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  288. #endif
  289. #endif
  290. /*
  291. #ifdef PILEDRIVER
  292. #ifndef YIELDING
  293. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  294. #endif
  295. #endif
  296. */
  297. /*
  298. #ifdef STEAMROLLER
  299. #ifndef YIELDING
  300. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  301. #endif
  302. #endif
  303. */
  304. #ifndef YIELDING
  305. #define YIELDING sched_yield()
  306. #endif
  307. /***
  308. To alloc job_t on heap or statck.
  309. please https://github.com/xianyi/OpenBLAS/issues/246
  310. ***/
  311. #if defined(OS_WINDOWS)
  312. #define GETRF_MEM_ALLOC_THRESHOLD 32
  313. #define BLAS3_MEM_ALLOC_THRESHOLD 32
  314. #endif
  315. #ifndef GETRF_MEM_ALLOC_THRESHOLD
  316. #define GETRF_MEM_ALLOC_THRESHOLD 80
  317. #endif
  318. #ifndef BLAS3_MEM_ALLOC_THRESHOLD
  319. #define BLAS3_MEM_ALLOC_THRESHOLD 160
  320. #endif
  321. #ifdef QUAD_PRECISION
  322. #include "common_quad.h"
  323. #endif
  324. #ifdef ARCH_ALPHA
  325. #include "common_alpha.h"
  326. #endif
  327. #ifdef ARCH_X86
  328. #include "common_x86.h"
  329. #endif
  330. #ifdef ARCH_X86_64
  331. #include "common_x86_64.h"
  332. #endif
  333. #ifdef ARCH_IA64
  334. #include "common_ia64.h"
  335. #endif
  336. #ifdef ARCH_POWER
  337. #include "common_power.h"
  338. #endif
  339. #ifdef sparc
  340. #include "common_sparc.h"
  341. #endif
  342. #ifdef ARCH_MIPS
  343. #include "common_mips.h"
  344. #endif
  345. #ifdef ARCH_MIPS64
  346. #include "common_mips64.h"
  347. #endif
  348. #ifdef ARCH_ARM
  349. #include "common_arm.h"
  350. #endif
  351. #ifdef ARCH_ARM64
  352. #include "common_arm64.h"
  353. #endif
  354. #ifdef ARCH_ZARCH
  355. #include "common_zarch.h"
  356. #endif
  357. #ifndef ASSEMBLER
  358. #ifdef OS_WINDOWSSTORE
  359. typedef char env_var_t[MAX_PATH];
  360. #define readenv(p, n) 0
  361. #else
  362. #ifdef OS_WINDOWS
  363. typedef char env_var_t[MAX_PATH];
  364. #define readenv(p, n) GetEnvironmentVariable((LPCTSTR)(n), (LPTSTR)(p), sizeof(p))
  365. #else
  366. typedef char* env_var_t;
  367. #define readenv(p, n) ((p)=getenv(n))
  368. #endif
  369. #endif
  370. #if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS)
  371. #ifdef _POSIX_MONOTONIC_CLOCK
  372. #if defined(__GLIBC_PREREQ) // cut the if condition if two lines, otherwise will fail at __GLIBC_PREREQ(2, 17)
  373. #if __GLIBC_PREREQ(2, 17) // don't require -lrt
  374. #define USE_MONOTONIC
  375. #endif
  376. #elif defined(OS_ANDROID)
  377. #define USE_MONOTONIC
  378. #endif
  379. #endif
  380. /* use similar scale as x86 rdtsc for timeouts to work correctly */
  381. static inline unsigned long long rpcc(void){
  382. #ifdef USE_MONOTONIC
  383. struct timespec ts;
  384. clock_gettime(CLOCK_MONOTONIC, &ts);
  385. return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec;
  386. #else
  387. struct timeval tv;
  388. gettimeofday(&tv,NULL);
  389. return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000;
  390. #endif
  391. }
  392. #define RPCC_DEFINED
  393. #define RPCC64BIT
  394. #endif // !RPCC_DEFINED
  395. #if !defined(BLAS_LOCK_DEFINED) && defined(__GNUC__)
  396. static void __inline blas_lock(volatile BLASULONG *address){
  397. do {
  398. while (*address) {YIELDING;};
  399. } while (!__sync_bool_compare_and_swap(address, 0, 1));
  400. }
  401. #define BLAS_LOCK_DEFINED
  402. #endif
  403. #ifndef RPCC_DEFINED
  404. #error "rpcc() implementation is missing for your platform"
  405. #endif
  406. #ifndef BLAS_LOCK_DEFINED
  407. #error "blas_lock() implementation is missing for your platform"
  408. #endif
  409. #endif // !ASSEMBLER
  410. #ifdef OS_LINUX
  411. #include "common_linux.h"
  412. #endif
  413. #define MMAP_ACCESS (PROT_READ | PROT_WRITE)
  414. #ifdef __NetBSD__
  415. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANON)
  416. #else
  417. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
  418. #endif
  419. #ifndef ASSEMBLER
  420. /* C99 supports complex floating numbers natively, which GCC also offers as an
  421. extension since version 3.0. If neither are available, use a compatible
  422. structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
  423. #if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
  424. (__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) && !defined(_MSC_VER)
  425. #define OPENBLAS_COMPLEX_C99
  426. #ifndef __cplusplus
  427. #include <complex.h>
  428. #endif
  429. typedef float _Complex openblas_complex_float;
  430. typedef double _Complex openblas_complex_double;
  431. typedef xdouble _Complex openblas_complex_xdouble;
  432. #define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
  433. #define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
  434. #define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
  435. #else
  436. #define OPENBLAS_COMPLEX_STRUCT
  437. typedef struct { float real, imag; } openblas_complex_float;
  438. typedef struct { double real, imag; } openblas_complex_double;
  439. typedef struct { xdouble real, imag; } openblas_complex_xdouble;
  440. #define openblas_make_complex_float(real, imag) {(real), (imag)}
  441. #define openblas_make_complex_double(real, imag) {(real), (imag)}
  442. #define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
  443. #endif
  444. #endif
  445. #include "param.h"
  446. #include "common_param.h"
  447. #ifndef STDERR
  448. #define STDERR stderr
  449. #endif
  450. #ifndef MASK
  451. #define MASK(a, b) (((a) + ((b) - 1)) & ~((b) - 1))
  452. #endif
  453. #if defined(XDOUBLE) || defined(DOUBLE)
  454. #define FLOATRET FLOAT
  455. #else
  456. #ifdef NEED_F2CCONV
  457. #define FLOATRET double
  458. #else
  459. #define FLOATRET float
  460. #endif
  461. #endif
  462. #ifndef ASSEMBLER
  463. #ifndef NOINCLUDE
  464. /* Inclusion of a standard header file is needed for definition of __STDC_*
  465. predefined macros with some compilers (e.g. GCC 4.7 on Linux). This occurs
  466. as a side effect of including either <features.h> or <stdc-predef.h>. */
  467. #include <stdio.h>
  468. #endif // NOINCLUDE
  469. #ifdef XDOUBLE
  470. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
  471. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
  472. #elif defined(DOUBLE)
  473. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_double
  474. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i)
  475. #else
  476. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_float
  477. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i)
  478. #endif
  479. #if defined(C_PGI) || defined(C_SUN)
  480. #if defined(__STDC_IEC_559_COMPLEX__)
  481. #define CREAL(X) creal(X)
  482. #define CIMAG(X) cimag(X)
  483. #else
  484. #define CREAL(X) (*((FLOAT *)&X + 0))
  485. #define CIMAG(X) (*((FLOAT *)&X + 1))
  486. #endif
  487. #else
  488. #ifdef OPENBLAS_COMPLEX_STRUCT
  489. #define CREAL(Z) ((Z).real)
  490. #define CIMAG(Z) ((Z).imag)
  491. #else
  492. #define CREAL __real__
  493. #define CIMAG __imag__
  494. #endif
  495. #endif
  496. #endif // ASSEMBLER
  497. #ifndef IFLUSH
  498. #define IFLUSH
  499. #endif
  500. #ifndef IFLUSH_HALF
  501. #define IFLUSH_HALF
  502. #endif
  503. #if defined(C_GCC) && (( __GNUC__ <= 3) || ((__GNUC__ == 4) && (__GNUC_MINOR__ < 2)))
  504. #ifdef USE_OPENMP
  505. #undef USE_OPENMP
  506. #endif
  507. #endif
  508. #if defined(C_MSVC)
  509. #define inline __inline
  510. #endif
  511. #ifndef ASSEMBLER
  512. #ifndef MIN
  513. #define MIN(a,b) (a>b? b:a)
  514. #endif
  515. #ifndef MAX
  516. #define MAX(a,b) (a<b? b:a)
  517. #endif
  518. #define TOUPPER(a) {if ((a) > 0x60) (a) -= 0x20;}
  519. #if defined(__FreeBSD__) || defined(__APPLE__)
  520. #define MAP_ANONYMOUS MAP_ANON
  521. #endif
  522. /* Common Memory Management Routine */
  523. void blas_set_parameter(void);
  524. int blas_get_cpu_number(void);
  525. void *blas_memory_alloc (int);
  526. void blas_memory_free (void *);
  527. void *blas_memory_alloc_nolock (int); //use malloc without blas_lock
  528. void blas_memory_free_nolock (void *);
  529. int get_num_procs (void);
  530. #if defined(OS_LINUX) && defined(SMP) && !defined(NO_AFFINITY)
  531. int get_num_nodes (void);
  532. int get_num_proc (int);
  533. int get_node_equal (void);
  534. #endif
  535. void goto_set_num_threads(int);
  536. void gotoblas_affinity_init(void);
  537. void gotoblas_affinity_quit(void);
  538. void gotoblas_dynamic_init(void);
  539. void gotoblas_dynamic_quit(void);
  540. void gotoblas_profile_init(void);
  541. void gotoblas_profile_quit(void);
  542. #ifdef USE_OPENMP
  543. #ifndef C_MSVC
  544. int omp_in_parallel(void);
  545. int omp_get_num_procs(void);
  546. #else
  547. __declspec(dllimport) int __cdecl omp_in_parallel(void);
  548. __declspec(dllimport) int __cdecl omp_get_num_procs(void);
  549. #endif
  550. #else
  551. #ifdef __ELF__
  552. int omp_in_parallel (void) __attribute__ ((weak));
  553. int omp_get_num_procs(void) __attribute__ ((weak));
  554. #endif
  555. #endif
  556. static __inline void blas_unlock(volatile BLASULONG *address){
  557. MB;
  558. *address = 0;
  559. }
  560. #ifdef OS_WINDOWSSTORE
  561. static __inline int readenv_atoi(char *env) {
  562. return 0;
  563. }
  564. #else
  565. #ifdef OS_WINDOWS
  566. static __inline int readenv_atoi(char *env) {
  567. env_var_t p;
  568. return readenv(p,env) ? 0 : atoi(p);
  569. }
  570. #else
  571. static __inline int readenv_atoi(char *env) {
  572. char *p;
  573. if (( p = getenv(env) ))
  574. return (atoi(p));
  575. else
  576. return(0);
  577. }
  578. #endif
  579. #endif
  580. #if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
  581. static __inline void compinv(FLOAT *b, FLOAT ar, FLOAT ai){
  582. #ifndef UNIT
  583. FLOAT ratio, den;
  584. if (
  585. #ifdef XDOUBLE
  586. (fabsl(ar)) >= (fabsl(ai))
  587. #elif defined DOUBLE
  588. (fabs (ar)) >= (fabs (ai))
  589. #else
  590. (fabsf(ar)) >= (fabsf(ai))
  591. #endif
  592. ) {
  593. ratio = ai / ar;
  594. den = (FLOAT)(ONE / (ar * (ONE + ratio * ratio)));
  595. ar = den;
  596. ai = -ratio * den;
  597. } else {
  598. ratio = ar / ai;
  599. den = (FLOAT)(ONE /(ai * (ONE + ratio * ratio)));
  600. ar = ratio * den;
  601. ai = -den;
  602. }
  603. b[0] = ar;
  604. b[1] = ai;
  605. #else
  606. b[0] = ONE;
  607. b[1] = ZERO;
  608. #endif
  609. }
  610. #endif
  611. #ifdef MALLOC_DEBUG
  612. void *blas_debug_alloc(int);
  613. void *blas_debug_free(void *);
  614. #undef malloc
  615. #undef free
  616. #define malloc(a) blas_debug_alloc(a)
  617. #define free(a) blas_debug_free (a)
  618. #endif
  619. #ifndef COPYOVERHEAD
  620. #define GEMMRETTYPE int
  621. #else
  622. typedef struct {
  623. double outercopy;
  624. double innercopy;
  625. double kernel;
  626. double mflops;
  627. } copyoverhead_t;
  628. #define GEMMRETTYPE copyoverhead_t
  629. #endif
  630. #endif
  631. #ifndef BUILD_KERNEL
  632. #define KNAME(A, B) A
  633. #else
  634. #define KNAME(A, B) A##B
  635. #endif
  636. #include "common_interface.h"
  637. #ifdef SANITY_CHECK
  638. #include "common_reference.h"
  639. #endif
  640. #include "common_macro.h"
  641. #include "common_level1.h"
  642. #include "common_level2.h"
  643. #include "common_level3.h"
  644. #include "common_lapack.h"
  645. #ifdef CBLAS
  646. # define OPENBLAS_CONST /* see comment in cblas.h */
  647. # include "cblas.h"
  648. #endif
  649. #ifndef ASSEMBLER
  650. #include "common_stackalloc.h"
  651. #if 0
  652. #include "symcopy.h"
  653. #endif
  654. #if defined(SMP_SERVER) && defined(SMP_ONDEMAND)
  655. #error Both SMP_SERVER and SMP_ONDEMAND are specified.
  656. #endif
  657. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  658. #include "common_thread.h"
  659. #endif
  660. #endif
  661. #define INFO_NUM 99
  662. #ifndef DEFAULT_CPU_NUMBER
  663. #define DEFAULT_CPU_NUMBER 4
  664. #endif
  665. #ifndef IDEBUG_START
  666. #define IDEBUG_START
  667. #endif
  668. #ifndef IDEBUG_END
  669. #define IDEBUG_END
  670. #endif
  671. #if !defined(ASSEMBLER) && defined(FUNCTION_PROFILE)
  672. typedef struct {
  673. int func;
  674. unsigned long long calls, fops, area, cycles, tcycles;
  675. } func_profile_t;
  676. extern func_profile_t function_profile_table[];
  677. extern int gotoblas_profile;
  678. #ifdef XDOUBLE
  679. #define NUMOPT QNUMOPT
  680. #elif defined DOUBLE
  681. #define NUMOPT DNUMOPT
  682. #else
  683. #define NUMOPT SNUMOPT
  684. #endif
  685. #define FUNCTION_PROFILE_START() { unsigned long long profile_start = rpcc(), profile_end;
  686. #ifdef SMP
  687. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  688. if (gotoblas_profile) { \
  689. profile_end = rpcc(); \
  690. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  691. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  692. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  693. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  694. function_profile_table[PROFILE_FUNC_NAME].tcycles += blas_cpu_number * (profile_end - profile_start); \
  695. } \
  696. }
  697. #else
  698. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  699. if (gotoblas_profile) { \
  700. profile_end = rpcc(); \
  701. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  702. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  703. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  704. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  705. function_profile_table[PROFILE_FUNC_NAME].tcycles += (profile_end - profile_start); \
  706. } \
  707. }
  708. #endif
  709. #else
  710. #define FUNCTION_PROFILE_START()
  711. #define FUNCTION_PROFILE_END(COMP, AREA, OPS)
  712. #endif
  713. #if 1
  714. #define PRINT_DEBUG_CNAME
  715. #define PRINT_DEBUG_NAME
  716. #else
  717. #define PRINT_DEBUG_CNAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_CNAME)
  718. #define PRINT_DEBUG_NAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
  719. #endif
  720. #ifdef __cplusplus
  721. }
  722. #endif /* __cplusplus */
  723. #endif