You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common.h 19 kB

13 years ago
10 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
13 years ago
13 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #ifndef COMMON_H
  39. #define COMMON_H
  40. #ifdef __cplusplus
  41. extern "C" {
  42. /* Assume C declarations for C++ */
  43. #endif /* __cplusplus */
  44. #ifndef _GNU_SOURCE
  45. #define _GNU_SOURCE
  46. #endif
  47. #ifndef __USE_XOPEN
  48. #define __USE_XOPEN
  49. #endif
  50. #ifndef __USE_SVID
  51. #define __USE_SVID
  52. #endif
  53. #ifdef BUILD_KERNEL
  54. #include "config_kernel.h"
  55. #else
  56. #include "config.h"
  57. #endif
  58. #undef ENABLE_SSE_EXCEPTION
  59. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  60. #define SMP
  61. #endif
  62. #if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX)
  63. #define WINDOWS_ABI
  64. #define OS_WINDOWS
  65. #ifdef DOUBLE
  66. #define DOUBLE_DEFINED DOUBLE
  67. #undef DOUBLE
  68. #endif
  69. #endif
  70. #if !defined(NOINCLUDE) && !defined(ASSEMBLER)
  71. #include <stdio.h>
  72. #include <stdlib.h>
  73. #include <string.h>
  74. #if !defined(_MSC_VER)
  75. #include <unistd.h>
  76. #endif
  77. #ifdef OS_LINUX
  78. #include <malloc.h>
  79. #include <sched.h>
  80. #endif
  81. #if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD)
  82. #include <sched.h>
  83. #endif
  84. #ifdef OS_ANDROID
  85. #define NO_SYSV_IPC
  86. //Android NDK only supports complex.h since Android 5.0
  87. #if __ANDROID_API__ < 21
  88. #define FORCE_OPENBLAS_COMPLEX_STRUCT
  89. #endif
  90. #endif
  91. #ifdef OS_WINDOWS
  92. #ifdef ATOM
  93. #define GOTO_ATOM ATOM
  94. #undef ATOM
  95. #endif
  96. #include <windows.h>
  97. #include <math.h>
  98. #ifdef GOTO_ATOM
  99. #define ATOM GOTO_ATOM
  100. #undef GOTO_ATOM
  101. #endif
  102. #else
  103. #include <sys/mman.h>
  104. #ifndef NO_SYSV_IPC
  105. #include <sys/shm.h>
  106. #endif
  107. #include <sys/time.h>
  108. #include <time.h>
  109. #include <unistd.h>
  110. #include <math.h>
  111. #ifdef SMP
  112. #include <pthread.h>
  113. #endif
  114. #endif
  115. #if defined(OS_SUNOS)
  116. #include <thread.h>
  117. #endif
  118. #ifdef __DECC
  119. #include <c_asm.h>
  120. #include <machine/builtins.h>
  121. #endif
  122. #if defined(ARCH_IA64) && defined(ENABLE_SSE_EXCEPTION)
  123. #include <fenv.h>
  124. #endif
  125. #endif
  126. #if defined(OS_WINDOWS) && defined(DOUBLE_DEFINED)
  127. #define DOUBLE DOUBLE_DEFINED
  128. #undef DOUBLE_DEFINED
  129. #endif
  130. #undef DEBUG_INFO
  131. #define SMP_DEBUG
  132. #undef MALLOC_DEBUG
  133. #undef SMP_ALLOC_DEBUG
  134. #ifndef ZERO
  135. #ifdef XDOUBLE
  136. #define ZERO 0.e0L
  137. #elif defined DOUBLE
  138. #define ZERO 0.e0
  139. #else
  140. #define ZERO 0.e0f
  141. #endif
  142. #endif
  143. #ifndef ONE
  144. #ifdef XDOUBLE
  145. #define ONE 1.e0L
  146. #elif defined DOUBLE
  147. #define ONE 1.e0
  148. #else
  149. #define ONE 1.e0f
  150. #endif
  151. #endif
  152. #define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
  153. #define ALLOCA_ALIGN 63UL
  154. #define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
  155. #ifdef NEEDBUNDERSCORE
  156. #define BLASFUNC(FUNC) FUNC##_
  157. #else
  158. #define BLASFUNC(FUNC) FUNC
  159. #endif
  160. #undef USE_PTHREAD_LOCK
  161. #undef USE_PTHREAD_SPINLOCK
  162. #if defined(USE_PTHREAD_LOCK) && defined(USE_PTHREAD_SPINLOCK)
  163. #error "You can't specify both LOCK operation!"
  164. #endif
  165. #ifdef SMP
  166. #define USE_PTHREAD_LOCK
  167. #undef USE_PTHREAD_SPINLOCK
  168. #endif
  169. #ifdef OS_WINDOWS
  170. #undef USE_PTHREAD_LOCK
  171. #undef USE_PTHREAD_SPINLOCK
  172. #endif
  173. #if defined(USE_PTHREAD_LOCK)
  174. #define LOCK_COMMAND(x) pthread_mutex_lock(x)
  175. #define UNLOCK_COMMAND(x) pthread_mutex_unlock(x)
  176. #elif defined(USE_PTHREAD_SPINLOCK)
  177. #ifndef ASSEMBLER
  178. typedef volatile int pthread_spinlock_t;
  179. int pthread_spin_lock (pthread_spinlock_t *__lock);
  180. int pthread_spin_unlock (pthread_spinlock_t *__lock);
  181. #endif
  182. #define LOCK_COMMAND(x) pthread_spin_lock(x)
  183. #define UNLOCK_COMMAND(x) pthread_spin_unlock(x)
  184. #else
  185. #define LOCK_COMMAND(x) blas_lock(x)
  186. #define UNLOCK_COMMAND(x) blas_unlock(x)
  187. #endif
  188. #define GOTO_SHMID 0x510510
  189. #if 0
  190. #ifndef __CUDACC__
  191. #define __global__
  192. #define __device__
  193. #define __host__
  194. #define __shared__
  195. #endif
  196. #endif
  197. #ifndef ASSEMBLER
  198. #ifdef QUAD_PRECISION
  199. typedef struct {
  200. unsigned long x[2];
  201. } xdouble;
  202. #elif defined EXPRECISION
  203. #define xdouble long double
  204. #else
  205. #define xdouble double
  206. #endif
  207. #if defined(OS_WINDOWS) && defined(__64BIT__)
  208. typedef long long BLASLONG;
  209. typedef unsigned long long BLASULONG;
  210. #else
  211. typedef long BLASLONG;
  212. typedef unsigned long BLASULONG;
  213. #endif
  214. #ifdef USE64BITINT
  215. typedef BLASLONG blasint;
  216. #else
  217. typedef int blasint;
  218. #endif
  219. #else
  220. #ifdef USE64BITINT
  221. #define INTSHIFT 3
  222. #define INTSIZE 8
  223. #else
  224. #define INTSHIFT 2
  225. #define INTSIZE 4
  226. #endif
  227. #endif
  228. #ifdef XDOUBLE
  229. #define FLOAT xdouble
  230. #ifdef QUAD_PRECISION
  231. #define XFLOAT xidouble
  232. #endif
  233. #ifdef QUAD_PRECISION
  234. #define SIZE 32
  235. #define BASE_SHIFT 5
  236. #define ZBASE_SHIFT 6
  237. #else
  238. #define SIZE 16
  239. #define BASE_SHIFT 4
  240. #define ZBASE_SHIFT 5
  241. #endif
  242. #elif defined(DOUBLE)
  243. #define FLOAT double
  244. #define SIZE 8
  245. #define BASE_SHIFT 3
  246. #define ZBASE_SHIFT 4
  247. #else
  248. #define FLOAT float
  249. #define SIZE 4
  250. #define BASE_SHIFT 2
  251. #define ZBASE_SHIFT 3
  252. #endif
  253. #ifndef XFLOAT
  254. #define XFLOAT FLOAT
  255. #endif
  256. #ifndef COMPLEX
  257. #define COMPSIZE 1
  258. #else
  259. #define COMPSIZE 2
  260. #endif
  261. #define Address_H(x) (((x)+(1<<15))>>16)
  262. #define Address_L(x) ((x)-((Address_H(x))<<16))
  263. #ifndef MAX_CPU_NUMBER
  264. #define MAX_CPU_NUMBER 2
  265. #endif
  266. #if defined(OS_SUNOS)
  267. #define YIELDING thr_yield()
  268. #endif
  269. #if defined(OS_WINDOWS)
  270. #if defined(_MSC_VER) && !defined(__clang__)
  271. #define YIELDING YieldProcessor()
  272. #else
  273. #define YIELDING SwitchToThread()
  274. #endif
  275. #endif
  276. #if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
  277. #define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
  278. #endif
  279. #ifdef BULLDOZER
  280. #ifndef YIELDING
  281. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  282. #endif
  283. #endif
  284. #ifdef PILEDRIVER
  285. #ifndef YIELDING
  286. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  287. #endif
  288. #endif
  289. /*
  290. #ifdef STEAMROLLER
  291. #ifndef YIELDING
  292. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  293. #endif
  294. #endif
  295. */
  296. #ifndef YIELDING
  297. #define YIELDING sched_yield()
  298. #endif
  299. /***
  300. To alloc job_t on heap or statck.
  301. please https://github.com/xianyi/OpenBLAS/issues/246
  302. ***/
  303. #if defined(OS_WINDOWS)
  304. #define GETRF_MEM_ALLOC_THRESHOLD 32
  305. #define BLAS3_MEM_ALLOC_THRESHOLD 32
  306. #endif
  307. #ifndef GETRF_MEM_ALLOC_THRESHOLD
  308. #define GETRF_MEM_ALLOC_THRESHOLD 80
  309. #endif
  310. #ifndef BLAS3_MEM_ALLOC_THRESHOLD
  311. #define BLAS3_MEM_ALLOC_THRESHOLD 160
  312. #endif
  313. #ifdef QUAD_PRECISION
  314. #include "common_quad.h"
  315. #endif
  316. #ifdef ARCH_ALPHA
  317. #include "common_alpha.h"
  318. #endif
  319. #ifdef ARCH_X86
  320. #include "common_x86.h"
  321. #endif
  322. #ifdef ARCH_X86_64
  323. #include "common_x86_64.h"
  324. #endif
  325. #ifdef ARCH_IA64
  326. #include "common_ia64.h"
  327. #endif
  328. #ifdef ARCH_POWER
  329. #include "common_power.h"
  330. #endif
  331. #ifdef sparc
  332. #include "common_sparc.h"
  333. #endif
  334. #ifdef ARCH_MIPS64
  335. #include "common_mips64.h"
  336. #endif
  337. #ifdef ARCH_ARM
  338. #include "common_arm.h"
  339. #endif
  340. #ifdef ARCH_ARM64
  341. #include "common_arm64.h"
  342. #endif
  343. #ifndef ASSEMBLER
  344. #ifdef OS_WINDOWS
  345. typedef char env_var_t[MAX_PATH];
  346. #define readenv(p, n) GetEnvironmentVariable((n), (p), sizeof(p))
  347. #else
  348. typedef char* env_var_t;
  349. #define readenv(p, n) ((p)=getenv(n))
  350. #endif
  351. #if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS)
  352. #ifdef _POSIX_MONOTONIC_CLOCK
  353. #if defined(__GLIBC_PREREQ) // cut the if condition if two lines, otherwise will fail at __GLIBC_PREREQ(2, 17)
  354. #if __GLIBC_PREREQ(2, 17) // don't require -lrt
  355. #define USE_MONOTONIC
  356. #endif
  357. #elif defined(OS_ANDROID)
  358. #define USE_MONOTONIC
  359. #endif
  360. #endif
  361. /* use similar scale as x86 rdtsc for timeouts to work correctly */
  362. static inline unsigned long long rpcc(void){
  363. #ifdef USE_MONOTONIC
  364. struct timespec ts;
  365. clock_gettime(CLOCK_MONOTONIC, &ts);
  366. return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec;
  367. #else
  368. struct timeval tv;
  369. gettimeofday(&tv,NULL);
  370. return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000;
  371. #endif
  372. }
  373. #define RPCC_DEFINED
  374. #define RPCC64BIT
  375. #endif // !RPCC_DEFINED
  376. #if !defined(BLAS_LOCK_DEFINED) && defined(__GNUC__)
  377. static void __inline blas_lock(volatile BLASULONG *address){
  378. do {
  379. while (*address) {YIELDING;};
  380. } while (!__sync_bool_compare_and_swap(address, 0, 1));
  381. }
  382. #define BLAS_LOCK_DEFINED
  383. #endif
  384. #ifndef RPCC_DEFINED
  385. #error "rpcc() implementation is missing for your platform"
  386. #endif
  387. #ifndef BLAS_LOCK_DEFINED
  388. #error "blas_lock() implementation is missing for your platform"
  389. #endif
  390. #endif // !ASSEMBLER
  391. #ifdef OS_LINUX
  392. #include "common_linux.h"
  393. #endif
  394. #define MMAP_ACCESS (PROT_READ | PROT_WRITE)
  395. #ifdef __NetBSD__
  396. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANON)
  397. #else
  398. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
  399. #endif
  400. #include "param.h"
  401. #include "common_param.h"
  402. #ifndef STDERR
  403. #define STDERR stderr
  404. #endif
  405. #ifndef MASK
  406. #define MASK(a, b) (((a) + ((b) - 1)) & ~((b) - 1))
  407. #endif
  408. #if defined(XDOUBLE) || defined(DOUBLE)
  409. #define FLOATRET FLOAT
  410. #else
  411. #ifdef NEED_F2CCONV
  412. #define FLOATRET double
  413. #else
  414. #define FLOATRET float
  415. #endif
  416. #endif
  417. #ifndef ASSEMBLER
  418. #ifndef NOINCLUDE
  419. /* Inclusion of a standard header file is needed for definition of __STDC_*
  420. predefined macros with some compilers (e.g. GCC 4.7 on Linux). This occurs
  421. as a side effect of including either <features.h> or <stdc-predef.h>. */
  422. #include <stdio.h>
  423. #endif // NOINCLUDE
  424. /* C99 supports complex floating numbers natively, which GCC also offers as an
  425. extension since version 3.0. If neither are available, use a compatible
  426. structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
  427. #if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
  428. (__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT)))
  429. #define OPENBLAS_COMPLEX_C99
  430. #ifndef __cplusplus
  431. #include <complex.h>
  432. #endif
  433. typedef float _Complex openblas_complex_float;
  434. typedef double _Complex openblas_complex_double;
  435. typedef xdouble _Complex openblas_complex_xdouble;
  436. #define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
  437. #define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
  438. #define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
  439. #else
  440. #define OPENBLAS_COMPLEX_STRUCT
  441. typedef struct { float real, imag; } openblas_complex_float;
  442. typedef struct { double real, imag; } openblas_complex_double;
  443. typedef struct { xdouble real, imag; } openblas_complex_xdouble;
  444. #define openblas_make_complex_float(real, imag) {(real), (imag)}
  445. #define openblas_make_complex_double(real, imag) {(real), (imag)}
  446. #define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
  447. #endif
  448. #ifdef XDOUBLE
  449. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
  450. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
  451. #elif defined(DOUBLE)
  452. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_double
  453. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i)
  454. #else
  455. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_float
  456. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i)
  457. #endif
  458. #if defined(C_PGI) || defined(C_SUN)
  459. #define CREAL(X) (*((FLOAT *)&X + 0))
  460. #define CIMAG(X) (*((FLOAT *)&X + 1))
  461. #else
  462. #ifdef OPENBLAS_COMPLEX_STRUCT
  463. #define CREAL(Z) ((Z).real)
  464. #define CIMAG(Z) ((Z).imag)
  465. #else
  466. #define CREAL __real__
  467. #define CIMAG __imag__
  468. #endif
  469. #endif
  470. #endif // ASSEMBLER
  471. #ifndef IFLUSH
  472. #define IFLUSH
  473. #endif
  474. #ifndef IFLUSH_HALF
  475. #define IFLUSH_HALF
  476. #endif
  477. #if defined(C_GCC) && (( __GNUC__ <= 3) || ((__GNUC__ == 4) && (__GNUC_MINOR__ < 2)))
  478. #ifdef USE_OPENMP
  479. #undef USE_OPENMP
  480. #endif
  481. #endif
  482. #if defined(C_MSVC)
  483. #define inline __inline
  484. #endif
  485. #ifndef ASSEMBLER
  486. #ifndef MIN
  487. #define MIN(a,b) (a>b? b:a)
  488. #endif
  489. #ifndef MAX
  490. #define MAX(a,b) (a<b? b:a)
  491. #endif
  492. #define TOUPPER(a) {if ((a) > 0x60) (a) -= 0x20;}
  493. #if defined(__FreeBSD__) || defined(__APPLE__)
  494. #define MAP_ANONYMOUS MAP_ANON
  495. #endif
  496. /* Common Memory Management Routine */
  497. void blas_set_parameter(void);
  498. int blas_get_cpu_number(void);
  499. void *blas_memory_alloc (int);
  500. void blas_memory_free (void *);
  501. void *blas_memory_alloc_nolock (int); //use malloc without blas_lock
  502. void blas_memory_free_nolock (void *);
  503. int get_num_procs (void);
  504. #if defined(OS_LINUX) && defined(SMP) && !defined(NO_AFFINITY)
  505. int get_num_nodes (void);
  506. int get_num_proc (int);
  507. int get_node_equal (void);
  508. #endif
  509. void goto_set_num_threads(int);
  510. void gotoblas_affinity_init(void);
  511. void gotoblas_affinity_quit(void);
  512. void gotoblas_dynamic_init(void);
  513. void gotoblas_dynamic_quit(void);
  514. void gotoblas_profile_init(void);
  515. void gotoblas_profile_quit(void);
  516. #ifdef USE_OPENMP
  517. int omp_in_parallel(void);
  518. int omp_get_num_procs(void);
  519. #else
  520. #ifdef __ELF__
  521. int omp_in_parallel (void) __attribute__ ((weak));
  522. int omp_get_num_procs(void) __attribute__ ((weak));
  523. #endif
  524. #endif
  525. static __inline void blas_unlock(volatile BLASULONG *address){
  526. MB;
  527. *address = 0;
  528. }
  529. #ifdef OS_WINDOWS
  530. static __inline int readenv_atoi(char *env) {
  531. env_var_t p;
  532. return readenv(p,env) ? 0 : atoi(p);
  533. }
  534. #else
  535. static __inline int readenv_atoi(char *env) {
  536. char *p;
  537. if (( p = getenv(env) ))
  538. return (atoi(p));
  539. else
  540. return(0);
  541. }
  542. #endif
  543. #if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
  544. static __inline void compinv(FLOAT *b, FLOAT ar, FLOAT ai){
  545. #ifndef UNIT
  546. FLOAT ratio, den;
  547. if (
  548. #ifdef XDOUBLE
  549. (fabsl(ar)) >= (fabsl(ai))
  550. #elif defined DOUBLE
  551. (fabs (ar)) >= (fabs (ai))
  552. #else
  553. (fabsf(ar)) >= (fabsf(ai))
  554. #endif
  555. ) {
  556. ratio = ai / ar;
  557. den = (FLOAT)(ONE / (ar * (ONE + ratio * ratio)));
  558. ar = den;
  559. ai = -ratio * den;
  560. } else {
  561. ratio = ar / ai;
  562. den = (FLOAT)(ONE /(ai * (ONE + ratio * ratio)));
  563. ar = ratio * den;
  564. ai = -den;
  565. }
  566. b[0] = ar;
  567. b[1] = ai;
  568. #else
  569. b[0] = ONE;
  570. b[1] = ZERO;
  571. #endif
  572. }
  573. #endif
  574. #ifdef MALLOC_DEBUG
  575. void *blas_debug_alloc(int);
  576. void *blas_debug_free(void *);
  577. #undef malloc
  578. #undef free
  579. #define malloc(a) blas_debug_alloc(a)
  580. #define free(a) blas_debug_free (a)
  581. #endif
  582. #ifndef COPYOVERHEAD
  583. #define GEMMRETTYPE int
  584. #else
  585. typedef struct {
  586. double outercopy;
  587. double innercopy;
  588. double kernel;
  589. double mflops;
  590. } copyoverhead_t;
  591. #define GEMMRETTYPE copyoverhead_t
  592. #endif
  593. #endif
  594. #ifndef BUILD_KERNEL
  595. #define KNAME(A, B) A
  596. #else
  597. #define KNAME(A, B) A##B
  598. #endif
  599. #include "common_interface.h"
  600. #ifdef SANITY_CHECK
  601. #include "common_reference.h"
  602. #endif
  603. #include "common_macro.h"
  604. #include "common_level1.h"
  605. #include "common_level2.h"
  606. #include "common_level3.h"
  607. #include "common_lapack.h"
  608. #ifdef CBLAS
  609. # define OPENBLAS_CONST /* see comment in cblas.h */
  610. # include "cblas.h"
  611. #endif
  612. #ifndef ASSEMBLER
  613. #if 0
  614. #include "symcopy.h"
  615. #endif
  616. #if defined(SMP_SERVER) && defined(SMP_ONDEMAND)
  617. #error Both SMP_SERVER and SMP_ONDEMAND are specified.
  618. #endif
  619. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  620. #include "common_thread.h"
  621. #endif
  622. #endif
  623. #define INFO_NUM 99
  624. #ifndef DEFAULT_CPU_NUMBER
  625. #define DEFAULT_CPU_NUMBER 4
  626. #endif
  627. #ifndef IDEBUG_START
  628. #define IDEBUG_START
  629. #endif
  630. #ifndef IDEBUG_END
  631. #define IDEBUG_END
  632. #endif
  633. #if !defined(ASSEMBLER) && defined(FUNCTION_PROFILE)
  634. typedef struct {
  635. int func;
  636. unsigned long long calls, fops, area, cycles, tcycles;
  637. } func_profile_t;
  638. extern func_profile_t function_profile_table[];
  639. extern int gotoblas_profile;
  640. #ifdef XDOUBLE
  641. #define NUMOPT QNUMOPT
  642. #elif defined DOUBLE
  643. #define NUMOPT DNUMOPT
  644. #else
  645. #define NUMOPT SNUMOPT
  646. #endif
  647. #define FUNCTION_PROFILE_START() { unsigned long long profile_start = rpcc(), profile_end;
  648. #ifdef SMP
  649. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  650. if (gotoblas_profile) { \
  651. profile_end = rpcc(); \
  652. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  653. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  654. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  655. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  656. function_profile_table[PROFILE_FUNC_NAME].tcycles += blas_cpu_number * (profile_end - profile_start); \
  657. } \
  658. }
  659. #else
  660. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  661. if (gotoblas_profile) { \
  662. profile_end = rpcc(); \
  663. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  664. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  665. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  666. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  667. function_profile_table[PROFILE_FUNC_NAME].tcycles += (profile_end - profile_start); \
  668. } \
  669. }
  670. #endif
  671. #else
  672. #define FUNCTION_PROFILE_START()
  673. #define FUNCTION_PROFILE_END(COMP, AREA, OPS)
  674. #endif
  675. #if 1
  676. #define PRINT_DEBUG_CNAME
  677. #define PRINT_DEBUG_NAME
  678. #else
  679. #define PRINT_DEBUG_CNAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_CNAME)
  680. #define PRINT_DEBUG_NAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
  681. #endif
  682. #ifdef __cplusplus
  683. }
  684. #endif /* __cplusplus */
  685. #endif