You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common.h 19 kB

13 years ago
10 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
13 years ago
13 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #ifndef COMMON_H
  39. #define COMMON_H
  40. #ifdef __cplusplus
  41. extern "C" {
  42. /* Assume C declarations for C++ */
  43. #endif /* __cplusplus */
  44. #ifndef _GNU_SOURCE
  45. #define _GNU_SOURCE
  46. #endif
  47. #ifndef __USE_XOPEN
  48. #define __USE_XOPEN
  49. #endif
  50. #ifndef __USE_SVID
  51. #define __USE_SVID
  52. #endif
  53. #ifdef BUILD_KERNEL
  54. #include "config_kernel.h"
  55. #else
  56. #include "config.h"
  57. #endif
  58. #undef ENABLE_SSE_EXCEPTION
  59. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  60. #define SMP
  61. #endif
  62. #if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX)
  63. #define WINDOWS_ABI
  64. #define OS_WINDOWS
  65. #ifdef DOUBLE
  66. #define DOUBLE_DEFINED DOUBLE
  67. #undef DOUBLE
  68. #endif
  69. #endif
  70. #if !defined(NOINCLUDE) && !defined(ASSEMBLER)
  71. #include <stdio.h>
  72. #include <stdlib.h>
  73. #include <string.h>
  74. #if !defined(_MSC_VER)
  75. #include <unistd.h>
  76. #endif
  77. #ifdef OS_LINUX
  78. #include <malloc.h>
  79. #include <sched.h>
  80. #endif
  81. #if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD)
  82. #include <sched.h>
  83. #endif
  84. #ifdef OS_ANDROID
  85. #define NO_SYSV_IPC
  86. #endif
  87. #ifdef OS_WINDOWS
  88. #ifdef ATOM
  89. #define GOTO_ATOM ATOM
  90. #undef ATOM
  91. #endif
  92. #include <windows.h>
  93. #include <math.h>
  94. #ifdef GOTO_ATOM
  95. #define ATOM GOTO_ATOM
  96. #undef GOTO_ATOM
  97. #endif
  98. #else
  99. #include <sys/mman.h>
  100. #ifndef NO_SYSV_IPC
  101. #include <sys/shm.h>
  102. #endif
  103. #include <sys/time.h>
  104. #include <time.h>
  105. #include <unistd.h>
  106. #include <math.h>
  107. #ifdef SMP
  108. #include <pthread.h>
  109. #endif
  110. #endif
  111. #if defined(OS_SUNOS)
  112. #include <thread.h>
  113. #endif
  114. #ifdef __DECC
  115. #include <c_asm.h>
  116. #include <machine/builtins.h>
  117. #endif
  118. #if defined(ARCH_IA64) && defined(ENABLE_SSE_EXCEPTION)
  119. #include <fenv.h>
  120. #endif
  121. #endif
  122. #if defined(OS_WINDOWS) && defined(DOUBLE_DEFINED)
  123. #define DOUBLE DOUBLE_DEFINED
  124. #undef DOUBLE_DEFINED
  125. #endif
  126. #undef DEBUG_INFO
  127. #define SMP_DEBUG
  128. #undef MALLOC_DEBUG
  129. #undef SMP_ALLOC_DEBUG
  130. #ifndef ZERO
  131. #ifdef XDOUBLE
  132. #define ZERO 0.e0L
  133. #elif defined DOUBLE
  134. #define ZERO 0.e0
  135. #else
  136. #define ZERO 0.e0f
  137. #endif
  138. #endif
  139. #ifndef ONE
  140. #ifdef XDOUBLE
  141. #define ONE 1.e0L
  142. #elif defined DOUBLE
  143. #define ONE 1.e0
  144. #else
  145. #define ONE 1.e0f
  146. #endif
  147. #endif
  148. #define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
  149. #define ALLOCA_ALIGN 63UL
  150. #define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
  151. #ifdef NEEDBUNDERSCORE
  152. #define BLASFUNC(FUNC) FUNC##_
  153. #else
  154. #define BLASFUNC(FUNC) FUNC
  155. #endif
  156. #undef USE_PTHREAD_LOCK
  157. #undef USE_PTHREAD_SPINLOCK
  158. #if defined(USE_PTHREAD_LOCK) && defined(USE_PTHREAD_SPINLOCK)
  159. #error "You can't specify both LOCK operation!"
  160. #endif
  161. #ifdef SMP
  162. #define USE_PTHREAD_LOCK
  163. #undef USE_PTHREAD_SPINLOCK
  164. #endif
  165. #ifdef OS_WINDOWS
  166. #undef USE_PTHREAD_LOCK
  167. #undef USE_PTHREAD_SPINLOCK
  168. #endif
  169. #if defined(USE_PTHREAD_LOCK)
  170. #define LOCK_COMMAND(x) pthread_mutex_lock(x)
  171. #define UNLOCK_COMMAND(x) pthread_mutex_unlock(x)
  172. #elif defined(USE_PTHREAD_SPINLOCK)
  173. #ifndef ASSEMBLER
  174. typedef volatile int pthread_spinlock_t;
  175. int pthread_spin_lock (pthread_spinlock_t *__lock);
  176. int pthread_spin_unlock (pthread_spinlock_t *__lock);
  177. #endif
  178. #define LOCK_COMMAND(x) pthread_spin_lock(x)
  179. #define UNLOCK_COMMAND(x) pthread_spin_unlock(x)
  180. #else
  181. #define LOCK_COMMAND(x) blas_lock(x)
  182. #define UNLOCK_COMMAND(x) blas_unlock(x)
  183. #endif
  184. #define GOTO_SHMID 0x510510
  185. #if 0
  186. #ifndef __CUDACC__
  187. #define __global__
  188. #define __device__
  189. #define __host__
  190. #define __shared__
  191. #endif
  192. #endif
  193. #ifndef ASSEMBLER
  194. #ifdef QUAD_PRECISION
  195. typedef struct {
  196. unsigned long x[2];
  197. } xdouble;
  198. #elif defined EXPRECISION
  199. #define xdouble long double
  200. #else
  201. #define xdouble double
  202. #endif
  203. #if defined(OS_WINDOWS) && defined(__64BIT__)
  204. typedef long long BLASLONG;
  205. typedef unsigned long long BLASULONG;
  206. #else
  207. typedef long BLASLONG;
  208. typedef unsigned long BLASULONG;
  209. #endif
  210. #ifdef USE64BITINT
  211. typedef BLASLONG blasint;
  212. #else
  213. typedef int blasint;
  214. #endif
  215. #else
  216. #ifdef USE64BITINT
  217. #define INTSHIFT 3
  218. #define INTSIZE 8
  219. #else
  220. #define INTSHIFT 2
  221. #define INTSIZE 4
  222. #endif
  223. #endif
  224. #ifdef XDOUBLE
  225. #define FLOAT xdouble
  226. #ifdef QUAD_PRECISION
  227. #define XFLOAT xidouble
  228. #endif
  229. #ifdef QUAD_PRECISION
  230. #define SIZE 32
  231. #define BASE_SHIFT 5
  232. #define ZBASE_SHIFT 6
  233. #else
  234. #define SIZE 16
  235. #define BASE_SHIFT 4
  236. #define ZBASE_SHIFT 5
  237. #endif
  238. #elif defined(DOUBLE)
  239. #define FLOAT double
  240. #define SIZE 8
  241. #define BASE_SHIFT 3
  242. #define ZBASE_SHIFT 4
  243. #else
  244. #define FLOAT float
  245. #define SIZE 4
  246. #define BASE_SHIFT 2
  247. #define ZBASE_SHIFT 3
  248. #endif
  249. #ifndef XFLOAT
  250. #define XFLOAT FLOAT
  251. #endif
  252. #ifndef COMPLEX
  253. #define COMPSIZE 1
  254. #else
  255. #define COMPSIZE 2
  256. #endif
  257. #define Address_H(x) (((x)+(1<<15))>>16)
  258. #define Address_L(x) ((x)-((Address_H(x))<<16))
  259. #ifndef MAX_CPU_NUMBER
  260. #define MAX_CPU_NUMBER 2
  261. #endif
  262. #if defined(OS_SUNOS)
  263. #define YIELDING thr_yield()
  264. #endif
  265. #if defined(OS_WINDOWS)
  266. #if defined(_MSC_VER) && !defined(__clang__)
  267. #define YIELDING YieldProcessor()
  268. #else
  269. #define YIELDING SwitchToThread()
  270. #endif
  271. #endif
  272. #if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
  273. #define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
  274. #endif
  275. #ifdef BULLDOZER
  276. #ifndef YIELDING
  277. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  278. #endif
  279. #endif
  280. #ifdef PILEDRIVER
  281. #ifndef YIELDING
  282. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  283. #endif
  284. #endif
  285. /*
  286. #ifdef STEAMROLLER
  287. #ifndef YIELDING
  288. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  289. #endif
  290. #endif
  291. */
  292. #ifndef YIELDING
  293. #define YIELDING sched_yield()
  294. #endif
  295. /***
  296. To alloc job_t on heap or statck.
  297. please https://github.com/xianyi/OpenBLAS/issues/246
  298. ***/
  299. #if defined(OS_WINDOWS)
  300. #define GETRF_MEM_ALLOC_THRESHOLD 32
  301. #define BLAS3_MEM_ALLOC_THRESHOLD 32
  302. #endif
  303. #ifndef GETRF_MEM_ALLOC_THRESHOLD
  304. #define GETRF_MEM_ALLOC_THRESHOLD 80
  305. #endif
  306. #ifndef BLAS3_MEM_ALLOC_THRESHOLD
  307. #define BLAS3_MEM_ALLOC_THRESHOLD 160
  308. #endif
  309. #ifdef QUAD_PRECISION
  310. #include "common_quad.h"
  311. #endif
  312. #ifdef ARCH_ALPHA
  313. #include "common_alpha.h"
  314. #endif
  315. #ifdef ARCH_X86
  316. #include "common_x86.h"
  317. #endif
  318. #ifdef ARCH_X86_64
  319. #include "common_x86_64.h"
  320. #endif
  321. #ifdef ARCH_IA64
  322. #include "common_ia64.h"
  323. #endif
  324. #ifdef ARCH_POWER
  325. #include "common_power.h"
  326. #endif
  327. #ifdef sparc
  328. #include "common_sparc.h"
  329. #endif
  330. #ifdef ARCH_MIPS64
  331. #include "common_mips64.h"
  332. #endif
  333. #ifdef ARCH_ARM
  334. #include "common_arm.h"
  335. #endif
  336. #ifdef ARCH_ARM64
  337. #include "common_arm64.h"
  338. #endif
  339. #ifndef ASSEMBLER
  340. #ifdef OS_WINDOWS
  341. typedef char env_var_t[MAX_PATH];
  342. #define readenv(p, n) GetEnvironmentVariable((n), (p), sizeof(p))
  343. #else
  344. typedef char* env_var_t;
  345. #define readenv(p, n) ((p)=getenv(n))
  346. #endif
  347. #if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS)
  348. #ifdef _POSIX_MONOTONIC_CLOCK
  349. #if defined(__GLIBC_PREREQ) // cut the if condition if two lines, otherwise will fail at __GLIBC_PREREQ(2, 17)
  350. #if __GLIBC_PREREQ(2, 17) // don't require -lrt
  351. #define USE_MONOTONIC
  352. #endif
  353. #elif defined(OS_ANDROID)
  354. #define USE_MONOTONIC
  355. #endif
  356. #endif
  357. /* use similar scale as x86 rdtsc for timeouts to work correctly */
  358. static inline unsigned long long rpcc(void){
  359. #ifdef USE_MONOTONIC
  360. struct timespec ts;
  361. clock_gettime(CLOCK_MONOTONIC, &ts);
  362. return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec;
  363. #else
  364. struct timeval tv;
  365. gettimeofday(&tv,NULL);
  366. return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000;
  367. #endif
  368. }
  369. #define RPCC_DEFINED
  370. #define RPCC64BIT
  371. #endif // !RPCC_DEFINED
  372. #if !defined(BLAS_LOCK_DEFINED) && defined(__GNUC__)
  373. static void __inline blas_lock(volatile BLASULONG *address){
  374. do {
  375. while (*address) {YIELDING;};
  376. } while (!__sync_bool_compare_and_swap(address, 0, 1));
  377. }
  378. #define BLAS_LOCK_DEFINED
  379. #endif
  380. #ifndef RPCC_DEFINED
  381. #error "rpcc() implementation is missing for your platform"
  382. #endif
  383. #ifndef BLAS_LOCK_DEFINED
  384. #error "blas_lock() implementation is missing for your platform"
  385. #endif
  386. #endif // !ASSEMBLER
  387. #ifdef OS_LINUX
  388. #include "common_linux.h"
  389. #endif
  390. #define MMAP_ACCESS (PROT_READ | PROT_WRITE)
  391. #ifdef __NetBSD__
  392. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANON)
  393. #else
  394. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
  395. #endif
  396. #include "param.h"
  397. #include "common_param.h"
  398. #ifndef STDERR
  399. #define STDERR stderr
  400. #endif
  401. #ifndef MASK
  402. #define MASK(a, b) (((a) + ((b) - 1)) & ~((b) - 1))
  403. #endif
  404. #if defined(XDOUBLE) || defined(DOUBLE)
  405. #define FLOATRET FLOAT
  406. #else
  407. #ifdef NEED_F2CCONV
  408. #define FLOATRET double
  409. #else
  410. #define FLOATRET float
  411. #endif
  412. #endif
  413. #ifndef ASSEMBLER
  414. #ifndef NOINCLUDE
  415. /* Inclusion of a standard header file is needed for definition of __STDC_*
  416. predefined macros with some compilers (e.g. GCC 4.7 on Linux). This occurs
  417. as a side effect of including either <features.h> or <stdc-predef.h>. */
  418. #include <stdio.h>
  419. #endif // NOINCLUDE
  420. /* C99 supports complex floating numbers natively, which GCC also offers as an
  421. extension since version 3.0. If neither are available, use a compatible
  422. structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
  423. #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
  424. (__GNUC__ >= 3 && !defined(__cplusplus)) )
  425. #define OPENBLAS_COMPLEX_C99
  426. #ifndef __cplusplus
  427. #include <complex.h>
  428. #endif
  429. typedef float _Complex openblas_complex_float;
  430. typedef double _Complex openblas_complex_double;
  431. typedef xdouble _Complex openblas_complex_xdouble;
  432. #define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
  433. #define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
  434. #define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
  435. #else
  436. #define OPENBLAS_COMPLEX_STRUCT
  437. typedef struct { float real, imag; } openblas_complex_float;
  438. typedef struct { double real, imag; } openblas_complex_double;
  439. typedef struct { xdouble real, imag; } openblas_complex_xdouble;
  440. #define openblas_make_complex_float(real, imag) {(real), (imag)}
  441. #define openblas_make_complex_double(real, imag) {(real), (imag)}
  442. #define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
  443. #endif
  444. #ifdef XDOUBLE
  445. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
  446. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
  447. #elif defined(DOUBLE)
  448. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_double
  449. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i)
  450. #else
  451. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_float
  452. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i)
  453. #endif
  454. #if defined(C_PGI) || defined(C_SUN)
  455. #define CREAL(X) (*((FLOAT *)&X + 0))
  456. #define CIMAG(X) (*((FLOAT *)&X + 1))
  457. #else
  458. #ifdef OPENBLAS_COMPLEX_STRUCT
  459. #define CREAL(Z) ((Z).real)
  460. #define CIMAG(Z) ((Z).imag)
  461. #else
  462. #define CREAL __real__
  463. #define CIMAG __imag__
  464. #endif
  465. #endif
  466. #endif // ASSEMBLER
  467. #ifndef IFLUSH
  468. #define IFLUSH
  469. #endif
  470. #ifndef IFLUSH_HALF
  471. #define IFLUSH_HALF
  472. #endif
  473. #if defined(C_GCC) && (( __GNUC__ <= 3) || ((__GNUC__ == 4) && (__GNUC_MINOR__ < 2)))
  474. #ifdef USE_OPENMP
  475. #undef USE_OPENMP
  476. #endif
  477. #endif
  478. #if defined(C_MSVC)
  479. #define inline __inline
  480. #endif
  481. #ifndef ASSEMBLER
  482. #ifndef MIN
  483. #define MIN(a,b) (a>b? b:a)
  484. #endif
  485. #ifndef MAX
  486. #define MAX(a,b) (a<b? b:a)
  487. #endif
  488. #define TOUPPER(a) {if ((a) > 0x60) (a) -= 0x20;}
  489. #if defined(__FreeBSD__) || defined(__APPLE__)
  490. #define MAP_ANONYMOUS MAP_ANON
  491. #endif
  492. /* Common Memory Management Routine */
  493. void blas_set_parameter(void);
  494. int blas_get_cpu_number(void);
  495. void *blas_memory_alloc (int);
  496. void blas_memory_free (void *);
  497. void *blas_memory_alloc_nolock (int); //use malloc without blas_lock
  498. void blas_memory_free_nolock (void *);
  499. int get_num_procs (void);
  500. #if defined(OS_LINUX) && defined(SMP) && !defined(NO_AFFINITY)
  501. int get_num_nodes (void);
  502. int get_num_proc (int);
  503. int get_node_equal (void);
  504. #endif
  505. void goto_set_num_threads(int);
  506. void gotoblas_affinity_init(void);
  507. void gotoblas_affinity_quit(void);
  508. void gotoblas_dynamic_init(void);
  509. void gotoblas_dynamic_quit(void);
  510. void gotoblas_profile_init(void);
  511. void gotoblas_profile_quit(void);
  512. #ifdef USE_OPENMP
  513. int omp_in_parallel(void);
  514. int omp_get_num_procs(void);
  515. #else
  516. #ifdef __ELF__
  517. int omp_in_parallel (void) __attribute__ ((weak));
  518. int omp_get_num_procs(void) __attribute__ ((weak));
  519. #endif
  520. #endif
  521. static __inline void blas_unlock(volatile BLASULONG *address){
  522. MB;
  523. *address = 0;
  524. }
  525. #ifdef OS_WINDOWS
  526. static __inline int readenv_atoi(char *env) {
  527. env_var_t p;
  528. return readenv(p,env) ? 0 : atoi(p);
  529. }
  530. #else
  531. static __inline int readenv_atoi(char *env) {
  532. char *p;
  533. if (( p = getenv(env) ))
  534. return (atoi(p));
  535. else
  536. return(0);
  537. }
  538. #endif
  539. #if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
  540. static __inline void compinv(FLOAT *b, FLOAT ar, FLOAT ai){
  541. #ifndef UNIT
  542. FLOAT ratio, den;
  543. if (
  544. #ifdef XDOUBLE
  545. (fabsl(ar)) >= (fabsl(ai))
  546. #elif defined DOUBLE
  547. (fabs (ar)) >= (fabs (ai))
  548. #else
  549. (fabsf(ar)) >= (fabsf(ai))
  550. #endif
  551. ) {
  552. ratio = ai / ar;
  553. den = (FLOAT)(ONE / (ar * (ONE + ratio * ratio)));
  554. ar = den;
  555. ai = -ratio * den;
  556. } else {
  557. ratio = ar / ai;
  558. den = (FLOAT)(ONE /(ai * (ONE + ratio * ratio)));
  559. ar = ratio * den;
  560. ai = -den;
  561. }
  562. b[0] = ar;
  563. b[1] = ai;
  564. #else
  565. b[0] = ONE;
  566. b[1] = ZERO;
  567. #endif
  568. }
  569. #endif
  570. #ifdef MALLOC_DEBUG
  571. void *blas_debug_alloc(int);
  572. void *blas_debug_free(void *);
  573. #undef malloc
  574. #undef free
  575. #define malloc(a) blas_debug_alloc(a)
  576. #define free(a) blas_debug_free (a)
  577. #endif
  578. #ifndef COPYOVERHEAD
  579. #define GEMMRETTYPE int
  580. #else
  581. typedef struct {
  582. double outercopy;
  583. double innercopy;
  584. double kernel;
  585. double mflops;
  586. } copyoverhead_t;
  587. #define GEMMRETTYPE copyoverhead_t
  588. #endif
  589. #endif
  590. #ifndef BUILD_KERNEL
  591. #define KNAME(A, B) A
  592. #else
  593. #define KNAME(A, B) A##B
  594. #endif
  595. #include "common_interface.h"
  596. #ifdef SANITY_CHECK
  597. #include "common_reference.h"
  598. #endif
  599. #include "common_macro.h"
  600. #include "common_level1.h"
  601. #include "common_level2.h"
  602. #include "common_level3.h"
  603. #include "common_lapack.h"
  604. #ifdef CBLAS
  605. # define OPENBLAS_CONST /* see comment in cblas.h */
  606. # include "cblas.h"
  607. #endif
  608. #ifndef ASSEMBLER
  609. #if 0
  610. #include "symcopy.h"
  611. #endif
  612. #if defined(SMP_SERVER) && defined(SMP_ONDEMAND)
  613. #error Both SMP_SERVER and SMP_ONDEMAND are specified.
  614. #endif
  615. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  616. #include "common_thread.h"
  617. #endif
  618. #endif
  619. #define INFO_NUM 99
  620. #ifndef DEFAULT_CPU_NUMBER
  621. #define DEFAULT_CPU_NUMBER 4
  622. #endif
  623. #ifndef IDEBUG_START
  624. #define IDEBUG_START
  625. #endif
  626. #ifndef IDEBUG_END
  627. #define IDEBUG_END
  628. #endif
  629. #if !defined(ASSEMBLER) && defined(FUNCTION_PROFILE)
  630. typedef struct {
  631. int func;
  632. unsigned long long calls, fops, area, cycles, tcycles;
  633. } func_profile_t;
  634. extern func_profile_t function_profile_table[];
  635. extern int gotoblas_profile;
  636. #ifdef XDOUBLE
  637. #define NUMOPT QNUMOPT
  638. #elif defined DOUBLE
  639. #define NUMOPT DNUMOPT
  640. #else
  641. #define NUMOPT SNUMOPT
  642. #endif
  643. #define FUNCTION_PROFILE_START() { unsigned long long profile_start = rpcc(), profile_end;
  644. #ifdef SMP
  645. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  646. if (gotoblas_profile) { \
  647. profile_end = rpcc(); \
  648. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  649. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  650. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  651. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  652. function_profile_table[PROFILE_FUNC_NAME].tcycles += blas_cpu_number * (profile_end - profile_start); \
  653. } \
  654. }
  655. #else
  656. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  657. if (gotoblas_profile) { \
  658. profile_end = rpcc(); \
  659. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  660. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  661. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  662. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  663. function_profile_table[PROFILE_FUNC_NAME].tcycles += (profile_end - profile_start); \
  664. } \
  665. }
  666. #endif
  667. #else
  668. #define FUNCTION_PROFILE_START()
  669. #define FUNCTION_PROFILE_END(COMP, AREA, OPS)
  670. #endif
  671. #if 1
  672. #define PRINT_DEBUG_CNAME
  673. #define PRINT_DEBUG_NAME
  674. #else
  675. #define PRINT_DEBUG_CNAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_CNAME)
  676. #define PRINT_DEBUG_NAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
  677. #endif
  678. #ifdef __cplusplus
  679. }
  680. #endif /* __cplusplus */
  681. #endif