You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common.h 20 kB

13 years ago
10 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
13 years ago
13 years ago
7 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #ifndef COMMON_H
  39. #define COMMON_H
  40. #ifdef __cplusplus
  41. extern "C" {
  42. /* Assume C declarations for C++ */
  43. #endif /* __cplusplus */
  44. #ifndef _GNU_SOURCE
  45. #define _GNU_SOURCE
  46. #endif
  47. #ifndef __USE_XOPEN
  48. #define __USE_XOPEN
  49. #endif
  50. #ifndef __USE_SVID
  51. #define __USE_SVID
  52. #endif
  53. #ifdef BUILD_KERNEL
  54. #include "config_kernel.h"
  55. #else
  56. #include "config.h"
  57. #endif
  58. #undef ENABLE_SSE_EXCEPTION
  59. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  60. #define SMP
  61. #endif
  62. #if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX)
  63. #define WINDOWS_ABI
  64. #define OS_WINDOWS
  65. #ifdef DOUBLE
  66. #define DOUBLE_DEFINED DOUBLE
  67. #undef DOUBLE
  68. #endif
  69. #endif
  70. #if !defined(NOINCLUDE) && !defined(ASSEMBLER)
  71. #include <stdio.h>
  72. #include <stdlib.h>
  73. #include <string.h>
  74. #if !defined(_MSC_VER)
  75. #include <unistd.h>
  76. #endif
  77. #include <time.h>
  78. #ifdef OS_LINUX
  79. #include <malloc.h>
  80. #include <sched.h>
  81. #endif
  82. #if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_ANDROID)
  83. #include <sched.h>
  84. #endif
  85. #ifdef OS_ANDROID
  86. #define NO_SYSV_IPC
  87. //Android NDK only supports complex.h since Android 5.0
  88. #if __ANDROID_API__ < 21
  89. #define FORCE_OPENBLAS_COMPLEX_STRUCT
  90. #endif
  91. #endif
  92. #ifdef OS_HAIKU
  93. #define NO_SYSV_IPC
  94. #endif
  95. #ifdef OS_WINDOWS
  96. #ifdef ATOM
  97. #define GOTO_ATOM ATOM
  98. #undef ATOM
  99. #endif
  100. #include <windows.h>
  101. #include <math.h>
  102. #ifdef GOTO_ATOM
  103. #define ATOM GOTO_ATOM
  104. #undef GOTO_ATOM
  105. #endif
  106. #else
  107. #include <sys/mman.h>
  108. #ifndef NO_SYSV_IPC
  109. #include <sys/shm.h>
  110. #endif
  111. #include <sys/time.h>
  112. #include <time.h>
  113. #include <unistd.h>
  114. #include <math.h>
  115. #ifdef SMP
  116. #include <pthread.h>
  117. #endif
  118. #endif
  119. #if defined(OS_SUNOS)
  120. #include <thread.h>
  121. #endif
  122. #ifdef __DECC
  123. #include <c_asm.h>
  124. #include <machine/builtins.h>
  125. #endif
  126. #if defined(ARCH_IA64) && defined(ENABLE_SSE_EXCEPTION)
  127. #include <fenv.h>
  128. #endif
  129. #endif
  130. #if defined(OS_WINDOWS) && defined(DOUBLE_DEFINED)
  131. #define DOUBLE DOUBLE_DEFINED
  132. #undef DOUBLE_DEFINED
  133. #endif
  134. #undef DEBUG_INFO
  135. #define SMP_DEBUG
  136. #undef MALLOC_DEBUG
  137. #undef SMP_ALLOC_DEBUG
  138. #ifndef ZERO
  139. #ifdef XDOUBLE
  140. #define ZERO 0.e0L
  141. #elif defined DOUBLE
  142. #define ZERO 0.e0
  143. #else
  144. #define ZERO 0.e0f
  145. #endif
  146. #endif
  147. #ifndef ONE
  148. #ifdef XDOUBLE
  149. #define ONE 1.e0L
  150. #elif defined DOUBLE
  151. #define ONE 1.e0
  152. #else
  153. #define ONE 1.e0f
  154. #endif
  155. #endif
  156. #define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
  157. #define ALLOCA_ALIGN 63UL
  158. #define NUM_BUFFERS MAX(50,(MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER))
  159. #ifdef NEEDBUNDERSCORE
  160. #define BLASFUNC(FUNC) FUNC##_
  161. #else
  162. #define BLASFUNC(FUNC) FUNC
  163. #endif
  164. #undef USE_PTHREAD_LOCK
  165. #undef USE_PTHREAD_SPINLOCK
  166. #if defined(USE_PTHREAD_LOCK) && defined(USE_PTHREAD_SPINLOCK)
  167. #error "You can't specify both LOCK operation!"
  168. #endif
  169. #ifdef SMP
  170. #define USE_PTHREAD_LOCK
  171. #undef USE_PTHREAD_SPINLOCK
  172. #endif
  173. #ifdef OS_WINDOWS
  174. #undef USE_PTHREAD_LOCK
  175. #undef USE_PTHREAD_SPINLOCK
  176. #endif
  177. #if defined(USE_PTHREAD_LOCK)
  178. #define LOCK_COMMAND(x) pthread_mutex_lock(x)
  179. #define UNLOCK_COMMAND(x) pthread_mutex_unlock(x)
  180. #elif defined(USE_PTHREAD_SPINLOCK)
  181. #ifndef ASSEMBLER
  182. typedef volatile int pthread_spinlock_t;
  183. int pthread_spin_lock (pthread_spinlock_t *__lock);
  184. int pthread_spin_unlock (pthread_spinlock_t *__lock);
  185. #endif
  186. #define LOCK_COMMAND(x) pthread_spin_lock(x)
  187. #define UNLOCK_COMMAND(x) pthread_spin_unlock(x)
  188. #else
  189. #define LOCK_COMMAND(x) blas_lock(x)
  190. #define UNLOCK_COMMAND(x) blas_unlock(x)
  191. #endif
  192. #define GOTO_SHMID 0x510510
  193. #if 0
  194. #ifndef __CUDACC__
  195. #define __global__
  196. #define __device__
  197. #define __host__
  198. #define __shared__
  199. #endif
  200. #endif
  201. #ifndef ASSEMBLER
  202. #ifdef QUAD_PRECISION
  203. typedef struct {
  204. unsigned long x[2];
  205. } xdouble;
  206. #elif defined EXPRECISION
  207. #define xdouble long double
  208. #else
  209. #define xdouble double
  210. #endif
  211. #if defined(OS_WINDOWS) && defined(__64BIT__)
  212. typedef long long BLASLONG;
  213. typedef unsigned long long BLASULONG;
  214. #else
  215. typedef long BLASLONG;
  216. typedef unsigned long BLASULONG;
  217. #endif
  218. #ifdef USE64BITINT
  219. typedef BLASLONG blasint;
  220. #if defined(OS_WINDOWS) && defined(__64BIT__)
  221. #define blasabs(x) llabs(x)
  222. #else
  223. #define blasabs(x) labs(x)
  224. #endif
  225. #else
  226. typedef int blasint;
  227. #define blasabs(x) abs(x)
  228. #endif
  229. #else
  230. #ifdef USE64BITINT
  231. #define INTSHIFT 3
  232. #define INTSIZE 8
  233. #else
  234. #define INTSHIFT 2
  235. #define INTSIZE 4
  236. #endif
  237. #endif
  238. #ifdef XDOUBLE
  239. #define FLOAT xdouble
  240. #ifdef QUAD_PRECISION
  241. #define XFLOAT xidouble
  242. #endif
  243. #ifdef QUAD_PRECISION
  244. #define SIZE 32
  245. #define BASE_SHIFT 5
  246. #define ZBASE_SHIFT 6
  247. #else
  248. #define SIZE 16
  249. #define BASE_SHIFT 4
  250. #define ZBASE_SHIFT 5
  251. #endif
  252. #elif defined(DOUBLE)
  253. #define FLOAT double
  254. #define SIZE 8
  255. #define BASE_SHIFT 3
  256. #define ZBASE_SHIFT 4
  257. #else
  258. #define FLOAT float
  259. #define SIZE 4
  260. #define BASE_SHIFT 2
  261. #define ZBASE_SHIFT 3
  262. #endif
  263. #ifndef XFLOAT
  264. #define XFLOAT FLOAT
  265. #endif
  266. #ifndef COMPLEX
  267. #define COMPSIZE 1
  268. #else
  269. #define COMPSIZE 2
  270. #endif
  271. #define Address_H(x) (((x)+(1<<15))>>16)
  272. #define Address_L(x) ((x)-((Address_H(x))<<16))
  273. #ifndef MAX_CPU_NUMBER
  274. #define MAX_CPU_NUMBER 2
  275. #endif
  276. #if defined(OS_SUNOS)
  277. #define YIELDING thr_yield()
  278. #endif
  279. #if defined(OS_WINDOWS)
  280. #if defined(_MSC_VER) && !defined(__clang__)
  281. #define YIELDING YieldProcessor()
  282. #else
  283. #define YIELDING SwitchToThread()
  284. #endif
  285. #endif
  286. #if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
  287. #define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
  288. #endif
  289. #ifdef BULLDOZER
  290. #ifndef YIELDING
  291. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  292. #endif
  293. #endif
  294. #ifdef POWER8
  295. #ifndef YIELDING
  296. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  297. #endif
  298. #endif
  299. #ifdef POWER9
  300. #ifndef YIELDING
  301. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  302. #endif
  303. #endif
  304. /*
  305. #ifdef PILEDRIVER
  306. #ifndef YIELDING
  307. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  308. #endif
  309. #endif
  310. */
  311. /*
  312. #ifdef STEAMROLLER
  313. #ifndef YIELDING
  314. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  315. #endif
  316. #endif
  317. */
  318. #ifndef YIELDING
  319. #define YIELDING sched_yield()
  320. #endif
  321. /***
  322. To alloc job_t on heap or statck.
  323. please https://github.com/xianyi/OpenBLAS/issues/246
  324. ***/
  325. #if defined(OS_WINDOWS)
  326. #define GETRF_MEM_ALLOC_THRESHOLD 32
  327. #define BLAS3_MEM_ALLOC_THRESHOLD 32
  328. #endif
  329. #ifndef GETRF_MEM_ALLOC_THRESHOLD
  330. #define GETRF_MEM_ALLOC_THRESHOLD 80
  331. #endif
  332. #ifndef BLAS3_MEM_ALLOC_THRESHOLD
  333. #define BLAS3_MEM_ALLOC_THRESHOLD 160
  334. #endif
  335. #ifdef QUAD_PRECISION
  336. #include "common_quad.h"
  337. #endif
  338. #ifdef ARCH_ALPHA
  339. #include "common_alpha.h"
  340. #endif
  341. #ifdef ARCH_X86
  342. #include "common_x86.h"
  343. #endif
  344. #ifdef ARCH_X86_64
  345. #include "common_x86_64.h"
  346. #endif
  347. #ifdef ARCH_IA64
  348. #include "common_ia64.h"
  349. #endif
  350. #ifdef ARCH_POWER
  351. #include "common_power.h"
  352. #endif
  353. #ifdef sparc
  354. #include "common_sparc.h"
  355. #endif
  356. #ifdef ARCH_MIPS
  357. #include "common_mips.h"
  358. #endif
  359. #ifdef ARCH_MIPS64
  360. #include "common_mips64.h"
  361. #endif
  362. #ifdef ARCH_ARM
  363. #include "common_arm.h"
  364. #endif
  365. #ifdef ARCH_ARM64
  366. #include "common_arm64.h"
  367. #endif
  368. #ifdef ARCH_ZARCH
  369. #include "common_zarch.h"
  370. #endif
  371. #ifndef ASSEMBLER
  372. #ifdef OS_WINDOWSSTORE
  373. typedef char env_var_t[MAX_PATH];
  374. #define readenv(p, n) 0
  375. #else
  376. #if defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT)
  377. typedef char env_var_t[MAX_PATH];
  378. #define readenv(p, n) GetEnvironmentVariable((LPCTSTR)(n), (LPTSTR)(p), sizeof(p))
  379. #else
  380. typedef char* env_var_t;
  381. #define readenv(p, n) ((p)=getenv(n))
  382. #endif
  383. #endif
  384. #if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS)
  385. #ifdef _POSIX_MONOTONIC_CLOCK
  386. #if defined(__GLIBC_PREREQ) // cut the if condition if two lines, otherwise will fail at __GLIBC_PREREQ(2, 17)
  387. #if __GLIBC_PREREQ(2, 17) // don't require -lrt
  388. #define USE_MONOTONIC
  389. #endif
  390. #elif defined(OS_ANDROID)
  391. #define USE_MONOTONIC
  392. #endif
  393. #endif
  394. /* use similar scale as x86 rdtsc for timeouts to work correctly */
  395. static inline unsigned long long rpcc(void){
  396. #ifdef USE_MONOTONIC
  397. struct timespec ts;
  398. clock_gettime(CLOCK_MONOTONIC, &ts);
  399. return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec;
  400. #else
  401. struct timeval tv;
  402. gettimeofday(&tv,NULL);
  403. return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000;
  404. #endif
  405. }
  406. #define RPCC_DEFINED
  407. #define RPCC64BIT
  408. #endif // !RPCC_DEFINED
  409. #if !defined(BLAS_LOCK_DEFINED) && defined(__GNUC__)
  410. static void __inline blas_lock(volatile BLASULONG *address){
  411. do {
  412. while (*address) {YIELDING;};
  413. } while (!__sync_bool_compare_and_swap(address, 0, 1));
  414. }
  415. #define BLAS_LOCK_DEFINED
  416. #endif
  417. #ifndef RPCC_DEFINED
  418. #error "rpcc() implementation is missing for your platform"
  419. #endif
  420. #ifndef BLAS_LOCK_DEFINED
  421. #error "blas_lock() implementation is missing for your platform"
  422. #endif
  423. #endif // !ASSEMBLER
  424. #ifdef OS_LINUX
  425. #include "common_linux.h"
  426. #endif
  427. #define MMAP_ACCESS (PROT_READ | PROT_WRITE)
  428. #ifdef __NetBSD__
  429. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANON)
  430. #else
  431. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
  432. #endif
  433. #ifndef ASSEMBLER
  434. /* C99 supports complex floating numbers natively, which GCC also offers as an
  435. extension since version 3.0. If neither are available, use a compatible
  436. structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
  437. #if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
  438. (__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) && !defined(_MSC_VER)
  439. #define OPENBLAS_COMPLEX_C99
  440. #ifndef __cplusplus
  441. #include <complex.h>
  442. #endif
  443. typedef float _Complex openblas_complex_float;
  444. typedef double _Complex openblas_complex_double;
  445. typedef xdouble _Complex openblas_complex_xdouble;
  446. #define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
  447. #define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
  448. #define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
  449. #else
  450. #define OPENBLAS_COMPLEX_STRUCT
  451. typedef struct { float real, imag; } openblas_complex_float;
  452. typedef struct { double real, imag; } openblas_complex_double;
  453. typedef struct { xdouble real, imag; } openblas_complex_xdouble;
  454. #define openblas_make_complex_float(real, imag) {(real), (imag)}
  455. #define openblas_make_complex_double(real, imag) {(real), (imag)}
  456. #define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
  457. #endif
  458. #endif
  459. #include "param.h"
  460. #include "common_param.h"
  461. #ifndef STDERR
  462. #define STDERR stderr
  463. #endif
  464. #ifndef MASK
  465. #define MASK(a, b) (((a) + ((b) - 1)) & ~((b) - 1))
  466. #endif
  467. #if defined(XDOUBLE) || defined(DOUBLE)
  468. #define FLOATRET FLOAT
  469. #else
  470. #ifdef NEED_F2CCONV
  471. #define FLOATRET double
  472. #else
  473. #define FLOATRET float
  474. #endif
  475. #endif
  476. #ifndef ASSEMBLER
  477. #ifndef NOINCLUDE
  478. /* Inclusion of a standard header file is needed for definition of __STDC_*
  479. predefined macros with some compilers (e.g. GCC 4.7 on Linux). This occurs
  480. as a side effect of including either <features.h> or <stdc-predef.h>. */
  481. #include <stdio.h>
  482. #endif // NOINCLUDE
  483. #ifdef XDOUBLE
  484. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
  485. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
  486. #elif defined(DOUBLE)
  487. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_double
  488. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i)
  489. #else
  490. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_float
  491. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i)
  492. #endif
  493. #if defined(C_PGI) || defined(C_SUN)
  494. #if defined(__STDC_IEC_559_COMPLEX__)
  495. #define CREAL(X) creal(X)
  496. #define CIMAG(X) cimag(X)
  497. #else
  498. #define CREAL(X) (*((FLOAT *)&X + 0))
  499. #define CIMAG(X) (*((FLOAT *)&X + 1))
  500. #endif
  501. #else
  502. #ifdef OPENBLAS_COMPLEX_STRUCT
  503. #define CREAL(Z) ((Z).real)
  504. #define CIMAG(Z) ((Z).imag)
  505. #else
  506. #define CREAL __real__
  507. #define CIMAG __imag__
  508. #endif
  509. #endif
  510. #endif // ASSEMBLER
  511. #ifndef IFLUSH
  512. #define IFLUSH
  513. #endif
  514. #ifndef IFLUSH_HALF
  515. #define IFLUSH_HALF
  516. #endif
  517. #if defined(C_GCC) && (( __GNUC__ <= 3) || ((__GNUC__ == 4) && (__GNUC_MINOR__ < 2)))
  518. #ifdef USE_OPENMP
  519. #undef USE_OPENMP
  520. #endif
  521. #endif
  522. #if defined(C_MSVC)
  523. #define inline __inline
  524. #endif
  525. #ifndef ASSEMBLER
  526. #ifndef MIN
  527. #define MIN(a,b) (a>b? b:a)
  528. #endif
  529. #ifndef MAX
  530. #define MAX(a,b) (a<b? b:a)
  531. #endif
  532. #define TOUPPER(a) {if ((a) > 0x60) (a) -= 0x20;}
  533. #if defined(__FreeBSD__) || defined(__APPLE__)
  534. #define MAP_ANONYMOUS MAP_ANON
  535. #endif
  536. /* Common Memory Management Routine */
  537. void blas_set_parameter(void);
  538. int blas_get_cpu_number(void);
  539. void *blas_memory_alloc (int);
  540. void blas_memory_free (void *);
  541. void *blas_memory_alloc_nolock (int); //use malloc without blas_lock
  542. void blas_memory_free_nolock (void *);
  543. int get_num_procs (void);
  544. #if defined(OS_LINUX) && defined(SMP) && !defined(NO_AFFINITY)
  545. int get_num_nodes (void);
  546. int get_num_proc (int);
  547. int get_node_equal (void);
  548. #endif
  549. void goto_set_num_threads(int);
  550. void gotoblas_affinity_init(void);
  551. void gotoblas_affinity_quit(void);
  552. void gotoblas_dynamic_init(void);
  553. void gotoblas_dynamic_quit(void);
  554. void gotoblas_profile_init(void);
  555. void gotoblas_profile_quit(void);
  556. #ifdef USE_OPENMP
  557. #ifndef C_MSVC
  558. int omp_in_parallel(void);
  559. int omp_get_num_procs(void);
  560. #else
  561. __declspec(dllimport) int __cdecl omp_in_parallel(void);
  562. __declspec(dllimport) int __cdecl omp_get_num_procs(void);
  563. #endif
  564. #if (__STDC_VERSION__ >= 201112L)
  565. #if defined(C_GCC) && ( __GNUC__ < 7)
  566. // workaround for GCC bug 65467
  567. #ifndef _Atomic
  568. #define _Atomic volatile
  569. #endif
  570. #endif
  571. #include <stdatomic.h>
  572. #else
  573. #ifndef _Atomic
  574. #define _Atomic volatile
  575. #endif
  576. #endif
  577. #else
  578. #ifdef __ELF__
  579. int omp_in_parallel (void) __attribute__ ((weak));
  580. int omp_get_num_procs(void) __attribute__ ((weak));
  581. #endif
  582. #endif
  583. static __inline void blas_unlock(volatile BLASULONG *address){
  584. MB;
  585. *address = 0;
  586. }
  587. #ifdef OS_WINDOWSSTORE
  588. static __inline int readenv_atoi(char *env) {
  589. return 0;
  590. }
  591. #else
  592. #ifdef OS_WINDOWS
  593. static __inline int readenv_atoi(char *env) {
  594. env_var_t p;
  595. return readenv(p,env) ? 0 : atoi(p);
  596. }
  597. #else
  598. static __inline int readenv_atoi(char *env) {
  599. char *p;
  600. if (( p = getenv(env) ))
  601. return (atoi(p));
  602. else
  603. return(0);
  604. }
  605. #endif
  606. #endif
  607. #if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
  608. static __inline void compinv(FLOAT *b, FLOAT ar, FLOAT ai){
  609. #ifndef UNIT
  610. FLOAT ratio, den;
  611. if (
  612. #ifdef XDOUBLE
  613. (fabsl(ar)) >= (fabsl(ai))
  614. #elif defined DOUBLE
  615. (fabs (ar)) >= (fabs (ai))
  616. #else
  617. (fabsf(ar)) >= (fabsf(ai))
  618. #endif
  619. ) {
  620. ratio = ai / ar;
  621. den = (FLOAT)(ONE / (ar * (ONE + ratio * ratio)));
  622. ar = den;
  623. ai = -ratio * den;
  624. } else {
  625. ratio = ar / ai;
  626. den = (FLOAT)(ONE /(ai * (ONE + ratio * ratio)));
  627. ar = ratio * den;
  628. ai = -den;
  629. }
  630. b[0] = ar;
  631. b[1] = ai;
  632. #else
  633. b[0] = ONE;
  634. b[1] = ZERO;
  635. #endif
  636. }
  637. #endif
  638. #ifdef MALLOC_DEBUG
  639. void *blas_debug_alloc(int);
  640. void *blas_debug_free(void *);
  641. #undef malloc
  642. #undef free
  643. #define malloc(a) blas_debug_alloc(a)
  644. #define free(a) blas_debug_free (a)
  645. #endif
  646. #ifndef COPYOVERHEAD
  647. #define GEMMRETTYPE int
  648. #else
  649. typedef struct {
  650. double outercopy;
  651. double innercopy;
  652. double kernel;
  653. double mflops;
  654. } copyoverhead_t;
  655. #define GEMMRETTYPE copyoverhead_t
  656. #endif
  657. #endif
  658. #ifndef BUILD_KERNEL
  659. #define KNAME(A, B) A
  660. #else
  661. #define KNAME(A, B) A##B
  662. #endif
  663. #include "common_interface.h"
  664. #ifdef SANITY_CHECK
  665. #include "common_reference.h"
  666. #endif
  667. #include "common_macro.h"
  668. #include "common_level1.h"
  669. #include "common_level2.h"
  670. #include "common_level3.h"
  671. #include "common_lapack.h"
  672. #ifdef CBLAS
  673. # define OPENBLAS_CONST /* see comment in cblas.h */
  674. # include "cblas.h"
  675. #endif
  676. #ifndef ASSEMBLER
  677. #include "common_stackalloc.h"
  678. #if 0
  679. #include "symcopy.h"
  680. #endif
  681. #if defined(SMP_SERVER) && defined(SMP_ONDEMAND)
  682. #error Both SMP_SERVER and SMP_ONDEMAND are specified.
  683. #endif
  684. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  685. #include "common_thread.h"
  686. #endif
  687. #endif
  688. #define INFO_NUM 99
  689. #ifndef DEFAULT_CPU_NUMBER
  690. #define DEFAULT_CPU_NUMBER 4
  691. #endif
  692. #ifndef IDEBUG_START
  693. #define IDEBUG_START
  694. #endif
  695. #ifndef IDEBUG_END
  696. #define IDEBUG_END
  697. #endif
  698. #if !defined(ASSEMBLER) && defined(FUNCTION_PROFILE)
  699. typedef struct {
  700. int func;
  701. unsigned long long calls, fops, area, cycles, tcycles;
  702. } func_profile_t;
  703. extern func_profile_t function_profile_table[];
  704. extern int gotoblas_profile;
  705. #ifdef XDOUBLE
  706. #define NUMOPT QNUMOPT
  707. #elif defined DOUBLE
  708. #define NUMOPT DNUMOPT
  709. #else
  710. #define NUMOPT SNUMOPT
  711. #endif
  712. #define FUNCTION_PROFILE_START() { unsigned long long profile_start = rpcc(), profile_end;
  713. #ifdef SMP
  714. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  715. if (gotoblas_profile) { \
  716. profile_end = rpcc(); \
  717. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  718. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  719. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  720. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  721. function_profile_table[PROFILE_FUNC_NAME].tcycles += blas_cpu_number * (profile_end - profile_start); \
  722. } \
  723. }
  724. #else
  725. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  726. if (gotoblas_profile) { \
  727. profile_end = rpcc(); \
  728. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  729. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  730. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  731. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  732. function_profile_table[PROFILE_FUNC_NAME].tcycles += (profile_end - profile_start); \
  733. } \
  734. }
  735. #endif
  736. #else
  737. #define FUNCTION_PROFILE_START()
  738. #define FUNCTION_PROFILE_END(COMP, AREA, OPS)
  739. #endif
  740. #if 1
  741. #define PRINT_DEBUG_CNAME
  742. #define PRINT_DEBUG_NAME
  743. #else
  744. #define PRINT_DEBUG_CNAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_CNAME)
  745. #define PRINT_DEBUG_NAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
  746. #endif
  747. #ifdef __cplusplus
  748. }
  749. #endif /* __cplusplus */
  750. #endif